% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/select_greedy.R, R/select_n_to_m.R
\name{select_greedy}
\alias{select_greedy}
\alias{select_n_to_m}
\title{Select matching pairs enforcing one-to-one linkage}
\usage{
select_greedy(pairs, threshold = NULL, weight, var = "select",
  preselect = NULL, id_x = NULL, id_y = NULL, ...)

select_n_to_m(pairs, threshold = NULL, weight = NULL, var = "select",
  preselect = NULL, n = 1, m = 1, id_x = NULL, id_y = NULL, ...)
}
\arguments{
\item{pairs}{a \code{pairs} object, such as generated by 
\code{\link{pair_blocking}}}

\item{threshold}{the threshold to apply. Pairs with a score above the 
threshold are selected.}

\item{weight}{name of the score/weight variable of the pairs. When not given
and \code{attr(pairs, "score")} is defined, that is used.}

\item{var}{the name of the new variable to create in pairs. This will be a
logical variable with a value of \code{TRUE} for the selected pairs.}

\item{preselect}{a logical variable with the same length as \code{pairs} has
rows, or the name of such a variable in \code{pairs}. Pairs are only 
selected when \code{preselect} is \code{TRUE}. This interacts with 
\code{threshold} (pairs have to be selected with both conditions).}

\item{id_x}{a integer vector with the same length a the number of rows in 
\code{pairs}, or the name of a column in \code{x}. This vector should 
identify unique objects in \code{x}. When not specified it is assumed that
each element in \code{x} is unique.}

\item{id_y}{a integer vector with the same length a the number of rows in 
\code{pairs}, or the name of a column in \code{y}. This vector should 
identify unique objects in \code{y}. When not specified it is assumed that
each element in \code{y} is unique.}

\item{...}{passed on to other methods.}

\item{n}{the number of records from \code{x} that can at most be linked to a
record in \code{y}.}

\item{m}{the number of records from \code{y} that can at most be linked to a
record in \code{x}.}
}
\value{
Returns the \code{pairs} with the variable given by \code{var} added. This
is a logical variable indicating which pairs are selected a matches.
}
\description{
Select matching pairs enforcing one-to-one linkage
}
\details{
Both methods force one-to-one matching. \code{select_greedy} uses a greedy 
algorithm that selects the first pair with the highest weight. 
\code{select_n_to_m} tries to optimise the total weight of all of the 
selected pairs. In general this will result in a better selection. However,
\code{select_n_to_m} uses much more memory and is much slower and, therefore,
can only be used when the number of possible pairs is not too large.
}
\examples{
data("linkexample1", "linkexample2")
pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
pairs <- score_simsum(pairs)

# Select pairs with a simsum > 5 and force one-to-one linkage
pairs <- select_n_to_m(pairs, 0, var = "ntom")
pairs <- select_greedy(pairs, 0, var = "greedy")
table(pairs[c("ntom", "greedy")])

\dontshow{gc()}

}
