% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/salso.R
\name{salso}
\alias{salso}
\title{Perform Sequentially-Allocated Latent Structure Optimization}
\usage{
salso(expectedPairwiseAllocationMatrix, structure = c("clustering",
  "featureAllocation")[1], loss = c("squaredError", "absoluteError", "binder",
  "lowerBoundVariationOfInformation")[1], nCandidates = 100,
  budgetInSeconds = 10, maxSize = 0, maxScans = 10, multicore = TRUE)
}
\arguments{
\item{expectedPairwiseAllocationMatrix}{A \code{n}-by-\code{n} symmetric matrix
whose \code{(i,j)} elements gives the estimated expected number of times that items
\code{i} and \code{j} are in the same subset (i.e., cluster or feature).}

\item{structure}{Either \code{"clustering"} or \code{"featureAllocation"} to indicate
the optimization seeks to produce a clustering or a feature allocation.}

\item{loss}{One of \code{"squaredError"}, \code{"absoluteError"}, \code{"binder"}, or
\code{"lowerBoundVariationOfInformation"} to indicate the optimization should seeks to
minimize squared error loss, absolute error loss, Binder loss (Binder 1978), or the lower
bound of the variation of information loss (Wade & Ghahramani 2017), respectively.  When
\code{structure="clustering"}, the first three are equivalent.  When
\code{structure="featureAllocation"}, only the first two are valid.}

\item{nCandidates}{The (maximum) number of candidates to consider.  Fewer than
\code{nCandidates} may be considered if the time in \code{budgetInSeconds} is exceeded.
The computational cost is linear in the number of candidates and there are rapidly
diminishing returns to more candidates.}

\item{budgetInSeconds}{The (maximum) number of seconds to devote to the optimization.
When this time is exceeded, no more candidates are considered.}

\item{maxSize}{Either zero or a positive integer.  If a positive integer, the
optimization is constrained to produce solutions whose number of clusters or number of
features is no more than the supplied value.  If zero, the size is not constrained.
To avoid overfitting in feature allocation estimation, it is recommended that
\code{"maxSize"} be close the mean number of features (i.e., columns) in the
feature allocations that generated the \code{expectedPairwiseAllocationMatrix}.}

\item{maxScans}{The maximum number of reallocation scans after the intial allocation.
The actual number of scans may be less than \code{maxScans} since the algorithm stops
if the result does not change between scans.}

\item{multicore}{Logical indicating whether computations should take advantage of
multiple CPU cores.}
}
\value{
A clustering (as a vector of cluster labels) or a feature allocation (as a binary
matrix of feature indicators).
}
\description{
This function implements the sequentially-allocated latent structure optimization (SALSO)
to find a clustering or feature allocation that minimizes various loss functions.
The SALSO method was presented at the workshop "Bayesian Nonparametric Inference: Dependence
Structures and their Applications" in Oaxaca, Mexico on December 6, 2017.
}
\examples{
\donttest{
probabilities <- expectedPairwiseAllocationMatrix(iris.clusterings)
salso(probabilities)

expectedCounts <- expectedPairwiseAllocationMatrix(USArrests.featureAllocations)
salso(expectedCounts,"featureAllocation")
}

}
\references{
Wade, S. and Ghahramani, Z. (2017). Bayesian cluster analysis: Point estimation and credible balls. Bayesian analysis.

Binder, D. (1978). Bayesian Cluster Analysis. Biometrika, 65: 31–38.
}
\seealso{
\code{\link{expectedPairwiseAllocationMatrix}}, \code{\link{dlso}}
}
\author{
David B. Dahl \email{dahl@stat.byu.edu}
}
