% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/salso.R
\name{salso}
\alias{salso}
\title{SALSO Greedy Search}
\usage{
salso(
  x,
  loss = VI(),
  maxNClusters = 0,
  nRuns = 16,
  maxZealousAttempts = 10,
  probSequentialAllocation = 0.5,
  nCores = 0,
  ...
)
}
\arguments{
\item{x}{A \eqn{B}-by-\eqn{n} matrix, where each of the \eqn{B} rows
represents a clustering of \eqn{n} items using cluster labels. For the
\eqn{b}th clustering, items \eqn{i} and \eqn{j} are in the same cluster if
\code{x[b,i] == x[b,j]}.}

\item{loss}{The loss function to use, as indicated by \code{"binder"},
\code{"omARI"}, \code{"VI"}, \code{"NVI"}, \code{"ID"}, \code{"NID"}, or
the result of calling a function with these names. Also supported are
\code{"binder.psm"}, \code{"VI.lb"}, \code{"omARI.approx"}, or the result
of calling a function with these names, in which case \code{x} above can
optionally be a pairwise similarity matrix, i.e., \eqn{n}-by-\eqn{n}
symmetric matrix whose \eqn{(i,j)} element gives the (estimated)
probability that items \eqn{i} and \eqn{j} are in the same subset (i.e.,
cluster) of a partition (i.e., clustering). The loss functions
\code{"binder.psm"}, \code{"VI.lb"}, and \code{"omARI.approx"} are
generally not recommended and the current implementation requires that
\code{maxZealousAttempts = 0} and \code{probSequentialAllocation = 1.0}.}

\item{maxNClusters}{The maximum number of clusters that can be considered by
the optimization algorithm, which has important implications for the
interpretability of the resulting clustering and can greatly influence the
RAM needed for the optimization algorithm. If the supplied value is zero
and \code{x} is a matrix of clusterings, the optimization is constrained by
the maximum number of clusters among the clusterings in \code{x}. If the
supplied value is zero and \code{x} is a pairwise similarity matrix, there
is no constraint.}

\item{nRuns}{The number of runs to try, although the actual number may differ
for the following reasons: 1. The actual number is a multiple of the number
of cores specified by the \code{nCores} argument, and 2. The search is
curtailed when the \code{seconds} threshold is exceeded.}

\item{maxZealousAttempts}{The maximum number of attempts for zealous updates,
in which entire clusters are destroyed and items are sequentially
reallocated. While zealous updates may be helpful in optimization, they
also take more CPU time which might be better used trying additional runs.}

\item{probSequentialAllocation}{For the initial allocation, the probability
of sequential allocation instead of using \code{sample(1:K, ncol(x),
TRUE)}, where \code{K} is set according to the \code{maxNClusters}
argument.}

\item{nCores}{The number of CPU cores to use, i.e., the number of
simultaneous runs at any given time. A value of zero indicates to use all
cores on the system.}

\item{...}{Extra arguments not intended for the end user, including: 1.
\code{seconds}: Instead of performing all the requested number of runs,
curtail the search after the specified expected number of seconds. Note
that the function will finish earlier if all the requested runs are
completed. The specified seconds does not account for the overhead involved
in starting the search and returning results. 2. \code{maxScans} The
maximum number of full reallocation scans. The actual number of scans may
be less than \code{maxScans} since the method stops if the result does not
change between scans, and 3. \code{probSingletonsInitialization}: When
doing a sequential allocation to obtain the initial allocation, the
probability of placing the first \code{maxNClusters} randomly-selected
items in singletons subsets.}
}
\value{
An integer vector giving the estimated partition, encoded using
  cluster labels.
}
\description{
This function provides a partition to summarize a partition distribution
using the SALSO greedy search method (Dahl, Johnson, and Müller, 2021). The
implementation currently supports the minimization of several partition
estimation criteria. For details on these criteria, see
\code{\link{partition.loss}}.
}
\details{
The initial version of the SALSO method was presented at the workshop
"Bayesian Nonparametric Inference: Dependence Structures and their
Applications" in Oaxaca, Mexico on December 6, 2017. See
<https://www.birs.ca/events/2017/5-day-workshops/17w5060/schedule>.
}
\examples{
# For examples, use 'nCores=1' per CRAN rules, but in practice omit this.

draws <- iris.clusterings
salso(draws, loss=VI(), nRuns=1, nCores=1)
salso(draws, loss=VI(a=0.7), nRuns=1, nCores=1)
salso(draws, loss=binder(), nRuns=1, nCores=1)
salso(iris.clusterings, binder(a=list(nClusters=3, upper=5)), nRuns=4, nCores=1)

}
\references{
D. B. Dahl, D. J. Johnson, and P. Müller (2021), Search Algorithms and Loss
Functions for Bayesian Clustering, <arXiv:2105.04451>.
}
\seealso{
\code{\link{partition.loss}}, \code{\link{psm}},
  \code{\link{summary.salso.estimate}}, \code{\link{dlso}}
}
