% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustering.R
\name{calcPurity}
\alias{calcPurity}
\title{Calculate purity by comparing two cluster labeling variables}
\usage{
calcPurity(
  object,
  trueCluster,
  useCluster = NULL,
  useDatasets = NULL,
  verbose = getOption("ligerVerbose", TRUE),
  classes.compare = trueCluster
)
}
\arguments{
\item{object}{A \linkS4class{liger} object, with the clustering result
present in cellMeta.}

\item{trueCluster}{Either the name of one variable in \code{cellMeta(object)}
or a factor object with annotation that matches with all cells being
considered.}

\item{useCluster}{The name of one variable in \code{cellMeta(object)}.
Default \code{NULL} uses default clusters.}

\item{useDatasets}{A character vector of the names, a numeric or logical
vector of the index of the datasets to be considered for the purity
calculation. Default \code{NULL} uses all datasets.}

\item{verbose}{Logical. Whether to show information of the progress. Default
\code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.}

\item{classes.compare}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Use
\code{trueCluster} instead.}
}
\value{
A numeric scalar, the purity of the clustering result indicated by
\code{useCluster} compared to \code{trueCluster}.
}
\description{
This function aims at calculating the purity for the clustering result
obtained with LIGER and the external clustering (existing "true" annotation).
Purity can sometimes be a more useful metric when the clustering to be tested
contains more subgroups or clusters than the true clusters. Purity ranges
from 0 to 1, with a score of 1 representing a pure, accurate clustering.

The true clustering annotation must be specified as the base line. We suggest
setting it to the object cellMeta so that it can be easily used for many
other visualization and evaluation functions.

The purity can be calculated for only specified datasets, since true
annotation might not be available for all datasets. Evaluation for only one
or a few datasets can be done by specifying \code{useDatasets}. If
\code{useDatasets} is specified, the argument checking for \code{trueCluster}
and \code{useCluster} will be enforced to match the cells in the specified
datasets.
}
\examples{
# Assume the true cluster in `pbmcPlot` is "leiden_cluster"
# generate fake new labeling
fake <- sample(1:7, ncol(pbmcPlot), replace = TRUE)
# Insert into cellMeta
pbmcPlot$new <- factor(fake)
calcPurity(pbmcPlot, trueCluster = "leiden_cluster", useCluster = "new")

# Now assume we got existing base line annotation only for "stim" dataset
nStim <- ncol(dataset(pbmcPlot, "stim"))
stimTrueLabel <- factor(fake[1:nStim])
# Insert into cellMeta
cellMeta(pbmcPlot, "stim_true_label", useDatasets = "stim") <- stimTrueLabel
# Assume "leiden_cluster" is the clustering result we got and need to be
# evaluated
calcPurity(pbmcPlot, trueCluster = "stim_true_label",
           useCluster = "leiden_cluster", useDatasets = "stim")
}
