% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sig_estimate.R
\name{sig_estimate}
\alias{sig_estimate}
\alias{show_sig_number_survey}
\alias{show_sig_number_survey2}
\title{Estimate Signature Number}
\usage{
sig_estimate(
  nmf_matrix,
  range = 2:5,
  nrun = 10,
  use_random = FALSE,
  method = "brunet",
  seed = 123456,
  cores = 1,
  keep_nmfObj = FALSE,
  save_plots = FALSE,
  plot_basename = file.path(tempdir(), "nmf"),
  what = "all",
  verbose = FALSE
)

show_sig_number_survey(
  object,
  x = "rank",
  left_y = "cophenetic",
  right_y = "rss",
  left_name = left_y,
  right_name = toupper(right_y),
  left_color = "black",
  right_color = "red",
  left_shape = 16,
  right_shape = 18,
  shape_size = 4,
  highlight = NULL
)

show_sig_number_survey2(
  x,
  y = NULL,
  what = c("all", "cophenetic", "rss", "residuals", "dispersion", "evar", "sparseness",
    "sparseness.basis", "sparseness.coef", "silhouette", "silhouette.coef",
    "silhouette.basis", "silhouette.consensus"),
  na.rm = FALSE,
  xlab = "Total signatures",
  ylab = "",
  main = "Signature number survey using NMF package"
)
}
\arguments{
\item{nmf_matrix}{a \code{matrix} used for NMF decomposition with rows indicate samples and columns indicate components.}

\item{range}{a \code{numeric} vector containing the ranks of factorization to try. Note that duplicates are removed
and values are sorted in increasing order. The results are notably returned in this order.}

\item{nrun}{a \code{numeric} giving the number of run to perform for each value in \code{range}, \code{nrun} set to 30~50 is
enough to achieve robust result.}

\item{use_random}{Should generate random data from input to test measurements. Default is \code{TRUE}.}

\item{method}{specification of the NMF algorithm. Use 'brunet' as default.
Available methods for NMF decompositions are 'brunet', 'lee', 'ls-nmf', 'nsNMF', 'offset'.}

\item{seed}{specification of the starting point or seeding method, which will compute a starting point,
usually using data from the target matrix in order to provide a good guess.}

\item{cores}{number of cpu cores to run NMF.}

\item{keep_nmfObj}{default is \code{FALSE}, if \code{TRUE}, keep NMF objects from runs, and the result may be huge.}

\item{save_plots}{if \code{TRUE}, save signature number survey plot to local machine.}

\item{plot_basename}{when save plots, set custom basename for file path.}

\item{what}{a character vector whose elements partially match one of the following item,
which correspond to the measures computed by \code{summary()} on each – multi-run – NMF result:
'all', 'cophenetic', 'rss', 'residuals', 'dispersion', 'evar', 'silhouette'
(and more specific \verb{*.coef}, \verb{*.basis}, \verb{*.consensus}), 'sparseness'
(and more specific \verb{*.coef}, \verb{*.basis}).
It specifies which measure must be plotted (what='all' plots all the measures).}

\item{verbose}{if \code{TRUE}, print extra message.}

\item{object}{a \code{Survey} object generated from \link{sig_estimate}, or
a \code{data.frame} contains at least rank columns and columns for
one measure.}

\item{x}{a \code{data.frame} or \code{NMF.rank} object obtained from \code{\link[=sig_estimate]{sig_estimate()}}.}

\item{left_y}{column name for left y axis.}

\item{right_y}{column name for right y axis.}

\item{left_name}{label name for left y axis.}

\item{right_name}{label name for right y axis.}

\item{left_color}{color for left axis.}

\item{right_color}{color for right axis.}

\item{left_shape, right_shape, shape_size}{shape setting.}

\item{highlight}{a \code{integer} to highlight a \code{x}.}

\item{y}{for random simulation,
a \code{data.frame} or \code{NMF.rank} object obtained from \code{\link[=sig_estimate]{sig_estimate()}}.}

\item{na.rm}{single logical that specifies if the rank
  for which the measures are NA values should be removed
  from the graph or not (default to \code{FALSE}).  This is
  useful when plotting results which include NAs due to
  error during the estimation process. See argument
  \code{stop} for \code{nmfEstimateRank}.}

\item{xlab}{x-axis label}

\item{ylab}{y-axis label}

\item{main}{main title}
}
\value{
\itemize{
\item sig_estimate: a \code{list} contains information of NMF run and rank survey.
}

\itemize{
\item show_sig_number_survey: a \code{ggplot} object
}

\itemize{
\item show_sig_number_survey2: a \code{ggplot} object
}
}
\description{
Use \strong{NMF} package to evaluate the optimal number of signatures.
This is used along with \link{sig_extract}.
Users should \code{library(NMF)} firstly. If NMF objects are returned,
the result can be further visualized by NMF plot methods like
\code{NMF::consensusmap()} and \code{NMF::basismap()}.

\code{sig_estimate()} shows comprehensive rank survey generated by
\strong{NMF} package, sometimes
it is hard to consider all measures. \code{show_sig_number_survey()} provides a
one or two y-axis visualization method to help users determine
the optimal signature number (showing both
stability ("cophenetic") and error (RSS) at default).
Users can also set custom measures to show.

\code{show_sig_number_survey2()} is modified from \strong{NMF} package to
better help users to explore survey of signature number.
}
\details{
The most common approach is to choose the smallest rank for which cophenetic correlation coefficient
starts decreasing (Used by this function). Another approach is to choose the rank for which the plot
of the residual sum of squares (RSS) between the input matrix and its estimate shows an inflection point.
More custom features please directly use \link[NMF:nmfEstimateRank]{NMF::nmfEstimateRank}.
}
\examples{
\donttest{
load(system.file("extdata", "toy_copynumber_tally_M.RData",
  package = "sigminer", mustWork = TRUE
))
library(NMF)
cn_estimate <- sig_estimate(cn_tally_M$nmf_matrix,
  cores = 1, nrun = 5,
  verbose = TRUE
)

p <- show_sig_number_survey2(cn_estimate$survey)
p

# Show two measures
show_sig_number_survey(cn_estimate)
# Show one measure
p1 <- show_sig_number_survey(cn_estimate, right_y = NULL)
p1
p2 <- add_h_arrow(p, x = 4.1, y = 0.953, label = "selected number")
p2

# Show data from a data.frame
p3 <- show_sig_number_survey(cn_estimate$survey)
p3
# Show other measures
head(cn_estimate$survey)
p4 <- show_sig_number_survey(cn_estimate$survey,
  right_y = "dispersion",
  right_name = "dispersion"
)
p4
p5 <- show_sig_number_survey(cn_estimate$survey,
  right_y = "evar",
  right_name = "evar"
)
p5
}
}
\references{
Gaujoux, Renaud, and Cathal Seoighe. "A flexible R package for nonnegative matrix factorization." BMC bioinformatics 11.1 (2010): 367.
}
\seealso{
\link{sig_extract} for extracting signatures using \strong{NMF} package, \link{sig_auto_extract} for
extracting signatures using automatic relevance determination technique.

\link{sig_estimate} for estimating signature number for \link{sig_extract},
\link{show_sig_number_survey2} for more visualization method.
}
\author{
Shixiang Wang
}
