% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/insilico_train.r
\name{insilico.train}
\alias{insilico.train}
\title{Modified InSilicoVA methods with training data}
\usage{
insilico.train(data, train, cause, causes.table = NULL, thre = 0.95,
  type = c("quantile", "fixed", "empirical")[1], isNumeric = FALSE,
  updateCondProb = TRUE, keepProbbase.level = TRUE, CondProb = NULL,
  CondProbNum = NULL, datacheck = TRUE, datacheck.missing = TRUE,
  warning.write = FALSE, external.sep = TRUE, Nsim = 4000, thin = 10,
  burnin = 2000, auto.length = TRUE, conv.csmf = 0.02, jump.scale = 0.1,
  levels.prior = NULL, levels.strength = NULL, trunc.min = 1e-04,
  trunc.max = 0.9999, subpop = NULL, java_option = "-Xmx1g", seed = 1,
  phy.code = NULL, phy.cat = NULL, phy.unknown = NULL,
  phy.external = NULL, phy.debias = NULL, exclude.impossible.cause = TRUE,
  indiv.CI = NULL, ...)
}
\arguments{
\item{data}{The original data to be used. It is suggested to use similar
input as InterVA4, with the first column being death IDs and 245 symptoms. 
The only difference in input is InsilicoVA takes three levels: ``present'',
``absent'', and ``missing (no data)''. Similar to InterVA software,
``present'' symptoms takes value ``Y''; ``absent'' symptoms take take value
``NA'' or ``''. For missing symptoms, e.g., questions not asked or answered
in the original interview, corrupted data, etc., the input should be coded
by ``.'' to distinguish from ``absent'' category. The order of the columns does
not matter as long as the column names are correct. It can also include more 
unused columns than the standard InterVA4 input. But the first column should be 
the death ID. For example input data format, see \code{RandomVA1} and 
\code{RandomVA2}.}

\item{train}{Training data, it should be in the same format as the testing data
and contains one additional column (see \code{cause} below) specifying known
cause of death. The first column is also assumed to be death ID.}

\item{cause}{the name of the column in \code{train} that contains cause of death.}

\item{causes.table}{The list of causes of death used in training data.}

\item{thre}{a numerical value between 0 to 1. It specifies the maximum rate of
missing for any symptoms to be considered in the model. Default value is set to
0.95, meaning if a symptom has more than 95\% missing in the training data, it
will be removed.}

\item{type}{Three types of learning conditional probabilities are provided: ``empirical'', ``quantile''
or ``fixed''. Since InSilicoVA works with ranked conditional probabilities P(S|C), ``quantile''
means the rankings of the P(S|C) are obtained by matching the same quantile distributions
in the default InterVA P(S|C), and ``fixed'' means P(S|C) are matched to the closest values
in the default InterVA P(S|C) table. Empirically both types of rankings produce similar results. ``empirical'', on the other hand, means no ranking is calculated, but use the empirical conditional probabilities directly. If ``empirical'', \code{updateCondProb} will be forced to be FALSE.}

\item{isNumeric}{Indicator if the input is already in numeric form. If the
input is coded numerically such that 1 for ``present'', 0 for ``absent'',
and -1 for ``missing'', this indicator could be set to True to avoid
conversion to standard InterVA format.}

\item{updateCondProb}{Logical indicator. If FALSE, then fit InSilicoVA model without 
re-estimating conditional probabilities.}

\item{keepProbbase.level}{see \code{\link{insilico}} for more detail.}

\item{CondProb}{see \code{\link{insilico}} for more detail.}

\item{CondProbNum}{see \code{\link{insilico}} for more detail.}

\item{datacheck}{Not Implemented.}

\item{datacheck.missing}{Not Implemented.}

\item{warning.write}{Not Implemented.}

\item{external.sep}{Not Implemented.}

\item{Nsim}{see \code{\link{insilico}} for more detail.}

\item{thin}{see \code{\link{insilico}} for more detail.}

\item{burnin}{see \code{\link{insilico}} for more detail.}

\item{auto.length}{see \code{\link{insilico}} for more detail.}

\item{conv.csmf}{see \code{\link{insilico}} for more detail.}

\item{jump.scale}{see \code{\link{insilico}} for more detail.}

\item{levels.prior}{see \code{\link{insilico}} for more detail.}

\item{levels.strength}{see \code{\link{insilico}} for more detail.}

\item{trunc.min}{see \code{\link{insilico}} for more detail.}

\item{trunc.max}{see \code{\link{insilico}} for more detail.}

\item{subpop}{see \code{\link{insilico}} for more detail.}

\item{java_option}{see \code{\link{insilico}} for more detail.}

\item{seed}{see \code{\link{insilico}} for more detail.}

\item{phy.code}{see \code{\link{insilico}} for more detail.}

\item{phy.cat}{see \code{\link{insilico}} for more detail.}

\item{phy.unknown}{see \code{\link{insilico}} for more detail.}

\item{phy.external}{see \code{\link{insilico}} for more detail.}

\item{phy.debias}{see \code{\link{insilico}} for more detail.}

\item{exclude.impossible.cause}{Not Implemented}

\item{indiv.CI}{see \code{\link{insilico}} for more detail.}

\item{...}{not used}
}
\value{
\code{insilico} object
}
\description{
This function implements InSilicoVA model with non-InterVA4 input data.
}
\details{
Please see \code{insilico} for more details about choosing chain length and 
OS system differences. This function implements InSilico with customized
input format and training data.

For more detail of model specification, see the paper on
\url{http://arxiv.org/abs/1411.3042}.
}
