% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Baseline.R
\name{calcBaseline}
\alias{calcBaseline}
\title{Calculate the BASELINe PDFs}
\usage{
calcBaseline(db, sequenceColumn = "CLONAL_SEQUENCE",
  germlineColumn = "CLONAL_GERMLINE", testStatistic = c("local",
  "focused", "imbalanced"), regionDefinition = NULL,
  targetingModel = HH_S5F, mutationDefinition = NULL,
  calcStats = FALSE, nproc = 1)
}
\arguments{
\item{db}{\code{data.frame} containing sequence data and annotations.}

\item{sequenceColumn}{\code{character} name of the column in \code{db} 
containing input sequences.}

\item{germlineColumn}{\code{character} name of the column in \code{db} 
containing germline sequences.}

\item{testStatistic}{\code{character} indicating the statistical framework 
used to test for selection. One of 
\code{c("local", "focused", "imbalanced")}.}

\item{regionDefinition}{\link{RegionDefinition} object defining the regions
and boundaries of the Ig sequences.}

\item{targetingModel}{\link{TargetingModel} object. Default is  \link{HH_S5F}.}

\item{mutationDefinition}{\link{MutationDefinition} object defining replacement
and silent mutation criteria. If \code{NULL} then 
replacement and silent are determined by exact 
amino acid identity. Note, if the input data.frame 
already contains observed and expected mutation frequency 
columns then mutations will not be recalculated and this
argument will be ignored.}

\item{calcStats}{\code{logical} indicating whether or not to calculate the 
summary statistics \code{data.frame} stored in the 
\code{stats} slot of a \link{Baseline} object.}

\item{nproc}{number of cores to distribute the operation over. If 
\code{nproc=0} then the \code{cluster} has already been
set and will not be reset.}
}
\value{
A \code{Baseline} object containing the modified \code{db} and BASELINe 
         posterior probability density functions (PDF) for each of the sequences.
}
\description{
\code{calcBaseline} calculates the BASELINe posterior probability density 
functions (PDFs) for sequences in the given Change-O \code{data.frame}.
}
\details{
Calculates the BASELINe posterior probability density function (PDF) for 
sequences in the provided \code{db}. 
         
\strong{Note}: Individual sequences within clonal groups are not, strictly speaking, 
independent events and it is generally appropriate to only analyze selection 
pressures on an effective sequence for each clonal group. For this reason,
it is strongly recommended that the input \code{db} contains one effective 
sequence per clone. Effective clonal sequences can be obtained by calling 
the \link{collapseClones} function.
                  
If the \code{db} does not contain the 
required columns to calculate the PDFs (namely MU_COUNT & MU_EXPECTED)
then the function will:
  \enumerate{
  \item  Calculate the numbers of observed mutations.
  \item  Calculate the expected frequencies of mutations and modify the provided 
         \code{db}. The modified \code{db} will be included as part of the 
         returned \code{Baseline} object.
}
         
The \code{testStatistic} indicates the statistical framework used to test for selection. 
E.g.
\itemize{
  \item   \code{local} = CDR_R / (CDR_R + CDR_S).
  \item   \code{focused} = CDR_R / (CDR_R + CDR_S + FWR_S).
  \item   \code{imbalanced} = CDR_R + CDR_S / (CDR_R + CDR_S + FWR_S + FRW_R).
}
For \code{focused} the \code{regionDefinition} must only contain two regions. If more 
than two regions are defined the \code{local} test statistic will be used.
For further information on the frame of these tests see Uduman et al. (2011).
}
\examples{
# Load and subset example data
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb, ISOTYPE == "IgG" & SAMPLE == "+7d")

# Collapse clones
db <- collapseClones(db, sequenceColumn="SEQUENCE_IMGT",
                     germlineColumn="GERMLINE_IMGT_D_MASK",
                     method="thresholdedFreq", minimumFrequency=0.6,
                     includeAmbiguous=FALSE, breakTiesStochastic=FALSE)
 
# Calculate BASELINe
baseline <- calcBaseline(db, 
                         sequenceColumn="CLONAL_SEQUENCE",
                         germlineColumn="CLONAL_GERMLINE", 
                         testStatistic="focused",
                         regionDefinition=IMGT_V,
                         targetingModel=HH_S5F,
                         nproc=1)
                         
}
\references{
\enumerate{
  \item  Hershberg U, et al. Improved methods for detecting selection by mutation 
           analysis of Ig V region sequences. 
           Int Immunol. 2008 20(5):683-94.
  \item  Uduman M, et al. Detecting selection in immunoglobulin sequences. 
           Nucleic Acids Res. 2011 39(Web Server issue):W499-504.
  \item  Yaari G, et al. Models of somatic hypermutation targeting and substitution based
           on synonymous mutations from high-throughput immunoglobulin sequencing data.
           Front Immunol. 2013 4(November):358.
 }
}
\seealso{
See \link{Baseline} for the return object.
         See \link{groupBaseline} and \link{summarizeBaseline} for further processing.
         See \link{plotBaselineSummary} and \link{plotBaselineDensity} for plotting results.
}
