% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/diff_abundance.R
\name{diff_abundance}
\alias{diff_abundance}
\title{Calculate differential abundance between conditions}
\usage{
diff_abundance(
  data,
  sample,
  condition,
  grouping,
  intensity_log2,
  missingness,
  comparison,
  mean = NULL,
  sd = NULL,
  n_samples = NULL,
  ref_condition,
  filter_NA_missingness = TRUE,
  method = c("t-test", "t-test_mean_sd", "moderated_t-test", "proDA"),
  p_adj_method = "BH",
  retain_columns = NULL
)
}
\arguments{
\item{data}{A data frame containing at least the input variables that are required for the selected method. Ideally the output of \code{assign_missingness} or \code{impute} is used.}

\item{sample}{The column in the data frame containing the sample name. Is not required if \code{method = "t-test_mean_sd"}.}

\item{condition}{The column in the data frame containing the conditions.}

\item{grouping}{The column in the data frame containing precursor or peptide identifiers.}

\item{intensity_log2}{The column in the data frame containing intensity values. The intensity values need to be log2 transformed. Is not required if \code{method = "t-test_mean_sd"}.}

\item{missingness}{The column in the data frame containing missingness information. Can be obtained by calling \code{assign_missingness}.
Is not required if \code{method = "t-test_mean_sd"}.}

\item{comparison}{The column in the data frame containing comparison information of treatment/reference condition pairs. Can be obtained by
calling \code{assign_missingness}. Is not required if \code{method = "t-test_mean_sd"}.}

\item{mean}{The column in the data frame containing mean values for two conditions. Is only required if \code{method = "t-test_mean_sd"}.}

\item{sd}{The column in the data frame containing standard deviations for two conditions. Is only required if \code{method = "t-test_mean_sd"}.}

\item{n_samples}{The column in the data frame containing the number of samples per condition for two conditions. Is only required if \code{method = "t-test_mean_sd"}.}

\item{ref_condition}{The condition that is used as a reference for differential abundance calculation.}

\item{filter_NA_missingness}{A logical, default is \code{TRUE}. For all methods except \code{"t-test_mean_sd"} missingness information has to be provided.
If a reference/treatment pair has too few samples to be considered robust, it is annotated with \code{NA} as missingness. If this argument
is \code{TRUE}, these reference/treatment pairs are filtered out.}

\item{method}{A character vector, specifies the method used for statistical testing. Methods include Welch test ("\code{t-test}"), a Welch test on means,
standard deviations and number of replicates ("\code{t-test_mean_sd}") and a moderated t-test based on the \code{limma} package ("\code{moderated_t-test}").
More information on the moderated t-test can be found in the \code{limma} documentation. Furthermore, the \code{proDA} package specific method ("\code{proDA}") can
be used to infer means across samples based on a probabilistic dropout model. This eliminates the need for data imputation since missing values are infered from the
model. More information can be found in the \code{proDA} documentation.}

\item{p_adj_method}{A character vector, specifies the p-value correction method. Possible methods are c("holm", "hochberg", "hommel", "bonferroni", "BH",
"BY", "fdr", "none"). Default method is \code{"BH"}.}

\item{retain_columns}{A vector indicating if certain columns should be retained from the input data frame. Default is not retaining
additional columns \code{retain_columns = NULL}. Specific columns can be retained by providing their names (not in quotations marks,
just like other column names, but in a vector).}
}
\value{
A data frame that contains differential abundances (\code{diff}), p-values (\code{pval}) and adjusted p-values (\code{adj_pval}) for each protein,
peptide or precursor (depending on the \code{grouping} variable) and the associated treatment/reference pair.
Depending on the method the data frame contains additional columns:
\itemize{
\item{"t-test": }{The \code{std_error} column contains the standard error of the differential abundances. \code{n_obs} contains the number of
observations for the specific protein, peptide or precursor (depending on the \code{grouping} variable) and the associated treatment/reference pair.}
\item{"t-test_mean_sd": }{\code{mean_control} and \code{mean_treated} columns contain the means for the reference and treatment condition, respectively.
\code{sd_control} and \code{sd_treated} columns contain the standard deviations for the reference and treatment condition, respectively.
\code{n_control} and \code{n_treated} columns contain the numbers of samples for the reference and treatment condition, respectively. The \code{std_error}
column contains the standard error of the differential abundances. \code{t_statistic} contains the t_statistic for the t-test.}
\item{"moderated_t-test": }{\code{CI_2.5} and \code{CI_97.5} give the 2.5% and 97.5% confidence interval borders for the differential abundance. \code{avg_abundance}
contains average abundances for treatment/reference pairs (mean of the two group means). \code{t_statistic} contains the t_statistic for the t-test. \code{B} The
B-statistic is the log-odds that the protein, peptide or precursor (depending on \code{grouping}) has a differential abundance between the two groups. Suppose B=1.5.
The odds of differential abundance is exp(1.5)=4.48, i.e, about four and a half to one. The probability that there is a differential abundance is 4.48/(1+4.48)=0.82,
i.e., the probability is about 82% that this group is differentially abundant. A B-statistic of zero corresponds to a 50-50 chance that the group is differentially
abundant.\code{n_obs} contains the number of observations for the specific protein, peptide or precursor (depending on the \code{grouping} variable) and the
associated treatment/reference pair.}
\item{"proDA": }{The \code{std_error} column contains the standard error of the differential abundances. \code{avg_abundance} contains average abundances for
treatment/reference pairs (mean of the two group means). \code{t_statistic} contains the t_statistic for the t-test. \code{n_obs} contains the number of
observations for the specific protein, peptide or precursor (depending on the \code{grouping} variable) and the associated treatment/reference pair.}
}
}
\description{
Performs differential abundance calculations and statistical testing on data frames with protein, peptide or precursor data. Different methods for statistical testing are available.
}
\examples{
\dontrun{
diff_abundance(
  data,
  sample = r_file_name,
  condition = r_condition,
  grouping = eg_precursor_id,
  intensity_log2 = normalised_intensity_log2,
  missingness = missingness,
  comparison = comparison,
  ref_condition = "control",
  method = "t-test",
  retain_columns = c(pg_protein_accessions)
)
}
}
