% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/summaryfactorlist.R
\name{summary_factorlist}
\alias{summary_factorlist}
\title{Summarise a set of factors (or continuous variables) by a dependent variable}
\usage{
summary_factorlist(
  .data,
  dependent = NULL,
  explanatory = NULL,
  formula = NULL,
  cont = "mean",
  cont_nonpara = NULL,
  cont_cut = 5,
  cont_range = TRUE,
  p = FALSE,
  p_cont_para = "aov",
  p_cat = "chisq",
  column = TRUE,
  total_col = FALSE,
  orderbytotal = FALSE,
  digits = c(1, 1, 3, 1, 0),
  na_include = FALSE,
  na_include_dependent = FALSE,
  na_complete_cases = FALSE,
  na_to_p = FALSE,
  na_to_prop = TRUE,
  fit_id = FALSE,
  add_dependent_label = FALSE,
  dependent_label_prefix = "Dependent: ",
  dependent_label_suffix = "",
  add_col_totals = FALSE,
  include_col_totals_percent = TRUE,
  col_totals_rowname = NULL,
  col_totals_prefix = "",
  add_row_totals = FALSE,
  include_row_totals_percent = TRUE,
  include_row_missing_col = TRUE,
  row_totals_colname = "Total N",
  row_missing_colname = "Missing N",
  catTest = NULL,
  weights = NULL
)
}
\arguments{
\item{.data}{Dataframe.}

\item{dependent}{Character vector of length 1:  name of dependent variable (2
to 5 factor levels).}

\item{explanatory}{Character vector of any length: name(s) of explanatory
variables.}

\item{formula}{an object of class "formula" (or one that can be coerced to 
that class). Optional instead of standard dependent/explanatory format. 
Do not include if using dependent/explanatory.}

\item{cont}{Summary for continuous explanatory variables: "mean" (standard
deviation) or "median" (interquartile range). If "median" then
non-parametric hypothesis test performed (see below).}

\item{cont_nonpara}{Numeric vector of form e.g. \code{c(1,2)}. Specify which
variables to perform non-parametric hypothesis tests on and summarise with
"median".}

\item{cont_cut}{Numeric: number of unique values in continuous variable at
which to consider it a factor.}

\item{cont_range}{Logical. Median is show with 1st and 3rd quartiles.}

\item{p}{Logical: Include null hypothesis statistical test.}

\item{p_cont_para}{Character. Continuous variable parametric test. One of
  either "aov" (analysis of variance) or "t.test" for Welch two sample
  t-test. Note continuous non-parametric test is always Kruskal Wallis
  (kruskal.test) which in two-group setting is equivalent to Mann-Whitney U
  /Wilcoxon rank sum test.

  For continous dependent and continuous explanatory, the parametric test
  p-value returned is for the Pearson correlation coefficient. The
  non-parametric equivalent is for the p-value for the Spearman correlation
  coefficient.}

\item{p_cat}{Character. Categorical variable test. One of either "chisq" or
"fisher".}

\item{column}{Logical: Compute margins by column rather than row.}

\item{total_col}{Logical: include a total column summing across factor
levels.}

\item{orderbytotal}{Logical: order final table by total column high to low.}

\item{digits}{Number of digits to round to (1) mean/median, (2) standard
deviation / interquartile range, (3) p-value, (4) count percentage, 
(5) weighted count.}

\item{na_include}{Logical: make explanatory variables missing data explicit
(\code{NA}).}

\item{na_include_dependent}{Logical: make dependent variable missing data
explicit.}

\item{na_complete_cases}{Logical: include only rows with complete data.}

\item{na_to_p}{Logical: include missing as group in statistical test.}

\item{na_to_prop}{Logical: include missing in calculation of column proportions.}

\item{fit_id}{Logical: allows merging via \code{\link{finalfit_merge}}.}

\item{add_dependent_label}{Add the name of the dependent label to the top
left of table.}

\item{dependent_label_prefix}{Add text before dependent label.}

\item{dependent_label_suffix}{Add text after dependent label.}

\item{add_col_totals}{Logical. Include column total n.}

\item{include_col_totals_percent}{Include column percentage of total.}

\item{col_totals_rowname}{Logical. Row name for column totals.}

\item{col_totals_prefix}{Character. Prefix to column totals, e.g. "N=".}

\item{add_row_totals}{Logical. Include row totals. Note this differs from
\code{total_col} above particularly for continuous explanatory variables.}

\item{include_row_totals_percent}{Include row percentage of total.}

\item{include_row_missing_col}{Logical. Include missing data total for each
row. Only used when \code{add_row_totals} is \code{TRUE}.}

\item{row_totals_colname}{Character. Column name for row totals.}

\item{row_missing_colname}{Character. Column name for missing data totals for
each row.}

\item{catTest}{Deprecated. See \code{p_cat} above.}

\item{weights}{Character vector of length 1: name of column to use for weights. 
Explanatory continuous variables are multiplied by weights. 
Explanatory categorical variables are counted with a frequency weight (sum(weights)).}
}
\value{
Returns a \code{factorlist} dataframe.
}
\description{
A function that takes a single dependent variable with a vector of
explanatory variable names (continuous or categorical variables) to produce a
summary table.
}
\details{
This function aims to produce publication-ready summary tables for
categorical or continuous dependent variables. It usually takes a categorical
dependent variable to produce a cross table of counts and proportions
expressed as percentages or summarised continuous explanatory variables.
However, it will take a continuous dependent variable to produce mean
(standard deviation) or median (interquartile range) for use with linear
regression models.
}
\examples{
library(finalfit)
library(dplyr)
# Load example dataset, modified version of survival::colon
data(colon_s)

# Table 1 - Patient demographics ----
explanatory = c("age", "age.factor", "sex.factor", "obstruct.factor")
dependent = "perfor.factor"
colon_s \%>\%
  summary_factorlist(dependent, explanatory, p=TRUE)

# summary.factorlist() is also commonly used to summarise any number of
# variables by an outcome variable (say dead yes/no).

# Table 2 - 5 yr mortality ----
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
dependent = "mort_5yr"
colon_s \%>\%
  summary_factorlist(dependent, explanatory)
}
\seealso{
\code{\link{fit2df}} \code{\link{ff_column_totals}}
  \code{\link{ff_row_totals}} \code{\link{ff_label}} \code{\link{ff_glimpse}}
  \code{\link{ff_percent_only}}. For lots of examples, see \url{https://finalfit.org/}
}
\concept{finalfit wrappers}
