% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/drtmle.R
\name{drtmle}
\alias{drtmle}
\title{TMLE estimate of the average treatment effect with doubly-robust inference}
\usage{
drtmle(
  Y,
  A,
  W,
  DeltaA = as.numeric(!is.na(A)),
  DeltaY = as.numeric(!is.na(Y)),
  a_0 = unique(A[!is.na(A)]),
  family = if (all(Y \%in\% c(0, 1))) {     stats::binomial() } else {    
    stats::gaussian() },
  stratify = FALSE,
  SL_Q = NULL,
  SL_g = NULL,
  SL_Qr = NULL,
  SL_gr = NULL,
  n_SL = 1,
  avg_over = "drtmle",
  se_cv = "none",
  se_cvFolds = ifelse(se_cv == "partial", 10, 1),
  targeted_se = se_cv != "partial",
  glm_Q = NULL,
  glm_g = NULL,
  glm_Qr = NULL,
  glm_gr = NULL,
  adapt_g = FALSE,
  guard = c("Q", "g"),
  reduction = "univariate",
  returnModels = FALSE,
  returnNuisance = TRUE,
  cvFolds = 1,
  maxIter = 3,
  tolIC = 1/length(Y),
  tolg = 0.01,
  verbose = FALSE,
  Qsteps = 2,
  Qn = NULL,
  gn = NULL,
  use_future = FALSE,
  ...
)
}
\arguments{
\item{Y}{A \code{numeric} continuous or binary outcomes.}

\item{A}{A \code{numeric} vector of discrete-valued treatment assignment.}

\item{W}{A \code{data.frame} of named covariates.}

\item{DeltaA}{A \code{numeric} vector of missing treatment indicator (assumed
to be equal to 0 if missing 1 if observed).}

\item{DeltaY}{A \code{numeric} vector of missing outcome indicator (assumed
to be equal to 0 if missing 1 if observed).}

\item{a_0}{A \code{numeric} vector of fixed treatment values at which to
return marginal mean estimates.}

\item{family}{A \code{family} object equal to either \code{binomial()} or
\code{gaussian()}, to be passed to the \code{SuperLearner} or \code{glm}
function.}

\item{stratify}{A \code{boolean} indicating whether to estimate the outcome
regression separately for different values of \code{A} (if \code{TRUE}) or
to pool across \code{A} (if \code{FALSE}).}

\item{SL_Q}{A vector of characters or a list describing the Super Learner
library to be used for the outcome regression. See
\code{\link[SuperLearner]{SuperLearner}} for details.}

\item{SL_g}{A vector of characters describing the super learner library to be
used for each of the propensity score regressions (\code{DeltaA}, \code{A},
and \code{DeltaY}). To use the same library for each of the regressions (or
if there is no missing data in \code{A} nor \code{Y}), a single library may
be input. See \code{\link[SuperLearner]{SuperLearner}} for details on how
super learner libraries can be specified.}

\item{SL_Qr}{A vector of characters or a list describing the Super Learner
library to be used for the reduced-dimension outcome regression.}

\item{SL_gr}{A vector of characters or a list describing the Super Learner
library to be used for the reduced-dimension propensity score.}

\item{n_SL}{Number of repeated Super Learners to run (default 1) for the
each nuisance parameter. Repeat Super Learners more times to obtain more stable
inference.}

\item{avg_over}{If multiple Super Learners are run, on which scale should the
results be aggregated. Options include: \code{"SL"} = 
repeated nuisance parameter estimates are averaged before subsequently 
generating a single vector of point estimates based on the averaged models;
\code{"drtmle"} = repeated vectors of point estimates are generated and 
averaged. Both can be specified, recognizing that this adds considerable
computational expense. In this case, the final estimates are the average 
of \code{n_SL} point estimates where each is built by averaging \code{n_SL} 
fits. If \code{NULL}, no averaging is performed (in which case \code{n_SL} 
should be set equal to 1).}

\item{se_cv}{Should cross-validated nuisance parameter estimates be used 
for computing standard errors? 
Options are \code{"none"} = no cross-validation is performed; \code{"partial"} = 
only applicable if Super Learner is used for nuisance parameter estimates; 
\code{"full"} = full cross-validation is performed. See vignette for further 
details. Ignored if \code{cvFolds > 1}, since then
cross-validated nuisance parameter estimates are used by default and it is 
assumed that you want full cross-validated standard errors.}

\item{se_cvFolds}{If cross-validated nuisance parameter estimates are used
to compute standard errors, how many folds should be used in this computation. 
If \code{se_cv = "partial"}, then this option sets the number of folds used
by the \code{SuperLearner} fitting procedure.}

\item{targeted_se}{A boolean indicating whether the targeted nuisance 
parameters should be used in standard error computation or the initial 
estimators. If \code{se_cv} is not set to \code{"none"}, this option is 
ignored and standard errors are computed based on non-targeted, cross-validated 
nuisance parameter fits.}

\item{glm_Q}{A character describing a formula to be used in the call to
\code{glm} for the outcome regression. Ignored if \code{SL_Q!=NULL}.}

\item{glm_g}{A list of characters describing the formulas to be used
for each of the propensity score regressions (\code{DeltaA}, \code{A}, and
\code{DeltaY}). To use the same formula for each of the regressions (or if
there are no missing data in \code{A} nor \code{Y}), a single character
formula may be input. In general the formulas can reference any variable in 
\code{colnames(W)}, unless \code{adapt_g = TRUE} in which case the formulas
should reference variables \code{QaW} where \code{a} takes values in \code{a_0}.}

\item{glm_Qr}{A character describing a formula to be used in the call to
\code{glm} for reduced-dimension outcome regression. Ignored if
\code{SL_Qr!=NULL}. The formula should use the variable name \code{'gn'}.}

\item{glm_gr}{A character describing a formula to be used in the call to
\code{glm} for the reduced-dimension propensity score. Ignored if
\code{SL_gr!=NULL}. The formula should use the variable name \code{'Qn'} and
\code{'gn'} if \code{reduction='bivariate'} and \code{'Qn'} otherwise.}

\item{adapt_g}{A boolean indicating whether the propensity score should be 
outcome adaptive. If \code{TRUE} then the propensity score is estimated as the
regression of \code{A} onto covariates \code{QaW} for \code{a} in each value
contained in \code{a_0}. See vignette for more details.}

\item{guard}{A character vector indicating what pattern of misspecifications
to guard against. If \code{guard} contains \code{"Q"}, then the TMLE guards
against misspecification of the outcome regression by estimating the
reduced-dimension outcome regression specified by \code{glm_Qr} or
\code{SL_Qr}. If \code{guard} contains \code{"g"} then the TMLE
(additionally) guards against misspecification of the propensity score by
estimating the reduced-dimension propensity score specified by \code{glm_gr}
or \code{SL_gr}. If \code{guard} is set to \code{NULL}, then only standard TMLE
and one-step estimators are computed.}

\item{reduction}{A character equal to \code{"univariate"} for a univariate
misspecification correction (default) or \code{"bivariate"} for the
bivariate version.}

\item{returnModels}{A boolean indicating whether to return model fits for the
outcome regression, propensity score, and reduced-dimension regressions.}

\item{returnNuisance}{A boolean indicating whether to return the estimated 
nuisance regressions evaluated on the observed data. Defaults to \code{TRUE}. 
If \code{n_SL} is large and \code{"drtmle"} is in \code{avg_over}, then 
consider setting to \code{FALSE} in order to reduce size of resultant object.}

\item{cvFolds}{A numeric equal to the number of folds to be used in
cross-validated fitting of nuisance parameters. If \code{cvFolds = 1}, no
cross-validation is used. Alternatively, \code{cvFolds} may be entered as a
vector of fold assignments for observations, in which case its length should
be the same length as \code{Y}.}

\item{maxIter}{A numeric that sets the maximum number of iterations the TMLE
can perform in its fluctuation step.}

\item{tolIC}{A numeric that defines the stopping criteria based on the
empirical mean of the influence function.}

\item{tolg}{A numeric indicating the minimum value for estimates of the
propensity score.}

\item{verbose}{A boolean indicating whether to print status updates.}

\item{Qsteps}{A numeric equal to 1 or 2 indicating whether the fluctuation
submodel for the outcome regression should be fit using a single
minimization (\code{Qsteps = 1}) or a backfitting-type minimization
(\code{Qsteps=2}). The latter was found to be more stable in simulations and
is the default.}

\item{Qn}{An optional list of outcome regression estimates. If specified, the
function will ignore the nuisance parameter estimation specified by
\code{SL_Q} and \code{glm_Q}. The entries in the list should correspond to
the outcome regression evaluated at \code{A} and the observed values of
\code{W}, with order determined by the input to \code{a_0} (e.g., if
\code{a_0 = c(0, 1)} then \code{Qn[[1]]} should be outcome regression at
\code{A} = 0 and \code{Qn[[2]]} should be outcome regression at
\code{A} = 1).}

\item{gn}{An optional list of propensity score estimates. If specified, the
function will ignore the nuisance parameter estimation specified by
\code{SL_g} and \code{glm_g}. The entries in the list should correspond to
the propensity for the observed values of \code{W}, with order determined by
the input to \code{a_0} (e.g., if \code{a_0 = c(0,1)} then \code{gn[[1]]}
should be propensity of \code{A} = 0 and \code{gn[[2]]} should be propensity
of \code{A} = 1).}

\item{use_future}{Boolean indicating whether to use \code{future_lapply} or
instead to just use lapply. The latter can be easier to run down errors.}

\item{...}{Other options (not currently used).}
}
\value{
An object of class \code{"drtmle"}.
\describe{
 \item{\code{drtmle}}{A \code{list} of doubly-robust point estimates and
       a doubly-robust covariance matrix}
 \item{\code{nuisance_drtmle}}{A \code{list} of the final TMLE estimates of
       the outcome regression (\code{$QnStar}), propensity score
       (\code{$gnStar}), and reduced-dimension regressions (\code{$QrnStar},
       \code{$grnStar}) evaluated at the observed data values.}
 \item{\code{ic_drtmle}}{A \code{list} of the empirical mean of the efficient
       influence function (\code{$eif}) and the extra pieces of the influence
       function resulting from misspecification. All should be smaller than
       \code{tolIC} (unless \code{maxIter} was reached first). Also includes
       a matrix of the influence function values at the estimated nuisance
       parameters evaluated at the observed data.}
 \item{\code{aiptw_c}}{A \code{list} of doubly-robust point estimates and
       a non-doubly-robust covariance matrix. Theory does not guarantee
       performance of inference for these estimators, but simulation studies
       showed they often perform adequately.}
 \item{\code{nuisance_aiptw}}{A \code{list} of the initial estimates of the
       outcome regression, propensity score, and reduced-dimension
       regressions evaluated at the observed data values.}
 \item{\code{tmle}}{A \code{list} of doubly-robust point estimates and
       non-doubly-robust covariance for the standard TMLE estimator.}
 \item{\code{aiptw}}{A \code{list} of doubly-robust point estimates and
       non-doubly-robust covariance matrix for the standard AIPTW estimator.}
 \item{\code{gcomp}}{A \code{list} of non-doubly-robust point estimates and
       non-doubly-robust covariance matrix for the standard G-computation
       estimator. If super learner is used there is no guarantee of correct
       inference for this estimator.}
 \item{\code{QnMod}}{The fitted object for the outcome regression. Returns
       \code{NULL} if \code{returnModels = FALSE}.}
 \item{\code{gnMod}}{The fitted object for the propensity score. Returns
       \code{NULL} if \code{returnModels = FALSE}.}
 \item{\code{QrnMod}}{The fitted object for the reduced-dimension regression
       that guards against misspecification of the outcome regression.
       Returns \code{NULL} if \code{returnModels = FALSE}.}
 \item{\code{grnMod}}{The fitted object for the reduced-dimension regression
       that guards against misspecification of the propensity score. Returns
       \code{NULL} if \code{returnModels = FALSE}.}
 \item{\code{a_0}}{The treatment levels that were requested for computation
       of covariate-adjusted means.}
}
}
\description{
TMLE estimate of the average treatment effect with doubly-robust inference
}
\examples{
# load super learner
library(SuperLearner)
# simulate data
set.seed(123456)
n <- 100
W <- data.frame(W1 = runif(n), W2 = rnorm(n))
A <- rbinom(n, 1, plogis(W$W1 - W$W2))
Y <- rbinom(n, 1, plogis(W$W1 * W$W2 * A))
# A quick example of drtmle:
# We note that more flexible super learner libraries
# are available, and that we recommend the user use more flexible
# libraries for SL_Qr and SL_gr for general use.
fit1 <- drtmle(
  W = W, A = A, Y = Y, a_0 = c(1, 0),
  family = binomial(),
  stratify = FALSE,
  SL_Q = c("SL.glm", "SL.mean", "SL.glm.interaction"),
  SL_g = c("SL.glm", "SL.mean", "SL.glm.interaction"),
  SL_Qr = "SL.glm",
  SL_gr = "SL.glm", maxIter = 1
)
}
