% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/resampling.R
\name{Resample}
\alias{Resample}
\title{Resampling observations}
\usage{
Resample(data, family = NULL, tau = 0.5, resampling = "subsampling", ...)
}
\arguments{
\item{data}{vector or matrix of data. In regression, this should be the
outcome data.}

\item{family}{type of regression model. This argument is defined as in
\code{\link[glmnet]{glmnet}}. Possible values include \code{"gaussian"}
(linear regression), \code{"binomial"} (logistic regression),
\code{"multinomial"} (multinomial regression), and \code{"cox"} (survival
analysis).}

\item{tau}{subsample size. Only used if \code{resampling="subsampling"} and
\code{cpss=FALSE}.}

\item{resampling}{resampling approach. Possible values are:
\code{"subsampling"} for sampling without replacement of a proportion
\code{tau} of the observations, or \code{"bootstrap"} for sampling with
replacement generating a resampled dataset with as many observations as in
the full sample. Alternatively, this argument can be a function to use for
resampling. This function must use arguments named \code{data} and
\code{tau} and return the IDs of observations to be included in the
resampled dataset.}

\item{...}{additional parameters passed to the function provided in
\code{resampling}.}
}
\value{
A vector of resampled IDs.
}
\description{
Generates a vector of resampled observation IDs.
}
\details{
With categorical outcomes (i.e. "family" argument is set to
  "binomial", "multinomial" or "cox"), the resampling is done such that the
  proportion of observations from each of the categories is representative of
  that of the full sample.
}
\examples{
## Linear regression framework
# Data simulation
simul <- SimulateRegression()

# Subsampling
ids <- Resample(data = simul$ydata, family = "gaussian")
sum(duplicated(ids))

# Bootstrapping
ids <- Resample(data = simul$ydata, family = "gaussian", resampling = "bootstrap")
sum(duplicated(ids))

## Logistic regression framework
# Data simulation
simul <- SimulateRegression(family = "binomial")

# Subsampling
ids <- Resample(data = simul$ydata, family = "binomial")
sum(duplicated(ids))
prop.table(table(simul$ydata))
prop.table(table(simul$ydata[ids]))

# Data simulation for a binary confounder
conf <- ifelse(runif(n = 100) > 0.5, yes = 1, no = 0)

# User-defined resampling function
BalancedResampling <- function(data, tau, Z, ...) {
  s <- NULL
  for (z in unique(Z)) {
    s <- c(s, sample(which((data == "0") & (Z == z)), size = tau * sum((data == "0") & (Z == z))))
    s <- c(s, sample(which((data == "1") & (Z == z)), size = tau * sum((data == "1") & (Z == z))))
  }
  return(s)
}

# Resampling keeping proportions by Y and Z
ids <- Resample(data = simul$ydata, family = "binomial", resampling = BalancedResampling, Z = conf)
prop.table(table(simul$ydata, conf))
prop.table(table(simul$ydata[ids], conf[ids]))

# User-defined resampling for stability selection
stab <- VariableSelection(
  xdata = simul$xdata, ydata = simul$ydata, family = "binomial",
  resampling = BalancedResampling, Z = conf
)
}
