% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calibrate_to_estimate.R
\name{calibrate_to_estimate}
\alias{calibrate_to_estimate}
\title{Calibrate weights from a primary survey to estimated totals from a control survey,
with replicate-weight adjustments that account for variance of the control totals}
\usage{
calibrate_to_estimate(
  rep_design,
  estimate,
  vcov_estimate,
  cal_formula,
  calfun = survey::cal.linear,
  bounds = list(lower = -Inf, upper = Inf),
  verbose = FALSE,
  maxit = 50,
  epsilon = 1e-07,
  variance = NULL,
  col_selection = NULL
)
}
\arguments{
\item{rep_design}{A replicate design object for the primary survey, created with either the \code{survey} or \code{srvyr} packages.}

\item{estimate}{A vector of estimated control totals.
The names of entries must match the names from calling \code{svytotal(x = cal_formula, design = rep_design)}.}

\item{vcov_estimate}{A variance-covariance matrix for the estimated control totals.
The column names and row names must match the names of \code{estimate}.}

\item{cal_formula}{A formula listing the variables to use for calibration.
All of these variables must be included in \code{rep_design}.}

\item{calfun}{A calibration function from the \code{survey} package,
such as \link[survey]{cal.linear}, \link[survey]{cal.raking}, or \link[survey]{cal.logit}.
Use \code{cal.linear} for ordinary post-stratification, and \code{cal.raking} for raking.
See \link[survey]{calibrate} for additional details.}

\item{bounds}{Parameter passed to \link[survey]{grake} for calibration. See \link[survey]{calibrate} for details.}

\item{verbose}{Parameter passed to \link[survey]{grake} for calibration. See \link[survey]{calibrate} for details.}

\item{maxit}{Parameter passed to \link[survey]{grake} for calibration. See \link[survey]{calibrate} for details.}

\item{epsilon}{Parameter passed to \link[survey]{grake} for calibration. \cr
After calibration, the absolute difference between each calibration target and the calibrated estimate
will be no larger than \code{epsilon} times (1 plus the absolute value of the target).
See \link[survey]{calibrate} for details.}

\item{variance}{Parameter passed to \link[survey]{grake} for calibration. See \link[survey]{calibrate} for details.}

\item{col_selection}{Optional parameter to determine which replicate columns
will have their control totals perturbed. If supplied, \code{col_selection} must be an integer vector
with length equal to the length of \code{estimate}.}
}
\value{
A replicate design object, with full-sample weights calibrated to totals from \code{estimate},
and replicate weights adjusted to account for variance of the control totals.
The element \code{col_selection} indicates, for each replicate column of the calibrated primary survey,
which column of replicate weights it was matched to from the control survey.
}
\description{
Calibrate the weights of a primary survey to match estimated totals from a control survey,
using adjustments to the replicate weights to account for the variance of the estimated control totals.
The adjustments to replicate weights are conducted using the method proposed by Fuller (1998).
This method can be used to implement general calibration as well as post-stratification or raking specifically
(see the details for the \code{calfun} parameter).
}
\details{
With the Fuller method, each of \code{k} randomly-selected replicate columns from the primary survey
are calibrated to control totals formed by perturbing the \code{k}-dimensional vector of
estimated control totals using a spectral decomposition of the variance-covariance matrix
of the estimated control totals. Other replicate columns are simply calibrated to the unperturbed control totals.
\cr

Because the set of replicate columns whose control totals are perturbed should be random,
there are multiple ways to ensure that this matching is reproducible.
The user can either call \link[base]{set.seed} before using the function,
or supply a vector of randomly-selected column indices to the argument \code{col_selection}.
}
\examples{
\dontrun{

# Load example data for primary survey ----

  suppressPackageStartupMessages(library(survey))
  data(api)

  primary_survey <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) |>
    as.svrepdesign(type = "JK1")

# Load example data for control survey ----

  control_survey <- svydesign(id = ~ 1, fpc = ~fpc, data = apisrs) |>
    as.svrepdesign(type = "JK1")

# Estimate control totals ----

  estimated_controls <- svytotal(x = ~ stype + enroll,
                                 design = control_survey)
  control_point_estimates <- coef(estimated_controls)
  control_vcov_estimate <- vcov(estimated_controls)

# Calibrate totals for one categorical variable and one numeric ----

  calibrated_rep_design <- calibrate_to_estimate(
    rep_design = primary_survey,
    estimate = control_point_estimates,
    vcov_estimate = control_vcov_estimate,
    cal_formula = ~ stype + enroll
  )

# Inspect estimates before and after calibration ----

  ##_ For the calibration variables, estimates and standard errors
  ##_ from calibrated design will match those of the control survey

    svytotal(x = ~ stype + enroll, design = primary_survey)
    svytotal(x = ~ stype + enroll, design = control_survey)
    svytotal(x = ~ stype + enroll, design = calibrated_rep_design)

  ##_ Estimates from other variables will be changed as well

    svymean(x = ~ api00 + api99, design = primary_survey)
    svymean(x = ~ api00 + api99, design = control_survey)
    svymean(x = ~ api00 + api99, design = calibrated_rep_design)

# Inspect weights before and after calibration ----

  summarize_rep_weights(primary_survey, type = 'overall')
  summarize_rep_weights(calibrated_rep_design, type = 'overall')

# For reproducibility, specify which columns are randomly selected for Fuller method ----

  column_selection <- calibrated_rep_design$col_selection
  print(column_selection)

  calibrated_rep_design <- calibrate_to_estimate(
    rep_design = primary_survey,
    estimate = control_point_estimates,
    vcov_estimate = control_vcov_estimate,
    cal_formula = ~ stype + enroll,
    col_selection = column_selection
  )
}
}
\references{
Fuller, W.A. (1998).
"Replication variance estimation for two-phase samples."
\strong{Statistica Sinica}, \emph{8}: 1153-1164.

Opsomer, J.D. and A. Erciulescu (2021).
"Replication variance estimation after sample-based calibration."
\strong{Survey Methodology}, \emph{47}: 265-277.
}
