% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xvalidate.R
\name{xvalidate}
\alias{xvalidate}
\title{Implementing Cross Validation}
\usage{
xvalidate(
  y,
  x,
  fes,
  IDs,
  testID = NULL,
  tol = 1e-08,
  hdfetol = 1e-04,
  colcheck = TRUE,
  init_mu = NULL,
  init_x = NULL,
  init_z = NULL,
  verbose = FALSE,
  cluster = NULL,
  penalty = "lasso",
  method = "placeholder",
  standardize = TRUE,
  penweights = rep(1, ncol(x_reg)),
  lambda = 0
)
}
\arguments{
\item{y}{Dependent variable (a vector)}

\item{x}{Regressor matrix.}

\item{fes}{List of fixed effects.}

\item{IDs}{A vector of fold IDs for k-fold cross validation. If left unspecified, each observation
is assigned to a different fold (warning: this is likely to be very resource-intensive).}

\item{testID}{Optional. A number indicating which ID to hold out during cross-validation. If left
unspecified, the function cycles through all IDs and reports the average RMSE.}

\item{tol}{Tolerance parameter for convergence of the IRLS algorithm.}

\item{hdfetol}{Tolerance parameter for the within-transformation step,
passed on to \code{lfe::demeanlist}.}

\item{colcheck}{Logical. If \code{TRUE}, checks for perfect multicollinearity in \code{x}.}

\item{init_mu}{Optional: initial values of the conditional mean \eqn{\mu}, to be used as weights in the
first iteration of the algorithm.}

\item{init_x}{Optional: initial values of the independent variables.}

\item{init_z}{Optional: initial values of the transformed dependent variable, to be used in the
first iteration of the algorithm.}

\item{verbose}{Logical. If \code{TRUE}, it prints information to the screen while evaluating.}

\item{cluster}{Optional: a vector classifying observations into clusters (to use when calculating SEs).}

\item{penalty}{A string indicating the penalty type. Currently supported: "lasso" and "ridge".}

\item{method}{The user can set this equal to "plugin" to perform the plugin algorithm with
coefficient-specific penalty weights (see details). Otherwise, a single global penalty is used.}

\item{standardize}{Logical. If \code{TRUE}, x variables are standardized before estimation.}

\item{penweights}{Optional: a vector of coefficient-specific penalties to use in plugin lasso when
\code{method == "plugin"}.}

\item{lambda}{Penalty parameter, to be passed on to penhdfeppml_int or penhdfeppml_cluster_int.}
}
\value{
A list with two elements:
\itemize{
\item \code{rmse}: root mean squared error (RMSE).
\item \code{mu}: conditional means.
}
}
\description{
This is the internal function called by \code{mlfitppml_int} to perform cross-validation, if the
option is enabled. It is available also on a stand-alone basis in case it is needed, but generally
users will be better served by using the wrapper \code{mlfitppml}.
}
\details{
\code{xvalidate} carries out cross-validation with the user-provided IDs by holding out each one of
them, sequentially, as in the k-fold procedure (unless \code{testID} is specified, in which case
it just uses this ID for validation). After filtering out the holdout sample, the function simply
calls \link{penhdfeppml_int} and \link{penhdfeppml_cluster_int} to estimate the coefficients, it
predicts the conditional means for the held-out observations and finally it calculates the root mean
squared error (RMSE).
}
\section{References}{

Breinlich, H., Corradi, V., Rocha, N., Ruta, M., Santos Silva, J.M.C. and T. Zylkin (2021).
"Machine Learning in International Trade Research: Evaluating the Impact of Trade Agreements",
Policy Research Working Paper; No. 9629. World Bank, Washington, DC.

Correia, S., P. Guimaraes and T. Zylkin (2020). "Fast Poisson estimation with high dimensional
fixed effects", \emph{STATA Journal}, 20, 90-115.

Gaure, S (2013). "OLS with multiple high dimensional category variables",
\emph{Computational Statistics & Data Analysis}, 66, 8-18.

Friedman, J., T. Hastie, and R. Tibshirani (2010). "Regularization paths for generalized linear
models via coordinate descent", \emph{Journal of Statistical Software}, 33, 1-22.

Belloni, A., V. Chernozhukov, C. Hansen and D. Kozbur (2016). "Inference in high dimensional panel
models with an application to gun control", \emph{Journal of Business & Economic Statistics}, 34, 590-605.
}

\examples{
# First, we need to transform the data. Start by filtering the data set to keep only countries in
# the Americas:
americas <- countries$iso[countries$region == "Americas"]
trade <- trade[(trade$imp \%in\% americas) & (trade$exp \%in\% americas), ]
# Now generate the needed x, y and fes objects:
y <- trade$export
x <- data.matrix(trade[, -1:-6])
fes <- list(exp_time = interaction(trade$exp, trade$time),
            imp_time = interaction(trade$imp, trade$time),
            pair     = interaction(trade$exp, trade$imp))
# We also need to create the IDs. We split the data set by agreement, not observation:
id <- unique(trade[, 5])
nfolds <- 10
unique_ids <- data.frame(id = id, fold = sample(1:nfolds, size = length(id), replace = TRUE))
cross_ids <- merge(trade[, 5, drop = FALSE], unique_ids, by = "id", all.x = TRUE)
# Finally, we try xvalidate with a lasso penalty (the default) and two lambda values:
\donttest{reg <- xvalidate(y = y, x = x, fes = fes, lambda = 0.001,
                         IDs = cross_ids$fold, verbose = TRUE)}

}
