\name{gesso.cv}
\alias{gesso.cv}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
Cross-Validation
}
\description{
Performs \code{nfolds}-fold cross-validation to tune hyperparmeters \code{lambda_1} and \code{lambda_2} for the gesso model.
}
\usage{
gesso.cv(G, E, Y, C = NULL, normalize = TRUE, normalize_response = FALSE, grid = NULL,
         grid_size = 20, grid_min_ratio = NULL, alpha = NULL, family = "gaussian", 
         type_measure = "loss", fold_ids = NULL, nfolds = 4, 
         parallel = TRUE, seed = 42, tolerance = 1e-3, max_iterations = 5000, 
         min_working_set_size = 100, verbose = TRUE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{G}{matrix of main effects of size \code{n x p}, variables organized by columns}
  \item{E}{vector of environmental measurments}
  \item{Y}{outcome vector. Set \code{family="gaussian"} for the continuous outcome and 
          \code{family="binomial"} for the binary outcome with 0/1 levels}
  \item{C}{matrix of confounders of size \code{n x m}, variables organized by columns} 
  \item{normalize}{\code{TRUE} to normalize matrix \code{G} and vector \code{E}}
  \item{normalize_response}{\code{TRUE} to normalize vector \code{Y} (for \code{family="gaussian"})}
  \item{grid}{grid sequence for tuning hyperparameters, we use the same grid for \code{lambda_1} and \code{lambda_2}}
  \item{grid_size}{specify \code{grid_size} to generate grid automatically. Grid is generated by calculating \code{max_lambda} from the data (smallest lambda such that all the coefficients are zero). \code{min_lambda} is calculated as a product of \code{max_lambda} and \code{grid_min_ratio}. The program then generates \code{grid_size} values equidistant on the log10 scale from \code{min_lambda} to \code{max_lambda}}
  \item{grid_min_ratio}{parameter to determine \code{min_lambda} (smallest value for the grid of lambdas),
  default is 0.1 for p > n, 0.01 otherwise}
  \item{alpha}{if \code{NULL} independent 2D grid is used for (\code{lambda_1}, \code{lambda_2}), else 1D grid is used where \code{lambda_2} = \code{alpha} * \code{lambda_1}, i.e. (\code{lambda_1}, \code{alpha} * \code{lambda_1})}
  \item{family}{\code{"gaussian"} for continuous outcome and \code{"binomial"} for binary}
  \item{type_measure}{loss to use for cross-validation. Specity \code{type_measure="loss"} for neative log likelihood or \code{type_measure="auc"} for AUC (for \code{family="binomial"} only) }
  \item{fold_ids}{option to input custom folds assignments}
  \item{tolerance}{tolerance for the dual gap convergence criterion}
  \item{max_iterations}{maximum number of iterations}
  \item{min_working_set_size}{minimum size of the working set}
  \item{nfolds}{number of cross-validation splits}
  \item{parallel}{\code{TRUE} to enable parallel cross-validation}
  \item{seed}{set random seed to control random folds assignments}
  \item{verbose}{\code{TRUE} to print messages}
}
\value{
A list of objects
\item{cv_result}{a tibble with cross-validation results: averaged across folds loss and the number of non-zero coefficients for each value of (\code{lambda_1}, \code{lambda_2}) path. Could be used for custom parameters tuning (ex: select (\code{lambda_1}, \code{lambda_2}) with a sertain number of non-zero main effects and/or a sertain number of interactions).
\itemize{
  \item{\code{mean_loss  }}{averaged across folds loss value, vector of size \code{lambda_1}*\code{lambda_2}}
  \item{\code{mean_beta_g_nonzero  }}{averaged across folds number of non-zero main effects, vector of size \code{lambda_1}*\code{lambda_2}}
  \item{\code{mean_beta_gxe_nonzero  }}{  averaged across folds number of non-zero interactions, vector of size \code{lambda_1}*\code{lambda_2}}
  \item{\code{lambda_1  }}{\code{lambda_1} pass, decreasing}
  \item{\code{lambda_2  }}{\code{lambda_2} pass, oscillating}
}}
\item{lambda_min}{a tibble of optimal (\code{lambda_1}, \code{lambda_2}) values, tuning parameter values that give minimum cross-validation loss (\code{mean_loss})}
%\item{lambda_se}{a tibble of optimal (\code{lambda_1}, \code{lambda_2}) values}
\item{fit}{list, return of the function gesso.fit on the full data}
\item{grid}{vector of values used for hyperparameters tuning}
\item{full_cv_result}{inner variables}
}
\examples{
data = data.gen()
tune_model = gesso.cv(data$G_train, data$E_train, data$Y_train, 
                      grid_size=20, parallel=TRUE, nfolds=3)
gxe_coefficients = gesso.coef(tune_model$fit, tune_model$lambda_min)$beta_gxe        
g_coefficients = gesso.coef(tune_model$fit, tune_model$lambda_min)$beta_g          
}

