% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/StatModels.R
\name{tune_classification_model}
\alias{tune_classification_model}
\title{Privacy-preserving Hyperparameter Tuning for Binary Classification Models}
\usage{
tune_classification_model(
  models,
  X,
  y,
  upper.bounds,
  lower.bounds,
  add.bias = FALSE
)
}
\arguments{
\item{models}{Vector of binary classification model objects, each initialized
with a different combination of hyperparameter values from the search space
for tuning. Each model should be initialized with the same epsilon privacy
parameter value eps. The tuned model satisfies eps-level differential
privacy.}

\item{X}{Dataframe of data to be used in tuning the model. Note it is assumed
the data rows and corresponding labels are randomly shuffled.}

\item{y}{Vector or matrix of true labels for each row of X.}

\item{upper.bounds}{Numeric vector giving upper bounds on the values in each
column of X. Should be of length ncol(X). The values are assumed to be in
the same order as the corresponding columns of X. Any value in the columns
of X larger than the corresponding upper bound is clipped at the bound.}

\item{lower.bounds}{Numeric vector giving lower bounds on the values in each
column of X. Should be of length ncol(X). The values are assumed to be in
the same order as the corresponding columns of X. Any value in the columns
of X smaller than the corresponding lower bound is clipped at the bound.}

\item{add.bias}{Boolean indicating whether to add a bias term to X. Defaults
to FALSE.}
}
\value{
Single model object selected from the input list models with tuned
parameters.
}
\description{
This function implements the privacy-preserving hyperparameter tuning
function for binary classification \insertCite{chaudhuri2011}{DPpack} using
the exponential mechanism. It accepts a list of models with various chosen
hyperparameters, a dataset X with corresponding labels y, upper and lower
bounds on the columns of X, and a boolean indicating whether to add bias in
the construction of each of the models. The data are split into m+1 equal
groups, where m is the number of models being compared. One group is set
aside as the validation group, and each of the other m groups are used to
train each of the given m models. The number of errors on the validation set
is counted for each model and used as the utility values in the exponential
mechanism (\code{\link{ExponentialMechanism}}) to select a tuned model in a
privacy-preserving way.
}
\examples{
# Build train dataset X and y, and test dataset Xtest and ytest
N <- 200
K <- 2
X <- data.frame()
y <- data.frame()
for (j in (1:K)){
  t <- seq(-.25,.25,length.out = N)
  if (j==1) m <- stats::rnorm(N,-.2,.1)
  if (j==2) m <- stats::rnorm(N, .2,.1)
  Xtemp <- data.frame(x1 = 3*t , x2 = m - t)
  ytemp <- data.frame(matrix(j-1, N, 1))
  X <- rbind(X, Xtemp)
  y <- rbind(y, ytemp)
}
Xtest <- X[seq(1,(N*K),10),]
ytest <- y[seq(1,(N*K),10),,drop=FALSE]
X <- X[-seq(1,(N*K),10),]
y <- y[-seq(1,(N*K),10),,drop=FALSE]
y <- as.matrix(y)

# Grid of possible gamma values for tuning logistic regression model
grid.search <- c(100, 1, .0001)

# Construct objects for logistic regression parameter tuning
eps <- 1 # Privacy budget should be the same for all models
lrdp1 <- LogisticRegressionDP$new("l2", eps, grid.search[1])
lrdp2 <- LogisticRegressionDP$new("l2", eps, grid.search[2])
lrdp3 <- LogisticRegressionDP$new("l2", eps, grid.search[3])
models <- c(lrdp1, lrdp2, lrdp3)

# Tune using data and bounds for X based on its construction
upper.bounds <- c( 1, 1)
lower.bounds <- c(-1,-1)
tuned.model <- tune_classification_model(models, X, y, upper.bounds, lower.bounds)
tuned.model$gamma # Gives resulting selected hyperparameter

# tuned.model result can be used the same as a trained LogisticRegressionDP model
# Predict new data points
predicted.y <- tuned.model$predict(Xtest)
n.errors <- sum(predicted.y!=ytest)

}
\references{
\insertRef{chaudhuri2011}{DPpack}
}
