% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simsl.main.R
\name{simsl}
\alias{simsl}
\title{Single-index models with a surface-link (main function)}
\usage{
simsl(y, A, X, mu.hat = NULL, family = "gaussian", bs = c("ps",
  "ps"), k = c(8, 8), knots = NULL, sp = NULL, method = "GCV.Cp",
  beta.ini = NULL, beta.ini.gam = FALSE, ind.to.be.positive = 1,
  pen.order = 0, lambda = 0, max.iter = 30, eps.iter = 10^{     -2
  }, trace.iter = TRUE, center.X = TRUE, scale.X = TRUE,
  si.main.effect = TRUE, bootstrap = FALSE, nboot = 200,
  boot.conf = 0.95, seed = 1357)
}
\arguments{
\item{y}{a n-by-1 vector of treatment outcomes; y is assumed to follow an exponential family distribution; any distribution supported by \code{mgcv::gam}.}

\item{A}{a n-by-1 vector of treatment variable; each element is assumed to take a value on a continuum.}

\item{X}{a n-by-p matrix of baseline covarates.}

\item{mu.hat}{a n-by-1 vector of the fitted main effect term of the model provided by the user; the defult is \code{NULL} and it is taken as a vector of zeros; the optimal choice for this vector is h(E(y|X)), where h is the canonical link function.}

\item{family}{specifies the distribution of y; e.g., "gaussian", "binomial", "poisson"; the defult is "gaussian"; can be any family supported by \code{mgcv::gam}.}

\item{bs}{type of basis for representing the treatment-specific smooths; the defult is "ps" (p-splines); any basis supported by \code{mgcv::gam} can be used, e.g., "cr" (cubic regression splines)}

\item{k}{basis dimension; the same number (k) is used for all treatment groups, however, the smooths of different treatments have different roughness parameters.}

\item{knots}{a list containing user specified knot values to be used for basis construction (for the treatment and the index variables, respectively).}

\item{sp}{a vector of smoothing parameters associated with the 2-dimensional smooth}

\item{method}{the smoothing parameter estimation method; "GCV.Cp" to use GCV for unknown scale parameter and Mallows' Cp/UBRE/AIC for known scale; any method supported by \code{mgcv::gam} can be used.}

\item{beta.ini}{an initial solution of \code{beta.coef}; a p-by-1 vector; the defult is \code{NULL}.}

\item{beta.ini.gam}{if \code{TRUE}, employ a \code{mgcv::gam} smooth function representation of the variable A effect when inializing \code{beta.coef}; otherwise use a linear model representation for the A effect at initialization.}

\item{ind.to.be.positive}{for identifiability of the solution \code{beta.coef}, we restrict the jth component of \code{beta.coef} to be positive; by default \code{j=1}.}

\item{pen.order}{0 indicates the ridge penalty; 1 indicates the 1st difference penalty; 2 indicates the 2nd difference penalty, used in a penalized least squares (LS) estimation of \code{beta.coef}.}

\item{lambda}{a regularziation parameter associated with the penalized LS of \code{beta.coef}.}

\item{max.iter}{an integer specifying the maximum number of iterations for \code{beta.coef} update.}

\item{eps.iter}{a value specifying the convergence criterion of algorithm.}

\item{trace.iter}{if \code{TRUE}, trace the estimation process and print the differences in \code{beta.coef}.}

\item{center.X}{if \code{TRUE}, center X to have zero mean.}

\item{scale.X}{if \code{TRUE}, scale X to have unit variance.}

\item{si.main.effect}{if \code{TRUE}, once the convergece in the estimates of \code{beta.coef} is reached, include the main effect associated with the fitted single-index (beta.coef'X) to the final surface-link estimate.}

\item{bootstrap}{if \code{TRUE}, compute bootstrap confidence intervals for the single-index coefficients, \code{beta.coef}; the default is \code{FALSE}.}

\item{nboot}{when \code{bootstrap=TRUE}, a value specifying the number of bootstrap replications.}

\item{boot.conf}{a value specifying the confidence level of the bootstrap confidence intervals; the defult is \code{boot.conf = 0.95}.}

\item{seed}{when  \code{bootstrap=TRUE}, randomization seed used in bootstrap resampling.}
}
\value{
a list of information of the fitted SIMSL including
 \item{beta.coef}{ the estimated single-index coefficients.} \item{g.fit}{a \code{mgcv:gam} object containing information about the estimated 2-dimensional link function.} \item{beta.ini}{the initial value used in the estimation of \code{beta.coef}} \item{beta.path}{solution path of \code{beta.coef} over the iterations} \item{d.beta}{records the change in \code{beta.coef} over the solution path, \code{beta.path}} \item{X.scale}{sd of pretreatment covariates X} \item{X.center}{mean of pretreatment covariates X} \item{A.range}{range of the observed treatment variable A} \item{p}{number of baseline covariates X} \item{n}{number of subjects} \item{boot.ci}{\code{boot.conf}-level bootstrap CIs (LB, UB) associated with \code{beta.coef}} \item{boot.mat}{a (nboot x p) matrix of bootstrap estimates of  \code{beta.coef}}
}
\description{
\code{simsl} is the wrapper function for fitting a single-index model with a surface-link (SIMSL).
The function estimates a linear combination (a single-index) of baseline covariates X, and models a nonlinear interactive structure between the single-index and a treatment variable defined on a continuum, via estimating a smooth link function on the index-treatment domain.
}
\details{
SIMSL captures the effect of covariates via a single-index and their interaction with the treatment via a 2-dimensional smooth link function.
Interaction effects are determined by shapes of the link surface.
The SIMSL allows comparing different individual treatment levels and constructing individual treatment rules,
as functions of a biomarker signature (single-index), efficiently utilizing information on patient’s characteristics.
The resulting \code{simsl} object can be used to estimate an optimal dose rule for a new patient with baseline clinical information.
}
\examples{

set.seed(1234)
n <- 200
n.test <- 500

## simulation 1
# generate training data
p <- 30
X <- matrix(runif(n*p,-1,1),ncol=p)
A <- runif(n,0,2)
f_opt <- 1 + 0.5*X[,2] + 0.5*X[,1]
mu <- 8 + 4*X[,1] - 2*X[,2] - 2*X[,3] - 25*((f_opt-A)^2)
y <- rnorm(length(mu),mu,1)
# fit SIMSL
simsl.obj <- simsl(y=y, A=A, X=X)

# generate testing data
X.test <- matrix(runif(n.test*p,-1,1),ncol=p)
A.test <- runif(n.test,0,2)
f_opt.test <- 1 + 0.5*X.test[,2] + 0.5*X.test[,1]
pred <- pred.simsl(simsl.obj, newx= X.test)  # make prediction based on the estimated SIMSL
value <- mean(8 + 4*X.test[,1] - 2*X.test[,2] - 2*X.test[,3] - 25*((f_opt.test- pred$trt.rule)^2))
value  # the "value" of the estimated treatment rule; the "oracle" value is 8.

## simulation 2
p <- 10
# generate training data
X = matrix(runif(n*p,-1,1),ncol=p)
A = runif(n,0,2)
f_opt = I(X[,1] > -0.5)*I(X[,1] < 0.5)*0.6 + 1.2*I(X[,1] > 0.5) +
 1.2*I(X[,1] < -0.5) + X[,4]^2 + 0.5*log(abs(X[,7])+1) - 0.6
mu =   8 + 4*cos(2*pi*X[,2]) - 2*X[,4] - 8*X[,5]^3 - 15*abs(f_opt-A)
y = rnorm(length(mu),mu,1)
Xq <- cbind(X, X^2)  # include a quadratic term
# fit SIMSL
simsl.obj <- simsl(y=y, A=A, X=Xq)

# generate testing data
X.test = matrix(runif(n.test*p,-1,1),ncol=p)
A.test = runif(n.test,0,2)
f_opt.test = I(X.test[,1] > -0.5)*I(X.test[,1] < 0.5)*0.6 + 1.2*I(X.test[,1] > 0.5) +
 1.2*I(X.test[,1] < -0.5) + X.test[,4]^2 + 0.5*log(abs(X.test[,7])+1) - 0.6
Xq.test <- cbind(X.test, X.test^2)
pred <- pred.simsl(simsl.obj, newx= Xq.test)  # make prediction based on the estimated SIMSL
value <- mean(8 + 4*cos(2*pi*X.test[,2]) - 2*X.test[,4] - 8*X.test[,5]^3 -
              15*abs(f_opt.test-pred$trt.rule))
value  # the "value" of the estimated treatment rule; the "oracle" value is 8.


\donttest{
 ### air pollution data application
 data(chicago); head(chicago)
 chicago <- chicago[,-3][complete.cases(chicago[,-3]), ]
 #plot(chicago$death)
 #chicago$death[2856:2859]
 chicago <- chicago[-c(2856:2859), ]  # get rid of the gross outliers in y
 #plot(chicago$pm10median)
 chicago <- chicago[-which.max(chicago$pm10median), ]  # get rid of the gross outliers in x

 # create lagged variables
 lagard <- function(x,n.lag=5) {
   n <- length(x); X <- matrix(NA,n,n.lag)
   for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
   X
 }
 chicago$pm10 <- lagard(chicago$pm10median)
 chicago <- chicago[complete.cases(chicago), ]
 # create season varaible
 chicago$time.day <- round(chicago$time \%\%  365)

 # fit SIMSL for modeling the season-by-pm10 interactions on their effects on outcomes
 simsl.obj <- simsl(y = chicago$death, A = chicago$time.day, X=chicago[,7], bs= c("cc", "ps"),
                    beta.ini.gam = TRUE, family=poisson(), method = "REML")
 simsl.obj$beta.coef  # the estimated single-index coefficients
 summary(simsl.obj$g.fit)
 #simsl.obj.boot <- simsl(y = chicago$death, A = chicago$time.day, X=chicago[,7],
 #                        bs= c("cc", "ps"), family=poisson(), beta.ini.gam = TRUE,
 #                        method = "REML", bootstrap = TRUE, nboot=5)  # nboot =500
 #simsl.obj.boot$boot.ci


 additive.fit  <- mgcv::gam(chicago$death ~
                              s(simsl.obj$g.fit$model[,3], k=8, bs="ps") +
                              s(chicago$time.day, k=8, bs="cc"),
                            family = poisson(), method = "REML")
 plot(additive.fit, shift= additive.fit$coefficients[1], select=2,
      ylab= "Linear predictor", xlab= "A", main = expression(paste("Individual A effect")))
 plot(additive.fit, shift= additive.fit$coefficients[1], select = 1,
      xlab= expression(paste(beta*minute,"x")), ylab= " ",
      main = expression(paste("Individual ", beta*minute,"x effect")))
 mgcv::vis.gam(simsl.obj$g.fit, view=c("A","single.index"), theta=-135, phi = 30,color="heat", se=1,
               ylab = "single-index", zlab = " ", main=expression(paste("Interaction surface ")))



 ### Warfarin data application
 data(warfarin)
 X <- warfarin$X
 A <- warfarin$A
 y <- -abs(warfarin$INR - 2.5)  # the target INR is 2.5
 X[,1:3] <- scale(X[,1:3]) # standardize continuous variables

 # Estimate the main effect, using an additive model for continous variables and
 # a linear model for the indicator variables
 mu.fit <- mgcv::gam(y-mean(y)  ~ X[, 4:13] +
                       s(X[,1], k=5, bs="ps")+
                       s(X[,2], k=5, bs="ps") +
                       s(X[,3], k=5, bs="ps"), method="REML")
 summary(mu.fit)
 mu.hat <- predict(mu.fit)
 # fit SIMSL (we do not scale/center X for the interpretabilty of the indicator variables in X).
 simsl.obj <- simsl(y, A, X, mu.hat=mu.hat, scale.X = FALSE, center.X=FALSE, method="REML")
 simsl.obj$beta.coef
 #simsl.obj.boot <- simsl(y, A, X, mu.hat=mu.hat, scale.X=FALSE, center.X=FALSE,
 #                        bootstrap = TRUE, nboot=5, method="REML")  # nboot = 500
 #simsl.obj.boot$boot.ci


 additive.fit  <- mgcv::gam(y-mu.hat ~
                              s(A, k=8, bs="ps") +
                              s(simsl.obj$g.fit$model[,3], k=8, bs="ps"),
                           method = "REML" )
 plot(additive.fit, shift= additive.fit$coefficients[1], select=1,
      ylab= "Y", main = expression(paste("Individual A effect")))
 plot(additive.fit, shift= additive.fit$coefficients[1], select=2,
      xlab= expression(paste(beta*minute,"x")), ylab= " ",
      main = expression(paste("Individual ", beta*minute,"x effect")))
 mgcv::vis.gam(simsl.obj$g.fit, view=c("A","single.index"), theta=55, phi = 30,color="heat", se=1,
               ylab = "single-index", zlab = "Y", main=expression(paste("Interaction surface ")))
}
}
\seealso{
\code{pred.simsl},  \code{fit.simsl}
}
\author{
Park, Petkova, Tarpey, Ogden
}
