% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utility.R
\name{generate.data}
\alias{generate.data}
\title{Generate simulated data}
\usage{
generate.data(
  n,
  p,
  support.size = NULL,
  rho = 0,
  family = c("gaussian", "binomial", "poisson", "cox", "mgaussian", "multinomial"),
  beta = NULL,
  cortype = 1,
  snr = 10,
  weibull.shape = 1,
  uniform.max = 1,
  y.dim = 3,
  class.num = 3,
  seed = 1
)
}
\arguments{
\item{n}{The number of observations.}

\item{p}{The number of predictors of interest.}

\item{support.size}{The number of nonzero coefficients in the underlying regression
model. Can be omitted if \code{beta} is supplied.}

\item{rho}{A parameter used to characterize the pairwise correlation in
predictors. Default is \code{0}.}

\item{family}{The distribution of the simulated response. \code{"gaussian"} for
univariate quantitative response, \code{"binomial"} for binary classification response,  
\code{"poisson"} for counting response, \code{"cox"} for left-censored response, 
\code{"mgaussian"} for multivariate quantitative response, 
\code{"mgaussian"} for multi-classification response.}

\item{beta}{The coefficient values in the underlying regression model. 
If it is supplied, \code{support.size} would be omitted.}

\item{cortype}{The correlation structure. 
\code{cortype = 1} denotes the independence structure, 
where the covariance matrix has \eqn{(i,j)} entry equals \eqn{I(i \neq j)}.
\code{cortype = 2} denotes the exponential structure,
where the covariance matrix has \eqn{(i,j)} entry equals \eqn{rho^{|i-j|}}.
code{cortype = 3} denotes the constant structure, 
where the non-diagnoal entries of covariance 
matrix are \eqn{rho} and diagonal entries are 1.}

\item{snr}{A numerical value controlling the signal-to-noise ratio (SNR). The SNR is defined as
as the variance of \eqn{x\beta} divided
by the variance of a gaussian noise: \eqn{\frac{Var(x\beta)}{\sigma^2}}.
The gaussian noise \eqn{\epsilon} is set with mean 0 and variance.
The noise is added to the linear predictor \eqn{\eta} = \eqn{x\beta}. Default is \code{snr = 10}.}

\item{weibull.shape}{The shape parameter of the Weibull distribution. 
It works only when \code{family = "cox"}. 
Default: \code{weibull.shape = 1}.}

\item{uniform.max}{A parameter controlling censored rate. 
A large value implies a small censored rate; 
otherwise, a large censored rate. 
It works only when \code{family = "cox"}. 
Default is \code{uniform.max = 1}.}

\item{y.dim}{Response's Dimension. It works only when \code{family = "mgaussian"}. Default: \code{y.dim = 3}.}

\item{class.num}{The number of class. It works only when \code{family = "multinomial"}. Default: \code{class.num = 3}.}

\item{seed}{random seed. Default: \code{seed = 1}.}
}
\value{
A \code{list} object comprising:
\item{x}{Design matrix of predictors.} 
\item{y}{Response variable.}
\item{beta}{The coefficients used in the underlying regression model.}
}
\description{
Generate simulated data under the 
generalized linear model and Cox proportional hazard model.
}
\details{
For \code{family = "gaussian"} , the data model is 
\deqn{Y = X \beta + \epsilon.}
The underlying regression coefficient \eqn{\beta} has 
uniform distribution [m, 100m] and \eqn{m=5 \sqrt{2log(p)/n}.}

For \code{family= "binomial"}, the data model is \deqn{Prob(Y = 1) = \exp(X
\beta + \epsilon)/(1 + \exp(X \beta + \epsilon)).}
The underlying regression coefficient \eqn{\beta} has 
uniform distribution [2m, 10m] and \eqn{m = 5 \sqrt{2log(p)/n}.}

For \code{family = "poisson"}, the data is modeled to have 
an exponential distribution: 
\deqn{Y = Exp(\exp(X \beta + \epsilon)).}
The underlying regression coefficient \eqn{\beta} has 
uniform distribution [2m, 10m] and \eqn{m = \sqrt{2log(p)/n}/3.}

For \code{family = "cox"}, the model for failure time \eqn{T} is
\deqn{T = (-\log(U / \exp(X \beta)))^{1/weibull.shape},}
where \eqn{U} is a uniform random variable with range [0, 1].
The centering time \eqn{C} is generated from 
uniform distribution \eqn{[0, uniform.max]},
then we define the censor status as 
\eqn{\delta = I(T \le C)} and observed time as \eqn{R = \min\{T, C\}}.
The underlying regression coefficient \eqn{\beta} has 
uniform distribution [2m, 10m], 
where \eqn{m = 5 \sqrt{2log(p)/n}}.

In the above models, \eqn{\epsilon \sim N(0,
\sigma^2 ),} where \eqn{\sigma^2} is determined by the \code{snr}.
}
\examples{

# Generate simulated data
n <- 200
p <- 20
support.size <- 5
dataset <- generate.data(n, p, support.size)
str(dataset)

}
\author{
Jin Zhu
}
