% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/glm.sdf.R
\name{glm.sdf}
\alias{glm.sdf}
\alias{logit.sdf}
\alias{probit.sdf}
\alias{glm}
\title{EdSurvey Generalized Linear Models}
\usage{
glm.sdf(formula, family = binomial(link = "logit"), data,
  weightVar = NULL, relevels = list(), 
  varMethod=c("jackknife", "Taylor"), jrrIMax = 1,
  omittedLevels = TRUE, defaultConditions = TRUE, recode = NULL,
  returnNumberOfPSU=FALSE, returnVarEstInputs = FALSE)

logit.sdf(formula, data, weightVar = NULL, relevels = list(),
  varMethod = c("jackknife", "Taylor"), jrrIMax = 1,
  omittedLevels = TRUE, defaultConditions = TRUE, recode = NULL,
  returnNumberOfPSU = FALSE, returnVarEstInputs = FALSE)

probit.sdf(formula, data, weightVar = NULL, relevels = list(),
  varMethod = c("jackknife", "Taylor"), jrrIMax = 1,
  omittedLevels = TRUE, defaultConditions = TRUE, recode = NULL,
  returnVarEstInputs = FALSE)
}
\arguments{
\item{formula}{a \ifelse{latex}{\code{formula}}{\code{\link[stats]{formula}}} for the
linear model. See \ifelse{latex}{\code{glm}}{\code{\link[stats]{glm}}}.
For logit and probit, we recommend using the \code{I()} function 
to define the level used for success. (See Examples.)}

\item{family}{the \code{glm.sdf} function currently fits only the binomial
outcome models, such as logit and probit, although other link
functions are available for binomial models. See the \code{link}
argument in the help for 
\ifelse{latex}{\code{family}}{\code{\link[stats]{family}}}.}

\item{data}{an \code{edsurvey.data.frame}}

\item{weightVar}{character indicating the weight variable to use (see Details).
The \code{weightVar} must be one of the weights for the
\code{edsurvey.data.frame}. If \code{NULL}, uses the default
for the \code{edsurvey.data.frame}.}

\item{relevels}{a list; used to change the contrasts from the
default treatment contrasts to the treatment contrasts with a chosen omitted
group. The name of each element should be the variable name, and the value 
should be the group to be omitted.}

\item{varMethod}{a character set to \dQuote{jackknife} or \dQuote{Taylor} that indicates the variance
estimation method to be used. See Details.}

\item{jrrIMax}{the \eqn{V_{jrr}} term (see Details) can be estimated with
any positive number of plausible values and is estimated on 
the lower
of the number of available plausible values and \code{jrrIMax}. When
\code{jrrIMax} is set to \code{Inf}, all plausible values will be used.
Higher values of \code{jrrIMax} lead to longer computing times and more
accurate variance estimates.}

\item{omittedLevels}{a logical value. When set to the default value of \code{TRUE}, drops
those levels of all factor variables that are specified
in \code{edsurvey.data.frame}. Use \code{print} on an
\code{edsurvey.data.frame} to see the omitted levels.}

\item{defaultConditions}{a logical value. When set to the default value of \code{TRUE}, uses
the default conditions stored in an \code{edsurvey.data.frame}
to subset the data. Use \code{print} on an
\code{edsurvey.data.frame} to see the default conditions.}

\item{recode}{a list of lists to recode variables. Defaults to \code{NULL}. Can be set as
\code{recode=} \code{list(}\code{var1=} \code{list(from=} \code{c("a",} \code{"b",} \code{"c"),} \code{to=}\code{"d"))}. See Examples.}

\item{returnNumberOfPSU}{a logical value set to \code{TRUE} to return the number of 
primary sampling units (PSUs)}

\item{returnVarEstInputs}{a logical value set to \code{TRUE} to return the
inputs to the jackknife and imputation variance
estimates. This is intended to allow for
the computation
of covariances between estimates.}
}
\value{
An \code{edsurveyGlm} with the following elements:
   \item{call}{the function call}
   \item{formula}{the formula used to fit the model}
   \item{coef}{the estimates of the coefficients}
   \item{se}{the standard error estimates of the coefficients}
   \item{Vimp}{the estimated variance due to uncertainty in the scores (plausible value variables)}
   \item{Vjrr}{the estimated variance due to sampling}
   \item{M}{the number of plausible values}
   \item{nPSU}{the number of PSUs used in calculation}
   \item{varm}{the variance estimates under the various plausible values}
   \item{coefm}{the values of the coefficients under the various plausible values}
   \item{coefmat}{the coefficient matrix (typically produced by the summary of a model)}
   \item{weight}{the name of the weight variable}
   \item{npv}{the number of plausible values}
   \item{njk}{the number of jackknife replicates used}
   \item{varMethod}{always \code{jackknife}}
   \item{varEstInputs}{when \code{returnVarEstInputs} is \code{TRUE},
                       this element is returned. These are
                       used for calculating covariances with
                       \code{\link{varEstToCov}}.}
}
\description{
Fits a logit or probit that
             uses weights and variance estimates
             appropriate for the \code{edsurvey.data.frame},
             \code{light.edsurvey.data.frame}, or \code{edsurvey.data.frame.list}.
}
\details{
This function implements an estimator that correctly handles left-hand side
variables that are logical, allows for survey sampling weights, and estimates
variances using jackknife replication or Taylor series.
The vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}
describes estimation of the reported statistics. 

The coefficients are estimated
using the sample weights according to the section
\dQuote{Estimation of Weighted Means When Plausible Values Are Not Present}
or the section 
\dQuote{Estimation of Weighted Means When Plausible Values Are Present,}
depending on if there are assessment variables or variables with plausible values
in them.

How the standard errors of the coefficients are estimated depends on the
presence of plausible values (assessment variables),
But once it is obtained, the \emph{t} statistic
is given by \deqn{t=\frac{\hat{\beta}}{\sqrt{\mathrm{var}(\hat{\beta})}}} where
\eqn{ \hat{\beta} } is the estimated coefficient and \eqn{\mathrm{var}(\hat{\beta})} is
its variance of that estimate.

Note that \code{logit.sdf} and \code{probit.sdf} are included for convenience only;
they give the same results as a call to \code{glm.sdf} with the binomial family
and the link function named in the function call (logit or probit).
By default, \code{glm} fits a logistic regression when \code{family} is not set, 
so the two are expected to give the same results in that case.
Other types of generalized linear models are not supported.

\subsection{Variance estimation of coefficients}{
  All variance estimation methods are shown in the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}.
  When the predicted
  value does not have plausible values and \code{varMethod} is set to
  \code{jackknife}, the variance of the coefficients
  is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Not Present, Using the Jackknife Method.}

  When plausible values are present and \code{varMethod} is set to
  \code{jackknife}, the
  variance of the coefficients is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Present, Using the Jackknife Method.}

  When the predicted
  value does not have plausible values and \code{varMethod} is set to
  \code{Taylor}, the variance of the coefficients
  is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Not Present, Using the Taylor Series Method.}

  When plausible values are present and \code{varMethod} is set to
  \code{Taylor}, the
  variance of the coefficients is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Present, Using the Taylor Series Method.}
}
}
\examples{
\dontrun{
# read in the example data (generated, not real student data)
sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))

# By default uses jackknife variance method using replicate weights
table(sdf$b013801)
logit1 <- logit.sdf(I(b013801 \%in\% c("26-100", ">100")) ~ dsex + b017451, data=sdf)
# use summary to get detailed results
summary(logit1)

# Taylor series variance estimation
logit1t <- logit.sdf(I(b013801 \%in\% c("26-100", ">100")) ~ dsex + b017451, data=sdf,
                     varMethod="Taylor")
summary(logit1t)

logit2 <- logit.sdf(I(composite >= 300) ~ dsex + b013801, data=sdf)
summary(logit2)

logit3 <- glm.sdf(I(composite >= 300) ~ dsex + b013801, data=sdf, 
                  family=quasibinomial(link="logit"))

summary(logit3)
}
}
\seealso{
\ifelse{latex}{\code{glm}}{\code{\link[stats]{glm}}}
}
\author{
Paul Bailey
}
