% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lm.sdf.R
\name{lm.sdf}
\alias{lm.sdf}
\alias{lm}
\title{EdSurvey Linear Models}
\usage{
lm.sdf(formula, data, weightVar = NULL, relevels = list(),
              varMethod = c("jackknife", "Taylor"), jrrIMax = 1,
              omittedLevels = TRUE, defaultConditions = TRUE, recode = NULL,
              returnVarEstInputs = FALSE, returnNumberOfPSU = FALSE,
              standardizeWithSamplingVar = FALSE)
}
\arguments{
\item{formula}{a \ifelse{latex}{\code{formula}}{\code{\link[stats]{formula}}} for the
linear model. See \ifelse{latex}{\code{lm}}{\code{\link[stats]{lm}}}.
If \emph{y} is left blank, the default subject scale or subscale variable
will be used. (You can find the default using
\code{\link{showPlausibleValues}}.)
If \emph{y} is a variable for a subject scale or subscale (one of the
names shown by \code{\link{showPlausibleValues}}),
then that subject scale or subscale is used.}

\item{data}{an \code{edsurvey.data.frame}, a \code{light.edsurvey.data.frame},
or an \code{edsurvey.data.frame.list}}

\item{weightVar}{a character indicating the weight variable to use (see Details).
The \code{weightVar} must be one of the weights for the
\code{edsurvey.data.frame}. If \code{NULL}, it  uses the default
for the \code{edsurvey.data.frame}.}

\item{relevels}{a list; used when the user wants to change the contrasts from the
default treatment contrasts to the treatment contrasts with a chosen omitted
group. The name of each element should be the variable name, and the value 
should be the group to be omitted.}

\item{varMethod}{a character set to \dQuote{jackknife} or \dQuote{Taylor} that indicates the variance
estimation method to be used. See Details.}

\item{jrrIMax}{when using the jackknife variance estimation method, the \eqn{V_{jrr}} term
(see Details) can be estimated with
any positive number of plausible values and is estimated on 
the lower
of the number of available plausible values and \code{jrrIMax}. When
\code{jrrIMax} is set to \code{Inf}, all plausible values will be used.
Higher values of \code{jrrIMax} lead to longer computing times and more
accurate variance estimates.}

\item{omittedLevels}{a logical value. When set to the default value of \code{TRUE}, drops
those levels of all factor variables that are specified
in an \code{edsurvey.data.frame}. Use \code{print} on an
\code{edsurvey.data.frame} to see the omitted levels.}

\item{defaultConditions}{a logical value. When set to the default value of \code{TRUE}, uses
the default conditions stored in an \code{edsurvey.data.frame}
to subset the data. Use \code{print} on an
\code{edsurvey.data.frame} to see the default conditions.}

\item{recode}{a list of lists to recode variables. Defaults to \code{NULL}. Can be set as
\code{recode=}\code{list(}\code{var1} \code{=} \code{list(}\code{from=} \code{c("a",} \code{"b",} \code{"c"),} \code{to=} \code{"d"))}. See Examples.}

\item{returnVarEstInputs}{a logical value set to \code{TRUE} to return the
inputs to the jackknife and imputation variance
estimates. This is intended to allow for
the computation
of covariances between estimates.}

\item{returnNumberOfPSU}{a logical value set to \code{TRUE} to return the number of 
primary sampling units (PSU).}

\item{standardizeWithSamplingVar}{a logical value indicating if the standardized coefficients
should have the variance of the regressors and outcome measured
with sampling variance. Defaults to \code{FALSE}.}
}
\value{
An \code{edsurvey.lm} with the following elements:
   \item{call}{the function call}
   \item{formula}{the formula used to fit the model}
   \item{coef}{the estimates of the coefficients}
   \item{se}{the standard error estimates of the coefficients}
   \item{Vimp}{the estimated variance from uncertainty in the scores (plausible value variables)}
   \item{Vjrr}{the estimated variance from sampling}
   \item{M}{the number of plausible values}
   \item{varm}{the variance estimates under the various plausible values}
   \item{coefm}{the values of the coefficients under the various plausible values}
   \item{coefmat}{the coefficient matrix (typically produced by the summary of a model)}
   \item{r.squared}{the coefficient of determination}
   \item{weight}{the name of the weight variable}
   \item{npv}{the number of plausible values}
   \item{jrrIMax}{the \code{jrrIMax} value used in computation}
   \item{njk}{the number of jackknife replicates used; set to \code{NA}
              when Taylor series variance estimates are used}
   \item{varMethod}{one of \code{Taylor series} or \code{jackknife}}
   \item{residuals}{residuals from the average regression coefficients}
   \item{PV.residuals}{residuals from the by plausible value coefficients}
   \item{PV.fitted.values}{fitted values from the by plausible value coefficients}
   \item{B}{imputation variance covariance matrix, before multiplication by (M+1)/M}
   \item{U}{sampling variance covariance matrix}
   \item{rbar}{average relative increase in variance; see van Buuren (2012, eq. 2.29)}
   \item{nPSU}{number of PSUs used in calculation}
   \item{n0}{number of rows on \code{edsurvey.data.frame} before any conditions were applied}
   \item{nUsed}{number of observations with valid data and weights larger than zero}
   \item{data}{data used for the computation}
   \item{Xstdev}{standard deviations of regressors, used for computing standardized
                 regression coefficients when \code{standardizeWithSamplingVar} is set to
                 \code{FALSE} (the default)}
   \item{varSummary}{the result of running \code{summary2} (unweighted) on each variable in the
                     regression}
   \item{varEstInputs}{when \code{returnVarEstInputs} is \code{TRUE},
                       this element is returned. These are
                       used for calculating covariances with
                       \code{\link{varEstToCov}}.}
   \item{standardizeWithSamplingVar}{when \code{standardizeWithSamplingVar}
                                     is set to \code{TRUE} this element is
                                     returned. Calculates the standard deviation
                                     of the standardized
                                     regression coefficients like any other
                                     variable.}
}
\description{
Fits a linear model that uses weights and variance estimates appropriate for the data.
}
\details{
This function implements an estimator that correctly handles left-hand
side variables that are either numeric or plausible values and allows for survey 
sampling weights and estimates variances using the jackknife replication method.
The vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}
 describes estimation of the reported statistics. 

Regardless of the variance estimation, the coefficients are estimated
using the sample weights according to the sections
\dQuote{Estimation of Weighted Means When Plausible Values Are Not Present}
or
\dQuote{Estimation of Weighted Means When Plausible Values Are Present,}
depending on if there are assessment variables or variables with plausible values
in them.

How the standard errors of the coefficients are estimated depends on the
value of \code{varMethod} and the presence of plausible values (assessment variables),
But once it is obtained, the \emph{t} statistic
is given by \deqn{t=\frac{\hat{\beta}}{\sqrt{\mathrm{var}(\hat{\beta})}}} where
\eqn{ \hat{\beta} } is the estimated coefficient and \eqn{\mathrm{var}(\hat{\beta})} is
the variance of that estimate.

The \bold{coefficient of determination (\emph{R}-squared value)} is similarly estimated by finding
the average \emph{R}-squared using the average across the plausible values.


\subsection{Variance estimation of coefficients}{
  All variance estimation methods are shown in the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistics}.
  When \code{varMethod} is set to \code{jackknife} and the predicted
  value does not have plausible values, the variance of the coefficients
  is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Not Present, Using the Jackknife Method.}

  When plausible values are present and \code{varMethod} is \code{jackknife}, the
  variance of the coefficients is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When
        Plausible Values Are Present, Using the Jackknife Method.}

  When plausible values are not present and \code{varMethod} is \code{Taylor}, the
  variance of the coefficients is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When Plausible
        Values Are Not Present, Using the Taylor Series Method.}

  When plausible values are present and \code{varMethod} is \dQuote{Taylor,} the
  variance of the coefficients is estimated according to the section
\dQuote{Estimation of Standard Errors of Weighted Means When Plausible
        Values Are Present, Using the Taylor Series Method.}
}
}
\examples{
\dontrun{
# read in the example data (generated, not real student data)
sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))

# By default uses jackknife variance method using replicate weights
lm1 <- lm.sdf(composite ~ dsex + b017451, data=sdf)
lm1

# for more detailed results use summary
summary(lm1)

# to specify a variance method, use varMethod
lm2 <- lm.sdf(composite ~ dsex + b017451, data=sdf, varMethod="Taylor")
lm2
summary(lm2)

# Use relevel to set a new omitted category
lm3 <- lm.sdf(composite ~ dsex + b017451, data=sdf, relevels=list(dsex="Female"))
summary(lm3)

# Use recode to change values for specified variables
lm4 <- lm.sdf(composite ~ dsex + b017451, data=sdf,
              recode=list(b017451=list(from=c("Never or hardly ever",
                                              "Once every few weeks",
                                              "About once a week"),
                                       to=c("Infrequently")),
                          b017451=list(from=c("2 or 3 times a week","Every day"),
                                       to=c("Frequently"))))
# Note: "Infrequently" is the dropped level for the recoded b017451
summary(lm4)
}
}
\references{
Binder, D. A. (1983). On the variances of asymptotically normal estimators from complex surveys. \emph{International Statistical Review}, \emph{51}(3), 279--292. 

Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York, NY: Wiley.

van Buuren, S. (2012). \emph{Flexible imputation of missing data}. New York, NY: CRC Press.

Weisberg, S. (1985). \emph{Applied linear regression} (2nd ed.). New York, NY: Wiley.
}
\seealso{
\ifelse{latex}{\code{lm}}{\code{\link[stats]{lm}}}
}
\author{
Paul Bailey
}
