% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/edsurvey.data.frame.R, R/getAttributes.R,
%   R/setAttributes.R, R/utilities.R
\name{edsurvey.data.frame}
\alias{edsurvey.data.frame}
\alias{$.edsurvey.data.frame}
\alias{$<-.edsurvey.data.frame}
\alias{\%in\%,edsurvey.data.frame,ANY-method}
\alias{\%in\%,edsurvey.data.frame.list,ANY-method}
\alias{getAttributes}
\alias{setAttributes}
\alias{getPSUVar}
\alias{getStratumVar}
\title{EdSurvey Class Constructors and Helpers}
\usage{
edsurvey.data.frame(
  userConditions,
  defaultConditions,
  dataList = list(),
  weights,
  pvvars,
  subject,
  year,
  assessmentCode,
  dataType,
  gradeLevel,
  achievementLevels,
  omittedLevels,
  survey,
  country,
  psuVar,
  stratumVar,
  jkSumMultiplier,
  recodes = NULL,
  validateFactorLabels = FALSE,
  forceLower = TRUE,
  reqDecimalConversion = TRUE,
  fr2Path = NULL,
  dim0 = NULL,
  cacheDataLevelName = NULL
)

\method{$}{edsurvey.data.frame}(x, i)

\method{$}{edsurvey.data.frame}(x, name) <- value

\S4method{\%in\%}{edsurvey.data.frame,ANY}(x, table)

\S4method{\%in\%}{edsurvey.data.frame.list,ANY}(x, table)

getAttributes(data, attribute = NULL, errorCheck = TRUE)

setAttributes(data, attribute, value)

getPSUVar(
  data,
  weightVar = attributes(getAttributes(data, "weights"))[["default"]]
)

getStratumVar(
  data,
  weightVar = attributes(getAttributes(data, "weights"))[["default"]]
)
}
\arguments{
\item{userConditions}{a list of user conditions that includes subsetting or recoding conditions}

\item{defaultConditions}{a list of default conditions that often are set for each survey}

\item{dataList}{a list of \code{dataListItem} objects to model the data structure of the survey}

\item{weights}{a list that stores information regarding weight variables. See Details.}

\item{pvvars}{a list that stores information regarding plausible values. See Details.}

\item{subject}{a character that indicates the subject domain of the given data}

\item{year}{a character or numeric that indicates the year of the given data}

\item{assessmentCode}{a character that indicates the code of the assessment.
Can be \code{National} or \code{International}.}

\item{dataType}{a character that indicates the unit level of the main data.
Examples include \code{Student}, \code{teacher}, \code{school},
\code{Adult Data}.}

\item{gradeLevel}{a character that indicates the grade level of the given data}

\item{achievementLevels}{a list of achievement-level categories and cutpoints}

\item{omittedLevels}{a list of default omitted levels for the given data}

\item{survey}{a character that indicates the name of the survey}

\item{country}{a character that indicates the country of the given data}

\item{psuVar}{a character that indicates the PSU sampling unit variable. Ignored when weights have \code{psuVar} defined.}

\item{stratumVar}{a character that indicates the stratum variable. Ignored when weights have \code{stratumVar} defined.}

\item{jkSumMultiplier}{a numeric value of the jackknife coefficient (used in calculating the jackknife replication estimation)}

\item{recodes}{a list of variable recodes of the given data}

\item{validateFactorLabels}{a Boolean that indicates whether the \code{getData} function needs to validate factor variables}

\item{forceLower}{a Boolean; when set to \code{TRUE}, will automatically lowercase variable names}

\item{reqDecimalConversion}{a Boolean; when set to \code{TRUE}, a \code{getData} call will multiply the raw file value by a decimal multiplier}

\item{fr2Path}{a character file location for NAEP assessments to identify the location of the codebook file in \code{fr2} format}

\item{dim0}{numeric vector of length two. To speed construction, the dimensions of the data can be provided}

\item{cacheDataLevelName}{a character value set to match the named element in the \code{dataList} to utilize the data caching scheme.  See details.}

\item{x}{an \code{edsurvey.data.frame}}

\item{i}{a character, the column name to extract}

\item{name}{a character vector of the column to edit}

\item{value}{outside of the assignment context, new value of the given \code{attribute}}

\item{table}{an \code{edsurvey.data.frame} or \code{edsurvey.data.frame.list} where \code{x} is searched for}

\item{data}{an \code{edsurvey.data.frame}}

\item{attribute}{a character, name of an attribute to get or set}

\item{errorCheck}{logical; see Details}

\item{weightVar}{a character indicating the full sample weights. Required in \code{getPSUVar} and \code{getStratumVar} when there is no default weight.}
}
\value{
An object of class \code{edsurvey.data.frame} with the following elements:

\emph{Elements that store data connections and data codebooks}
   \item{\code{dataList}}{a \code{list} object containing the surveys \code{dataListItem} objects}
\emph{Elements that store sample design and default subsetting information of the given survey data}
   \item{\code{userConditions}}{a list containing all user conditions, set using the \code{subset.edsurvey.data.frame} method}
   \item{\code{defaultConditions}}{the default subsample conditions}
   \item{\code{weights}}{a list containing the weights. See Details.}
   \item{\code{stratumVar}}{a character that indicates the default strata identification variable name in the data. Often used in Taylor series estimation.}
   \item{\code{psuVar}}{a character that indicates the default PSU (sampling unit) identification variable name in the data. Often used in Taylor series estimation.}
   \item{\code{pvvars}}{a list containing the plausible values. See Details.}
   \item{\code{achievementLevels}}{default achievement cutoff scores and names. See Details.}
   \item{\code{omittedLevels}}{the levels of the factor variables that will be omitted from the \code{edsurvey.data.frame}}
\emph{Elements that store descriptive information of the survey}
   \item{\code{survey}}{the type of survey data}
   \item{\code{subject}}{the subject of the data}
   \item{\code{year}}{the year of assessment}
   \item{\code{assessmentCode}}{the assessment code}
   \item{\code{dataType}}{the type of data (e.g., \code{student} or \code{school})}
   \item{\code{gradeLevel}}{the grade of the dataset contained in the \code{edsurvey.data.frame}}
\emph{Elements used in \code{mml.sdf}}
   \item{\code{dichotParamTab}}{IRT item parameters for dichotomous items in a data frame}
   \item{\code{polyParamTab}}{IRT item parameters for polytomous items in a data frame}
   \item{\code{adjustedData}}{IRT item parameter adjustment information in a data frame}
   \item{\code{testData}}{IRT transformation constants in a data frame}
   \item{\code{scoreCard}}{item scoring information in a data frame}
   \item{\code{scoreDict}}{generic scoring information in a data frame}
   \item{\code{scoreFunction}}{a function that turns the variables with items in them into numeric scores}
}
\description{
Two new classes in \code{EdSurvey} are described in this section: the \code{edsurvey.data.frame}
             and \code{light.edsurvey.data.frame}. The \code{edsurvey.data.frame}
             class stores metadata about survey data, and data are stored on the
             disk (via the \code{LaF} package), allowing gigabytes of data to be used easily on a machine otherwise
             inappropriate for manipulating large datasets.
             The \code{light.edsurvey.data.frame} is typically generated
             by the \code{getData} function and stores the data in a
             \code{data.frame}.
             Both classes use attributes to manage metadata and allow
             for correct statistics to be used in calculating results; the
             \code{getAttributes} acts as an accessor for these attributes, whereas
             \code{setAttributes} acts as a mutator for the attributes.
             As a convenience, \code{edsurvey.data.frame}
             implements the \code{$} function to extract a variable.
}
\details{
The \code{weight} list has an element named after each weight variable name
that is a list with elements \code{jkbase} and \code{jksuffixes}. The
\code{jkbase} variable is a single character indicating the jackknife replicate
weight base name, whereas \code{jksuffixes} is a vector with one element for each
jackknife replicate weight. When the two are pasted together, they should form
the complete set of the jackknife replicate weights. The \code{weights} argument
also can have an attribute that is the default weight. If the primary sampling
unit and stratum variables change by weight, they also can be defined on the weight
list as \code{psuVar} and \code{stratumVar}. When this option is used, it overrides
the \code{psuVar} and \code{stratumVar} on the \code{edsurvey.data.frame},
which can be left blank. A weight must define only one of \code{psuVar}
and \code{stratumVar}.

The \code{pvvars} list has an element for each subject or subscale score
that has plausible values. Each element is a list with a \code{varnames}
element that indicates the column names of the plausible values and an
\code{achievementLevel} argument that is a named vector of the
achievement-level cutpoints.

An \code{edsurvey.data.frame} implements a unique data caching mechanism that allows users to create and merge data columns for flexibility.
This \code{cache} object is a single \code{data.frame} that is an element in the \code{edsurvey.data.frame}. To accommodate studies with complex data models
the cache can only support one data level at this time. The \code{cacheDataLevelName} parameter indicates which named element in the \code{dataList}
the cache is indicated. The default value \code{cacheDataLevelName = NULL} will set the first item in the \code{dataList} as the \code{cache} level for an \code{edsurvey.data.frame}.
}
\section{EdSurvey Classes}{

\code{edsurvey.data.frame} is an object that stores connection to data on the
disk along with important survey sample design information.

\code{edsurvey.data.frame.list} is a list of \code{edsurvey.data.frame}
objects. It often is used in trend or cross-regional analysis in the
\code{\link{gap}} function. See \code{\link{edsurvey.data.frame.list}} for
more information on how to create an \code{edsurvey.data.frame.list}. Users
also can refer to the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Trend.pdf}{\emph{Using EdSurvey for Trend Analysis}}
for examples.

Besides \code{edsurvey.data.frame} class, the \code{EdSurvey} package also
implements the \code{light.edsurvey.data.frame} class, which can be used by both
\code{EdSurvey} and non-\code{EdSurvey} functions. More particularly,
\code{light.edsurvey.data.frame} is a \code{data.frame} that has basic
survey and sample design information (i.e., plausible values and weights), which
will be used for variance estimation in analytical functions. Because it
also is a base R \code{data.frame}, users can apply base R functions for
data manipulation.
See the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-getData.pdf}{\emph{Using the \code{getData} Function in EdSurvey}}
for more examples.

Many functions will remove attributes from a data frame, such as
a \code{light.edsurvey.data.frame}, and the
\code{\link{rebindAttributes}} function can add them back.

Users can get a \code{light.edsurvey.data.frame} object by using the
\code{\link{getData}} method with \code{addAttributes=TRUE}.
}

\section{Basic Methods for EdSurvey Classes}{

\emph{Extracting a column from an \code{edsurvey.data.frame}}

Users can extract a column from an \code{edsurvey.data.frame} object using \code{$} or \code{[]} like a normal data frame.

\emph{Extracting and updating attributes of an object of class \code{edsurvey.data.frame} or \code{light.edsurvey.data.frame}}

Users can use the \code{getAttributes} method to extract any attribute of
an \code{edsurvey.data.frame} or a \code{light.edsurvey.data.frame}.
The \code{errorCheck} parameter has a default value of\code{TRUE}, which throws an error if an attribute is not found.
Setting \code{errorCheck = FALSE} will suppress error checking, and return \code{NULL} if an attribute can't be found.

A \code{light.edsurvey.data.frame} will not have attributes related to data connection
because data have already been read in memory.

If users want to update an attribute (i.e., \code{omittedLevels}), they can
use the \code{setAttributes} method.
}

\examples{
\dontrun{
# read in the example data (generated, not real student data)
sdf <- readNAEP(path=system.file("extdata/data", "M36NT2PM.dat", package="NAEPprimer"))

# run a base R function on a column of edsurvey.data.frame
table(sdf$dsex)
# assignment
table(sdf$b013801)
sdf$books <- ifelse(sdf$b013801 \%in\% c("0-10", "11-25"), "0-25 books", "26+ books")
table(sdf$books, sdf$b013801)

# extract default omitted levels of NAEP primer data
getAttributes(data=sdf, attribute="omittedLevels")
#[1] "Multiple" NA         "Omitted"

# update default omitted levels of NAEP primer data
sdf <- setAttributes(data=sdf,
	                 attribute="omittedLevels",
	                 value=c("Multiple", "Omitted", NA, "(Missing)"))
getAttributes(data=sdf, attribute="omittedLevels")
#[1] "Multiple"  "Omitted"   NA          "(Missing)"
}
}
\seealso{
\code{\link{rebindAttributes}}
}
\author{
Tom Fink, Trang Nguyen, and Paul Bailey
}
