% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aaa_fabricate.R, R/add_level.R,
%   R/modify_level.R, R/nest_level.R
\name{fabricate}
\alias{fabricate}
\alias{add_level}
\alias{modify_level}
\alias{nest_level}
\title{Fabricate data}
\usage{
fabricate(..., data = NULL, N = NULL, ID_label = NULL)

add_level(N = NULL, ..., nest = TRUE)

modify_level(..., by = NULL)

nest_level(N = NULL, ...)
}
\arguments{
\item{...}{Variable or level-generating arguments, such as
\code{my_var = rnorm(N)}. For \code{fabricate}, you may also pass
\code{add_level()} or \code{modify_level()} arguments, which define a level
of a multi-level dataset. See examples.}

\item{data}{(optional) user-provided data that forms the basis of the
fabrication, e.g. you can add variables to existing data. Provide either
\code{N} or \code{data} (\code{N} is the number of rows of the data if
\code{data} is provided). If \code{data} and \code{N} are not provided,
fabricatr will try to interpret the first un-named argument as either \code{data}
or \code{N} based on type.}

\item{N}{(optional) number of units to draw. If provided as
\code{fabricate(N = 5)}, this determines the number of units in the
single-level data. If provided in \code{add_level}, e.g.
\code{fabricate(cities = add_level(N = 5))}, \code{N} determines the number
of units in a specific level of a hierarchical dataset.}

\item{ID_label}{(optional) variable name for ID variable, e.g. citizen_ID}

\item{nest}{(Default TRUE) Boolean determining whether data in an
\code{add_level()} call will be nested under the current working data frame
or create a separate hierarchy of levels. See our vignette for
cross-classified, non-nested data for details.}

\item{by}{(optional) quoted name of variable \code{modify_level} uses to split-modify-combine data by.}
}
\value{
data.frame
}
\description{
\code{fabricate} helps you simulate a dataset before you collect it. You can
either start with your own data and add simulated variables to it (by passing
\code{data} to \code{fabricate()}) or start from scratch by defining
\code{N}. Create hierarchical data with multiple levels of data such as
citizens within cities within states using \code{add_level()} or modify
existing hierarchical data using \code{modify_level()}. You can use any R
function to create each variable. Use \code{cross_levels()} and
\code{link_levels()} to make more complex designs such as panel or
cross-classified data.
}
\details{
We also provide several built-in options to easily create variables, including
\code{\link{draw_binary}}, \code{\link{draw_count}}, \code{\link{draw_likert}},
and intra-cluster correlated variables \code{\link{draw_binary_icc}} and
\code{\link{draw_normal_icc}}
}
\examples{


# Draw a single-level dataset with a covariate
building_df <- fabricate(
  N = 100,
  height_ft = runif(N, 300, 800)
)
head(building_df)

# Start with existing data instead
building_modified <- fabricate(
  data = building_df,
  rent = rnorm(N, mean = height_ft * 100, sd = height_ft * 30)
)

# Draw a two-level hierarchical dataset
# containing cities within regions
multi_level_df <- fabricate(
 regions = add_level(N = 5),
 cities = add_level(N = 2, pollution = rnorm(N, mean = 5)))
head(multi_level_df)

# Start with existing data and add a nested level:
company_df <- fabricate(
 data = building_df,
 company_id = add_level(N=10, is_headquarters = sample(c(0, 1), N, replace=TRUE))
)

# Start with existing data and add variables to hierarchical data
# at levels which are already present in the existing data.
# Note: do not provide N when adding variables to an existing level
fabricate(
  data = multi_level_df,
  regions = modify_level(watershed = sample(c(0, 1), N, replace = TRUE)),
  cities = modify_level(runoff = rnorm(N))
)

# fabricatr can add variables that are higher-level summaries of lower-level
# variables via a split-modify-combine logic and the \\code{by} argument

multi_level_df <-
 fabricate(
   regions = add_level(N = 5, elevation = rnorm(N)),
   cities = add_level(N = 2, pollution = rnorm(N, mean = 5)),
   cities = modify_level(by = "regions", regional_pollution = mean(pollution))
 )

# fabricatr can also make panel or cross-classified data. For more
# information about syntax for this functionality please read our vignette
# or check documentation for \\code{link_levels}:
cross_classified <- fabricate(
  primary_schools = add_level(N = 50, ps_quality = runif(N, 0, 10)),
  secondary_schools = add_level(N = 100, ss_quality = runif(N, 0, 10), nest=FALSE),
  students = link_levels(N = 2000,
                          by=join(ps_quality, ss_quality, rho = 0.5),
                          student_quality = ps_quality + 3*ss_quality + rnorm(N)))
}
\seealso{
\code{\link{link_levels}}
}
