% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc_and_utility.R
\name{umx_residualize}
\alias{umx_residualize}
\title{Easily residualize variables in long or wide dataframes, returning them changed in-place.}
\usage{
umx_residualize(var, covs = NULL, suffixes = NULL, data)
}
\arguments{
\item{var}{The base name of the variable you want to residualize. Alternatively, a
regression \code{\link[=formula]{formula()}} containing var on the lhs, and covs on the rhs}

\item{covs}{Covariates to residualize on.}

\item{suffixes}{Suffixes that identify the variable for each twin, i.e. c("_T1", "_T2")
Up to you to check all variables are present!}

\item{data}{The dataframe containing all the variables}
}
\value{
\itemize{
\item dataframe with var residualized in place (i.e under its original column name)
}
}
\description{
Residualize one or more variables residualized against covariates, and return a
complete dataframe with residualized variable in place.
Optionally, this also works on wide (i.e., twin) data. Just supply suffixes to identify
the paired-wide columns (see examples).
}
\details{
In R, residuals for a variable can be found with the following statement:

\code{tmp = residuals(lm(var ~ cov1 + cov2, data = data, na.action = na.exclude))}

This tmp variable could then be written over the old data:

umx_residualize obviates the user having to build the lm, set na.action, or replace the data.
In addition, it has the powerful feature of operating on a list of variables, and of operating on
wide data, expanding the var name using a set of variable-name suffixes.
}
\examples{
# Residualize mpg on cylinders and displacement
r1 = umx_residualize("mpg", c("cyl", "disp"), data = mtcars)
r2 = residuals(lm(mpg ~ cyl + disp, data = mtcars, na.action = na.exclude))
all(r1$mpg == r2)

# =============================
# = Use the formula interface =
# =============================
r1 = umx_residualize(mpg ~ cyl + I(cyl^2) + disp, data = mtcars)

# validate against using lm
r2 = residuals(lm(mpg ~ cyl + I(cyl^2) + disp, data = mtcars, na.action = na.exclude))
all(r1$mpg == r2)

# ===========================================================
# = Residualize twin data (i.e. wide or "1 family per row") =
# ===========================================================
# Make some toy "twin" data to demonstrate with
tmp = mtcars
tmp$mpg_T1  = tmp$mpg_T2  = tmp$mpg
tmp$cyl_T1  = tmp$cyl_T2  = tmp$cyl
tmp$disp_T1 = tmp$disp_T2 = tmp$disp

covs = c("cyl", "disp")
tmp= umx_residualize(var="mpg", covs=covs, suffixes=c("_T1","_T2"), data = tmp)
str(tmp[1:5, 12:17])

# ===================================
# = Residualize several DVs at once =
# ===================================
df1 = umx_residualize(c("mpg", "hp"), cov = c("cyl", "disp"), data = tmp)
df2 = residuals(lm(hp ~ cyl + disp, data = tmp, na.action = na.exclude))
all(df1$hp == df2)
}
\references{
\itemize{
\item \url{https://tbates.github.io},  \url{https://github.com/tbates/umx}
}
}
\seealso{
Other Twin Data functions: 
\code{\link{umx_long2wide}()},
\code{\link{umx_make_TwinData}()},
\code{\link{umx_make_twin_data_nice}()},
\code{\link{umx_scale_wide_twin_data}()},
\code{\link{umx_wide2long}()},
\code{\link{umx}}
}
\concept{Twin Data functions}
