% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dicho.R
\name{dicho}
\alias{dicho}
\title{Dichotomize variables}
\usage{
dicho(x, ..., dich.by = "median", as.num = FALSE, var.label = NULL,
  val.labels = NULL, append = FALSE, suffix = "_d")
}
\arguments{
\item{x}{A vector or data frame.}

\item{...}{Optional, unquoted names of variables that should be selected for
further processing. Required, if \code{x} is a data frame (and no
vector) and only selected variables from \code{x} should be processed.
You may also use functions like \code{:} or dplyr's \code{\link[dplyr]{select_helpers}}.
See 'Examples' or \href{../doc/design_philosophy.html}{package-vignette}.}

\item{dich.by}{Indicates the split criterion where a variable is dichotomized.
Must be one of the following values (may be abbreviated):
\describe{
  \item{\code{"median"} or \code{"md"}}{by default, \code{x} is split into two groups at the median.}
  \item{\code{"mean"} or \code{"m"}}{splits \code{x} into two groups at the mean of \code{x}.}
  \item{numeric value}{splits \code{x} into two groups at the specific value. Note that the value is inclusive, i.e. \code{dich.by = 10} will split \code{x} into one group with values from lowest to 10 and another group with values greater than 10.}
  }}

\item{as.num}{Logical, if \code{TRUE}, return value will be numeric, not a factor.}

\item{var.label}{Optional string, to set variable label attribute for the
returned variable (see vignette \href{https://cran.r-project.org/package=sjlabelled/vignettes/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}).
If \code{NULL} (default), variable label attribute of \code{x} will
be used (if present). If empty, variable label attributes will be removed.}

\item{val.labels}{Optional character vector (of length two), to set value label
attributes of dichotomized variable (see \code{\link[sjlabelled]{set_labels}}).
If \code{NULL} (default), no value labels will be set.}

\item{append}{Logical, if \code{TRUE} and \code{x} is a data frame,
\code{x} including the new variables as additional columns is returned;
if \code{FALSE} (the default), only the new variables are returned.}

\item{suffix}{String value, will be appended to variable (column) names of
\code{x}, if \code{x} is a data frame. If \code{x} is not a data
frame, this argument will be ignored. The default value to suffix
column names in a data frame depends on the function call:
\itemize{
  \item recoded variables (\code{rec()}) will be suffixed with \code{"_r"}
  \item recoded variables (\code{recode_to()}) will be suffixed with \code{"_r0"}
  \item dichotomized variables (\code{dicho()}) will be suffixed with \code{"_d"}
  \item grouped variables (\code{split_var()}) will be suffixed with \code{"_g"}
  \item grouped variables (\code{group_var()}) will be suffixed with \code{"_gr"}
  \item standardized variables (\code{std()}) will be suffixed with \code{"_z"}
  \item centered variables (\code{center()}) will be suffixed with \code{"_c"}
}}
}
\value{
\code{x}, dichotomized. If \code{x} is a data frame, only
        the dichotomized variables will be returned.
}
\description{
Dichotomizes variables into dummy variables (0/1). Dichotomization is
               either done by median, mean or a specific value (see \code{dich.by}).
}
\details{
\code{dicho()} also works on grouped data frames (see \code{\link[dplyr]{group_by}}).
         In this case, dichotomization is applied to the subsets of variables
         in \code{x}. See 'Examples'.
}
\note{
Variable label attributes are preserved (unless changed via
      \code{var.label}-argument).
}
\examples{
data(efc)
summary(efc$c12hour)
# split at median
table(dicho(efc$c12hour))
# split at mean
table(dicho(efc$c12hour, dich.by = "mean"))
# split between value lowest to 30, and above 30
table(dicho(efc$c12hour, dich.by = 30))

# sample data frame, values from 1-4
head(efc[, 6:10])

# dichtomized values (1 to 2 = 0, 3 to 4 = 1)
library(dplyr)
efc \%>\%
  select(6:10) \%>\%
  dicho(dich.by = 2) \%>\%
  head()

# dichtomize several variables in a data frame
dicho(efc, c12hour, e17age, c160age)

# dichotomize and set labels
frq(dicho(efc, e42dep, var.label = "Dependency (dichotomized)",
          val.labels = c("lower", "higher")))

# works also with gouped data frames
mtcars \%>\%
  dicho(disp) \%>\%
  table()

mtcars \%>\%
  group_by(cyl) \%>\%
  dicho(disp) \%>\%
  table()

# dichotomizing grouped data frames leads to different
# results for a dichotomized variable, because the split
# value is different for each group.
# compare:
mtcars \%>\%
  group_by(cyl) \%>\%
  summarise(median = median(disp))

median(mtcars$disp)

}
