% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manip.r
\name{mutate}
\alias{mutate}
\alias{transmute}
\title{Create or transform variables}
\usage{
mutate(.data, ...)

transmute(.data, ...)
}
\arguments{
\item{.data}{A tbl. All main verbs are S3 generics and provide methods
for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.}

\item{...}{Name-value pairs of expressions, each with length 1 or the same
length as the number of rows in the group (if using \code{\link[=group_by]{group_by()}}) or in the entire
input (if not using groups). The name of each argument will be the name of
a new variable, and the value will be its corresponding value.  Use a \code{NULL}
value in \code{mutate} to drop a variable.  New variables overwrite existing variables
of the same name.

The arguments in \code{...} are automatically \link[rlang:quo]{quoted} and
\link[rlang:eval_tidy]{evaluated} in the context of the data
frame. They support \link[rlang:quasiquotation]{unquoting} and
splicing. See \code{vignette("programming")} for an introduction to
these concepts.}
}
\value{
An object of the same class as \code{.data}.
}
\description{
\code{mutate()} adds new variables and preserves existing ones;
\code{transmute()} adds new variables and drops existing ones.  Both
functions preserve the number of rows of the input.
New variables overwrite existing variables of the same name.
}
\section{Useful functions available in calculations of variables}{

\itemize{
\item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings
\item \code{\link[=lead]{lead()}}, \code{\link[=lag]{lag()}}
\item \code{\link[=dense_rank]{dense_rank()}}, \code{\link[=min_rank]{min_rank()}}, \code{\link[=percent_rank]{percent_rank()}}, \code{\link[=row_number]{row_number()}},
\code{\link[=cume_dist]{cume_dist()}}, \code{\link[=ntile]{ntile()}}
\item \code{\link[=cumsum]{cumsum()}}, \code{\link[=cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[=cumany]{cumany()}}, \code{\link[=cumall]{cumall()}}
\item \code{\link[=na_if]{na_if()}}, \code{\link[=coalesce]{coalesce()}}
\item \code{\link[=if_else]{if_else()}}, \code{\link[=recode]{recode()}}, \code{\link[=case_when]{case_when()}}
}
}

\section{Grouped tibbles}{


Because mutating expressions are computed within groups, they may
yield different results on grouped tibbles. This will be the case
as soon as an aggregating, lagging, or ranking function is
involved. Compare this ungrouped mutate:\preformatted{starwars \%>\%
  mutate(mass / mean(mass, na.rm = TRUE)) \%>\%
  pull()
}

With the grouped equivalent:\preformatted{starwars \%>\%
  group_by(gender) \%>\%
  mutate(mass / mean(mass, na.rm = TRUE)) \%>\%
  pull()
}

The former normalises \code{mass} by the global average whereas the
latter normalises by the averages within gender levels.

Note that you can't overwrite a grouping variable within
\code{mutate()}.

\code{mutate()} does not evaluate the expressions when the group is empty.
}

\section{Scoped mutation and transmutation}{


The three \link{scoped} variants of \code{mutate()} (\code{\link[=mutate_all]{mutate_all()}},
\code{\link[=mutate_if]{mutate_if()}} and \code{\link[=mutate_at]{mutate_at()}}) and the three variants of
\code{transmute()} (\code{\link[=transmute_all]{transmute_all()}}, \code{\link[=transmute_if]{transmute_if()}},
\code{\link[=transmute_at]{transmute_at()}}) make it easy to apply a transformation to a
selection of variables.
}

\section{Tidy data}{

When applied to a data frame, row names are silently dropped. To preserve,
convert to an explicit variable with \code{\link[tibble:rownames_to_column]{tibble::rownames_to_column()}}.
}

\examples{
# Newly created variables are available immediately
mtcars \%>\% as_tibble() \%>\% mutate(
  cyl2 = cyl * 2,
  cyl4 = cyl2 * 2
)

# You can also use mutate() to remove variables and
# modify existing variables
mtcars \%>\% as_tibble() \%>\% mutate(
  mpg = NULL,
  disp = disp * 0.0163871 # convert to litres
)


# window functions are useful for grouped mutates
mtcars \%>\%
 group_by(cyl) \%>\%
 mutate(rank = min_rank(desc(mpg)))
# see `vignette("window-functions")` for more details

# You can drop variables by setting them to NULL
mtcars \%>\% mutate(cyl = NULL)

# mutate() vs transmute --------------------------
# mutate() keeps all existing variables
mtcars \%>\%
  mutate(displ_l = disp / 61.0237)

# transmute keeps only the variables you create
mtcars \%>\%
  transmute(displ_l = disp / 61.0237)


# The mutate operation may yield different results on grouped
# tibbles because the expressions are computed within groups.
# The following normalises `mass` by the global average:
starwars \%>\%
  mutate(mass / mean(mass, na.rm = TRUE)) \%>\%
  pull()

# Whereas this normalises `mass` by the averages within gender
# levels:
starwars \%>\%
  group_by(gender) \%>\%
  mutate(mass / mean(mass, na.rm = TRUE)) \%>\%
  pull()

# Note that you can't overwrite grouping variables:
gdf <- mtcars \%>\% group_by(cyl)
try(mutate(gdf, cyl = cyl * 100))


# Refer to column names stored as strings with the `.data` pronoun:
vars <- c("mass", "height")
mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]])

# For more complex cases, knowledge of tidy evaluation and the
# unquote operator `!!` is required. See https://tidyeval.tidyverse.org/
#
# One useful and simple tidy eval technique is to use `!!` to
# bypass the data frame and its columns. Here is how to divide the
# column `mass` by an object of the same name:
mass <- 100
mutate(starwars, mass = mass / !!mass)
}
\seealso{
Other single table verbs: \code{\link{arrange}},
  \code{\link{filter}}, \code{\link{select}},
  \code{\link{slice}}, \code{\link{summarise}}
}
\concept{single table verbs}
