% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stat-summary-bin.R, R/stat-summary.r
\name{stat_summary_bin}
\alias{stat_summary}
\alias{stat_summary_bin}
\title{Summarise y values at unique/binned x x.}
\usage{
stat_summary_bin(mapping = NULL, data = NULL, geom = "pointrange",
  fun.data = NULL, fun.y = NULL, fun.ymax = NULL, fun.ymin = NULL,
  fun.args = list(), na.rm = FALSE, position = "identity",
  show.legend = NA, inherit.aes = TRUE, ...)

stat_summary(mapping = NULL, data = NULL, geom = "pointrange",
  fun.data = NULL, fun.y = NULL, fun.ymax = NULL, fun.ymin = NULL,
  fun.args = list(), na.rm = FALSE, position = "identity",
  show.legend = NA, inherit.aes = TRUE, ...)
}
\arguments{
\item{mapping}{Set of aesthetic mappings created by \code{\link{aes}} or
\code{\link{aes_}}. If specified and \code{inherit.aes = TRUE} (the
default), is combined with the default mapping at the top level of the
plot. You only need to supply \code{mapping} if there isn't a mapping
defined for the plot.}

\item{data}{A data frame. If specified, overrides the default data frame
defined at the top level of the plot.}

\item{geom}{Use to override the default connection between
\code{geom_histogram}/\code{geom_freqpoly} and \code{stat_bin}.}

\item{fun.data}{A function that is given the complete data and should
return a data frame with variables \code{ymin}, \code{y}, and \code{ymax}.}

\item{fun.ymin, fun.y, fun.ymax}{Alternatively, supply three individual
functions that are each passed a vector of x's and should return a
single number.}

\item{fun.args}{Optional additional arguments passed on to the functions.}

\item{na.rm}{If \code{FALSE} (the default), removes missing values with
a warning.  If \code{TRUE} silently removes missing values.}

\item{position}{Position adjustment, either as a string, or the result of
a call to a position adjustment function.}

\item{show.legend}{logical. Should this layer be included in the legends?
\code{NA}, the default, includes if any aesthetics are mapped.
\code{FALSE} never includes, and \code{TRUE} always includes.}

\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
rather than combining with them. This is most useful for helper functions
that define both data and aesthetics and shouldn't inherit behaviour from
the default plot specification, e.g. \code{\link{borders}}.}

\item{...}{other arguments passed on to \code{\link{layer}}. There are
  three types of arguments you can use here:

  \itemize{
  \item Aesthetics: to set an aesthetic to a fixed value, like
     \code{color = "red"} or \code{size = 3}.
  \item Other arguments to the layer, for example you override the
    default \code{stat} associated with the layer.
  \item Other arguments passed on to the stat.
  }}
}
\description{
\code{stat_summary} operates on unique \code{x}; \code{stat_summary_bin}
operators on binned \code{x}. They are more flexible versions of
\code{\link{stat_bin}}: instead of just counting, the can compute any
aggregate.
}
\section{Aesthetics}{

\Sexpr[results=rd,stage=build]{ggplot2:::rd_aesthetics("stat", "summary")}
}

\section{Summary functions}{

You can either supply summary functions individually (\code{fun.y},
\code{fun.ymax}, \code{fun.ymin}), or as a single function (\code{fun.data}):

\describe{
  \item{fun.data}{Complete summary function. Should take numeric vector as
     input and return data frame as output}
  \item{fun.ymin}{ymin summary function (should take numeric vector and
    return single number)}
  \item{fun.y}{y summary function (should take numeric vector and return
    single number)}
  \item{fun.ymax}{ymax summary function (should take numeric vector and
    return single number)}
}

A simple vector function is easiest to work with as you can return a single
number, but is somewhat less flexible. If your summary function computes
multiple values at once (e.g. ymin and ymax), use \code{fun.data}.

If no aggregation functions are suppled, will default to
\code{\link{mean_se}}.
}
\examples{
d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)

# You can supply individual functions to summarise the value at
# each x:
d + stat_summary(fun.y = "median", colour = "red", size = 2)
d + stat_summary(fun.y = "mean", colour = "red", size = 2)
d + aes(colour = factor(vs)) + stat_summary(fun.y = mean, geom="line")

d + stat_summary(fun.y = mean, fun.ymin = min, fun.ymax = max,
  colour = "red")

#' d <- ggplot(diamonds, aes(carat, price))
d + geom_smooth()
d + geom_line(stat = "summary_bin", binwidth = 0.1, fun.y = "mean")

d <- ggplot(diamonds, aes(cut))
d + geom_bar()
d + stat_summary_bin(aes(y = price), fun.y = "mean", geom = "bar")
\donttest{
# A set of useful summary functions is provided from the Hmisc package:
stat_sum_df <- function(fun, geom="crossbar", ...) {
  stat_summary(fun.data=fun, colour="red", geom=geom, width=0.2, ...)
}

# Don't use ylim to zoom into a summary plot - this throws the
# data away
p <- ggplot(mtcars, aes(cyl, mpg)) +
  stat_summary(fun.y = "mean", geom = "point")
p
p + ylim(15, 30)
# Instead use coord_cartesian
p + coord_cartesian(ylim = c(15, 30))

# The crossbar geom needs grouping to be specified when used with
# a continuous x axis.
d + stat_sum_df("mean_cl_boot", mapping = aes(group = cyl))
d + stat_sum_df("mean_sdl", mapping = aes(group = cyl))
d + stat_sum_df("mean_sdl", mult = 1, mapping = aes(group = cyl))
d + stat_sum_df("median_hilow", mapping = aes(group = cyl))

# There are lots of different geoms you can use to display the summaries

d + stat_sum_df("mean_cl_normal", mapping = aes(group = cyl))
d + stat_sum_df("mean_cl_normal", geom = "errorbar")
d + stat_sum_df("mean_cl_normal", geom = "pointrange")
d + stat_sum_df("mean_cl_normal", geom = "smooth")

# Summaries are more useful with a bigger data set:
mpg2 <- subset(mpg, cyl != 5L)
m <- ggplot(mpg2, aes(x=cyl, y=hwy)) +
        geom_point() +
        stat_summary(fun.data = "mean_sdl", geom = "linerange",
                     colour = "red", size = 2, mult = 1) +
       xlab("cyl")
m
# An example with highly skewed distributions:
if (require("ggplot2movies")) {
set.seed(596)
mov <- movies[sample(nrow(movies), 1000), ]
 m2 <- ggplot(mov, aes(x= factor(round(rating)), y=votes)) + geom_point()
 m2 <- m2 + stat_summary(fun.data = "mean_cl_boot", geom = "crossbar",
                         colour = "red", width = 0.3) + xlab("rating")
m2
# Notice how the overplotting skews off visual perception of the mean
# supplementing the raw data with summary statistics is _very_ important

# Next, we'll look at votes on a log scale.

# Transforming the scale means the data are transformed
# first, after which statistics are computed:
m2 + scale_y_log10()
# Transforming the coordinate system occurs after the
# statistic has been computed. This means we're calculating the summary on the raw data
# and stretching the geoms onto the log scale.  Compare the widths of the
# standard errors.
m2 + coord_trans(y="log10")
}
}
}
\seealso{
\code{\link{geom_errorbar}}, \code{\link{geom_pointrange}},
 \code{\link{geom_linerange}}, \code{\link{geom_crossbar}} for geoms to
 display summarised data
}

