% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/f_dataSquashing.R
\name{autoSquash}
\alias{autoSquash}
\title{Automated data squashing}
\usage{
autoSquash(data, keep_pts = c(100, 75, 50, 25), cut_offs = c(500, 1000,
  10000, 1e+05, 5e+05, 1e+06, 5e+06), num_super_pts = c(50, 75, 150, 500,
  750, 1000, 2000, 5000))
}
\arguments{
\item{data}{A data frame (typically from \code{\link{processRaw}}) containing
columns named \emph{N}, \emph{E}, and (possibly) \emph{weight}. Can contain
additional columns, which will be ignored.}

\item{keep_pts}{A vector of whole numbers for the number of points to leave
unsquashed for each count (\emph{N}). See the 'Details' section.}

\item{cut_offs}{A vector of whole numbers for the cutoff values of unsquashed
data used to determine how many "super points" to end up with after
squashing each count (\emph{N}). See the 'Details' section.}

\item{num_super_pts}{A vector of whole numbers for the number of
"super points" to end up with after squashing each count (\emph{N}). Length
must be 1 more than length of \code{cut_offs}. See the 'Details' section.}
}
\value{
A data frame with column names \emph{N}, \emph{E}, and
  \emph{weight} containing the reduced data set.
}
\description{
\code{autoSquash} squashes data by calling \code{\link{squashData}} once for
  each count (\emph{N}), removing the need to repeatedly squash the same data
  set.
}
\details{
See \code{\link{squashData}} for details on squashing a given
  count (\emph{N}).

The elements in \code{keep_pts} determine how many points are left
  unsquashed for each count (\emph{N}). The first element in \code{keep_pts}
  is used for the smallest \emph{N} (usually 1). Each successive element is
  used for each successive \emph{N}. Once the last element is reached, it is
  used for all other \emph{N}.

For counts that are squashed, \code{cut_offs} and
  \code{num_super_pts} determine how the points are squashed. For instance,
  by default, if a given \emph{N} contains less than 500 points to be
  squashed, then those points are squashed to 50 "super points".
}
\examples{
data(caers)
proc <- processRaw(caers)
table(proc$N)

squash1 <- autoSquash(proc)
ftable(squash1[, c("N", "weight")])

squash2 <- autoSquash(proc, keep_pts = c(50, 5))
ftable(squash2[, c("N", "weight")])

squash3 <- autoSquash(proc, keep_pts = 100,
                      cut_offs = c(250, 500),
                      num_super_pts = c(20, 60, 125)
)
ftable(squash3[, c("N", "weight")])

}
\references{
DuMouchel W, Pregibon D (2001). "Empirical Bayes Screening for
  Multi-item Associations." In \emph{Proceedings of the Seventh ACM SIGKDD
  International Conference on Knowledge Discovery and Data Mining}, KDD '01,
  pp. 67-76. ACM, New York, NY, USA. ISBN 1-58113-391-X.
}
\seealso{
\code{\link{processRaw}} for data preparation and
  \code{\link{squashData}} for squashing individual counts
}
\keyword{openEBGM}
