\name{rvgt.ftable}
\Rdversion{1.1}
\alias{rvgt.ftable}
\alias{rvgt.ftable.default}

\title{Create RVG Frequency Table for Random Variate Generator}

\description{
  Function for creating frequency tables for random variate
  generators. Thus a histogram is computed and the bin counts are stored
  in an array which can be used to visualize possible defects of the
  pseudo-random variate generator and run goodness-of-fit tests.

  Currently the function only works for generators for continuous
  univariate distribution.
}

\usage{
rvgt.ftable(n, rep=1, rdist, qdist, pdist, \dots,
            breaks = 101, trunc=NULL, exactu=FALSE, plot=FALSE)
}

\arguments{
  \item{n}{sample size for one repetition.}
  \item{rep}{number of repetitions.}
  \item{rdist}{random variate generator for a continuous univariate distribution.}
  \item{qdist}{quantile function for the distribution.}
  \item{pdist}{cumulative distribution function for distribution.}
  \item{\dots }{parameters to be passed to \code{rdist}, \code{qdist}
    and \code{pdist}.}
  \item{breaks}{one of:
    \itemize{
      \item
      a single number giving the number of cells of histogram; or
      \item
      a vector giving the breakpoints between histogram cells (in
      uniform scale). Notice that in the latter case the break points
      are automatically sorted and the first and last entry is set to 0
      and 1, resp. Moreover, they must be different from each other.
    }
  }
  \item{trunc}{boundaries of truncated domain. (optional)}
  \item{exactu}{logical.
    If \code{TRUE} then the exact locations of the given break points
    are used. Otherwise, these points are slightly shifted in order to
    accelerate exection time, see details below.}
  \item{plot}{logical. If \code{TRUE}, a histogram is plotted.}
}

\details{
  \code{rvgt.ftable} returns tables of bin counts similar to the
  \code{\link{hist}} function. Bins can be either specified my the
  number of break points between the cells of the histogram, or by a
  list of break points in the \eqn{u}-scale.
  In the former case the break points are constructed such that all bins
  of the histogram have equal probability for the distribution under the
  null hypothesis, i.e., the break points are equidistributed in the
  \eqn{u}-scale using the formula \eqn{u_i=i/(breaks-1)} where
  \eqn{i=0,\dots,breaks-1}. 

  When the quantile function \code{qdist} is given, then these points
  are transformed into breaking points in the \eqn{x}-scale using
  \code{qdist}\eqn{(u_i)}. Thus the histogram can be computed directly
  for random points \eqn{X} that are generated by means of \code{rdist}.

  Otherwise the cumulative distribution function \code{pdist} must be
  given. If \code{exactu} is \code{TRUE},
  then all non-uniform random points \eqn{X} are first
  transformed into uniformly distributed random numbers
  \eqn{U=}\code{pdist}\eqn{(X)} for which the histogram is created.
  This is slower than directly using \eqn{X} but it is numerically more
  robust as round-off error in \code{qdist} have much more influence
  than those in \code{pdist}.

  If \code{trunc} is given, then functions \code{qdist} and
  \code{pdist} are rescaled to this given domain. It is recommended to
  provide \code{pdist} even when \code{qdist} is given.
  
  If \code{exactu} is \code{FALSE} \emph{and} the quantile function
  \code{qdist} is missing, then the first sample of size \code{n} is
  used to estimate the quantiles for the given break points using
  function \code{\link{quantile}}. The break points in \eqn{u}-scale are
  then recomputed using these quantiles by means of the given
  probability function \code{pdist}.
  This is usually (much) faster than calling \code{pdist} on each
  generated point. However, the break points are slightly
  perturbated (but this does not effect the correctness of the
  frequency table).
  
  The argument \code{rep} allows to create multiple such arrays of bin
  counts and store these in one table. Thus has two advantages:
  \itemize{
    \item It allows for huge total sample sizes that would otherwise
    exceed the available memory, and
    \item it can be used to visualize test results for increasing
    sample sizes, or
    \item allows for a two-level test.
  }
}

\note{
  It is important that all given functions -- \code{rdist},
  \code{qdist}, and \code{pdist} -- accept the same arguments passed to
  \code{rvgt.ftable} via \code{\dots}.
}

\value{
  An object of class \code{"rvgt.ftable"} which is a list with components:

  \item{n}{sample size.}
  \item{rep}{number of repetitions.}
  \item{ubreaks}{an array of break points in \eqn{u}-scale.}
  \item{xbreaks}{an array of break points in \eqn{x}-scale.}
  \item{count}{a matrix of \code{rep} rows and (\code{breaks}\eqn{-1})
    columns that contains the bin counts. The results for each
    repetition are stored row wise.}
}

\references{
  W. H\"ormann, J. Leydold, and G. Derflinger (2004):
  Automatic Nonuniform Random Variate Generation.
  Springer-Verlag, Berlin Heidelberg
}

\author{
  Sougata Chaudhuri \email{sgtchaudhuri@gmail.com},
  Josef Leydold \email{josef.leydold@wu.ac.at}
}

\seealso{
  See \code{\link{plot.rvgt.ftable}} for the syntax of the plotting
  method.
}

\examples{
## Create a frequency table for normal distribution with mean 1 and
## standard deviation 2. Number of bins should be 50.
## Use a sample of size of 5 times 10^5 random variates.
ft <- rvgt.ftable(n=1e5,rep=5, rdist=rnorm,qdist=qnorm, breaks=51, mean=1,sd=2)

## Show histogram
plot(ft)

## Run a chi-square test
rvgt.chisq(ft)

## The following allows to plot a histgram in a single call.
rvgt.ftable(n=1e5,rep=5, rdist=rnorm,qdist=qnorm, plot=TRUE)

## Use the cumulative distribution function when the quantile function
## is not available or if its round-off errors have serious impact.
ft <- rvgt.ftable(n=1e5,rep=5, rdist=rnorm,pdist=pnorm )
plot(ft)

## Create a frequency table for the normal distribution with
## non-equidistributed break points
ft <- rvgt.ftable(n=1e5,rep=5, rdist=rnorm,qdist=qnorm, breaks=1/(1:100))
plot(ft)

## A (naive) generator for a truncated normal distribution
rdist <- function(n) {
  x <- numeric(n)
  for (i in 1:n){ while(TRUE){ x[i] <- rnorm(1); if (x[i]>1) break} }
  return(x)
}
ft <- rvgt.ftable(n=1e3,rep=5, rdist=rdist,
                  pdist=pnorm, qdist=qnorm, trunc=c(1,Inf))
plot(ft)

}

\keyword{distribution}
\keyword{datagen}
