% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rarify.R
\name{rarify}
\alias{rarify}
\title{Rarify (subsample) biological sample to fixed count}
\usage{
rarify(inbug, sample.ID, abund, subsiz, mySeed = NA, verbose = FALSE)
}
\arguments{
\item{inbug}{Input data frame.  Needs 3 columns (SampleID, taxonomicID
, Count).}

\item{sample.ID}{Column name in inbug for sample identifier.}

\item{abund}{Column name in inbug for organism count.}

\item{subsiz}{Target subsample size for each sample.}

\item{mySeed}{Seed for random number generator.  If provided the results with
the same inbug file will produce the same results. Default = NA (random seed
will be used.)}

\item{verbose}{Boolean value for if status messages are output to the console.
Default = FALSE}
}
\value{
Returns a data frame with the same three columns but the abund field
has been modified so the total count for each sample is no longer above the
target (subsiz).
}
\description{
Takes as an input a 3 column data frame (SampleID, TaxonID
, Count) and returns a similar dataframe with revised Counts.

The other inputs are subsample size (target number of organisms in each
sample) and seed. The seed is given so the results can be reproduced from the
 same input file.  If no seed is given a random seed is used.
}
\details{
rarify function:
 R function to rarify (subsample) a macroinvertebrate sample down to a fixed
 count; by John Van Sickle, USEPA. email: VanSickle.John@epa.gov ;
 Version 1.0, 06/10/05;
}
\examples{
# Subsample to 500 organisms (from over 500 organisms) for 12 samples.

# load bio data
df_biodata <- data_bio2rarify
dim(df_biodata)

# subsample
mySize  <- 500
Seed_OR <- 18590214
Seed_WA <- 18891111
Seed_US <- 17760704
bugs_mysize <- rarify(inbug = df_biodata,
                      sample.ID = "SampleID",
                      abund = "N_Taxa",
                      subsiz = mySize,
                      mySeed = Seed_US,
                      verbose = FALSE)

# view results
dim(bugs_mysize)

# Compare pre- and post- subsample counts
df_compare <- merge(df_biodata,
                    bugs_mysize,
                    by = c("SampleID", "TaxaID"),
                    suffixes = c("_Orig","_500"))
df_compare <- df_compare[, c("SampleID",
                             "TaxaID",
                             "N_Taxa_Orig",
                             "N_Taxa_500")]

# compare totals
tbl_totals <- aggregate(cbind(N_Taxa_Orig, N_Taxa_500) ~ SampleID,
                        df_compare,
                        sum)

\donttest{
# save the data
write.table(bugs_mysize,
            file.path(tempdir(), paste("bugs", mySize, "txt", sep = ".")),
            sep = "\t")
}
}
