% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/corpus.R
\docType{methods}
\name{subset}
\alias{subset}
\alias{subset,corpus-method}
\alias{subset,character-method}
\alias{subset,subcorpus-method}
\alias{subset,remote_corpus-method}
\title{Subsetting corpora and subcorpora}
\usage{
\S4method{subset}{corpus}(x, subset, regex = FALSE, ...)

\S4method{subset}{character}(x, ...)

\S4method{subset}{subcorpus}(x, subset, ...)

\S4method{subset}{remote_corpus}(x, subset)
}
\arguments{
\item{x}{A \code{corpus} or \code{subcorpus} object. A corpus may also
specified by a length-one \code{character} vector.}

\item{subset}{A \code{logical} expression indicating elements or rows to
keep. The expression may be unevaluated (using \code{quote} or
\code{bquote}).}

\item{regex}{A \code{logical} value. If \code{TRUE}, values for s-attributes
defined using the three dots (...) are interpreted as regular expressions
and passed into a \code{grep} call for subsetting a table with the regions
and values of structural attributes. If \code{FALSE} (the default), values
for s-attributes must match exactly.}

\item{...}{An expression that will be used to create a subcorpus from
s-attributes.}
}
\description{
The structural attributes of a corpus (s-attributes) can be used
  to generate subcorpora (i.e. a \code{subcorpus} class object) by applying
  the \code{subset}-method. To obtain a \code{subcorpus}, the
  \code{subset}-method can be applied on a corpus represented by a
  \code{corpus} object, a length-one \code{character} vector (as a shortcut),
  and on a \code{subcorpus} object.
}
\examples{
use("polmineR")

# examples for standard and non-standard evaluation
a <- corpus("GERMAPARLMINI")

# subsetting a corpus object using non-standard evaluation 
sc <- subset(a, speaker == "Angela Dorothea Merkel")
sc <- subset(a, speaker == "Bärbel Höhn")
sc <- subset(a, speaker == "Angela Dorothea Merkel" & date == "2009-10-28")
sc <- subset(a, grepl("Merkel", speaker))
sc <- subset(a, grepl("Merkel", speaker) & date == "2009-10-28")

# subsetting corpus specified by character vector 
sc <- subset("GERMAPARLMINI", grepl("Merkel", speaker))
sc <- subset("GERMAPARLMINI", speaker == "Angela Dorothea Merkel")
sc <- subset("GERMAPARLMINI", speaker == "Angela Dorothea Merkel" & date == "2009-10-28")
sc <- subset("GERMAPARLMINI", grepl("Merkel", speaker) & date == "2009-10-28")

# subsetting a corpus using the (old) logic of the partition-method
sc <- subset(a, speaker = "Angela Dorothea Merkel")
sc <- subset(a, speaker = "Bärbel Höhn")
sc <- subset(a, speaker = "Angela Dorothea Merkel", date = "2009-10-28")
sc <- subset(a, speaker = "Merkel", regex = TRUE)
sc <- subset(a, speaker = c("Merkel", "Kauder"), regex = TRUE)
sc <- subset(a, speaker = "Merkel", date = "2009-10-28", regex = TRUE)

# providing the value for s-attribute as a variable
who <- "Volker Kauder"
sc <- subset(a, quote(speaker == who))

# use bquote for quasiquotation when using a variable for subsetting in a loop
for (who in c("Angela Dorothea Merkel", "Volker Kauder", "Ronald Pofalla")){
   sc <- subset(a, bquote(speaker == .(who)))
   if (interactive()) print(size(sc))
}

# equivalent procedure with lapply (DOES NOT WORK YET)
b <- lapply(
  c("Angela Dorothea Merkel", "Volker Kauder", "Ronald Pofalla"),
  function(who) subset(a, bquote(speaker == .(who)))
)
sapply(b, size)
}
\seealso{
The methods applicable for the \code{subcorpus} object resulting
  from subsetting a corpus or subcorpus are described in the documentation of
  the \code{\link{subcorpus-class}}. Note that the \code{subset}-method can also be
  applied to \code{\link{textstat-class}} objects (and objects inheriting from
  this class).
}
