% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/corpus-methods-quanteda.R
\name{texts}
\alias{texts}
\alias{texts<-}
\alias{as.character.corpus}
\title{get or assign corpus texts}
\usage{
texts(x, groups = NULL, ...)

texts(x) <- value

\method{as.character}{corpus}(x, ...)
}
\arguments{
\item{x}{a quanteda \link{corpus} or character object}

\item{groups}{either: a character vector containing the names of document
variables to be used for grouping; or a factor (or object that can be
coerced into a factor) equal in length to the number of documents, used for
aggregating the texts through concatenation}

\item{...}{unused}

\item{value}{character vector of the new texts}
}
\value{
For \code{texts}, a character vector of the texts in the corpus.
  
  For \code{texts <-}, the corpus with the updated texts.

for \code{texts <-}, a corpus with the texts replaced by \code{value}

\code{as.character(x)} is equivalent to \code{texts(x)}
}
\description{
Get or replace the texts in a \link{corpus} object, with grouping options. 
Works for plain character vectors too, if \code{groups} is a factor.
}
\details{
\code{as.character(x)} where \code{x} is a corpus is equivalent to
calling \code{texts(x)}
}
\note{
You are strongly encouraged as a good practice of text analysis 
  workflow \emph{not} to modify the substance of the texts in a corpus. 
  Rather, this sort of processing is better performed through downstream 
  operations.  For instance, do not lowercase the texts in a corpus, or you 
  will never be able to recover the original case.  Rather, apply 
  \code{\link{toLower}} to the corpus and use the result as an input, e.g. to
  \code{\link{tokenize}}.
}
\examples{
nchar(texts(corpus_subset(data_corpus_inaugural, Year < 1806)))

# grouping on a document variable
nchar(texts(corpus_subset(data_corpus_inaugural, Year < 1806), groups = "President"))

# grouping a character vector using a factor
nchar(data_char_inaugural[1:5])
nchar(texts(data_char_inaugural[1:5], 
            groups = as.factor(data_corpus_inaugural[1:5, "President"])))

BritCorpus <- corpus(c("We must prioritise honour in our neighbourhood.", 
                       "Aluminium is a valourous metal."))
texts(BritCorpus) <- 
    stringi::stri_replace_all_regex(texts(BritCorpus),
                                   c("ise", "([nlb])our", "nium"),
                                   c("ize", "$1or", "num"),
                                   vectorize_all = FALSE)
texts(BritCorpus)
texts(BritCorpus)[2] <- "New text number 2."
texts(BritCorpus)
}
\keyword{corpus}
