% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocessCorpus.R
\name{toDocumentTermMatrix}
\alias{toDocumentTermMatrix}
\title{Default preprocessing of corpus and conversion to document-term matrix}
\usage{
toDocumentTermMatrix(x, language = "english", minWordLength = 3,
  sparsity = NULL, removeStopwords = TRUE, stemming = TRUE,
  weighting = function(x) tm::weightTfIdf(x, normalize = FALSE))
}
\arguments{
\item{x}{\code{\link[tm]{Corpus}} object which should be processed}

\item{language}{Default language used for preprocessing (i.e. stop word removal and stemming)}

\item{minWordLength}{Minimum length of words used for cut-off; i.e. shorter words are 
removed. Default is 3.}

\item{sparsity}{A numeric for the maximal allowed sparsity in the range from bigger zero to 
smaller one. Default is \code{NULL} in order suppress this functionality.}

\item{removeStopwords}{Flag indicating whether to remove stopwords or not (default: yes)}

\item{stemming}{Perform stemming (default: TRUE)}

\item{weighting}{Function used for weighting of words; default is a a link to the tf-idf scheme.}
}
\value{
Object of \code{\link[tm]{DocumentTermMatrix}}
}
\description{
Preprocess existing corpus of type \code{\link[tm]{Corpus}} according to default operations. 
This helper function groups all standard preprocessing steps such that the usage of the 
package is more convenient. The result is a document-term matrix.
}
\seealso{
\code{\link[tm]{DocumentTermMatrix}} for the underlying class
}
\keyword{corpus}
\keyword{preprocessing}
