% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/textParsers.R
\name{token}
\alias{token}
\title{Tokenize (or split) text and emit n-word combinations from a document.}
\usage{
token(n, tokenSep = "+", ignoreCase = FALSE,
  delimiter = "[ \\\\t\\\\b\\\\f\\\\r]+", punctuation = NULL,
  stemming = FALSE, stopWords = FALSE, sep = " ", minLength = 1)
}
\arguments{
\item{n}{number of words}

\item{tokenSep}{a character string to separate the tokens when \code{n > 1}}

\item{ignoreCase}{logical: treat text as-is (\code{FALSE}) or convert to all lowercase
(true); Default is \code{TRUE}. Note that if the \code{stemming} is set to 
\code{TRUE}, tokens will always be converted to lowercase, so this option 
will be ignored.}

\item{delimiter}{character or string that divides one word from the next. 
You can use a regular expression as the \code{delimiter} value.}

\item{punctuation}{a regular expression that specifies the punctuation characters 
parser will remove before it evaluates the input text.}

\item{stemming}{logical: If true, apply Porter2 Stemming to each token to reduce 
it to its root form. Default is \code{FALSE}.}

\item{stopWords}{logical or string with the name of the file that contains stop words.
If TRUE then  that should
be ignored when parsing text. Each stop word is specified on a separate line.}

\item{sep}{a character string to separate multiple text columns.}

\item{minLength}{exclude tokens shorter than minLength characters.}
}
\value{
pluggable token parser
}
\description{
When \code{n=1} simply tokenize text and emit words with counts. When n>1
tokenized words are combined into permutations of length n within
each document.
}

