% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ngram_asweka.r
\name{Tokenize-AsWeka}
\alias{Tokenize-AsWeka}
\alias{ngram_asweka}
\title{Weka-like n-gram Tokenization}
\usage{
ngram_asweka(str, min = 2, max = 2, sep = " ")
}
\arguments{
\item{str}{The input text.}

\item{min, max}{The minimum and maximum 'n' as in 'n-gram'.}

\item{sep}{A set of separator characters for the "words".  See details for
information about how this works; it works a little differently
from \code{sep} arguments in R functions.}
}
\value{
A vector of n-grams listed in decreasing blocks of n, in order within a
block.  The output matches that of RWeka's n-gram tokenizer.
}
\description{
An n-gram tokenizer with identical output to the \code{NGramTokenizer}
function from the RWeka package.
}
\details{
This n-gram tokenizer behaves similarly in both input and return to 
the tokenizer in RWeka.  Unlike the tokenizer \code{ngram()}, the
return is not a special class of external pointers; it is a vector,
and therefore can be serialized via \code{save()} or \code{saveRDS()}.
}
\examples{
library(ngram)

str <- "A B A C A B B"
ngram_asweka(str, min=2, max=4)

}
\seealso{
\code{\link{ngram}}
}
\keyword{Tokenization}

