% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/ngrams.R
\name{ngrams}
\alias{ngrams}
\alias{ngrams.character}
\alias{ngrams.tokenizedTexts}
\alias{skipgrams}
\alias{skipgrams.character}
\alias{skipgrams.tokenizedTexts}
\title{Create ngrams}
\usage{
ngrams(x, ...)

\method{ngrams}{character}(x, n = 2, window = 1, concatenator = "_", ...)

\method{ngrams}{tokenizedTexts}(x, n = 2, window = 1, concatenator = "_",
  ...)

skipgrams(x, ...)

\method{skipgrams}{character}(x, n = 2, k = 1, concatenator = "_", ...)

\method{skipgrams}{tokenizedTexts}(x, n = 2, k = 1, concatenator = "_",
  ...)
}
\arguments{
\item{x}{a tokenizedText object or a character vector of tokens}

\item{...}{additional arguments passed to \code{\link[parallel]{mclapply}}
which applies \code{ngram.character()} to the \code{tokenizedTexts} list object}

\item{n}{integer vector specifying the number of elements to be concatenated
in each ngram}

\item{window}{integer vector specifying the adjacency width for tokens
forming the ngrams, default is 1 for only immediately neighbouring words}

\item{concatenator}{character for combining words, default is \code{_}
(underscore) character}

\item{k}{for skip-grams only, \code{k} is the}
}
\value{
a tokenizedTexts object consisting a list of character vectors of
  ngrams, one list element per text, or a character vector if called on a
  simple character vector
}
\description{
Create a set of ngrams (words in sequence) from tokenized text(s)
}
\details{
Normally, \code{\link{ngrams}} will be called through
  \code{\link{tokenize}}, but these functions are also exported in case a
  user wants to perform lower-level ngram construction on tokenized texts.

  \code{\link{skipgrams}} is a wrapper to \code{\link{ngrams}} that simply
  passes through a \code{window} value of \code{1:(k+1)}, conforming to the
  definition of skip-grams found in Guthrie et al (2006): A $k$ skip-gram is
  an ngram which is a superset of all ngrams and each $(k-i)$ skipgram until
  $(k-i)==0$ (which includes 0 skip-grams).
}
\examples{
ngrams(LETTERS, n = 2, window = 2)
ngrams(LETTERS, n = 3, window = 2)
ngrams(LETTERS, n = 3, window = 3)

tokens <- tokenize("the quick brown fox jumped over the lazy dog.",
                   removePunct = TRUE, simplify = TRUE)
ngrams(tokens, n = 1:3)
ngrams(tokens, n = c(2,4), window = 1:2, concatenator = " ")

# skipgrams
tokens <- tokenize(toLower("Insurgents killed in ongoing fighting."),
                   removePunct = TRUE, simplify = TRUE)
skipgrams(tokens, n = 2, k = 2, concatenator = " ")
skipgrams(tokens, n = 3, k = 2, concatenator = " ")
}
\author{
Ken Benoit
}
\references{
\href{http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf}{Guthrie,
  D, B Allison, W Liu, and L Guthrie. 2006. "A Closer Look at Skip-Gram
  Modelling."}
}

