% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/language_model.R
\name{language_model}
\alias{language_model}
\alias{language_model.language_model}
\alias{language_model.kgram_freqs}
\title{k-gram Language Models}
\usage{
language_model(object, ...)

\method{language_model}{language_model}(object, ...)

\method{language_model}{kgram_freqs}(object, smoother = "ml", N = param(object, "N"), ...)
}
\arguments{
\item{object}{an object which stores the information required to build the
k-gram model. At present, necessarily a \code{kgram_freqs} object, or a
\code{language_model} object of which a copy is desired (see Details).}

\item{...}{possible additional parameters required by the smoother.}

\item{smoother}{a length one character vector. Indicates the smoothing
technique to be applied to compute k-gram continuation probabilities. A list
of available smoothers can be obtained with \code{smoothers()}, and
further information on a particular smoother through
\code{info()}.}

\item{N}{a length one integer. Maximum order of k-grams to use in the language
model. This muss be less than or equal to the order of the underlying
\code{kgram_freqs} object.}
}
\value{
A \code{language_model} object.
}
\description{
Build a k-gram language model.
\subsection{Principal methods supported by objects of class \code{language_model}}{
\itemize{
\item \code{probability()}: compute word continuation and sentence probabilities.
See \link[kgrams]{probability}.
\item \code{sample_sentences()}: generate random text by sampling from the
language model probability distribution at arbitary temperature. See
\link[kgrams]{sample_sentences}.
\item \code{perplexity()}: Compute the language model perplexity on a test
corpus. See \link[kgrams]{perplexity}.
}
}
}
\details{
These generics are used to construct objects of class \code{language_model}.
The \code{language_model} method is only needed to create copies of
\code{language_model} objects (that is to say, new copies which are not
altered by methods which modify the original object in place,
see e.g. \link[kgrams]{parameters}). The discussion below focuses on
language models and the \code{kgram_freqs} method.

\link[kgrams]{kgrams} supports several k-gram language models, including
Interpolated Kneser-Ney, Stupid Backoff and others
(see \link[kgrams]{smoothers}). The objects created by
\code{language_models()} have methods for computing word continuation and
sentence probabilities (see \link[kgrams]{probability}),
random text generation (see \link[kgrams]{sample_sentences})
and other type of language modeling tasks such as computing perplexities and
word prediction accuracies.

Smoothers have often tuning parameters, which need to be specified by
(exact) name through the \code{...} arguments; otherwise,
\code{language_model()} will use default values and, once per session, throw
a warning. \code{info(smoother)} lists all parameters needed by a
specific smoother, together with their allowed parameter space.

The run-time of \code{language_model()} may vary substantially for different
smoothing methods, depending on whether or not a method requires the
computation of additional quantities (that is to say, beyond k-gram counts)
for its operativity (this is, for instance, the case for the Kneser-Ney
smoother).
}
\examples{
# Create an interpolated Kneser-Ney 2-gram language model

freqs <- kgram_freqs("a a b a a b a b a b a b", 2)
model <- language_model(freqs, "kn", D = 0.5)
model
summary(model)
probability("a" \%|\% "b", model)

# For more examples, see ?probability, ?sample_sentences and ?perplexity.

}
\author{
Valerio Gherardi
}
