% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/dictionaries.R
\name{applyDictionary}
\alias{applyDictionary}
\alias{applyDictionary.dfm}
\title{apply a dictionary or thesarus to an object}
\usage{
applyDictionary(x, dictionary, ...)

\method{applyDictionary}{dfm}(x, dictionary, exclusive = TRUE,
  valuetype = c("glob", "regex", "fixed"), case_insensitive = TRUE,
  capkeys = !exclusive, verbose = TRUE, ...)
}
\arguments{
\item{x}{object to which dictionary or thesaurus will be supplied}

\item{dictionary}{the \link{dictionary}-class object that will be applied to \code{x}}

\item{...}{not used}

\item{exclusive}{if \code{TRUE}, remove all features not in dictionary,
otherwise, replace values in dictionary keys with keys while leaving other
features unaffected}

\item{valuetype}{how to interpret dictionary values: \code{"glob"} for
"glob"-style wildcard expressions (the format used in Wordstat and LIWC
formatted dictionary values); \code{"regex"} for regular expressions; or
\code{"fixed"} for exact matching (entire words, for instance)}

\item{case_insensitive}{ignore the case of dictionary values if \code{TRUE}}

\item{capkeys}{if \code{TRUE}, convert dictionary or thesaurus keys to
uppercase to distinguish them from other features}

\item{verbose}{print status messages if \code{TRUE}}
}
\value{
an object of the type passed with the value-matching features replaced by dictionary keys
}
\description{
Convert features into equivalence classes defined by values of a dictionary
object.
}
\note{
Selecting only features defined in a "dictionary" is traditionally
known in text analysis as a dictionary method, even though technically this is more like a thesarus.
If a more truly thesaurus-like application is desired, set \code{keeponly = FALSE} to convert features
defined as values in a dictionary into their keys, while keeping all other features.
}
\examples{
myDict <- dictionary(list(christmas = c("Christmas", "Santa", "holiday"),
                          opposition = c("Opposition", "reject", "notincorpus"),
                          taxglob = "tax*",
                          taxregex = "tax.+$",
                          country = c("United_States", "Sweden")))
myDfm <- dfm(c("My Christmas was ruined by your opposition tax plan.",
               "Does the United_States or Sweden have more progressive taxation?"),
             ignoredFeatures = stopwords("english"), verbose = FALSE)
myDfm

# glob format
applyDictionary(myDfm, myDict, valuetype = "glob")
applyDictionary(myDfm, myDict, valuetype = "glob", case_insensitive = FALSE)

# regex v. glob format: note that "united_states" is a regex match for "tax*"
applyDictionary(myDfm, myDict, valuetype = "glob")
applyDictionary(myDfm, myDict, valuetype = "regex", case_insensitive = TRUE)

# fixed format: no pattern matching
applyDictionary(myDfm, myDict, valuetype = "fixed")
applyDictionary(myDfm, myDict, valuetype = "fixed", case_insensitive = FALSE)
}

