% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare_corpora.r
\name{tCorpus$compare_subset}
\alias{tCorpus$compare_subset}
\alias{compare_subset}
\title{Compare vocabulary of a subset of a tCorpus to the rest of the tCorpus}
\arguments{
\item{feature}{the column name of the feature that is to be compared}

\item{subset_x}{an expression to subset the tCorpus. The vocabulary of the subset will be compared to the rest of the tCorpus}

\item{subset_meta_x}{like subset_x, but using using the meta data}

\item{query_x}{like subset_x, but using a query search to select documents (see \link{tCorpus$search_contexts})}

\item{query_feature}{if query_x is used, the column name of the feature used in the query search.}

\item{smooth}{Laplace smoothing is used for the calculation of the ratio of the relative term frequency. Here you can set the added value.}

\item{min_ratio}{threshold for the ratio value, which is the ratio of the relative frequency of a term in dtm.x and dtm.y}

\item{min_chi2}{threshold for the chi^2 value}

\item{yates_cor}{mode for using yates correctsion in the chi^2 calculation. Can be turned on ("yes") or off ("no"), or set to "auto", in which case cochrans rule is used to determine whether yates' correction is used.}

\item{what}{choose whether to compare the frequency ("freq") of terms, or the document frequency ("docfreq"). This also affects how chi^2 is calculated, comparing either freq relative to vocabulary size or docfreq relative to corpus size (N)}
}
\value{
A vocabularyComparison object
}
\description{
\strong{Usage:}
}
\details{
## R6 method for class tCorpus. Use as tc$method (where tc is a tCorpus object).
\preformatted{compare_subset(feature, subset_x=NULL, subset_meta_x=NULL, query_x=NULL, query_feature='token', smooth=0.1, min_ratio=NULL, min_chi2=NULL, yates_cor=c('auto','yes','no'), what=c('freq','docfreq','cooccurrence'))}
}
\examples{
tc = create_tcorpus(sotu_texts, doc_column = 'id')

tc$preprocess('token', 'feature', remove_stopwords = TRUE, use_stemming = TRUE)

comp = tc$compare_subset('feature', subset_meta_x = president == 'Barack Obama')
comp = comp[order(-comp$chi),]
head(comp)
\dontrun{
plot(comp)
}

comp = tc$compare_subset('feature', query_x = 'terroris*')
comp = comp[order(-comp$chi),]
head(comp, 10)
}
