% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/feature_preparation.r
\name{term_union}
\alias{term_union}
\title{Combine terms in a dtm}
\usage{
term_union(dtm, simmat, as_dfm = T, verbose = F, sep = "|", par = NA)
}
\arguments{
\item{dtm}{A quanteda \link[quanteda]{dfm} or a dgCMatrix.}

\item{simmat}{A similarity matrix in dgCMatrix format. For instance, created with \link{term_char_sim}}

\item{as_dfm}{If True, return as quanteda dfm}

\item{verbose}{If True, report progress}

\item{sep}{The separator used for pasting the terms}

\item{par}{If TRUE, add parentheses to colnames before combining. This is mainly for internal use, as it allows
specification if OR (term_union) and AND (term_intersect) operations are combined. 
If NA, this is based on whether parenthese are present.}
}
\value{
A dgCMatrix or quanteda dfm
}
\description{
Given a dtm and a similarity (adjacency) matrix, group clusters of similar terms (simmat > 0) into a single column.
Column names will be concatenated, with a "|" seperator (read as OR)
}
\examples{
dfm = quanteda::dfm(c('That guy Gadaffi','Do you mean Kadaffi?',
                      'Nah more like Gadaffel','What Gargamel?'))
simmat = term_char_sim(colnames(dfm), same_start=0)
term_union(dfm, simmat, verbose = FALSE)
}
