% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare_documents.r
\name{tCorpus$compare_documents}
\alias{tCorpus$compare_documents}
\alias{compare_documents}
\title{Calculate the similarity of documents}
\arguments{
\item{feature}{the column name of the feature that is to be used for the comparison.}

\item{date_col}{a date with time in POSIXct. If given together with hour_window, only documents within the given hour_window will be compared.}

\item{hour_window}{an integer. If given together with date_col, only documents within the given hour_window will be compared.}

\item{measure}{the similarity measure. Currently supports cosine similarity (symmetric) and overlap_pct (asymmetric)}

\item{weight}{a weighting scheme for the document-term matrix. Default is term-frequency inverse document frequency with normalized rows (document length).}

\item{ngrams}{an integer. If given, ngrams of this length are used}

\item{from_subset}{An expression to select a subset. If given, only this subset will be compared to other documents}

\item{to_subset}{An expression to select a subset. If given, documents are only compared to this subset}
}
\description{
Calculate the similarity of documents
}
\section{Usage}{

## R6 method for class tCorpus. Use as tc$method (where tc is a tCorpus object).
\preformatted{compare_documents(feature='token', date_col=NULL, hour_window=NULL, measure=c('cosine','overlap_pct'), min_similarity=0, weight=c('norm_tfidf', 'tfidf', 'termfreq','docfreq'), ngrams=NA, from_subset=NULL, to_subset=NULL))}
}

\examples{
d = data.frame(text = c('a b c d e',
                        'e f g h i j k',
                        'a b c'),
               date = c('2010-01-01','2010-01-01','2012-01-01'))
tc = create_tcorpus(d)

g = tc$compare_documents()
igraph::get.data.frame(g)

g = tc$compare_documents(measure = 'overlap_pct')
igraph::get.data.frame(g)

g = tc$compare_documents(date_col = 'date', hour_window = c(0,36))
igraph::get.data.frame(g)
}
