% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model.R
\name{keyATM_read}
\alias{keyATM_read}
\title{Read texts}
\usage{
keyATM_read(
  texts,
  encoding = "UTF-8",
  check = TRUE,
  keep_docnames = FALSE,
  progress_bar = FALSE,
  split = 0
)
}
\arguments{
\item{texts}{input. keyATM takes a quanteda dfm (dgCMatrix), data.frame, \pkg{tibble} tbl_df, or a vector of file paths.}

\item{encoding}{character. Only used when \code{texts} is a vector of file paths. Default is \code{UTF-8}.}

\item{check}{logical. If \code{TRUE}, check whether there is anything wrong with the structure of texts. Default is \code{TRUE}.}

\item{keep_docnames}{logical. If \code{TRUE}, it keeps the document names in a quanteda dfm. Default is \code{FALSE}.}

\item{progress_bar}{logical. If \code{TRUE}, it shows a progress bar (currently it only supports a quanteda object). Default is \code{FALSE}.}

\item{split}{numeric. This option works only with a quanteda dfm. It creates a two subset of the dfm by randomly splitting each document (i.e., the total number of documents is the same between two subsets). This option specifies the split proportion. Default is \code{0}.}
}
\value{
a keyATM_docs object. The first element is a list whose elements are split texts. The length of the list equals to the number of documents.
}
\description{
Read texts and create a \code{keyATM_docs} object, which is a list of texts.
}
\examples{
\dontrun{
 # Use quanteda dfm
 keyATM_docs <- keyATM_read(texts = quanteda_dfm)

 # Use data.frame or tibble (texts should be stored in a column named `text`)
 keyATM_docs <- keyATM_read(texts = data_frame_object)
 keyATM_docs <- keyATM_read(texts = tibble_object)

 # Use a vector that stores full paths to the text files
 files <- list.files(doc_folder, pattern = "*.txt", full.names = TRUE)
 keyATM_docs <- keyATM_read(texts = files)

}
}
