% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/character-shingles-tokenizers.R
\name{tokenize_character_shingles}
\alias{tokenize_character_shingles}
\title{Character shingle tokenizers}
\usage{
tokenize_character_shingles(x, n = 3L, n_min = n, lowercase = TRUE,
  strip_non_alphanum = TRUE, simplify = FALSE)
}
\arguments{
\item{x}{A character vector or a list of character vectors to be tokenized
into character shingles. If \code{x} is a character vector, it can be of
any length, and each element will be tokenized separately. If \code{x} is a
list of character vectors, each element of the list should have a length of
1.}

\item{n}{The number of characters in each shingle. This must be an integer
greater than or equal to 1.}

\item{n_min}{This must be an integer greater than or equal to 1, and less
than or equal to \code{n}.}

\item{lowercase}{Should the characters be made lower case?}

\item{strip_non_alphanum}{Should punctuation and white space be stripped?}

\item{simplify}{\code{FALSE} by default so that a consistent value is
returned regardless of length of input. If \code{TRUE}, then an input with
a single element will return a character vector of tokens instead of a
list.}
}
\value{
A list of character vectors containing the tokens, with one element
  in the list for each element that was passed as input. If \code{simplify =
  TRUE} and only a single element was passed as input, then the output is a
  character vector of tokens.
}
\description{
The character shingle tokenizer functions like an n-gram tokenizer, except
the units that are shingled are characters instead of words. Options to the
function let you determine whether non-alphanumeric characters like
punctuation should be retained or discarded.
}
\examples{
x <- c("Now is the hour of our discontent")
tokenize_character_shingles(x)
tokenize_character_shingles(x, n = 5)
tokenize_character_shingles(x, n = 5, strip_non_alphanum = FALSE)
tokenize_character_shingles(x, n = 5, n_min = 3, strip_non_alphanum = FALSE)

}

