% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/qdapRegex-package.R
\docType{data}
\name{regex_usa}
\alias{regex_usa}
\title{Canned Regular Expressions (United States of America)}
\format{A list with 54 elements}
\usage{
data(regex_usa)
}
\description{
A dataset containing a list U.S. specific, canned regular expressions for use
in various functions within the \pkg{qdapRegex} package.
}
\details{
The following canned regular expressions are included:
\describe{
  \item{rm_abbreviation}{abbreviations containing single lower case or capital letter followed by a period and then an optional space (this must be repeated 2 or more times)}
  \item{rm_between}{Remove characters between a left and right boundary including the boundaries; note contains \code{"\%s"} that is replaced by \code{\link[base]{sprintf}} and is not a valid regex on its own}
  \item{rm_between2}{Remove characters between a left and right boundary NOT including the boundaries; note contains \code{"\%s"} that is replaced by \code{\link[base]{sprintf}} and is not a valid regex on its own}
  \item{rm_caps}{words containing 2 or more consecutive upper case letters and no lower case}
  \item{rm_caps_phrase}{phrases of 1 word or more containing 1 or more consecutive upper case letters and no lower case; if phrase is one word long then phrase must be 2 or more consecutive capital letters}
  \item{rm_citation}{substring that looks for in-text and parenthetical APA6 style citations (attempts to exclude references)}
  \item{rm_citation2}{substring that looks for in-text APA6 style citations (attempts to exclude references)}
  \item{rm_citation3}{substring that looks for parenthetical APA6 style citations (attempts to exclude references)}
  \item{rm_city_state}{substring with \emph{city} (single lower case word or multiple consecutive capitalized words before a comma and state) & \emph{state} (2 consecutive capital letters)}
  \item{rm_city_state_zip}{substring with \emph{city} (single lower case word or multiple consecutive capitalized words before a comma and state) & \emph{state} (2 consecutive capital letters) & \emph{zip code} (exactly 5 or 5+4 consecutive digits)}
  \item{rm_date}{dates in the form of 2 digit month, 2 digit day, and 2 or 4 digit year.  Separator between month, day, and year may be dot (.), slash (/), or dash (-)}
  \item{rm_date2}{dates in the form of 3-9 letters followed by one or more spaces, 2 digits, a comma(,), one or more spaces, and 4 digits}
  \item{rm_date3}{dates in the form of XXXX-XX-XX; hyphen separated string of 4 digit year, 2 digit month, and 2 digit day}
  \item{rm_date4}{dates in the form of both \code{rm_date}, \code{rm_date2}, and \code{rm_date3}}
  \item{rm_dollar}{substring with dollar sign ($) followed by (1) just dollars (no decimal), (2) dollars and cents (whole number and decimal), or (3) just cents (decimal value); dollars may contain commas}
  \item{rm_email}{substring with (1) alphanumeric characters or dash (-), plus (+), or underscore (_) (\emph{This may be repeated}) (2) followed by at (@), followed by the same regex sequence as before the at (@), and ending with dot (.) and 2-14 digits}
  \item{rm_emoticon}{common emoticons (logic is complicated to explain in words) using ">?[:;=8XB]\{1\}[-~+o^]?[|\")(&gt;DO>\{pP3/]+|</?3|XD+|D:<|x[-~+o^]?[|\")(&gt;DO>\{pP3/]+" regex pattern; general pattern is optional hat character, followed by eyes character, followed by optional nose character, and ending with a mouth character}
  \item{rm_endmark}{substring of the last endmark group in a string; endmarks include (! ? . * OR |)}
  \item{rm_endmark3}{substring of the last endmark group in a string; endmarks include (! ? OR .)}
  \item{rm_endmark3}{substring of the last endmark group in a string; endmarks include (! ? . * | ; OR :)}
  \item{rm_hash}{substring that begins with a hash (#) followed by a word}
  \item{rm_nchar_words}{substring of letters (that may contain apostrophes) n letters long (apostrophe not counted in length); note contains \code{"\%s"} that is replaced by \code{\link[base]{sprintf}} and is not a valid regex on its own}
  \item{rm_nchar_words2}{substring of letters (that may contain apostrophes) n letters long (apostrophe counted in length); note contains \code{"\%s"} that is replaced by \code{\link[base]{sprintf}} and is not a valid regex on its own}
  \item{rm_non_ascii}{substring of 2 digits or letters a-f inside of a left and right angle brace in the form of \code{"<a4>"}}
  \item{rm_non_words}{substring of any character that isn't a letter, apostrophe, or single space}
  \item{rm_number}{substring that may begin with dash (-) for negatives, and is (1) just whole number (no decimal), (2) whole number and decimal, or (3) just decimal value; regex pattern provided by Jason Gray}
  \item{rm_percent}{substring beginning with (1) just whole number (no decimal), (2) whole number and decimal, or (3) just decimal value and followed by a percent sign (\%)}
  \item{rm_phone}{phone numbers in the form of optional country code, valid 3 digit prefix, and 7 digits (may contain hyphens and parenthesis); logic is complex to explain (see \url{http://stackoverflow.com/a/21008254/1000343} for more)}
  \item{rm_postal_code}{U.S. state abbreviations (and District of Columbia) that is constrained to just possible U.S. state names, not just two consecutive capital letters; taken from Mike Hamilton's submission found \url{http://regexlib.com/REDetails.aspx?regexp_id=2177}}
  \item{rm_repeated_characters}{substring with a repetition of repeated characters within a word; regex pattern retrieved from \href{http://stackoverflow.com}{StackOverflow}'s, \href{http://stackoverflow.com/users/3679490/vks}{vks}: \url{http://stackoverflow.com/a/29438461/1000343}}
  \item{rm_repeated_phrases}{substring with a phrase (a sequence of 1 or more words) that is repeated 2 or more times (case is ignored; separating periods and commas are ignored); regex pattern retrieved from \href{http://stackoverflow.com}{StackOverflow}'s, \href{http://stackoverflow.com/users/2725969/brodieg}{BrodieG}: \url{http://stackoverflow.com/a/28786617/1000343}}
  \item{rm_repeated_words}{substring with a word (marked with a boundary) that is repeat 2 or more times (case is ignored)}
  \item{rm_tag}{substring that begins with an at (@) followed by a word}
  \item{rm_tag2}{Twitter substring that begins with an at (@) followed by a word composed of alpha-numeric characters and underscores, no longer than 15 characters}
  \item{rm_title_name}{substring beginning with title (Mrs., Mr., Ms., Dr.) that is case independent or full title (Miss, Mizz, mizz) followed by a single lower case word or multiple capitalized words}
  \item{rm_time}{substring that (1) must begin with 0-2 digits, (2) must be followed by a single colon (:), (3) optionally may be followed by either a colon (:) or a dot (.), (4) optionally may be followed by 1-infinite digits (if previous condition is true)}
  \item{rm_time2}{substring that is identical to \code{rm_time} with the additional search for Ante Meridiem/Post Meridiem abbreviations (e.g., AM, p.m., etc.)}
  \item{rm_transcript_time}{substring that is specific to transcription time stamps in the form of HH:MM:SS.OS where OS is milliseconds.  HH: and .OS are optional. The SS.OS period divide may also be a comma or additional colon.  The HH:SS divid may also be a period.  String may be affixed with pound sign (#).}
  \item{rm_twitter_url}{\href{https://twitter.com/}{Twitter} short link/url; substring optionally beginning with \emph{http}, followed by \emph{t.co} ending on a space or end of string (whichever comes first)}
  \item{rm_url}{substring beginning with \emph{http}, \emph{www.}, or \emph{ftp} and ending on a space or end of string (whichever comes first); note that this regex is simple and may not cover all valid URLs or may include invalid URLs}
  \item{rm_url2}{substring beginning with \emph{http}, \emph{www.}, or \emph{ftp} and more constrained than \code{rm_url}; based on @imme_emosol's response from \url{https://mathiasbynens.be/demo/url-regex}}
  \item{rm_url3}{substring beginning with \emph{http} or \emph{ftp} and more constrained than \code{rm_url} & \code{rm_url2} though light-weight, making it ideal for validation purposes; taken from @imme_emosol's response found \url{https://mathiasbynens.be/demo/url-regex}}
  \item{rm_white}{substring of white space(s); this regular expression combines \code{rm_white_bracket}, \code{rm_white_colon}, \code{rm_white_comma}, \code{rm_white_endmark}, \code{rm_white_lead}, \code{rm_white_trail}, and \code{rm_white_multiple}}
  \item{rm_white_bracket}{substring of white space(s) following left brackets ("\{", "(", "[") or preceding right brackets ("\}", ")", "]")}
  \item{rm_white_colon}{substring of white space(s) preceding colon(s)/semicolon(s)}
  \item{rm_white_comma}{substring of white space(s) preceding a comma}
  \item{rm_white_endmark}{substring of white space(s) preceding a single occurrence/combination of period(s), question mark(s), and exclamation point(s)}
  \item{rm_white_lead}{substring of leading white space(s)}
  \item{rm_white_lead_trail}{substring of leading/trailing white space(s)}
  \item{rm_white_multiple}{substring of multiple, consecutive white spaces}
  \item{rm_white_punctuation}{substring of white space(s) preceding a comma or a single occurrence/combination of colon(s), semicolon(s), period(s), question mark(s), and exclamation point(s)}
  \item{rm_white_trail}{substring of trailing white space(s)}
  \item{rm_zip}{substring of 5 digits optionally followed by a dash and 4 more digits} 
}
}
\section{Extra}{
 Use \code{qdapRegex:::examine_regex()} to interactively explore the 
regular expressions in \code{regex_usa}.  This will provide a browser + console
based break down of each regex in the dictionary.
}

\keyword{datasets}
