% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rows_not_duplicated.R
\name{rows_not_duplicated}
\alias{rows_not_duplicated}
\title{Verify that row data are not duplicated}
\usage{
rows_not_duplicated(x, cols = NULL, preconditions = NULL,
  brief = NULL, warn_count = NULL, notify_count = NULL,
  warn_fraction = NULL, notify_fraction = NULL, tbl_name = NULL,
  db_type = NULL, creds_file = NULL, initial_sql = NULL,
  file_path = NULL, col_types = NULL)
}
\arguments{
\item{x}{An agent object of class \code{ptblank_agent}.}

\item{cols}{An optional grouping of columns to check for duplication. If not
provided, the validation checks for duplicate records using data across all
columns.}

\item{preconditions}{An optional statement of filtering conditions that may
reduce the number of rows for validation for the current validation step.
The statements are executed for every row of the table in focus and are
often referred as predicate statements (they either return \code{TRUE} or
\code{FALSE} for every row evaluated, where rows evaluated as \code{TRUE} are the
rows that are retained for the validation step). For example, if a table
has columns \code{a}, \code{b}, and \code{c}, and, column \code{a} has numerical data, we can
write a statement \code{a < 5} that filters all rows in the table where values
in column a are less than five.}

\item{brief}{An optional, text-based description for the validation step.}

\item{warn_count}{The threshold number for individual validations returning a
\code{FALSE} result before applying the \code{warn} flag.}

\item{notify_count}{The threshold number for individual validations returning
a \code{FALSE} result before applying the \code{notify} flag.}

\item{warn_fraction}{The threshold fraction for individual validations
returning a \code{FALSE} over all the entire set of individual validations.
Beyond this threshold, the \code{warn} flag will be applied.}

\item{notify_fraction}{The threshold fraction for individual validations
returning a \code{FALSE} over all the entire set of individual validations.
Beyond this threshold, the \code{notify} flag will be applied.}

\item{tbl_name}{The name of the local or remote table.}

\item{db_type}{If the table is located in a database, the type of database is
required here. Currently, this can be either \code{PostgreSQL} or \code{MySQL}.}

\item{creds_file}{If a connection to a database is required for reaching the
table specified in \code{tbl_name}, then a path to a credentials file can be
used to establish that connection. The credentials file is an \code{RDS}
containing a character vector with the following items in the specified
order: (1) database name (\code{dbname}), (2) the \code{host} name, (3) the \code{port},
(4) the username (\code{user}), and (5) the \code{password}. This file can be easily
created using the \code{\link[=create_creds_file]{create_creds_file()}} function.}

\item{initial_sql}{When accessing a remote table, this provides an option to
provide an initial query component before conducting validations. An entire
SQL statement can be provided here, or, as a shortcut, the initial
\code{SELECT...} statement can be omitted for simple queries (e.g., \code{WHERE a > 1 AND b = 'one'}).}

\item{file_path}{An optional path for a tabular data file to be loaded for
this verification step. Valid types are CSV and TSV files.}

\item{col_types}{If validating a CSV or TSV file, an optional column
specification can be provided here as a string. This string representation
is where each character represents one column and the mappings are: \code{c} ->
character, \code{i} -> integer, \code{n} -> number, \code{d} -> double, \code{l} -> logical,
\code{D} -> date, \code{T} -> date time, \code{t} -> time, \code{?} -> guess, or \code{_/-}, which
skips the column.}
}
\value{
A \pkg{pointblank} agent object.
}
\description{
Verification step where row data should contain no duplicates.
}
\examples{
# Validate that column `a` exists in
# the `small_table` CSV file; do this
# by creating an agent, focussing on
# that table, creating a
# `rows_not_duplicated()` step, and then
# interrogating the table
agent <-
  create_agent() \%>\%
  focus_on(
    file_name = 
      system.file(
        "extdata", "small_table.csv",
        package = "pointblank"),
    col_types = "TDicidlc") \%>\%
  rows_not_duplicated(
    cols = a & b) \%>\%
  interrogate()

# Determine if these column
# validations have all passed
# by using `all_passed()`
all_passed(agent)

}
