% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/01_UNIVARIATE_ANALYSIS.R
\name{imp.outliers}
\alias{imp.outliers}
\title{Imputation methods for outliers}
\usage{
imp.outliers(
  db,
  sc = c(NA, NaN, Inf),
  method = "iqr",
  range = 1.5,
  upper.pct = 0.95,
  lower.pct = 0.05
)
}
\arguments{
\item{db}{Data frame of risk factors supplied for imputation.}

\item{sc}{Vector of all special case elements. Default values are \code{c(NA, NaN, Inf)}. Those values will be
excluded from calculation of imputed value and replacements.}

\item{method}{Imputation method. Available options are: \code{"iqr"} and \code{"percentile"}. Method \code{iqr}
performs identification of outliers by the method applied in boxplot 5-figures, while for
\code{percentile} method user defines lower and upper limits for replacement.
Default value is \code{"iqr"}.}

\item{range}{Determines how far the plot whiskers extend out from the box. If range is positive,
the whiskers extend to the most extreme data point which is no more than range times the
interquartile range from the box. A value of zero causes the whiskers to extend to
the data extremes. Default \code{range} is set to is 1.5.}

\item{upper.pct}{Upper limit for percentile method. All values above this limit will be replaced by the value
identified at this percentile. Default value is set to \eqn{95^{th}} percentile (0.95).
This parameter is used only if selected \code{method} is \code{percentile}.}

\item{lower.pct}{Lower limit for percentile method. All values below this limit will be replaced by the value
identified at this percentile. Default value is set to \eqn{5^{th}} percentile (0.05).
This parameter is used only if selected \code{method} is \code{percentile}.}
}
\value{
This function returns list of two data frames. The first data frame contains analyzed risk factors with
imputed values for outliers, while the second data frame presents the imputation report. Using the imputation report,
for each risk factor, user can inspect imputed info (\code{info}), imputation method (\code{imputation.method}),
imputed value (\code{imputation.val.upper} and \code{imputation.val.lower}),
number of imputed observations (\code{imputation.num.upper} and \code{imputation.num.lower}).
}
\description{
\code{imp.outliers} replaces predefined quantum of the smallest and largest values by the less
extreme values. This procedure is applicable only to the numeric risk factors.
}
\examples{
suppressMessages(library(PDtoolkit))
data(gcd)
gcd$age[1:20] <- NA
gcd$age.bin <- ndr.bin(x = gcd$age, y = gcd$qual, sc.method = "separately", y.type = "bina")[[2]]
gcd$dummy1 <- NA
imput.res.1 <- imp.outliers(db = gcd[, -1], 
		      method = "iqr",
		      range = 1.5)
#analyzed risk factors with imputed values
head(imput.res.1[[1]])
#imputation report
imput.res.1[[2]]
#percentile method
imput.res.2 <- imp.outliers(db = gcd[, -1], 
		      method = "percentile",
		      upper.pct = 0.95,
		      lower.pct = 0.05)
#analyzed risk factors with imputed values
head(imput.res.2[[1]])
#imputation report
imput.res.2[[2]]
}
