% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emp_kl.R
\name{emp_kl_div}
\alias{emp_kl_div}
\title{Calculate the empirical KL divergence for a representative dataset as compared to the true dataset}
\usage{
emp_kl_div(
  true_dat,
  rep_dat,
  categoric_vars,
  numeric_vars,
  l_m = 10,
  weights = rep(1, nrow(rep_dat))
)
}
\arguments{
\item{true_dat}{The true dataset}

\item{rep_dat}{A representative dataset}

\item{categoric_vars}{A vector of column positions or column names for the categoric variables.}

\item{numeric_vars}{A vector of column positions or column names for the numeric variables.}

\item{l_m}{Approximate number of true data points to be in each bin for numeric variables. Default is 10.}

\item{weights}{If weighted frequencies are desired, pass a vector weights of the same length as representative data points.}
}
\description{
Calculate the empirical KL divergence for a representative dataset as compared to the true dataset
}
\details{
This function computes the estimated the KL divergence of two samples of data
using the empirical distribution functions for the representative data set and true data set
with continuous variables transformed to categorical using a histogram approach with
statistically equivalent data-dependent bins, as detailed in

Wang, Qing, Sanjeev R. Kulkarni, and Sergio Verdú. "Divergence estimation of continuous distributions based on data-dependent partitions." IEEE Transactions on Information Theory 51.9 (2005): 3064-3074.
}
\examples{

data("rl_reg1")

## random prototyping
rep_dat_random <- represent(rl_reg1, identity.rl_reg1, "proto_random", id = FALSE, parallel = FALSE)

## empirical KL divergence
cat_vars <- c("sex")
num_vars <- c("income", "bp")
emp_kl_div(rl_reg1[unique(identity.rl_reg1), c(cat_vars, num_vars)],
           rep_dat_random[, c(cat_vars, num_vars)],
           cat_vars, num_vars)

}
