% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vtreat.R
\name{mkCrossFrameCExperiment}
\alias{mkCrossFrameCExperiment}
\title{Run categorical cross-frame experiment.}
\usage{
mkCrossFrameCExperiment(dframe, varlist, outcomename, outcometarget, ...,
  weights = c(), minFraction = 0.02, smFactor = 0, rareCount = 0,
  rareSig = 1, collarProb = 0, codeRestriction = NULL,
  customCoders = NULL, scale = FALSE, doCollar = FALSE,
  splitFunction = NULL, ncross = 3, forceSplit = FALSE,
  catScaling = TRUE, verbose = TRUE, parallelCluster = NULL,
  use_parallel = TRUE)
}
\arguments{
\item{dframe}{Data frame to learn treatments from (training data), must have at least 1 row.}

\item{varlist}{Names of columns to treat (effective variables).}

\item{outcomename}{Name of column holding outcome variable. dframe[[outcomename]] must be only finite non-missing values.}

\item{outcometarget}{Value/level of outcome to be considered "success",  and there must be a cut such that dframe[[outcomename]]==outcometarget at least twice and dframe[[outcomename]]!=outcometarget at least twice.}

\item{...}{no additional arguments, declared to forced named binding of later arguments}

\item{weights}{optional training weights for each row}

\item{minFraction}{optional minimum frequency a categorical level must have to be converted to an indicator column.}

\item{smFactor}{optional smoothing factor for impact coding models.}

\item{rareCount}{optional integer, allow levels with this count or below to be pooled into a shared rare-level.  Defaults to 0 or off.}

\item{rareSig}{optional numeric, suppress levels from pooling at this significance value greater.  Defaults to NULL or off.}

\item{collarProb}{what fraction of the data (pseudo-probability) to collar data at if doCollar is set during \code{\link{prepare.treatmentplan}}.}

\item{codeRestriction}{what types of variables to produce (character array of level codes, NULL means no restriction).}

\item{customCoders}{map from code names to custom categorical variable encoding functions (please see \url{https://github.com/WinVector/vtreat/blob/master/extras/CustomLevelCoders.md}).}

\item{scale}{optional if TRUE replace numeric variables with regression ("move to outcome-scale").}

\item{doCollar}{optional if TRUE collar numeric variables by cutting off after a tail-probability specified by collarProb during treatment design.}

\item{splitFunction}{(optional) see vtreat::buildEvalSets .}

\item{ncross}{optional scalar>=2 number of cross-validation rounds to design.}

\item{forceSplit}{logical, if TRUE force cross-validated significance calculations on all variables.}

\item{catScaling}{optional, if TRUE use glm() linkspace, if FALSE use lm() for scaling.}

\item{verbose}{if TRUE print progress.}

\item{parallelCluster}{(optional) a cluster object created by package parallel or package snow.}

\item{use_parallel}{logical, if TRUE use parallel methods.}
}
\value{
list with treatments and crossFrame
}
\description{
Builds a \code{\link{designTreatmentsC}} treatment plan and a data frame prepared 
from \code{dframe} that is "cross" in the sense each row is treated using a treatment
plan built from a subset of dframe disjoint from the given row.
The goal is to try to and supply a method of breaking nested model bias other than splitting
into calibration, training, test sets.
}
\examples{

set.seed(23525)
zip <- paste('z',1:100)
N <- 200
d <- data.frame(zip=sample(zip,N,replace=TRUE),
                zip2=sample(zip,20,replace=TRUE),
                y=runif(N))
del <- runif(length(zip))
names(del) <- zip
d$y <- d$y + del[d$zip2]
d$yc <- d$y>=mean(d$y)
cC <- mkCrossFrameCExperiment(d,c('zip','zip2'),'yc',TRUE,
  rareCount=2,rareSig=0.9)
cor(as.numeric(cC$crossFrame$yc),cC$crossFrame$zip_catB)  # poor
cor(as.numeric(cC$crossFrame$yc),cC$crossFrame$zip2_catB) # better
treatments <- cC$treatments
dTrainV <- cC$crossFrame

}
\seealso{
\code{\link{designTreatmentsC}}, \code{\link{designTreatmentsN}}, \code{\link{prepare.treatmentplan}}
}
