% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/folda.R
\name{folda}
\alias{folda}
\title{Forward Uncorrelated Linear Discriminant Analysis}
\usage{
folda(
  datX,
  response,
  subsetMethod = c("forward", "all"),
  testStat = c("Pillai", "Wilks"),
  correction = TRUE,
  alpha = 0.1,
  prior = NULL,
  misClassCost = NULL,
  missingMethod = c("medianFlag", "newLevel"),
  downSampling = FALSE,
  kSample = NULL
)
}
\arguments{
\item{datX}{A data frame containing the predictor variables.}

\item{response}{A factor representing the response variable with multiple
classes.}

\item{subsetMethod}{A character string specifying the method for variable
selection. Options are \code{"forward"} for forward selection or \code{"all"} for
using all variables. Default is \code{"forward"}.}

\item{testStat}{A character string specifying the test statistic to use for
forward selection. Options are \code{"Pillai"} or \code{"Wilks"}. Default is
\code{"Pillai"}.}

\item{correction}{A logical value indicating whether to apply a multiple
comparison correction during forward selection. Default is \code{TRUE}.}

\item{alpha}{A numeric value between 0 and 1 specifying the significance
level for the test statistic during forward selection. Default is 0.1.}

\item{prior}{A numeric vector representing the prior probabilities for each
class in the response variable. If \code{NULL}, the observed class frequencies
are used as the prior. Default is \code{NULL}.}

\item{misClassCost}{A square matrix \eqn{C}, where each element \eqn{C_{ij}}
represents the cost of classifying an observation into class \eqn{i} given
that it truly belongs to class \eqn{j}. If \code{NULL}, a default matrix with
equal misclassification costs for all class pairs is used. Default is
\code{NULL}.}

\item{missingMethod}{A character vector of length 2 specifying how to handle
missing values for numerical and categorical variables, respectively.
Default is \code{c("medianFlag", "newLevel")}.}

\item{downSampling}{A logical value indicating whether to perform
downsampling to balance the class distribution in the training data or
speed up the program. Default is \code{FALSE}.}

\item{kSample}{An integer specifying the maximum number of samples to take
from each class during downsampling. If \code{NULL}, the number of samples is
limited to the size of the smallest class. Default is \code{NULL}.}
}
\value{
A list of class \code{ULDA} containing the following components:
\item{scaling}{The matrix of scaling coefficients for the linear
discriminants.} \item{groupMeans}{The group means of the linear
discriminant scores.} \item{prior}{The prior probabilities for each class.}
\item{misClassCost}{The misclassification cost matrix.}
\item{misReference}{A reference for handling missing values.}
\item{terms}{The terms used in the model formula.} \item{xlevels}{The
levels of the factors used in the model.} \item{varIdx}{The indices of the
selected variables.} \item{varSD}{The standard deviations of the selected
variables.} \item{varCenter}{The means of the selected variables.}
\item{statPillai}{The Pillai's trace statistic.} \item{pValue}{The p-value
associated with Pillai's trace.} \item{predGini}{The Gini index of the
predictions on the training data.} \item{confusionMatrix}{The confusion
matrix for the training data predictions.} \item{forwardInfo}{Information
about the forward selection process, if applicable.} \item{stopInfo}{A
message indicating why forward selection stopped, if applicable.}
}
\description{
This function fits a ULDA (Uncorrelated Linear Discriminant Analysis) model
to the provided data, with an option for forward selection of variables based
on Pillai's trace or Wilks' Lambda. It can also handle missing values,
perform downsampling, and compute the linear discriminant scores and group
means for classification. The function returns a fitted ULDA model object.
}
\examples{
# Fit the ULDA model
fit <- folda(datX = iris[, -5], response = iris[, 5], subsetMethod = "all")

# Fit the ULDA model with forward selection
fit <- folda(datX = iris[, -5], response = iris[, 5], subsetMethod = "forward")
}
\references{
Howland, P., Jeon, M., & Park, H. (2003). \emph{Structure
preserving dimension reduction for clustered text data based on the
generalized singular value decomposition}. SIAM Journal on Matrix Analysis
and Applications

Wang, S. (2024). A New Forward Discriminant Analysis Framework Based On
Pillai's Trace and ULDA. \emph{arXiv preprint arXiv:2409.03136}. Available
at \url{https://arxiv.org/abs/2409.03136}.
}
