% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hstats.R
\name{hstats}
\alias{hstats}
\alias{hstats.default}
\alias{hstats.ranger}
\alias{hstats.Learner}
\alias{hstats.explainer}
\title{Calculate Interaction Statistics}
\usage{
hstats(object, ...)

\method{hstats}{default}(
  object,
  X,
  v = colnames(X),
  pred_fun = stats::predict,
  n_max = 300L,
  w = NULL,
  pairwise_m = 5L,
  threeway_m = pairwise_m,
  verbose = TRUE,
  ...
)

\method{hstats}{ranger}(
  object,
  X,
  v = colnames(X),
  pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
  n_max = 300L,
  w = NULL,
  pairwise_m = 5L,
  threeway_m = pairwise_m,
  verbose = TRUE,
  ...
)

\method{hstats}{Learner}(
  object,
  X,
  v = colnames(X),
  pred_fun = NULL,
  n_max = 300L,
  w = NULL,
  pairwise_m = 5L,
  threeway_m = pairwise_m,
  verbose = TRUE,
  ...
)

\method{hstats}{explainer}(
  object,
  X = object[["data"]],
  v = colnames(X),
  pred_fun = object[["predict_function"]],
  n_max = 300L,
  w = object[["weights"]],
  pairwise_m = 5L,
  threeway_m = pairwise_m,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{object}{Fitted model object.}

\item{...}{Additional arguments passed to \code{pred_fun(object, X, ...)},
for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.}

\item{X}{A data.frame or matrix serving as background dataset.}

\item{v}{Vector of feature names, by default \code{colnames(X)}.}

\item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)},
providing \eqn{K \ge 1} predictions per row. Its first argument represents the
model \code{object}, its second argument a data structure like \code{X}. Additional arguments
(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default,
\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting
matrix of predictions will be used as default column names in the results.}

\item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is
selected from \code{X}. In this case, set a random seed for reproducibility.}

\item{w}{Optional vector of case weights for each row of \code{X}.}

\item{pairwise_m}{Number of features for which pairwise statistics are to be
calculated. The features are selected based on Friedman and Popescu's overall
interaction strength \eqn{H^2_j}. Set to to 0 to avoid pairwise calculations.
For multivariate predictions, the union of the column-wise strongest variable
names is taken. This can lead to very long run-times.}

\item{threeway_m}{Same as \code{pairwise_m}, but controlling the number of features for
which threeway interactions should be calculated. Not larger than \code{pairwise_m}.
Set to 0 to avoid threeway calculations.}

\item{verbose}{Should a progress bar be shown? The default is \code{TRUE}.}
}
\value{
An object of class "hstats" containing these elements:
\itemize{
\item \code{X}: Input \code{X} (sampled to \code{n_max} rows).
\item \code{w}: Input \code{w} (sampled to \code{n_max} values, or \code{NULL}).
\item \code{v}: Same as input \code{v}.
\item \code{f}: Matrix with (centered) predictions \eqn{F}.
\item \code{mean_f2}: (Weighted) column means of \code{f}. Used to normalize most statistics.
\item \code{F_j}: List of matrices, each representing (centered)
partial dependence functions \eqn{F_j}.
\item \code{F_not_j}: List of matrices with (centered) partial dependence
functions \eqn{F_{\setminus j}} of other features.
\item \code{K}: Number of columns of prediction matrix.
\item \code{pred_names}: Column names of prediction matrix.
\item \code{v_pairwise}: Subset of \code{v} with largest \code{h2_overall()} used for pairwise
calculations.
\item \code{combs2}: Named list of variable pairs for which pairwise partial
dependence functions are available.
\item \code{F_jk}: List of matrices, each representing (centered) bivariate
partial dependence functions \eqn{F_{jk}}.
\item \code{v_threeway}: Subset of \code{v} with largest \code{h2_overall()} used for three-way
calculations.
\item \code{combs3}: Named list of variable triples for which three-way partial
dependence functions are available.
\item \code{F_jkl}: List of matrices, each representing (centered) three-way
partial dependence functions \eqn{F_{jkl}}.
}
}
\description{
This is the main function of the package. It does the expensive calculations behind
the following H-statistics:
\itemize{
\item Total interaction strength \eqn{H^2}, a statistic measuring the proportion of
prediction variability unexplained by main effects of \code{v}, see \code{\link[=h2]{h2()}} for details.
\item Friedman and Popescu's statistic \eqn{H^2_j} of overall interaction strength per
feature, see \code{\link[=h2_overall]{h2_overall()}} for details.
\item Friedman and Popescu's statistic \eqn{H^2_{jk}} of pairwise interaction strength,
see \code{\link[=h2_pairwise]{h2_pairwise()}} for details.
\item Friedman and Popescu's statistic \eqn{H^2_{jkl}} of three-way interaction strength,
see \code{\link[=h2_threeway]{h2_threeway()}} for details.
}

Furthermore, it allows to calculate an experimental partial dependence based
measure of feature importance, \eqn{\textrm{PDI}_j^2}. It equals the proportion of
prediction variability unexplained by other features, see \code{\link[=pd_importance]{pd_importance()}}
for details. (This statistic is not shown by \code{summary()} or \code{plot()}.)

Instead of using \code{summary()}, interaction statistics can also be obtained via the
more flexible functions \code{\link[=h2]{h2()}}, \code{\link[=h2_overall]{h2_overall()}}, \code{\link[=h2_pairwise]{h2_pairwise()}}, and
\code{\link[=h2_threeway]{h2_threeway()}}.
}
\section{Methods (by class)}{
\itemize{
\item \code{hstats(default)}: Default hstats method.

\item \code{hstats(ranger)}: Method for "ranger" models.

\item \code{hstats(Learner)}: Method for "mlr3" models.

\item \code{hstats(explainer)}: Method for DALEX "explainer".

}}
\examples{
# MODEL 1: Linear regression
fit <- lm(Sepal.Length ~ . + Petal.Width:Species, data = iris)
s <- hstats(fit, X = iris[-1])
s
plot(s)
summary(s)
  
# Absolute pairwise interaction strengths
h2_pairwise(s, normalize = FALSE, squared = FALSE, plot = FALSE)

# MODEL 2: Multi-response linear regression
fit <- lm(as.matrix(iris[1:2]) ~ Petal.Length + Petal.Width * Species, data = iris)
s <- hstats(fit, X = iris[3:5], verbose = FALSE)
plot(s)
summary(s)

# MODEL 3: Gamma GLM with log link
fit <- glm(Sepal.Length ~ ., data = iris, family = Gamma(link = log))

# No interactions for additive features, at least on link scale
s <- hstats(fit, X = iris[-1], verbose = FALSE)
summary(s)

# On original scale, we have interactions everywhere...
s <- hstats(fit, X = iris[-1], type = "response", verbose = FALSE)

# All three types use different denominators
plot(s, which = 1:3, ncol = 1)

# All statistics on same scale (of predictions)
plot(s, which = 1:3, squared = FALSE, normalize = FALSE, facet_scale = "free_y")
}
\references{
Friedman, Jerome H., and Bogdan E. Popescu. \emph{"Predictive Learning via Rule Ensembles."}
The Annals of Applied Statistics 2, no. 3 (2008): 916-54.
}
\seealso{
\code{\link[=h2]{h2()}}, \code{\link[=h2_overall]{h2_overall()}}, \code{\link[=h2_pairwise]{h2_pairwise()}}, \code{\link[=h2_threeway]{h2_threeway()}},
and \code{\link[=pd_importance]{pd_importance()}} for specific statistics calculated from the resulting object.
}
