% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hdMTD_BIC.R
\name{hdMTD_BIC}
\alias{hdMTD_BIC}
\title{The Bayesian Information Criterion (BIC) method for inference in MTD models}
\usage{
hdMTD_BIC(
  X,
  d,
  S = seq_len(d),
  minl = 1,
  maxl = length(S),
  xi = 1/2,
  A = NULL,
  byl = FALSE,
  BICvalue = FALSE,
  single_matrix = FALSE,
  indep_part = TRUE,
  zeta = maxl,
  warn = FALSE,
  ...
)
}
\arguments{
\item{X}{A vector or single-column data frame containing a chain sample (\code{X[1]} is the most recent).}

\item{d}{A positive integer representing an upper bound for the chain order.}

\item{S}{A numeric vector of positive integers from which this function will select
a set of relevant lags. Typically, \code{S} is a subset of \code{1:d}. If \code{S}
is not provided, by default \code{S=1:d}.}

\item{minl}{A positive integer. \code{minl} represents the smallest length of any relevant lag
set this function might return. If \code{minl == maxl}, this function will return the
subset of \code{S} of length \code{minl} with the lowest BIC. If \code{minl < maxl}, the function
will consider subsets ranging from length \code{minl} to length \code{maxl} when searching for
the subset of \code{S} with the smallest BIC.}

\item{maxl}{A positive integer equal to or greater than \code{minl} but less than the number
of elements in \code{S} (\code{maxl = length(S)} is accepted but in this case the output will
always be \code{S}). \code{maxl} represents the largest length of any relevant lag set this
function might return.}

\item{xi}{The BIC penalization term constant. Defaulted to 1/2. A smaller \code{xi} \verb{(near 0)}
reduces the impact of overparameterization.}

\item{A}{A vector with positive integers representing the state space. If not informed,
this function will set \code{A=sort(unique(X))}.}

\item{byl}{Logical. If \code{TRUE}, the function will look for the set with smallest BIC by each
length (from  \code{minl} to \code{maxl}), and return the set with smallest BIC for each length.
If \code{minl==maxl} setting \code{byl=TRUE} or \code{FALSE} makes no difference, since the
function will only calculate the BIC for sets with \code{maxl} elements in the relevant lag set.}

\item{BICvalue}{Logical. If \code{TRUE}, the function will also return the calculated values of
the BIC for the estimated relevant lag sets.}

\item{single_matrix}{Logical. If \code{TRUE}, the chain sample is thought to come from an MTD model
where the stochastic matrices \eqn{p_j} are constant across all lags \eqn{j\in \Lambda}. In practice,
this means the user believes the stochastic matrices for every lag in \code{S} are the same, which reduces
the number of parameters in the penalization term.}

\item{indep_part}{Logical. If \code{FALSE} there is no independent distribution and \eqn{\lambda_0=0} which
reduces the number of parameters in the penalization term.}

\item{zeta}{A positive integer representing the number of distinct matrices \eqn{p_j}
in the MTD, which affects the number of parameters in the penalization term. Defaulted
to \code{maxl}. See more in \emph{Details}.}

\item{warn}{Logical. If \code{TRUE}, the function warns the user when \code{A} is set automatically.}

\item{...}{Additional arguments (not used in this function, but maintained for compatibility with \code{\link[=hdMTD]{hdMTD()}}.}
}
\value{
Returns a vector with the estimated relevant lag set using BIC. It might return more
than one set if \code{minl < maxl} and \code{byl = TRUE}. Additionally, it can return the value
of the penalized likelihood for the outputted lag sets if \code{BICvalue = TRUE}.
}
\description{
A function for estimating the relevant lag set \eqn{\Lambda} of a Markov chain using
Bayesian Information Criterion (BIC). This means that this method selects the set of lags
that minimizes a penalized log likelihood for a given sample, see \emph{References} below for
details on the method.
}
\details{
\strong{Criterion.} For each candidate lag set \eqn{T} contained in \eqn{S} with
size \eqn{l = |T|} where \code{minl <= l <= maxl}, \code{hdMTD_BIC()} evaluates
\deqn{BIC(T) = - L_T + xi * df(T) * log(N),}
where \eqn{N = length(X)} and
\deqn{L_T = \sum_{x_T \in A^T} \sum_{a \in A} N(a x_T) * log( \hat{p}(a | x_T) ).}
The empirical conditionals are
\deqn{\hat{p}(a | x_T) = N(a x_T) / N(x_T),}
computed from the sample counts (same quantities returned by
\code{\link{freqTab}} and \code{\link{empirical_probs}}).

\strong{Degrees of freedom.} The parameter count \eqn{df(T)} is the number of free
parameters of an MTD model with lag set \eqn{T} and state space \eqn{A}, honoring the
constraints \code{single_matrix}, \code{indep_part}, and \code{zeta}:
\deqn{df(T) = w_{df} + p0_{df} + |A| * (|A| - 1) * zeta.}
Here \eqn{zeta} is the number of distinct \eqn{p_j} matrices allowed across lags
(by default \eqn{zeta = l}; setting \code{single_matrix = TRUE} forces \eqn{zeta = 1}).
The weight and independent-part contributions are:
\eqn{w_{df} = l} if \code{indep_part} is \code{TRUE}, otherwise \eqn{w_{df} = l - 1};
\eqn{p0_{df} = |A| - 1} if \code{indep_part} is \code{TRUE}, otherwise \eqn{p0_{df} = 0}.

\strong{Scale.} With \code{xi = 1/2} (the default), \eqn{BIC} equals one half of the
classical Schwarz BIC \eqn{-2 * L_T + df(T) * log(N)}; minimizing either criterion selects
the same lag set.

\strong{Note.} The likelihood term \eqn{L_T} sums over the \eqn{N - max(T)} effective
transitions, while the penalty uses \eqn{log(N)} (this matches the implementation).

Note that the upper bound for the order of the chain (\code{d}) affects the estimation
of the transition probabilities. If we run the function with a certain order parameter \code{d},
only the sequences of length \code{d} that appeared in the sample will be counted. Therefore,
all transition probabilities, and hence all BIC values, will be calculated with respect to
that \code{d}. If we use another value for \code{d} to run the function, even if the output
agrees with that of the previous run, its BIC value might change a little.

The parameter \code{zeta} indicates the the number of distinct matrices pj in the MTD.
If \code{zeta = 1}, all matrices \eqn{p_j} are identical; if \code{zeta = 2} there exists
two groups of distinct matrices and so on. The largest value for \code{zeta} is \code{maxl}
since this is the largest number of matrices \eqn{p_j}. When \code{minl<maxl},
for each \code{minl} \eqn{\leq} \code{l} \eqn{\leq} \code{maxl}, \code{zeta = min(zeta,l)}.
If \code{single_matrix = TRUE} then \code{zeta} is set to 1.
}
\examples{
# Simulate a chain from an MTD model
set.seed(1)
M <- MTDmodel(Lambda = c(1, 3), A = c(1, 2), lam0 = 0.05)
X <- perfectSample(M, N = 400)

# Fit using BIC with a single lag
hdMTD_BIC(X, d = 6, minl = 1, maxl = 1)

# Fit using BIC with lag selection and extract BIC value
hdMTD_BIC(X, d = 3, minl = 1, maxl = 2, BICvalue = TRUE)

}
\references{
Imre Csiszár, Paul C. Shields.
The consistency of the BIC Markov order estimator.
\emph{The Annals of Statistics}, \emph{28}(6), 1601-1619.
\doi{10.1214/aos/1015957472}
}
