% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rollCast.R
\name{rollCast}
\alias{rollCast}
\title{Backtesting Semi-ARMA Models with Rolling Forecasts}
\usage{
rollCast(
  y,
  p = NULL,
  q = NULL,
  K = 5,
  method = c("norm", "boot"),
  alpha = 0.95,
  np.fcast = c("lin", "const"),
  it = 10000,
  n.start = 1000,
  msg = 1000,
  argsSmoots = list(),
  plot = TRUE,
  argsPlot = list()
)
}
\arguments{
\item{y}{a numeric vector that represents the equidistant time series assumed
to follow a Semi-ARMA model; must be ordered from past to present.}

\item{p}{an integer value \eqn{\geq 0}{\ge 0} that defines the AR order
\eqn{p} of the underlying ARMA(\eqn{p,q}) model within \code{X}; is set to
\code{NULL} by default; if no value is passed to \code{p} but one is passed
to \code{q}, \code{p} is set to \code{0}; if both \code{p} and \code{q} are
\code{NULL}, optimal orders following the BIC for
\eqn{0 \leq p,q \leq 5}{0 \le p,q \le 5} are chosen; is set to \code{NULL} by
default; decimal numbers will be rounded off to integers.}

\item{q}{an integer value \eqn{\geq 0}{\ge 0} that defines the MA order
\eqn{q} of the underlying ARMA(\eqn{p,q}) model within \code{X}; is set to
\code{NULL} by default; if no value is passed to \code{q} but one is passed
to \code{p}, \code{q} is set to \code{0}; if both \code{p} and \code{q} are
\code{NULL}, optimal orders following the BIC for
\eqn{0 \leq p,q \leq 5}{0 \le p,q \le 5} are chosen; is set to \code{NULL} by
default; decimal numbers will be rounded off to integers.}

\item{K}{a single, positive integer value that defines the number of
out-of-sample observations; the last \code{K} observations in \code{y} are
treated as the out-of-sample observations, whereas the rest of the
observations in \code{y} are the in-sample values.}

\item{method}{a character object; defines the method used for the calculation
of the forecasting intervals; with \code{"norm"} the intervals are obtained
under the assumption of normally distributed innovations; with \code{"boot"}
the intervals are obtained via a bootstrap; is set to \code{"norm"} by
default.}

\item{alpha}{a numeric vector of length 1 with \eqn{0 < } \code{alpha}
\eqn{ < 1}; the forecasting intervals will be obtained based on the
confidence level (\eqn{100}\code{alpha})-percent; is set to
\code{alpha = 0.95} by default, i.e., a \eqn{95}-percent confidence level.}

\item{np.fcast}{a character object; defines the forecasting method used
for the nonparametric trend; for \code{np.fcast = "lin"} the trend is
is extrapolated linearly based on the last two trend estimates; for
\code{np.fcast = "const"}, the last trend estimate is used as a constant
estimate for future values; is set to \emph{"lin"} by default.}

\item{it}{an integer that represents the total number of iterations, i.e.,
the number of simulated series; is set to \code{10000} by default; only
necessary, if \code{method = "boot"}; decimal
numbers will be rounded off to integers.}

\item{n.start}{an integer that defines the 'burn-in' number
of observations for the simulated ARMA series via bootstrap; is set to
\code{1000} by default; only necessary, if \code{method = "boot"};decimal
numbers will be rounded off to integers.}

\item{msg}{an integer \eqn{\geq 1}{\ge 1}; controls the iteration status
report that is frequently printed to the R console if \code{method = "boot"};
for \code{msg = NA}, nothing will be printed, for any positive integer any
message Iteration: \eqn{i}' with \eqn{i} being divisible by \code{msg}
without a rest will be shown in the console; is set to \code{msg = 1000} by
default; decimal numbers will be rounded off to integers.}

\item{argsSmoots}{a list that contains arguments that will be passed to
\code{\link{msmooth}} for the estimation of the nonparametric trend
function; by default, the default values of \code{msmooth} are used.}

\item{plot}{a logical value that controls the graphical output; for the
default (\code{plot = TRUE}), the original series with the obtained point
forecasts as well as the forecasting intervals will be plotted; for
\code{plot = FALSE}, no plot will be created.}

\item{argsPlot}{a list; additional arguments for the standard plot function,
e.g., \code{xlim}, \code{type}, ..., can be passed to it; arguments with
respect to plotted graphs, e.g., the argument \code{col}, only affect the
original series \code{y}; please note that in accordance with the argument
\code{x} (lower case) of the standard plot function, an additional numeric
vector with time points can be implemented via the argument \code{x} (lower
case).}
}
\value{
A list with different elements is returned. The elements are as follows.
\describe{
\item{alpha}{a single numeric value; it describes, what confidence level
(\eqn{100}\code{alpha})-percent has been considered for the forecasting
intervals.}
\item{breach}{a logical vector that states whether the \eqn{K} true
out-of-sample observations lie outside of the forecasting intervals,
respectively; a breach is denoted by \code{TRUE}.}
\item{breach.val}{a numeric vector that contains the margin of the breaches
(in absolute terms) for the \eqn{K} out-of-sample time points; if a breach
did not occur, the respective element is set to zero.}
\item{error}{a numeric vector that contains the simulated empirical
values of the forecasting error for \code{method = "boot"}; otherwise,
it is set to \code{NULL}.}
\item{fcast.rest}{a numeric vector that contains the \eqn{K} point forecasts
of the parametric part of the model.}
\item{fcast.roll}{a numeric matrix that contains the \eqn{K} rolling point
forecasts as well as the values of the respective forecasting intervals
for the complete model;
the first row contains the point forecasts, the lower boundary values
are in the second row and the upper values of the forecasting intervals
can be found in the third row.}
\item{fcast.trend}{a numeric vector that contains the \eqn{K} obtained trend
forecasts.}
\item{K}{a positive integer; states the number of out-of-sample observations
as well as the number of forecasts for the out-of-sample time points.}
\item{MASE}{the obtained value of the mean average scaled error for the
selected model.}
\item{method}{a character object that states, whether the forecasting
intervals were obtained via a bootstrap (\code{method = "boot"}) or under
the normality assumption for the innovations (\code{method = "norm"}).}
\item{model.nonpar}{the output (usually a list) of the nonparametric
trend estimation via \code{\link{msmooth}}.}
\item{model.par}{the output (usually a list) of the parametric ARMA
estimation of the detrended series via \code{\link[stats]{arima}}.}
\item{n}{the number of observations (in-sample & out-of-sample
observations).}
\item{n.in}{the number of in-sample observations (\code{n - n.out}).}
\item{n.out}{the number of out-of-sample observations (equals \code{K}).}
\item{np.fcast}{a character object that states the applied forecasting
method for the nonparametric trend function; either a linear (
\code{np.fcast = "lin"}) or a constant \code{np.fcast = "const"} are
possible.}
\item{quants}{a numeric vector of length 2 with the
\eqn{[100(1 -} \code{alpha}\eqn{)/2]}-percent and
\{\eqn{100}\eqn{[1 - (1 -} \code{alpha}\eqn{)/2]}\}-percent quantiles of
the forecasting error distribution.}
\item{RMSSE}{the obtained value of the root mean squared scaled error for
the selected model.}
\item{y}{a numeric vector that contains all true observations (in-sample &
out-of-sample observations).}
\item{y.in}{a numeric vector that contains all in-sample observations.}
\item{y.out}{a numeric vector that contains the \eqn{K} out-of-sample
observations.}
}
}
\description{
A simple backtest of Semi-ARMA models via rolling forecasts can be
implemented.
}
\details{
Define that an observed, equidistant time series \eqn{y_t}{y_[t]}, with
\eqn{t = 1, 2, ..., n}, follows
\deqn{y_t = m(x_t) + \epsilon_t,}{y_[t] = m(x_[t]) + \epsilon_[t],}
where \eqn{x_t = t/n}{x_[t] = t/n} is the rescaled time on the closed
interval \eqn{[0,1]} and \eqn{m(x_t)}{m(x_[t])} is a nonparametric and
deterministic trend function (see Beran and Feng, 2002, and Feng, Gries and
Fritz, 2020).
\eqn{\epsilon_t}{\epsilon_[t]}, on the other hand, is a stationary process
with \eqn{E(\epsilon_t) = 0}{E(\epsilon_[t]) = 0} and short-range dependence.
For the purpose of this function, \eqn{\epsilon_t}{\epsilon_[t]} is assumed
to follow an autoregressive-moving-average (ARMA) model with
\deqn{\epsilon_t = \zeta_t + \beta_1 \epsilon_{t-1} + ... + \beta_p
\epsilon_{t-p} + \alpha_1 \zeta_{t-1} + ... +
\alpha_q \zeta_{t-q}.}{\epsilon_[t] = \zeta_[t] + \beta_[1] \epsilon_[t-1] +
... + \beta_[p] \epsilon_[t-p] + \alpha_[1] \zeta_[t-1] + ... +
\alpha_[q] \zeta_[t-q].}
Here, the random variables \eqn{\zeta_t}{\zeta_[t]} are identically and
independently distributed (i.i.d.) with zero-mean and a constant variance
and the coefficients \eqn{\alpha_j}{\alpha_[j]} and \eqn{\beta_i}{\beta_[i]},
\eqn{i = 1, 2, ..., p} and \eqn{j = 1, 2, ..., q}, are real numbers.
The combination of both previous formulas will be called a semiparametric
ARMA (Semi-ARMA) model.

An explicit forecasting method of Semi-ARMA models is described in
\code{\link{modelCast}}. To backtest a selected model, a slightly adjusted
procedure is used. The data is divided into in-sample and an
out-of-sample values (usually the last \eqn{K = 5} observations in the data
are reserved for the out-of-sample observations). A model is fitted to the
in-sample data, whereas one-step rolling point forecasts and forecasting
intervals are obtained for the out-of-sample time points. The proposed
forecasts of the trend are either a linear or a constant extrapolation of
the trend with negligible forecasting intervals, whereas the point forecasts
of the stationary rest term are obtained via the selected ARMA(\eqn{p,q})
model (see Fritz et al., 2020). The corresponding forecasting intervals
are calculated under the assumption that the innovations
\eqn{\zeta_t}{\zeta_[t]} are either normally distributed (see e.g. pp.
93-94 in Brockwell and Davis, 2016) or via a forward bootstrap (see Lu and
Wang, 2020). For a one-step forecast for time point \eqn{t}, all observations
until time point \eqn{t-1} are assumed to be known.

The function calculates three important values for backtesting: the number
of breaches, i.e. the number of true observations that lie outside of the
forecasting intervals, the mean absolute scaled error (MASE, see Hyndman
and Koehler, 2006) and the root mean squared scaled error (RMSSE, see
Hyndman and Koehler, 2006) are obtained. For the MASE, a value \eqn{< 1}
indicates a better average forecasting potential than a naive forecasting
approach.
Furthermore, it is independent from the scale of the data and can thus be
used to compare forecasts of different datasets. Closely related is the
RMSSE, however here, the mean of the squared forecasting errors is computed
and scaled by the mean of the squared naive forecasting approach. Then the
root of that value is the RMSSE. Due to the close relation, the
interpretation of the RMSSE is similarly but not identically to the
interpretation of the MASE. Of course, a value close to zero is preferred
in both cases.

To make use of the function, a numeric vector with the values of a time
series that is assumed to follow a Semi-ARMA model needs to be passed to
the argument \code{y}. Moreover, the arguments \code{p} and \code{q}
represent the AR and MA orders, respectively, of the underlying ARMA
process in the parametric part of the model. If both values are set to
\code{NULL}, an optimal order in accordance with the Bayesian Information
Criterion (BIC) will be selected. If only one of the values is \code{NULL},
it will be changed to zero instead. \code{K} defines the number of the
out-of-sample observations; these will be cut off the end of \code{y}, while
the remaining observations are treated as the in-sample observations. For the
\eqn{K} out-of-sample time points, rolling forecasts will be obtained.
\code{method} describes the method to use for the computation of the
prediction intervals. Under the normality assumption for the innovations
\eqn{\zeta_t}{\zeta_[t]}, intervals can be obtained via
\emph{method} = "norm". However, if the assumption does not hold, a
bootstrap can be implemented as well (\emph{method = "boot"}). Both
approaches are explained in more detail in \code{\link{normCast}} and
\code{\link{bootCast}}, respectively. With \code{alpha}, the confidence
level of the forecasting intervals can be adjusted, as the
(\eqn{100}\code{alpha})-percent forecasting intervals will be computed. By
means of the argument \code{np.fcast}, the forecasting method for the
nonparametric trend function can be defined. Selectable are a linear
(\code{np.fcast = "lin"}) and a constant (\code{np.fcast = "const"})
extrapolation. For more information on these methods, we refer the reader to
\code{\link{trendCast}}.

\code{it}, \code{n.start} and \code{msg} are only
relevant for \code{method = "boot"}. With \code{it} the total number of
bootstrap iterations is defined, whereas \code{n.start} regulates, how
many 'burn-in' observations are generated for each simulated ARMA process
in the bootstrap. Since a bootstrap may take a longer computation time,
the argument \code{msg} helps adjusting the frequency of messages printed
to the R console that inform about the iteration status. Additional
information on these three function arguments can be found in
\code{\link{bootCast}}.

The argument \code{argsSmoots} is a list. In this list, different arguments
of the function \code{\link{msmooth}} can be implemented to adjust the
estimation of the nonparametric part of the complete model. The arguments
of the smoothing function are described in \code{\link{msmooth}}.

\code{rollCast} allows for a quick plot of the results. If the logical
argument \code{plot} is set to \code{TRUE}, a graphic with default
settings is created. Nevertheless, users are allowed to implement further
arguments of the standard plot function in the list \code{argsPlot}. For
example, the limits of the plot can be adjusted by \code{xlim} and
\code{ylim}. Furthermore, an argument \code{x} can be included in
\code{argsPlot} with the actual equidistant time points of the whole series
(in-sample & out-of-sample observations). Otherwise, simply \code{1:n} is
used as the in-sample time points by default.

NOTE:

Within this function, the \code{\link[stats]{arima}} function of the
\code{stats} package with its method \code{"CSS-ML"} is used throughout for
the estimation of ARMA models. Furthermore, to increase the performance,
C++ code via the \code{\link[Rcpp:Rcpp-package]{Rcpp}} and
\code{\link[RcppArmadillo:RcppArmadillo-package]{RcppArmadillo}} packages
was implemented.
}
\examples{
lgdp <- log(smoots::gdpUS$GDP)
time <- seq(from = 1947.25, to = 2019.5, by = 0.25)
backtest <- rollCast(lgdp, K = 5,
 argsPlot = list(x = time, xlim = c(2012, 2019.5), col = "forestgreen",
 type = "b", pch = 20, lty = 2, main = "Example"))
backtest

}
\references{
Beran, J., and Feng, Y. (2002). Local polynomial fitting with long-memory,
short-memory and antipersistent errors. Annals of the Institute of
Statistical Mathematics, 54, 291-311.

Brockwell, P. J., and Davis, R. A. (2016). Introduction to time series
and forecasting, 3rd edition. Springer.

Fritz, M., Forstinger, S., Feng, Y., and Gries, T. (2020). Forecasting
economic growth processes for developing economies. Unpublished.

Feng, Y., Gries, T. and Fritz, M. (2020). Data-driven
local polynomial for the trend and its derivatives in economic time
series. Journal of Nonparametric Statistics, 32:2, 510-533.

Hyndman, R. J., and Koehler, A. B. (2006). Another look at measures of
forecast accuracy. International Journal of Forecasting, 22:4, 679-688.

Lu, X., and Wang, L. (2020). Bootstrap prediction interval for ARMA models
with unknown orders. REVSTAT–Statistical Journal, 18:3, 375-396.
}
\author{
\itemize{
\item Yuanhua Feng (Department of Economics, Paderborn University), \cr
Author of the Algorithms \cr
Website: \url{https://wiwi.uni-paderborn.de/en/dep4/feng/}
\item Dominik Schulz (Research Assistant) (Department of Economics, Paderborn
University), \cr
Package Creator and Maintainer
}
}
