% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/explain_forecast.R
\name{explain_forecast}
\alias{explain_forecast}
\title{Explain a Forecast from Time Series Models with Dependence-Aware (Conditional/Observational) Shapley Values}
\usage{
explain_forecast(
  model,
  y,
  xreg = NULL,
  train_idx = NULL,
  explain_idx,
  explain_y_lags,
  explain_xreg_lags = explain_y_lags,
  horizon,
  approach,
  phi0,
  max_n_coalitions = NULL,
  iterative = NULL,
  group_lags = TRUE,
  group = NULL,
  n_MC_samples = 1000,
  seed = NULL,
  predict_model = NULL,
  get_model_specs = NULL,
  verbose = "basic",
  extra_computation_args = list(),
  iterative_args = list(),
  output_args = list(),
  ...
)
}
\arguments{
\item{model}{Model object.
The model whose predictions you want to explain.
Run \code{\link[=get_supported_models]{get_supported_models()}}
for a table of which models \code{explain} supports natively. Unsupported models
can still be explained by passing \code{predict_model} and (optionally) \code{get_model_specs},
see details for more information.}

\item{y}{Matrix, data.frame/data.table or a numeric vector.
Contains the endogenous variables used to estimate the (conditional) distributions
needed to properly estimate the conditional expectations in the Shapley formula
including the observations to be explained.}

\item{xreg}{Matrix, data.frame/data.table or a numeric vector.
Contains the exogenous variables used to estimate the (conditional) distributions
needed to properly estimate the conditional expectations in the Shapley formula
including the observations to be explained.
As exogenous variables are used contemporaneously when producing a forecast,
this item should contain nrow(y) + horizon rows.}

\item{train_idx}{Numeric vector.
The row indices in data and reg denoting points in time to use when estimating the conditional expectations in
the Shapley value formula.
If \code{train_idx = NULL} (default) all indices not selected to be explained will be used.}

\item{explain_idx}{Numeric vector.
The row indices in data and reg denoting points in time to explain.}

\item{explain_y_lags}{Numeric vector.
Denotes the number of lags that should be used for each variable in \code{y} when making a forecast.}

\item{explain_xreg_lags}{Numeric vector.
If \code{xreg != NULL}, denotes the number of lags that should be used for each variable in \code{xreg} when making a forecast.}

\item{horizon}{Numeric.
The forecast horizon to explain. Passed to the \code{predict_model} function.}

\item{approach}{Character vector of length \code{1} or one less than the number of features.
All elements should either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"vaeac"},
\code{"categorical"}, \code{"timeseries"}, \code{"independence"}, \code{"regression_separate"}, or \code{"regression_surrogate"}.
The two regression approaches cannot be combined with any other approach.
See details for more information.}

\item{phi0}{Numeric.
The prediction value for unseen data, i.e., an estimate of the expected prediction without conditioning on any
features.
Typically set this equal to the mean of the response in the training data, but alternatives such as the mean
of the training predictions are also reasonable.}

\item{max_n_coalitions}{Integer.
Upper limit on the number of unique feature/group coalitions to use in the iterative procedure
(if \code{iterative = TRUE}).
If \code{iterative = FALSE}, it represents the number of feature/group coalitions to use directly.
The quantity refers to the number of unique feature coalitions if \code{group = NULL},
and group coalitions if \code{group != NULL}.
\code{max_n_coalitions = NULL} corresponds to \code{2^n_features}.}

\item{iterative}{Logical or NULL.
If \code{NULL} (default), set to \code{TRUE} if there are more than 5 features/groups, and \code{FALSE} otherwise.
If \code{TRUE}, Shapley values are estimated iteratively for faster, sufficiently accurate results.
First an initial number of coalitions is sampled, then bootstrapping estimates the variance of the Shapley values.
A convergence criterion determines if the variances are sufficiently small. If not, additional samples are added.
The process repeats until the variances are below the threshold.
Specifics for the iterative process and convergence criterion are set via \code{iterative_args}.}

\item{group_lags}{Logical.
If \code{TRUE} all lags of each variable are grouped together and explained as a group.
If \code{FALSE} all lags of each variable are explained individually.}

\item{group}{List.
If \code{NULL}, regular feature-wise Shapley values are computed.
If provided, group-wise Shapley values are computed.
\code{group} then has length equal to the number of groups.
Each list element contains the character vectors with the features included in the corresponding group.
See
\href{https://martinjullum.com/publication/jullum-2021-efficient/jullum-2021-efficient.pdf}{Jullum et al. (2021)}
for more information on group-wise Shapley values.}

\item{n_MC_samples}{Positive integer.
For most approaches, it indicates the maximum number of samples to use in the Monte Carlo integration
of every conditional expectation.
For \code{approach="ctree"}, \code{n_MC_samples} corresponds to the number of samples
from the leaf node (see an exception related to the \code{ctree.sample} argument in \code{\link[=setup_approach.ctree]{setup_approach.ctree()}}).
For \code{approach="empirical"}, \code{n_MC_samples} is the \eqn{K} parameter in equations (14-15) of
Aas et al. (2021), i.e. the maximum number of observations (with largest weights) that is used, see also the
\code{empirical.eta} argument \code{\link[=setup_approach.empirical]{setup_approach.empirical()}}.}

\item{seed}{Positive integer.
Specifies the seed before any code involving randomness is run.
If \code{NULL} (default), no seed is set in the calling environment.}

\item{predict_model}{Function.
Prediction function to use when \code{model} is not natively supported.
(Run \code{\link[=get_supported_models]{get_supported_models()}} for a list of natively supported models.)
The function must have two arguments, \code{model} and \code{newdata}, which specify the model
and a data.frame/data.table to compute predictions for, respectively.
The function must give the prediction as a numeric vector.
\code{NULL} (the default) uses functions specified internally.
Can also be used to override the default function for natively supported model classes.}

\item{get_model_specs}{Function.
An optional function for checking model/data consistency when \code{model} is not natively supported.
(Run \code{\link[=get_supported_models]{get_supported_models()}} for a list of natively supported models.)
The function takes \code{model} as an argument and provides a list with 3 elements:
\describe{
\item{labels}{Character vector with the names of each feature.}
\item{classes}{Character vector with the class of each feature.}
\item{factor_levels}{Character vector with the levels for any categorical features.}
}
If \code{NULL} (the default), internal functions are used for natively supported model classes, and checking is
disabled for unsupported model classes.
Can also be used to override the default function for natively supported model classes.}

\item{verbose}{String vector or NULL.
Controls verbosity (printout detail level) via one or more of \code{"basic"}, \code{"progress"},
\code{"convergence"}, \code{"shapley"} and \code{"vS_details"}.
\code{"basic"} (default) displays basic information about the computation and messages about parameters/checks.
\code{"progress"} displays where in the calculation process the function currently is.
\code{"convergence"} displays how close the Shapley value estimates are to convergence
(only when \code{iterative = TRUE}).
\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}),
and the final estimates.
\code{"vS_details"} displays information about the v(S) estimates,
most relevant for \code{approach \%in\% c("regression_separate", "regression_surrogate", "vaeac")}.
\code{NULL} means no printout.
Any combination can be used, e.g., \code{verbose = c("basic", "vS_details")}.}

\item{extra_computation_args}{Named list.
Specifies extra arguments related to the computation of the Shapley values.
See \code{\link[=get_extra_comp_args_default]{get_extra_comp_args_default()}} for description of the arguments and their default values.}

\item{iterative_args}{Named list.
Specifies the arguments for the iterative procedure.
See \code{\link[=get_iterative_args_default]{get_iterative_args_default()}} for description of the arguments and their default values.}

\item{output_args}{Named list.
Specifies certain arguments related to the output of the function.
See \code{\link[=get_output_args_default]{get_output_args_default()}} for description of the arguments and their default values.}

\item{...}{
  Arguments passed on to \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}}, \code{\link[=setup_approach.vaeac]{setup_approach.vaeac}}
  \describe{
    \item{\code{categorical.joint_prob_dt}}{Data.table. (Optional)
Containing the joint probability distribution for each combination of feature
values.
\code{NULL} means it is estimated from the \code{x_train} and \code{x_explain}.}
    \item{\code{categorical.epsilon}}{Numeric value. (Optional)
If \code{categorical.joint_prob_dt} is not supplied, probabilities/frequencies are
estimated using \code{x_train}. If certain observations occur in \code{x_explain} and NOT in \code{x_train},
then epsilon is used as the proportion of times that these observations occur in the training data.
In theory, this proportion should be zero, but this causes an error later in the Shapley computation.}
    \item{\code{internal}}{List.
Not used directly, but passed through from \code{\link[=explain]{explain()}}.}
    \item{\code{ctree.mincriterion}}{Numeric scalar or vector.
Either a scalar or vector of length equal to the number of features in the model.
The value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
If it is a vector, this indicates which value to use when conditioning on various numbers of features.
The default value is 0.95.}
    \item{\code{ctree.minsplit}}{Numeric scalar.
Determines the minimum value that the sum of the left and right daughter nodes must reach for a split.
The default value is 20.}
    \item{\code{ctree.minbucket}}{Numeric scalar.
Determines the minimum sum of weights in a terminal node required for a split.
The default value is 7.}
    \item{\code{ctree.sample}}{Boolean.
If \code{TRUE} (default), then the method always samples \code{n_MC_samples} observations from the leaf nodes
(with replacement).
If \code{FALSE} and the number of observations in the leaf node is less than \code{n_MC_samples},
the method will take all observations in the leaf.
If \code{FALSE} and the number of observations in the leaf node is more than \code{n_MC_samples},
the method will sample \code{n_MC_samples} observations (with replacement).
This means that there will always be sampling in the leaf unless
\code{sample = FALSE} \emph{and} the number of obs in the node is less than \code{n_MC_samples}.}
    \item{\code{empirical.type}}{Character. (default = \code{"fixed_sigma"})
Must be one of \code{"independence"}, \code{"fixed_sigma"}, \code{"AICc_each_k"}, or \code{"AICc_full"}.
Note: \code{"empirical.type = independence"} is deprecated; use \code{approach = "independence"} instead.
\code{"fixed_sigma"} uses a fixed bandwidth (set through \code{empirical.fixed_sigma}) in the kernel density estimation.
\code{"AICc_each_k"} and \code{"AICc_full"} optimize the bandwidth using the AICc criterion, with respectively
one bandwidth per coalition size and one bandwidth for all coalition sizes.}
    \item{\code{empirical.eta}}{Numeric scalar.
Needs to be \verb{0 < eta <= 1}.
The default value is 0.95.
Represents the minimum proportion of the total empirical weight that data samples should use.
For example, if \code{eta = .8}, we choose the \code{K} samples with the largest weights so that the sum of the weights
accounts for 80\\% of the total weight.
\code{eta} is the \eqn{\eta} parameter in equation (15) of
\href{https://martinjullum.com/publication/aas-2021-explaining/aas-2021-explaining.pdf}{Aas et al. (2021)}.}
    \item{\code{empirical.fixed_sigma}}{Positive numeric scalar.
The default value is 0.1.
Represents the kernel bandwidth in the distance computation used when conditioning on all different coalitions.
Only used when \code{empirical.type = "fixed_sigma"}}
    \item{\code{empirical.n_samples_aicc}}{Positive integer.
Number of samples to consider in AICc optimization.
The default value is 1000.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}
    \item{\code{empirical.eval_max_aicc}}{Positive integer.
Maximum number of iterations when optimizing the AICc.
The default value is 20.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}
    \item{\code{empirical.start_aicc}}{Numeric.
Start value of the \code{sigma} parameter when optimizing the AICc.
The default value is 0.1.
Only used when \code{empirical.type} is either \code{"AICc_each_k"} or \code{"AICc_full"}.}
    \item{\code{empirical.cov_mat}}{Numeric matrix. (Optional)
The covariance matrix of the data generating distribution used to define the Mahalanobis distance.
\code{NULL} means it is estimated from \code{x_train}.}
    \item{\code{gaussian.mu}}{Numeric vector. (Optional)
Containing the mean of the data generating distribution.
\code{NULL} means it is estimated from the \code{x_train}.}
    \item{\code{gaussian.cov_mat}}{Numeric matrix. (Optional)
Containing the covariance matrix of the data generating distribution.
\code{NULL} means it is estimated from the \code{x_train}.}
    \item{\code{timeseries.fixed_sigma}}{Positive numeric scalar.
Represents the kernel bandwidth in the distance computation.
The default value is 2.}
    \item{\code{timeseries.bounds}}{Numeric vector of length two.
Specifies the lower and upper bounds of the timeseries.
The default is \code{c(NULL, NULL)}, i.e. no bounds.
If one or both of these bounds are not \code{NULL}, we restrict the sampled time series to be between these bounds.
This is useful if the underlying time series are scaled between 0 and 1, for example.}
    \item{\code{vaeac.depth}}{Positive integer (default is \code{3}). The number of hidden layers
in the neural networks of the masked encoder, full encoder, and decoder.}
    \item{\code{vaeac.width}}{Positive integer (default is \code{32}). The number of neurons in each
hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
    \item{\code{vaeac.latent_dim}}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
    \item{\code{vaeac.lr}}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
    \item{\code{vaeac.activation_function}}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
    \item{\code{vaeac.n_vaeacs_initialize}}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
in the start. Pick the best performing one after \code{vaeac.extra_parameters$epochs_initiation_phase}
epochs (default is \code{2}) and continue training that one.}
    \item{\code{vaeac.epochs}}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
This includes \code{vaeac.extra_parameters$epochs_initiation_phase}, where the default is \code{2}.}
    \item{\code{vaeac.extra_parameters}}{Named list with extra parameters to the \code{vaeac} approach. See
\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}} for description of possible additional parameters and their default values.}
  }}
}
\value{
Object of class \code{c("shapr", "list")}. Contains the following items:
\describe{
\item{\code{shapley_values_est}}{data.table with the estimated Shapley values with explained observation in the rows and
features along the columns.
The column \code{none} is the prediction not devoted to any of the features (given by the argument \code{phi0})}
\item{\code{shapley_values_sd}}{data.table with the standard deviation of the Shapley values reflecting the uncertainty
in the coalition sampling part of the kernelSHAP procedure.
These are, by definition, 0 when all coalitions are used.
Only present when \code{extra_computation_args$compute_sd=TRUE}, which is the default when \code{iterative = TRUE}.}
\item{\code{internal}}{List with the different parameters, data, functions and other output used internally.}
\item{\code{pred_explain}}{Numeric vector with the predictions for the explained observations.}
\item{\code{MSEv}}{List with the values of the MSEv evaluation criterion for the approach. See the
\href{https://norskregnesentral.github.io/shapr/articles/general_usage.html#msev-evaluation-criterion
}{MSEv evaluation section in the general usage vignette for details}.}
\item{\code{timing}}{List containing timing information for the different parts of the computation.
\code{summary} contains the time stamps for the start and end time in addition to the total execution time.
\code{overall_timing_secs} gives the time spent on different parts of the explanation computation.
\code{main_computation_timing_secs} further decomposes the main computation time into different parts of the
computation for each iteration of the iterative estimation routine, if used.}
}
}
\description{
Computes dependence-aware Shapley values for observations in \code{explain_idx} from the specified
\code{model} by using the method specified in \code{approach} to estimate the conditional expectation.
See
\href{https://martinjullum.com/publication/aas-2021-explaining/aas-2021-explaining.pdf}{Aas, et. al (2021)}
for a thorough introduction to dependence-aware prediction explanation with Shapley values.
For an overview of the methodology and capabilities of the \code{shapr} package, see the software paper
\href{https://arxiv.org/pdf/2504.01842}{Jullum et al. (2025)}, or the pkgdown site at
\href{https://norskregnesentral.github.io/shapr/}{norskregnesentral.github.io/shapr/}.
}
\details{
This function explains a forecast of length \code{horizon}. The argument \code{train_idx}
is analogous to x_train in \code{explain()}, however, it just contains the time indices of where
in the data the forecast should start for each training sample. In the same way \code{explain_idx}
defines the time index (indices) which will precede a forecast to be explained.

As any autoregressive forecast model will require a set of lags to make a forecast at an
arbitrary point in time, \code{explain_y_lags} and \code{explain_xreg_lags} define how many lags
are required to "refit" the model at any given time index. This allows the different
approaches to work in the same way they do for time-invariant models.

See the \href{https://norskregnesentral.github.io/shapr/articles/general_usage.html#forecasting}{
forecasting section of the general usage vignette} for further details.
See also the software paper \href{https://arxiv.org/pdf/2504.01842}{Jullum et al. (2025, Sec. 6)}
for a more detailed introduction to the methodology, and additional examples.
}
\examples{
\donttest{
# Load example data
data("airquality")
data <- data.table::as.data.table(airquality)

# Fit an AR(2) model.
model_ar_temp <- ar(data$Temp, order = 2)

# Calculate the zero prediction values for a three step forecast.
p0_ar <- rep(mean(data$Temp), 3)

# Empirical approach, explaining forecasts starting at T = 152 and T = 153.
explain_forecast(
  model = model_ar_temp,
  y = data[, "Temp"],
  train_idx = 2:151,
  explain_idx = 152:153,
  explain_y_lags = 2,
  horizon = 3,
  approach = "empirical",
  phi0 = p0_ar,
  group_lags = FALSE
)
}

}
\references{
\itemize{
\item \href{https://arxiv.org/pdf/2504.01842}{
Jullum, M., Olsen, L. H. B., Lachmann, J., & Redelmeier, A. (2025). shapr: Explaining Machine Learning Models
with Conditional Shapley Values in R and Python. arXiv preprint arXiv:2504.01842.}
\item \href{https://martinjullum.com/publication/aas-2021-explaining/aas-2021-explaining.pdf}{
Aas, K., Jullum, M., & Løland, A. (2021). Explaining individual predictions when features are dependent:
More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502}
\item \href{https://proceedings.neurips.cc/paper_files/paper/2020/file/0d770c496aa3da6d2c3f2bd19e7b9d6b-Paper.pdf}{
Frye, C., Rowat, C., & Feige, I. (2020). Asymmetric Shapley values:
incorporating causal knowledge into model-agnostic explainability.
Advances in neural information processing systems, 33, 1229-1239}
\item \href{https://proceedings.neurips.cc/paper/2020/file/32e54441e6382a7fbacbbbaf3c450059-Paper.pdf}{
Heskes, T., Sijben, E., Bucur, I. G., & Claassen, T. (2020). Causal shapley values:
Exploiting causal knowledge to explain individual predictions of complex models.
Advances in neural information processing systems, 33, 4778-4789}
\item \href{https://martinjullum.com/publication/jullum-2021-efficient/jullum-2021-efficient.pdf}{
Jullum, M., Redelmeier, A. & Aas, K. (2021). Efficient and simple prediction explanations with
groupShapley: A practical perspective. Italian Workshop on Explainable Artificial Intelligence 2021.}
\item \href{https://martinjullum.com/publication/redelmeier-2020-explaining/redelmeier-2020-explaining.pdf}{
Redelmeier, A., Jullum, M., & Aas, K. (2020). Explaining predictive models with mixed features using Shapley
values and conditional inference trees. In Machine Learning and Knowledge Extraction:
International Cross-Domain Conference, CD-MAKE 2020, Dublin, Ireland, August 25-28, 2020, Proceedings 4
(pp. 117-137). Springer International Publishing.}
\item \href{https://www.theoj.org/joss-papers/joss.02027/10.21105.joss.02027.pdf}{
Sellereite N., & Jullum, M. (2019). shapr: An R-package for explaining machine learning models with
dependence-aware Shapley values. Journal of Open Source Software, 5(46), 2027}
\item \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{
Olsen, L. H., Glad, I. K., Jullum, M., & Aas, K. (2022). Using Shapley values and variational autoencoders to
explain predictive models with dependent mixed features. Journal of machine learning research, 23(213), 1-51}
\item \href{https://link.springer.com/content/pdf/10.1007/s10618-024-01016-z.pdf}{
Olsen, L. H. B., Glad, I. K., Jullum, M., & Aas, K. (2024). A comparative study of methods for estimating
model-agnostic Shapley value explanations. Data Mining and Knowledge Discovery, 1-48}
\item \href{https://arxiv.org/pdf/2410.04883}{
Olsen, L. H. B., & Jullum, M. (2024). Improving the Sampling Strategy in KernelSHAP. arXiv e-prints, arXiv-2410}
}
}
\author{
Jon Lachmann, Martin Jullum
}
