 #' Multi-Horizon Probabilistic Ensemble with Copulas for Time Series Forecasting
#'
#' @param ts Numeric vector (time series levels).
#' @param horizon Integer, number of steps ahead.
#' @param n_variants Integer, number of model variants per horizon (ensemble size).
#' @param engines Character vector of supported mean-model engines ('rpart', 'glmnet', 'knn').
#' @param dists Character vector of supported residual distributions (gaussian, laplace, student, logistic, asymmetric_laplace, skew_normal, skew_t.
#' @param h_options Character vector for supported heteroscedastic scale models (tree, ridge).
#' @param alpha,beta Numeric weights combining CRPS and calibration error.
#' @param temperature Softmax temperature for ensemble weighting (>0).
#' @param dates Vector for date formats. Default: NULL.
#' @param ci Numeric scalar, confidence interval for plot. Default: 0.9.
#' @param n_testing Backtest spacing used inside components.
#' @param seed Optional integer seed for reproducibility.
#'
#' @return
#' A list of class `c("organik","list")` with elements:
#' \itemize{
#'   \item `model_list`: list of horizon-wise ensemble models.
#'   \item `growth_pred_funs`: list of marginal predictors for growth.
#'   \item `level_pred_funs`: list of marginal predictors for level.
#'   \item `cor_mat`: horizon-by-horizon correlation (after cleaning / nearPD).
#'   \item `path_prediction(n_paths, probs, copula=c("gaussian","t"), df, return_increments, seed)`:
#'     function that simulates joint paths and returns summaries (means, quantiles,
#'     cumulative growth paths, level paths, and incremental returns if requested).
#'   \item `plot`: plot with prediction in the confidence interval.
#' }
#'
#' @examples
#' \donttest{
#' set.seed(1)
#' y <- cumsum(rnorm(200, sd = 0.5)) + 10
#' obj <- organik(y, horizon = 4,
#'   n_variants = 3,
#'   engines = "knn",
#'   dists = c("gaussian","laplace"),
#'   h_options = "tree",
#'   n_testing = 3, seed = 123)
#' # joint path simulation for next 4 steps:
#' path <- obj$path_prediction(n_paths = 100)
#' str(path$level_quants)
#' }
#'
#' @export
#' @importFrom stats median runif cor dnorm pnorm qnorm rnorm rgamma rchisq dt pt qt rt quantile sd approxfun predict
#' @importFrom utils head tail
#' @importFrom Matrix nearPD
#' @importFrom rpart rpart rpart.control
#' @importFrom glmnet glmnet cv.glmnet
#' @importFrom imputeTS na_kalman
#' @importFrom graphics axis axis.Date lines par polygon
#' @importFrom grDevices recordPlot adjustcolor
#' @importFrom MASS ginv
#'
organik <- function(ts, horizon, n_variants = 10,
                    engines = c("rpart","glmnet","knn"),
                    dists   = c("gaussian","laplace","student","logistic","asymmetric_laplace","skew_normal","skew_t"),
                    h_options = c("tree", "ridge"),
                    alpha = 1.0, beta = 1.0, temperature = 1,
                    dates = NULL, ci = 0.95,
                    n_testing = 30,
                    seed = 42) {
  if (!is.null(seed)) set.seed(seed)

  # --- minimal length checks ---
  stopifnot("horizon must be >= 1" = horizon >= 1L)
  stopifnot("n_testing must be >= 1" = n_testing >= 1L)
  n_rows <- floor(length(ts)/horizon)
  # Conservative rule of thumb: need at least horizon + n_testing + 2 observations
  stopifnot("time series too short for given horizon and backtesting" =
              n_rows >= n_testing + 5L)

  tts <- dts(ts, 1)
  reframed <- smart_reframer(tts, horizon, horizon)
  X <- head(reframed, -1)

  # y_mat: cumulative growth targets for each step-ahead (1..h)
  Y_rows <- tail(reframed, -1)
  y_mat <- t(apply(Y_rows, 1, function(r) cumprod(1 + r) - 1))
  colnames(y_mat) <- paste0("t", seq_len(horizon))

  # ---------- Train one ensemble per horizon ----------
  model_list <- lapply(seq_len(horizon), function(h) {
    ensemble(
      X, y_mat[, h], n_variants,
      engines, dists, h_options,
      alpha, beta, temperature,
      n_testing, seed = seed
    )
  })
  names(model_list) <- paste0("t", 1:horizon)

  # ---------- One-step-ahead predictors ----------
  new_data <- tail(X, 1)
  last_level <- tail(ts, 1)

  growth_pred_funs <- lapply(model_list, function(m) m$predict(new_data, level = NULL))
  names(growth_pred_funs) <- paste0("t", seq_len(horizon))

  level_pred_funs <- lapply(model_list, function(m) m$predict(new_data, level = last_level))
  names(level_pred_funs) <- paste0("t", seq_len(horizon))

  # ---------- Empirical correlation on cumulative targets ----------
  #cor_mat <- suppressWarnings(cor(y_mat, use = "pairwise.complete.obs", method = "spearman"))
  #cor_mat[!is.finite(cor_mat)] <- 0
  #cor_mat <- 2 * sin(cor_mat * pi/6)
  #diag(cor_mat) <- 1

  t_corr_from_kendall <- function(Y) {
    Tau <- suppressWarnings(stats::cor(Y, method = "kendall", use = "pairwise.complete.obs"))
    R   <- sin(pi * Tau / 2)
    diag(R) <- 1
    R <- (R + t(R)) / 2
    ensure_pd <- function(S) {
      ev <- tryCatch(eigen(S, symmetric = TRUE, only.values = TRUE)$values, error = function(e) NA)
      if (all(is.finite(ev)) && min(ev) > 1e-8) return(S)
      if (requireNamespace("Matrix", quietly = TRUE)) as.matrix(Matrix::nearPD(S, corr = TRUE)$mat)
      else S + diag(1e-6, nrow(S))
    }
    ensure_pd(R)
  }

  cor_mat <- t_corr_from_kendall(y_mat)

  # ---------- Joint path prediction via copula ----------
  path_prediction <- function(n_paths = 5000,
                              probs = c(0.05, 0.5, 0.95),
                              copula = c("gaussian", "t"),
                              df = 5,
                              return_increments = TRUE,
                              seed = NULL) {
    copula <- match.arg(copula)
    if (!is.null(seed)) set.seed(seed)

    H <- length(growth_pred_funs)
    if (H != ncol(cor_mat)) stop("cor_mat and growth_pred_funs length mismatch.")
    if (H == 0) stop("No horizons available for path prediction.")

    ensure_pd <- function(S) {
      vals <- tryCatch(eigen(S, symmetric = TRUE, only.values = TRUE)$values, error = function(e) NA)
      if (all(is.finite(vals)) && min(vals) > 1e-10) return(S)
      if (requireNamespace("Matrix", quietly = TRUE)) {
        return(as.matrix(nearPD(S, corr = TRUE)$mat))
      } else {
        return(S + diag(1e-6, nrow(S)))
      }
    }
    Sigma <- ensure_pd(cor_mat)
    L <- tryCatch(chol(Sigma), error = function(e) chol(ensure_pd(Sigma)))

    Z0 <- matrix(rnorm(n_paths * H), n_paths, H)
    Z  <- Z0 %*% L

    if (copula == "gaussian") {
      U <- pnorm(Z)
    } else {
      if (!is.finite(df) || df <= 1) stop("df must be > 1 for t-copula.")
      W  <- rgamma(n_paths, shape = df/2, rate = df/2)  # χ²_ν / ν
      Tm <- Z / sqrt(W)
      U  <- pt(Tm, df = df)
    }

    Gcum <- matrix(NA_real_, n_paths, H)
    for (h in seq_len(H)) {
      qh <- growth_pred_funs[[h]]$qfun
      uh <- pmin(pmax(U[, h], 1e-12), 1 - 1e-12)
      Gcum[, h] <- qh(uh)
    }
    colnames(Gcum) <- paste0("t", seq_len(H))

    Lvl <- last_level * (1 + Gcum)
    colnames(Lvl) <- paste0("t", seq_len(H))

    Rincr <- NULL
    if (return_increments) {
      Rincr <- Gcum
      if (H >= 2) {
        for (h in 2:H) {
          Rincr[, h] <- (1 + Gcum[, h]) / pmax(1 + Gcum[, h - 1], 1e-12) - 1
        }
      }
      colnames(Rincr) <- paste0("t", seq_len(H))
    }

    q_summary <- function(M) {
      out <- sapply(probs, function(p) apply(M, 2, quantile, probs = p, na.rm = TRUE))
      colnames(out) <- paste0("q", formatC(probs * 100, width = 2, format = "d"))
      out
    }

    list(
      n_paths            = n_paths,
      copula             = copula,
      df                 = if (copula == "t") df else NA_real_,
      last_level         = last_level,
      cum_growth_paths   = Gcum,
      level_paths        = Lvl,
      incr_return_paths  = Rincr,
      cum_growth_mean    = colMeans(Gcum),
      level_mean         = colMeans(Lvl),
      cum_growth_quants  = q_summary(Gcum),
      level_quants       = q_summary(Lvl),
      cor_used           = Sigma
    )
  }

  plot <- plot_graph(ts, level_pred_funs, alpha = 1 - ci, dates)

  structure(list(
    model_list        = model_list,
    growth_pred_funs  = growth_pred_funs,
    level_pred_funs   = level_pred_funs,
    cor_mat           = cor_mat,
    path_prediction   = path_prediction,
    plot = plot
  ),
  class = c("organik","list"))
}

#' @keywords internal

ensemble <- function(
    X, y,
    n_variants = 12,
    engines = c("rpart","glmnet","knn"),
    dists   = c("gaussian","laplace","student","logistic","asymmetric_laplace","skew_normal","skew_t"),
    h_options = c("ridge","tree"),
    alpha = 1.0, beta = 1.0, temperature = 0.25,
    n_testing = 50, mc_crps_n = 1000, cap_to_y = TRUE,
    max_warn_retries = 2,
    seed = NULL
) {
  if (!is.null(seed)) set.seed(seed)
  X <- as.matrix(X); p <- ncol(X)
  `%||%` <- function(a,b) if (is.null(a)) b else a
  clamp01 <- function(z) pmin(pmax(z, 0), 1)
  safe_mean <- function(v) { v <- v[is.finite(v)]; if (!length(v)) NA_real_ else mean(v) }

  fmt_num <- function(z) {
    if (is.null(z) || length(z) == 0 || !is.finite(z)) return("NA")
    if (!is.numeric(z)) return(as.character(z))
    if (any(abs(z) >= 1e3 | abs(z) < 1e-3)) format(z, digits = 4, scientific = TRUE)
    else format(z, digits = 4, scientific = FALSE)
  }

  summarize_cfg <- function(cfg, model_obj) {
    eng_str <- switch(cfg$engine,
                      "rpart" = {
                        rc <- cfg$engine_params$rpart_control
                        paste0("engine=rpart{cp=", fmt_num(rc$cp),
                               ", minsplit=", fmt_num(rc$minsplit),
                               ", minbucket=", fmt_num(rc$minbucket),
                               ", maxdepth=", fmt_num(rc$maxdepth),
                               ", usesurrogate=", fmt_num(rc$usesurrogate), "}")
                      },
                      "glmnet" = {
                        lam <- tryCatch(model_obj$engine_fit$lambda, error = function(e) NA_real_)
                        al  <- tryCatch(model_obj$engine_fit$alpha,  error = function(e) NA_real_)
                        paste0("engine=glmnet{alpha=", fmt_num(al),
                               ", lambda=", fmt_num(lam),
                               ", standardize=", cfg$engine_params$standardize %||% TRUE, "}")
                      },
                      "knn" = paste0("engine=knn{k=", fmt_num(cfg$engine_params$k),
                                     ", weight=", cfg$engine_params$weight, "}")
    )

    dist_str <- switch(cfg$dist,
                       "student"            = paste0("dist=student{nu=", fmt_num(cfg$dist_params$nu), "}"),
                       "asymmetric_laplace" = paste0("dist=asymmetric_laplace{tau=", fmt_num(cfg$dist_params$tau), "}"),
                       "skew_normal"        = paste0("dist=skew_normal{alpha=", fmt_num(cfg$dist_params$alpha), "}"),
                       "skew_t"             = paste0("dist=skew_t{alpha=", fmt_num(cfg$dist_params$alpha),
                                                     ", nu=", fmt_num(cfg$dist_params$nu), "}"),
                       paste0("dist=", cfg$dist)
    )

    het_str <- switch(cfg$hetero,
                      "ridge" = {
                        sel_lambda <- tryCatch(model_obj$hetero_fit$lambda, error = function(e) NA_real_)
                        paste0("hetero=ridge{lambda=", fmt_num(sel_lambda),
                               ", kfold=", fmt_num(cfg$h_params$ridge_kfold), "}")
                      },
                      "tree" = {
                        tc <- cfg$h_params$tree_control
                        paste0("hetero=tree{cp=", fmt_num(tc$cp),
                               ", minbucket=", fmt_num(tc$minbucket),
                               ", maxdepth=", fmt_num(tc$maxdepth),
                               ", usesurrogate=", fmt_num(tc$usesurrogate), "}")
                      },
                      paste0("hetero=", cfg$hetero)
    )

    paste(eng_str, dist_str, het_str, sep = " | ")
  }

  cfg_signature <- function(cfg) {
    onum <- function(z) paste0(format(z, digits = 6), collapse = ",")
    olist <- function(x) paste0(names(x), "=", vapply(x, function(v) {
      if (is.list(v)) return(olist(v))
      if (is.numeric(v)) return(onum(v))
      if (is.logical(v)) return(paste0(v, collapse = ","))
      paste0(v, collapse = ",")
    }, character(1L)), collapse = ";")
    paste(
      paste0("E:", cfg$engine),
      paste0("D:", cfg$dist),
      paste0("H:", cfg$hetero),
      paste0("EP{", olist(cfg$engine_params), "}"),
      paste0("DP{", olist(cfg$dist_params), "}"),
      paste0("HP{", olist(cfg$h_params), "}"),
      sep = " | "
    )
  }

  plan_variants <- function(n, engines, dists, heteros, seed = NULL) {
    if (!is.null(seed)) set.seed(seed + 777)
    eng_seq  <- rep_len(sample(engines), n)
    dist_seq <- rep_len(sample(dists), n)
    het_seq  <- rep_len(sample(heteros), n)
    data.frame(engine = eng_seq, dist = dist_seq, hetero = het_seq, stringsAsFactors = FALSE)
  }

  sample_cfg <- function(engine, dist, hetero, p) {
    if (engine == "rpart") {
      eparams <- list(
        rpart_control = rpart.control(
          cp            = sample(seq(0.0001, 0.001, length.out = 100), 1),
          minsplit      = sample(5:40, 1),
          minbucket     = sample(5:30, 1),
          maxdepth      = sample(2:9, 1),
          usesurrogate  = sample(0:2, 1)
        )
      )
    } else if (engine == "glmnet") {
      eparams <- list(
        alpha        = runif(1, 0, 1),
        lambda       = NULL,
        standardize  = TRUE
      )
    } else if (engine == "knn") {
      eparams <- list(
        k = sample(5:50, 1),
        weight = sample(c("uniform","distance"), 1)
      )
    }

    dparams <- switch(dist,
                      "student"            = list(nu = sample(3:15, 1)),
                      "asymmetric_laplace" = list(tau = runif(1, 0.2, 0.8)),
                      "skew_normal"        = list(alpha = sample(seq(1.2, 3.0, length.out = 100), 1)),
                      "skew_t"             = list(alpha = sample(seq(1.2, 3.0, length.out = 100), 1), nu = sample(3:15, 1)),
                      list()
    )

    hparams <- switch(hetero,
                      "ridge" = list(
                        ridge_lambda = NULL,
                        ridge_lambda_grid = c(1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10),
                        ridge_kfold  = sample(c(3,4,5), 1),
                        standardize  = TRUE
                      ),
                      "tree" = list(
                        tree_control = rpart.control(
                          cp        = sample(seq(0.0001, 0.001, length.out = 100), 1),
                          minbucket = sample(5:30, 1),
                          maxdepth  = sample(2:9, 1),
                          usesurrogate = sample(0:2, 1)
                        )
                      )
    )

    list(engine = engine, engine_params = eparams,
         dist = dist, dist_params = dparams,
         hetero = hetero, h_params = hparams)
  }

  train_variant_with_retry <- function(cfg) {
    this_cfg <- cfg
    for (attempt in 0:max_warn_retries) {
      model_i <- withCallingHandlers(
        component_fit(
          X, y,
          engine = this_cfg$engine,
          engine_params = this_cfg$engine_params,
          dist   = this_cfg$dist,
          dist_params = this_cfg$dist_params,
          hetero = this_cfg$hetero,
          h_params = this_cfg$h_params,
          backtest  = TRUE,
          n_testing = n_testing,
          mc_crps_n = mc_crps_n,
          cap_to_y  = cap_to_y,
          seed = seed
        ),
        warning = function(w) { invokeRestart("muffleWarning") }
      )
      return(list(model = model_i, cfg = this_cfg))
    }
  }

  planner <- plan_variants(n_variants, engines, dists, h_options, seed)
  sig_seen <- new.env(parent = emptyenv())
  variants <- vector("list", n_variants)
  meta     <- vector("list", n_variants)
  max_resample <- 50L

  for (i in seq_len(n_variants)) {
    base_e <- planner$engine[i]; base_d <- planner$dist[i]; base_h <- planner$hetero[i]
    cfg_i <- NULL; sig_i <- NULL

    for (attempt in 1:max_resample) {
      cfg_try <- sample_cfg(base_e, base_d, base_h, p = p)
      sig_try <- cfg_signature(cfg_try)
      if (is.null(sig_seen[[sig_try]])) { cfg_i <- cfg_try; sig_i <- sig_try; break }
    }
    if (is.null(cfg_i)) {
      alt_e <- sample(setdiff(engines, base_e), 1)
      cfg_i <- sample_cfg(alt_e, base_d, base_h, p = p)
      sig_i <- cfg_signature(cfg_i)
    }
    sig_seen[[sig_i]] <- TRUE

    tr <- train_variant_with_retry(cfg_i)
    model_i <- tr$model; cfg_used <- tr$cfg
    variants[[i]] <- model_i

    bt <- model_i$backtest
    mean_crsp <-safe_mean(bt$crsp)
    cal_last  <- tail(bt$calibration_error, 1)

    meta[[i]] <- data.frame(
      id = i,
      engine = cfg_used$engine,
      dist   = cfg_used$dist,
      hetero = cfg_used$hetero,
      mean_crsp = mean_crsp,
      calibration_error = cal_last,
      specific_params = summarize_cfg(cfg_used, model_i),
      stringsAsFactors = FALSE
    )
  }

  meta_df <- do.call(rbind, meta)

  #minmax <- function(x) (x - min(x))/(max(x) - min(x))

  scaled_crps <- as.vector(-scale(meta_df$mean_crsp))
  scaled_cals <- as.vector(-scale(meta_df$calibration_error))
  meta_df$score <- alpha * scaled_crps + beta * scaled_cals


  softmax <- function(v, temperature = 1)
  {
    if (any(!is.finite(v))) {v[!is.finite(v)] <- mean(v[is.finite(v)], na.rm = TRUE) + 3 * sd(v[is.finite(v)], na.rm = TRUE)}
    z <- scale(v)
    zz <- z - max(z)
    exp_z <- exp(zz)/max(temperature, 1e-6)
    w <- exp_z/sum(exp_z)
    return(w)
  }

  s <- meta_df$score
  weights <- softmax(s, temperature)
  meta_df$weight <- weights

  make_mixture_predfun <- function(pfuns_list, weights, level = NULL,
                                   lo_q = 1e-4, hi_q = 1 - 1e-4,
                                   n_grid = 2000, max_expand = 6L) {
    k <- length(pfuns_list)
    w <- as.numeric(weights)

    # Mixture density / CDF defined on:
    #  - growth scale if level is NULL
    #  - level scale if level is not NULL (internally mapped to growth)
    dfun <- function(x) {
      if (!is.null(level)) {
        x <- (x / level) - 1  # map level -> growth
      }
      x <- as.numeric(x)
      dens_mat <- vapply(pfuns_list, function(pf) pf$dfun(x), numeric(length(x)))
      as.numeric(dens_mat %*% w)
    }

    pfun <- function(q) {
      if (!is.null(level)) {
        q <- (q / level) - 1  # map level -> growth
      }
      q <- as.numeric(q)
      cdf_mat <- vapply(pfuns_list, function(pf) pf$pfun(q), numeric(length(q)))
      as.numeric(cdf_mat %*% w)
    }

    # Support bounds first on growth scale
    q_los <- vapply(pfuns_list, function(pf) pf$qfun(lo_q), numeric(1))
    q_his <- vapply(pfuns_list, function(pf) pf$qfun(hi_q), numeric(1))
    lo_g <- min(q_los)
    hi_g <- max(q_his)
    if (!is.finite(lo_g) || !is.finite(hi_g) || !(hi_g > lo_g)) {
      mu_guess <- vapply(pfuns_list, function(pf) pf$qfun(0.5), numeric(1))
      lo_g <- min(mu_guess) - 10
      hi_g <- max(mu_guess) + 10
    }

    # Map support to level scale if needed
    if (is.null(level)) {
      lower <- lo_g
      upper <- hi_g
    } else {
      lower <- level * (1 + lo_g)
      upper <- level * (1 + hi_g)
    }

    make_cdf_qfun_safe <- function(pdf, lower, upper, n_grid, max_expand = 6L) {
      expand <- 0L
      repeat {
        x_grid <- seq(lower, upper, length.out = n_grid)
        dx <- diff(x_grid)[1]
        dens <- pdf(x_grid)
        dens[!is.finite(dens)] <- 0
        dens <- pmax(dens, 0)
        cdf_vals <- c(0, cumsum((dens[-1] + dens[-length(dens)]) * 0.5) * dx)
        mass <- cdf_vals[length(cdf_vals)]
        if (!is.finite(mass) || mass <= 0) {
          if (expand < max_expand) {
            expand <- expand + 1L
            rng <- (upper - lower) * 0.5
            lower <- lower - rng
            upper <- upper + rng
            next
          } else {
            cdf_vals <- seq(0, 1, length.out = length(x_grid))
            cdf_fun  <- approxfun(x_grid, cdf_vals, yleft = 0, yright = 1, rule = 2)
            qfun_fun <- approxfun(cdf_vals, x_grid, yleft = lower, yright = upper, rule = 2)
            return(list(cdf = cdf_fun, qfun = qfun_fun))
          }
        } else {
          cdf_vals <- cdf_vals / mass
        }
        cdf_vals <- pmin(pmax(cdf_vals, 0), 1)
        cdf_vals <- cummax(cdf_vals)
        if (length(unique(cdf_vals)) < 2L) {
          if (expand < max_expand) {
            expand <- expand + 1L
            rng <- (upper - lower) * 0.5
            lower <- lower - rng
            upper <- upper + rng
            next
          } else {
            cdf_vals <- seq(0, 1, length.out = length(x_grid))
          }
        } else {
          jitter <- seq(0, (length(cdf_vals) - 1)) * 1e-14
          cdf_vals <- pmin(pmax(cdf_vals + jitter, 0), 1)
          cdf_vals <- (cdf_vals - min(cdf_vals)) /
            max(1e-12, (max(cdf_vals) - min(cdf_vals)))
        }
        cdf_fun  <- approxfun(x_grid, cdf_vals, yleft = 0, yright = 1, rule = 2)
        qfun_fun <- approxfun(cdf_vals, x_grid, yleft = lower, yright = upper, rule = 2)
        return(list(cdf = cdf_fun, qfun = qfun_fun))
      }
    }

    # Grid is now on growth scale (level = NULL) or level scale (level != NULL),
    # consistent with dfun()'s interpretation.
    grid <- make_cdf_qfun_safe(function(xx) dfun(xx), lower, upper, n_grid, max_expand)

    qfun <- function(p) {
      grid$qfun(clamp01(p))  # no extra level*(1+...) here
    }

    rfun <- function(n) {
      comp <- sample.int(k, size = n, replace = TRUE, prob = w)
      out <- numeric(n)
      for (j in seq_len(k)) {
        idx <- which(comp == j)
        if (length(idx)) out[idx] <- pfuns_list[[j]]$rfun(length(idx))
      }
      if (!is.null(level)) {
        out <- level * (1 + out)  # map growth -> level
      }
      out
    }

    list(dfun = dfun, pfun = pfun, qfun = qfun, rfun = rfun)
  }

  x_names <- if (is.null(colnames(X))) paste0("x", seq_len(ncol(X))) else colnames(X)
  predictor <- function(Xnew, level = NULL) {
    if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = ncol(X))
    if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
    colnames(Xnew) <- x_names
    m <- nrow(Xnew); out <- vector("list", m)
    for (i in seq_len(m)) {
      pf_i <- lapply(variants, function(mod) mod$predict(Xnew[i, , drop = FALSE]))
      out[[i]] <- make_mixture_predfun(pf_i, weights, level)
    }
    if (m == 1L) out[[1]] else out
  }

  engine_counts <- sort(table(vapply(meta_df$engine, identity, "")), decreasing = TRUE)
  hetero_counts <- sort(table(vapply(meta_df$hetero, identity, "")), decreasing = TRUE)
  dist_counts   <- sort(table(vapply(meta_df$dist,   identity, "")), decreasing = TRUE)
  dup_flags <- duplicated(meta_df$specific_params)
  dup_rows  <- meta_df[dup_flags, , drop = FALSE]

  out <- list(
    type        = "ensemble_prob_resid",
    n_models    = n_variants,
    variants    = variants,
    weights     = weights,
    leaderboard = meta_df[order(-meta_df$score), ],
    predict     = predictor
  )
  attr(out$leaderboard, "engine_counts") <- engine_counts
  attr(out$leaderboard, "hetero_counts") <- hetero_counts
  attr(out$leaderboard, "dist_counts")   <- dist_counts
  attr(out$leaderboard, "duplicates")    <- dup_rows
  out
}
#'
#'
#' @keywords internal
#'
safe_standardize <- function(X) {
X <- as.matrix(X)
if (!ncol(X)) return(list(Xs = X, mu = numeric(0), sd = numeric(0), keep = integer(0)))
mu <- colMeans(X)
sd <- apply(X, 2, sd)
keep <- which(is.finite(sd) & sd > 1e-12)
if (length(keep) == 0L) {
  return(list(Xs = matrix(numeric(nrow(X)), nrow(X), 0L),
              mu = numeric(0), sd = numeric(0), keep = integer(0)))
}
Xs <- scale(X[, keep, drop = FALSE], center = mu[keep], scale = sd[keep])
list(Xs = Xs, mu = mu[keep], sd = sd[keep], keep = keep)
}
#'
#'
#' @keywords internal
#'
safe_solve <- function(A, b) {
  tryCatch(
    solve(A, b, tol = 1e-10),
    error = function(e) {
      if (is.matrix(A) && nrow(A) == ncol(A) && nrow(A) > 0) {
        Ae <- A + diag(1e-8, nrow(A))
        tryCatch(solve(Ae, b, tol = 1e-10),
                 error = function(e2) {
                   if (requireNamespace("MASS", quietly = TRUE)) ginv(Ae) %*% b
                   else stop(e)
                 })
      } else stop(e)
    }
  )
}
#'
#'
#' @keywords internal
#'
build_hetero_ridge <- function(X, proxy, params, seed = NULL) {
  lambda_grid <- params$ridge_lambda_grid %||% c(1e-6,1e-4,1e-3,1e-2,1e-1,1,10)
  kfold       <- params$ridge_kfold       %||% 5
  standardize <- params$standardize       %||% TRUE
  lambda_sel  <- params$ridge_lambda

  proxy <- as.numeric(proxy); proxy[!is.finite(proxy)] <- mean(proxy[is.finite(proxy)], na.rm = TRUE)
  if (!length(proxy) || all(!is.finite(proxy))) proxy <- rep(0, nrow(X))

  cv_ridge <- function(Xm, yv, lambda_grid, kfold = 5, standardize = TRUE, seed = NULL) {
    n <- nrow(Xm); if (!is.null(seed)) set.seed(seed)
    kfold <- min(max(2L, kfold), max(2L, n))
    folds <- sample(rep(1:kfold, length.out = n))
    rmse <- numeric(length(lambda_grid))
    for (li in seq_along(lambda_grid)) {
      lam <- lambda_grid[li]; se <- 0
      for (k in 1:kfold) {
        tr <- folds != k; te <- !tr
        mod <- fit_ridge(Xm[tr,,drop=FALSE], yv[tr], lam, standardize)
        pred <- predict_ridge(mod, Xm[te,,drop=FALSE])
        se <- se + mean((pred - yv[te])^2)
      }
      rmse[li] <- sqrt(se / kfold)
    }
    lambda_grid[ which.min(rmse) ]
  }

  if (is.null(lambda_sel)) lambda_sel <- cv_ridge(X, proxy, lambda_grid, kfold, standardize, seed)
  mod <- fit_ridge(X, proxy, lambda_sel, standardize)

  scale_fun <- function(Xnew) {
    Xnew <- as.matrix(Xnew)
    if (!is.null(mod$intercept_only) && mod$intercept_only) {
      const_s <- pmax(exp(mean(proxy, na.rm = TRUE)), 1e-8)
      return(rep(const_s, nrow(Xnew)))
    }
    s_hat <- exp(predict_ridge(mod, Xnew))
    pmax(as.numeric(s_hat), 1e-8)
  }

  list(scale_fun = scale_fun, fit_info = list(method = "ridge", lambda = lambda_sel))
}
#'
#'
#' @keywords internal
#'
fit_ridge <- function(X, y, lambda, standardize = TRUE) {
X <- as.matrix(X); y <- as.numeric(y)
n <- nrow(X); p <- ncol(X)
if (is.null(n) || is.null(p) || p == 0L || n < 2L) {
  b0 <- if (length(y)) mean(y, na.rm = TRUE) else 0
  return(list(beta = matrix(b0, nrow = 1), mu = numeric(0), sd = numeric(0),
              keep = integer(0), standardize = FALSE, intercept_only = TRUE))
}
if (standardize) {
  ss <- safe_standardize(X); Xs <- ss$Xs; mu <- ss$mu; sd <- ss$sd; keep <- ss$keep
} else { Xs <- X; mu <- rep(0, p); sd <- rep(1, p); keep <- seq_len(p) }

Xd <- cbind(1, Xs)
I  <- diag(ncol(Xd)); I[1,1] <- 0
beta <- tryCatch(
  safe_solve(crossprod(Xd) + lambda * I, crossprod(Xd, y)),
  error = function(e) matrix(mean(y, na.rm = TRUE), nrow = 1)
)
list(beta = beta, mu = mu, sd = sd, keep = keep,
     standardize = standardize, intercept_only = FALSE)
}
#'
#'
#'@keywords internal
#'
predict_ridge <- function(mod, Xnew) {
Xnew <- as.matrix(Xnew)
if (!is.null(mod$intercept_only) && mod$intercept_only)
  return(rep(drop(mod$beta), nrow(Xnew)))
if (length(mod$keep) == 0L)
  return(rep(drop(mod$beta), nrow(Xnew)))
Xk <- Xnew[, mod$keep, drop = FALSE]
Xs <- if (mod$standardize) scale(Xk, center = mod$mu, scale = mod$sd) else Xk
drop(cbind(1, Xs) %*% mod$beta)
}
#'
#' @keywords internal
#'
component_fit <- function(
    X, y,
    engine = c("rpart","glmnet","knn"),
    engine_params = list(
      rpart_control = NULL,
      # glmnet
      alpha       = 0.5,
      lambda      = NULL,   # if NULL -> CV
      standardize = TRUE,
      # knn
      k = 25,
      weight = c("uniform","distance")
    ),
    dist = c("gaussian","laplace","student","logistic","asymmetric_laplace","skew_normal","skew_t"),
    dist_params = list(nu = 5, tau = 0.5, alpha = 2.0),
    hetero = c("ridge","none","tree"),
    h_params = list(
      # ridge
      ridge_lambda      = NULL,
      ridge_lambda_grid = c(1e-6,1e-4,1e-3,1e-2,1e-1,1,10),
      ridge_kfold       = 5,
      standardize       = TRUE,
      # tree
      tree_control      = NULL
    ),
    backtest  = TRUE,
    n_testing = 30,
    mc_crps_n = 1000,
    cap_to_y  = TRUE,
    skew_grid_n  = 1000,
    skewt_grid_n = 1200,
    seed = NULL
    ) {
      if (!is.null(seed)) set.seed(seed)
      engine <- match.arg(engine)
      dist   <- match.arg(dist)
      hetero <- match.arg(hetero)

      if (is.vector(X)) X <- matrix(as.numeric(X), ncol = 1)
      if (is.data.frame(X)) X <- as.matrix(X)
      stopifnot(is.matrix(X), is.numeric(X), is.numeric(y), nrow(X) == length(y))
      n <- nrow(X); p <- ncol(X)
      colnames(X) <- if (is.null(colnames(X))) paste0("x", seq_len(p)) else colnames(X)
      x_names <- colnames(X)
      y <- as.numeric(y)
      y_min <- min(y, na.rm = TRUE); y_max <- max(y, na.rm = TRUE)
      eps <- 1e-12
      `%||%` <- function(a,b) if (is.null(a)) b else a

      # ---- mean model by engine ----
      mu_model <- switch(engine,
                         "rpart" = {
                           if (!requireNamespace("rpart", quietly = TRUE)) stop("Install 'rpart'.")
                           df <- data.frame(y = y, X)
                           ctrl <- engine_params$rpart_control %||% rpart.control(cp = 0.001, minbucket = 5)
                           fit <- rpart(y ~ ., data = df, method = "anova", control = ctrl)
                           engine_fit <- list(method = "rpart", lambda = NA_real_, alpha = NA_real_)
                           function(Xnew) {
                             if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
                             if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
                             colnames(Xnew) <- x_names
                             as.numeric(predict(fit, newdata = as.data.frame(Xnew)))
                           }
                         },
                         "glmnet" = {
                           if (!requireNamespace("glmnet", quietly = TRUE)) stop("Install 'glmnet'.")
                           a <- engine_params$alpha %||% 0.5
                           if (is.null(engine_params$lambda)) {
                             cv <- cv.glmnet(x = X, y = y, alpha = a, family = "gaussian",
                                                     standardize = engine_params$standardize %||% TRUE)
                             lam <- cv$lambda.min
                             fit <- glmnet(x = X, y = y, alpha = a, lambda = lam,
                                                   family = "gaussian", standardize = engine_params$standardize %||% TRUE)
                           } else {
                             lam <- engine_params$lambda
                             fit <- glmnet(x = X, y = y, alpha = a, lambda = lam,
                                                   family = "gaussian", standardize = engine_params$standardize %||% TRUE)
                           }
                           engine_fit <- list(method = "glmnet", lambda = lam, alpha = a)
                           function(Xnew) {
                             if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
                             if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
                             colnames(Xnew) <- x_names
                             as.numeric(predict(fit, newx = Xnew, s = lam))
                           }
                         },
                         "knn" = {
                           k  <- max(1L, as.integer(engine_params$k %||% 25))
                           wt <- match.arg(engine_params$weight %||% "uniform", c("uniform","distance"))
                           Xtr <- X; ytr <- y
                           engine_fit <- list(method = "knn", lambda = NA_real_, alpha = NA_real_)
                           function(Xnew) {
                             if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
                             if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
                             colnames(Xnew) <- x_names
                             m <- nrow(Xnew); out <- numeric(m)
                             for (i in seq_len(m)) {
                               d <- sqrt(rowSums((Xtr - matrix(Xnew[i,], nrow(Xtr), p, byrow=TRUE))^2))
                               ord <- order(d); nn <- ord[seq_len(min(k, length(ord)))]
                               if (wt == "uniform") out[i] <- mean(ytr[nn])
                               else { w <- 1 / pmax(d[nn], 1e-12); out[i] <- sum(w * ytr[nn]) / sum(w) }
                             }
                             out
                           }
                         }
      )

      mu_hat <- mu_model(X)
      res    <- y - mu_hat
      res    <- res[is.finite(res)]

      dist_key <- dist
      glob <- switch(dist_key,
                     "gaussian" = list(sigma = sd(res)),
                     "laplace"  = list(b     = mean(abs(res))),
                     "student"  = { nu <- max(2.1, as.numeric(dist_params$nu %||% 5))
                     sd_r <- sd(res); s <- if (is.finite(sd_r) && sd_r > 0) sd_r * sqrt((nu - 2)/nu) else 1
                     list(s = s, nu = nu) },
                     "logistic" = { sd_r <- sd(res); s <- if (is.finite(sd_r) && sd_r > 0) sd_r * sqrt(3) / pi else 1; list(s = s) },
                     "asymmetric_laplace" = { tau <- min(max(as.numeric(dist_params$tau %||% 0.5), 1e-6), 1-1e-6)
                     sigma <- mean(ifelse(res >= 0, tau*res, (1-tau)*(-res))); list(sigma = max(sigma, eps), tau = tau) },
                     "skew_normal" = { s <- sd(res); alpha <- as.numeric(dist_params$alpha %||% 2.0); list(s = max(s,eps), alpha = alpha) },
                     "skew_t" = { sd_r <- sd(res); nu <- max(2.1, as.numeric(dist_params$nu %||% 5))
                     s <- if (is.finite(sd_r) && sd_r > 0) sd_r * sqrt((nu - 2)/nu) else 1
                     alpha <- as.numeric(dist_params$alpha %||% 2.0); list(s = max(s,eps), alpha = alpha, nu = nu) }
      )

      proxy <- if (dist_key %in% c("gaussian","skew_normal","skew_t"))
        0.5 * log(pmax(res^2, eps))
      else  log(pmax(abs(res), eps))

      # ----- Ridge helper  --------------------------------------


      cv_ridge <- function(Xm, yv, lambda_grid, kfold = 5, standardize = TRUE, seed = NULL) {
        n <- nrow(Xm); if (!is.null(seed)) set.seed(seed)
        folds <- sample(rep(1:kfold, length.out = n))
        rmse <- numeric(length(lambda_grid))
        for (li in seq_along(lambda_grid)) {
          lam <- lambda_grid[li]; sse <- 0
          for (k in 1:kfold) {
            tr <- folds != k; te <- !tr
            mod <- fit_ridge(Xm[tr,,drop=FALSE], yv[tr], lam, standardize)
            pred <- predict_ridge(mod, Xm[te,,drop=FALSE])
            sse <- sse + mean((pred - yv[te])^2)
          }
          rmse[li] <- sqrt(sse / kfold)
        }
        lambda_grid[ which.min(rmse) ]
      }

      # ----- Heteroscedastic scale model s(x) ------------------------------
      scale_fun <- switch(hetero,
                          "none" = {
                            s0 <- switch(dist_key,
                                         "gaussian" = glob$sigma, "laplace" = glob$b, "student" = glob$s,
                                         "logistic" = glob$s, "asymmetric_laplace" = glob$sigma,
                                         "skew_normal" = glob$s, "skew_t" = glob$s)
                            function(Xnew) {
                              if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
                              if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
                              rep(max(s0, 1e-8), nrow(Xnew))
                            }
                          },

                          "ridge" = {
                            out <- tryCatch(
                              build_hetero_ridge(X, proxy, h_params, seed),
                              error = function(e) NULL
                            )
                            if (is.null(out)) {
                              # fallback: constant scale
                              const_s <- pmax(exp(mean(proxy, na.rm = TRUE)), 1e-8)
                              function(Xnew) rep(const_s, nrow(as.matrix(Xnew)))
                            } else {
                              hetero_fit <- out$fit_info  # capture actual λ for leaderboard
                              out$scale_fun
                            }
                          },

                          "tree" = {
                            if (!requireNamespace("rpart", quietly = TRUE)) stop("Install 'rpart' for hetero='tree'.")
                            dfh <- data.frame(proxy = proxy, X)
                            ctrl <- h_params$tree_control %||% rpart.control(cp = 0.001, minbucket = 5)
                            fit_h <- rpart(proxy ~ ., data = dfh, method = "anova", control = ctrl)
                            function(Xnew) {
                              if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
                              if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
                              s_hat <- exp(predict(fit_h, newdata = as.data.frame(Xnew)))
                              pmax(as.numeric(s_hat), 1e-8)
                            }
                          }
      )

      selected_lambda_hetero <- if (hetero == "ridge") {
        # 'lam' in the local ridge branch scope
        get0("lam", inherits = TRUE)
      } else if (hetero == "glmnet") {
        get0("hetero_lambda_selected__", ifnotfound = NA_real_, inherits = TRUE)
      } else NA_real_

      # ---- (skew) distribution builders, predictor, backtest (unchanged) --
      make_cdf_qfun_safe <- function(pdf, lower, upper, n_grid, max_expand = 6L) {
        expand <- 0L
        repeat {
          x_grid <- seq(lower, upper, length.out = n_grid)
          dx <- diff(x_grid)[1]
          dens <- pdf(x_grid); dens[!is.finite(dens)] <- 0; dens <- pmax(dens, 0)
          cdf_vals <- c(0, cumsum((dens[-1] + dens[-length(dens)]) * 0.5) * dx)
          mass <- cdf_vals[length(cdf_vals)]
          if (!is.finite(mass) || mass <= 0) {
            if (expand < max_expand) {
              expand <- expand + 1L
              rng <- (upper - lower) * 0.5
              lower <- lower - rng; upper <- upper + rng
              next
            } else {
              cdf_vals <- seq(0, 1, length.out = length(x_grid))
              cdf_fun  <- suppressWarnings(approxfun(x_grid, cdf_vals, yleft = 0, yright = 1, rule = 2))
              qfun_fun <- suppressWarnings(approxfun(cdf_vals, x_grid, yleft = lower, yright = upper, rule = 2))
              return(list(cdf = cdf_fun, qfun = qfun_fun))
            }
          } else cdf_vals <- cdf_vals / mass
          cdf_vals <- pmin(pmax(cdf_vals, 0), 1); cdf_vals <- cummax(cdf_vals)
          if (length(unique(cdf_vals)) < 2L) {
            if (expand < max_expand) {
              expand <- expand + 1L
              rng <- (upper - lower) * 0.5
              lower <- lower - rng; upper <- upper + rng
              next
            } else {
              cdf_vals <- seq(0, 1, length.out = length(x_grid))
            }
          } else {
            jitter <- seq(0, (length(cdf_vals)-1)) * 1e-14
            cdf_vals <- pmin(pmax(cdf_vals + jitter, 0), 1)
            cdf_vals <- (cdf_vals - min(cdf_vals)) / max(1e-12, (max(cdf_vals) - min(cdf_vals)))
          }
          cdf_fun  <- suppressWarnings(approxfun(x_grid, cdf_vals, yleft = 0, yright = 1, rule = 2))
          qfun_fun <- suppressWarnings(approxfun(cdf_vals, x_grid, yleft = lower, yright = upper, rule = 2))
          return(list(cdf = cdf_fun, qfun = qfun_fun))
        }
      }

      builder <- switch(dist_key,
                        "gaussian" = function(mu, s) list(
                          dfun = function(x) dnorm(x, mean = mu, sd = s),
                          pfun = function(q)  pnorm(q, mean = mu, sd = s),
                          qfun = function(p)  qnorm(p, mean = mu, sd = s),
                          rfun = function(n)  rnorm(n, mean = mu, sd = s)
                        ),
                        "laplace" = function(mu, b) {
                          dfun <- function(x) (1/(2*b)) * exp(-abs(x - mu)/b)
                          pfun <- function(q) { z <- (q - mu)/b; ifelse(q < mu, 0.5 * exp(z), 1 - 0.5 * exp(-z)) }
                          qfun <- function(p) { p <- pmin(pmax(as.numeric(p),0),1); lo <- p < 0.5; out <- numeric(length(p));
                          out[lo] <- mu + b*log(2*p[lo]); out[!lo] <- mu - b*log(2*(1-p[!lo])); out }
                          rfun <- function(n) { u <- runif(n) - 0.5; mu - b * sign(u) * log(1 - 2*abs(u)) }
                          list(dfun=dfun,pfun=pfun,qfun=qfun,rfun=rfun)
                        },
                        "student" = function(mu, s) {
                          nu <- glob$nu
                          list(
                            dfun = function(x) (1/s) * dt((x - mu)/s, df = nu),
                            pfun = function(q)         pt((q - mu)/s, df = nu),
                            qfun = function(p) mu + s * qt(p, df = nu),
                            rfun = function(n) mu + s * rt(n, df = nu)
                          )
                        },
                        "logistic" = function(mu, s) list(
                          dfun = function(x) { z <- (x - mu)/s; exp(-z) / (s * (1 + exp(-z))^2) },
                          pfun = function(q) 1/(1 + exp(-(q - mu)/s)),
                          qfun = function(p) { p <- pmin(pmax(p, eps), 1 - eps); mu + s * log(p/(1 - p)) },
                          rfun = function(n) { u <- runif(n); mu + s * log(u/(1 - u)) }
                        ),
                        "asymmetric_laplace" = function(mu, sigma) {
                          tau <- glob$tau
                          list(
                            dfun = function(x) {
                              u <- x - mu; k1 <- tau/sigma; k2 <- (1 - tau)/sigma
                              ifelse(u < 0, (1 - tau) * k1 * exp(u * k1),
                                     tau       * k2 * exp(-u * k2))
                            },
                            pfun = function(q) {
                              u <- q - mu
                              ifelse(u < 0,
                                     (1 - tau) * exp(u * (tau / sigma)),
                                     1 - tau * exp(-u * ((1 - tau) / sigma)))
                            },
                            qfun = function(p) {
                              p <- pmin(pmax(as.numeric(p), 0), 1); cut <- 1 - tau; out <- numeric(length(p))
                              lo <- p < cut
                              out[lo]  <- mu + (sigma / tau)       * log(p[lo] / (1 - tau))
                              out[!lo] <- mu - (sigma / (1 - tau)) * log((1 - p[!lo]) / tau)
                              out
                            },
                            rfun = function(n) {
                              u <- runif(n); cut <- 1 - tau; out <- numeric(n); lo <- u < cut
                              out[lo]  <- mu + (sigma / tau)       * log(u[lo] / (1 - tau))
                              out[!lo] <- mu - (sigma / (1 - tau)) * log((1 - u[!lo]) / tau)
                              out
                            }
                          )
                        },
                        "skew_normal" = function(mu, s) {
                          alpha <- glob$alpha
                          pdf <- function(x) { z <- (x - mu)/s; (1/s) * (2 * dnorm(z) * pnorm(alpha * z)) }
                          rng <- 12 * max(s, 1)
                          grids <- make_cdf_qfun_safe(pdf, lower = mu - rng, upper = mu + rng, n_grid = skew_grid_n)
                          rfn <- function(n) {
                            delta <- alpha / sqrt(1 + alpha^2)
                            u0 <- rnorm(n); u1 <- rnorm(n)
                            z  <- delta * abs(u0) + sqrt(1 - delta^2) * u1
                            mu + s * z
                          }
                          list(dfun = pdf, pfun = grids$cdf, qfun = grids$qfun, rfun = rfn)
                        },
                        "skew_t" = function(mu, s) {
                          alpha <- glob$alpha; nu <- glob$nu
                          pdf <- function(x) { z <- (x - mu)/s; (1/s) * (2 * dt(z, df = nu) *
                                                                           pt(alpha * z * sqrt((nu + 1) / (nu + z^2)), df = nu + 1)) }
                          rng <- 20 * max(s, 1)
                          grids <- make_cdf_qfun_safe(pdf, lower = mu - rng, upper = mu + rng, n_grid = skewt_grid_n)
                          rfn <- function(n) {
                            delta <- alpha / sqrt(1 + alpha^2)
                            u0 <- rnorm(n); u1 <- rnorm(n)
                            zsn <- delta * abs(u0) + sqrt(1 - delta^2) * u1
                            w   <- rchisq(n, df = nu)
                            mu + s * zsn * sqrt(nu / w)
                          }
                          list(dfun = pdf, pfun = grids$cdf, qfun = grids$qfun, rfun = rfn)
                        }
      )

      cap_q <- function(v) if (cap_to_y) pmin(pmax(v, y_min), y_max) else v
      cap_r <- function(v) if (cap_to_y) pmin(pmax(v, y_min), y_max) else v

      predictor <- function(Xnew) {
        if (is.vector(Xnew)) Xnew <- matrix(as.numeric(Xnew), ncol = p)
        if (is.data.frame(Xnew)) Xnew <- as.matrix(Xnew)
        colnames(Xnew) <- x_names
        mu <- mu_model(Xnew)
        sc <- scale_fun(Xnew)
        out <- vector("list", nrow(Xnew))
        for (i in seq_len(nrow(Xnew))) {
          pf <- switch(dist_key,
                       "gaussian"           = builder(mu[i], sc[i]),
                       "laplace"            = builder(mu[i], sc[i]),
                       "student"            = builder(mu[i], sc[i]),
                       "logistic"           = builder(mu[i], sc[i]),
                       "asymmetric_laplace" = builder(mu[i], sc[i]),
                       "skew_normal"        = builder(mu[i], sc[i]),
                       "skew_t"             = builder(mu[i], sc[i])
          )
          qf <- pf$qfun; rf <- pf$rfun
          pf$qfun <- function(p) cap_q(qf(p))
          pf$rfun <- function(n) cap_r(rf(n))
          out[[i]] <- pf
        }
        if (nrow(Xnew) == 1L) out[[1]] else out
      }

      ks_calib <- function(u) {
        u <- sort(u); m <- length(u); if (m == 0) return(NA_real_)
        d_plus  <- max((seq_len(m)/m) - u)
        d_minus <- max(u - ((seq_len(m)-1)/m))
        max(d_plus, d_minus)
      }
      mc_crps <- function(pred, yobs, nsim = 4000L) {
        z  <- pred$rfun(nsim)
        z2 <- pred$rfun(nsim)
        mean(abs(z - yobs)) - 0.5 * mean(abs(z - z2))
      }

      bt_df <- NULL
      if (backtest) {
        stopifnot(n_testing >= 1, n_testing < n)
        testing_points <- tail(seq(1, n, n_testing + 1), -1)
        pits <- numeric(0); rows <- list()
        for (t in testing_points) {
          obj_t <- component_fit(
            X[1:t, , drop=FALSE], y[1:t],
            engine = engine, engine_params = engine_params,
            dist = dist, dist_params = dist_params,
            hetero = hetero, h_params = h_params,
            backtest = FALSE, cap_to_y = cap_to_y,
            skew_grid_n = skew_grid_n, skewt_grid_n = skewt_grid_n
          )
          pred_t <- obj_t$predict(X[t + 1, , drop=FALSE])
          pit_t  <- pred_t$pfun(y[t + 1])
          crps_t <- mc_crps(pred_t, y[t + 1], nsim = mc_crps_n)

          pits <- c(pits, as.numeric(pit_t))
          rows[[length(rows) + 1L]] <- data.frame(
            t = t, train_n = t,
            test_y = y[t + 1],
            pit = as.numeric(pit_t),
            crsp = crps_t,
            calibration_error = ks_calib(pits)
          )
        }
        bt_df <- do.call(rbind, rows)
      }

      structure(list(
        engine = engine,
        dist = dist,
        hetero = hetero,
        predict = predictor,
        backtest = bt_df,
        engine_fit = get0("engine_fit", ifnotfound = list(method = engine, lambda = NA_real_, alpha = NA_real_), inherits = TRUE),
        hetero_fit = list(method = hetero, lambda = selected_lambda_hetero)
      ), class = c("component_fit","list"))
    }
#'
#'
#' @keywords internal
#'
smart_reframer <- function(ts, seq_len, stride)
{
  n_length <- length(ts)
  if(seq_len > n_length | stride > n_length){stop("vector too short for sequence length or stride")}
  if(n_length%%seq_len > 0){ts <- tail(ts, - (n_length%%seq_len))}
  n_length <- length(ts)
  idx <- seq(from = 1, to = (n_length - seq_len + 1), by = 1)
  reframed <- t(sapply(idx, function(x) ts[x:(x+seq_len-1)]))
  if(seq_len == 1){reframed <- t(reframed)}
  idx <- rev(seq(nrow(reframed), 1, - stride))
  reframed <- reframed[idx,,drop = F]
  colnames(reframed) <- paste0("t", 1:seq_len)
  return(reframed)
}
#'
#'
#' @keywords internal
#'
dts <- function(ts, lag = 1, mode = "relative", stride = FALSE)
{
  if(mode == "relative"){trafo_ts <- tail(ts, -lag)/head(ts, -lag)-1}
  if(mode == "log"){trafo_ts <- log(tail(ts, -lag)/head(ts, -lag))}
  trafo_ts[!is.finite(trafo_ts)] <- NA
  if(anyNA(ts)){trafo_ts <- na_kalman(trafo_ts)}
  if(stride){trafo_ts <- trafo_ts[rev(seq(length(trafo_ts), 1, -lag))]}
  return(trafo_ts)
}
#'
#'
#' @keywords internal
#'
plot_graph <- function(ts, pred_funs, alpha = 0.05, dates = NULL,
                            line_size = 1.3, label_size = 11,
                            forcat_band = "seagreen2", forcat_line = "seagreen4",
                            hist_line = "gray43",
                            label_x = "Horizon", label_y = "Forecasted Var",
                            date_format = "%b-%Y") {

  ## ---------- 1. Compute prediction intervals (lower / median / upper) ----------
  pred_list <- lapply(pred_funs, function(f) {
    qs <- quantile(f$rfun(1000),
                   probs = c(alpha, 0.5, 1 - alpha),
                   na.rm = TRUE)
    as.numeric(qs)
  })

  preds <- do.call(rbind, pred_list)
  colnames(preds) <- c("lower", "median", "upper")
  future <- nrow(preds)

  ## ---------- 2. Build x coordinates ----------
  if (is.null(dates)) {
    # numeric horizon
    x_hist   <- seq_along(ts)
    x_forcat <- length(ts) + (1:future)
    x_all    <- c(x_hist, x_forcat)
  } else {
    # date horizon
    x_hist <- as.Date(as.character(dates))
    last_date <- tail(x_hist, 1)
    x_forcat <- last_date + (1:future)
    x_all    <- c(x_hist, x_forcat)
  }

  y_all <- c(ts, preds[, "median"])

  ## ---------- 3. Set up graphical parameters for label size ----------
  old_par <- par(no.readonly = TRUE)
  on.exit(par(old_par))

  scale_cex <- label_size / 11  # 11 is the default in original code
  par(cex.axis = scale_cex, cex.lab = scale_cex * 1.1)

  ## ---------- 4. Create empty plot and axes ----------
  if (is.null(dates)) {
    # numeric x-axis
    plot(x_all, y_all, type = "n",
         xlab = label_x, ylab = label_y)
    axis(1)  # default numeric axis
  } else {
    # date x-axis with custom format
    plot(x_all, y_all, type = "n",
         xlab = label_x, ylab = label_y,
         xaxt = "n")
    axis.Date(1, at = pretty(x_all),
              labels = format(pretty(x_all), format = date_format))
  }

  ## ---------- 5. Historical + full median line ----------
  lines(x_all, y_all, col = hist_line, lwd = line_size)

  ## ---------- 6. Forecast band (ribbon) ----------
  # polygon between lower and upper over the forecast horizon
  xx <- c(x_forcat, rev(x_forcat))
  yy <- c(preds[, "lower"], rev(preds[, "upper"]))

  polygon(xx, yy,
          col = adjustcolor(forcat_band, alpha.f = 0.3),
          border = NA)

  ## ---------- 7. Forecast median line ----------
  lines(x_forcat, preds[, "median"],
        col = forcat_line, lwd = line_size)

  p <- recordPlot()

  return(p)
}
