% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ml_models.R, R/results.R
\name{soil_preprocess}
\alias{soil_preprocess}
\alias{pcr_preprocess}
\alias{plsr_preprocess}
\alias{lasso_preprocess}
\alias{rf_preprocess}
\alias{cubist_preprocess}
\alias{results}
\title{Soil and Spectral Data Preprocessing for Model Training}
\usage{
pcr_preprocess(soil, vnir.matrix, j, preprocess, type_of_soil)

plsr_preprocess(soil, vnir.matrix, j, preprocess, type_of_soil)

lasso_preprocess(soil, vnir.matrix, j, preprocess, type_of_soil)

rf_preprocess(soil, vnir.matrix, j, preprocess, type_of_soil)

cubist_preprocess(soil, vnir.matrix, j, preprocess, type_of_soil)

results(metric.list, soil_type)
}
\arguments{
\item{soil}{A data frame of soil properties. Must include the target soil variable.}

\item{vnir.matrix}{A numeric matrix of VNIR spectral data.}

\item{j}{A list of index vectors specifying calibration sample sets
(e.g., from \code{\link{merge_of_lab_and_spectrum}}).}

\item{preprocess}{A preprocessing function to apply to the spectral data
(e.g., smoothing, normalization).}

\item{type_of_soil}{An integer index selecting which soil property column to model.}

\item{metric.list}{A list of MSD metric objects returned by one of the
preprocessing/model functions. Each element corresponds to a model fit on a
calibration/validation split.}

\item{soil_type}{Optional, an integer or string indicating which soil property
was modeled (currently not used internally but kept for consistency).}
}
\value{
A list of MSD metric objects for calibration and validation sets,
specific to the fitted model.

A named numeric vector of mean performance metrics across all splits:
  \describe{
    \item{LV}{Latent variable / model index}
    \item{cv-r2}{Cross-validated R-squared for calibration set}
    \item{cv-bias}{Bias in cross-validation for calibration set}
    \item{cv-rmse}{Root mean squared error in cross-validation for calibration set}
    \item{cal-mse}{Mean squared error for calibration set}
    \item{cal-rpiq}{Ratio of performance to interquartile distance for calibration set}
    \item{val-r2}{R-squared for validation set}
    \item{val-bias}{Bias for validation set}
    \item{val-rmse}{Root mean squared error for validation set}
    \item{val-mse}{Mean squared error for validation set}
    \item{val-rpiq}{Ratio of performance to interquartile distance for validation set}
  }
}
\description{
These functions fit predictive models for soil properties using VNIR spectral data.
Each function applies a specific machine learning method:
\itemize{
  \item \code{pcr_preprocess()} – Principal Component Regression (PCR)
  \item \code{plsr_preprocess()} – Partial Least Squares Regression (PLSR)
  \item \code{lasso_preprocess()} – LASSO regression
  \item \code{rf_preprocess()} – Random Forest regression
  \item \code{cubist_preprocess()} – Cubist regression
}

Computes mean performance metrics across multiple calibration and validation sets.
Typically used to summarize the results of soil property prediction models 
generated by preprocessing functions such as \code{pcr_preprocess()}, 
\code{plsr_preprocess()}, \code{lasso_preprocess()}, \code{rf_preprocess()}, 
or \code{cubist_preprocess()}.
}
\details{
All functions use the same workflow:
\enumerate{
  \item Combine the selected soil property with preprocessed spectra.
  \item Split data into calibration and validation sets (using sample indices).
  \item Fit the chosen model across multiple calibration/validation partitions.
  \item Generate predictions and compute performance metrics (MSD-based).
}
}
\examples{
\donttest{
# Example with PCR
results_pcr <- pcr_preprocess(soil, vnir.matrix, j, preprocess = scale, type_of_soil = 2)

# Example with Random Forest
results_rf <- rf_preprocess(soil, vnir.matrix, j, preprocess = scale, type_of_soil = 2)
}

\donttest{
msd_list <- pcr_preprocess(soil, vnir.matrix, j, preprocess = scale, type_of_soil = 2)
results_summary <- results(msd_list)
}

}
\seealso{
\code{\link{merge_of_lab_and_spectrum}}, \code{\link{ml_f}}
}
