#' External validity indices
#'
#' \strong{E}xternal \strong{v}alidity indices compare a predicted clustering
#' result with a reference class or gold standard.
#'
#' `ev_nmi` calculates the normalized mutual information
#'
#' @param pred.lab predicted labels generated by classifier
#' @param ref.lab reference labels for the observations
#' @param method method of computing the entropy. Can be any one of "emp", "mm",
#'   "shrink", or "sg".
#'
#' @return `ev_nmi` returns the normalized mutual information.
#' @references Strehl A, Ghosh J. Cluster ensembles: a knowledge reuse framework
#'   for combining multiple partitions. J. Mach. Learn. Res. 2002;3:583-617.
#' @note `ev_nmi` is adapted from [infotheo::mutinformation()]
#' @author Johnson Liu, Derek Chiu
#' @name external_validity
#' @export
#'
#' @examples
#' set.seed(1)
#' E <- matrix(rep(sample(1:4, 1000, replace = TRUE)), nrow = 100, byrow =
#'               FALSE)
#' x <- sample(1:4, 100, replace = TRUE)
#' y <- sample(1:4, 100, replace = TRUE)
#' ev_nmi(x, y)
#' ev_confmat(x, y)
ev_nmi <- function(pred.lab, ref.lab, method = "emp") {
  U <- data.frame(ref.lab, pred.lab)
  Hyx <- infotheo::entropy(U, method)
  Hx <- infotheo::entropy(pred.lab, method)
  Hy <- infotheo::entropy(ref.lab, method)
  I <- ifelse(Hx + Hy - Hyx < 0, 0, Hx + Hy - Hyx)
  NMI <- I / sqrt(Hx * Hy)
  NMI
}

#' @details `ev_confmat` calculates a variety of statistics associated with
#'   confusion matrices. Accuracy, Cohen's kappa, and Matthews correlation
#'   coefficient have direct multiclass definitions, whereas all other
#'   metrics use macro-averaging.
#'
#' @return `ev_confmat` returns a tibble of the following summary statistics using [yardstick::summary.conf_mat()]:
#' * `accuracy`: Accuracy
#' * `kap`: Cohen's kappa
#' * `sens`: Sensitivity
#' * `spec`: Specificity
#' * `ppv`: Positive predictive value
#' * `npv`: Negative predictive value
#' * `mcc`: Matthews correlation coefficient
#' * `j_index`: Youden's J statistic
#' * `bal_accuracy`: Balanced accuracy
#' * `detection_prevalence`: Detection prevalence
#' * `precision`: alias for `ppv`
#' * `recall`: alias for `sens`
#' * `f_meas`: F Measure
#' @rdname external_validity
#' @export
ev_confmat <- function(pred.lab, ref.lab) {
  if (!all(unique(pred.lab) %in% unique(ref.lab))) {
    stop("Cluster labels should be the same in the predicted and reference
         classes.")
  }
  # Relabel predicted classes
  pred.relab <- relabel_class(pred.lab, ref.lab) %>%
    factor(levels = sort(unique(ref.lab)))

  # Confusion matrix and summary statistics
  CM <- table(pred.relab, ref.lab)
  summary(yardstick::conf_mat(CM))
}
