#-------------------- WISE ----------------------



#' @title Calculate an n by n similarity matrix
#' @description Returns an n by n similarity matrix.
#' @import FNN
#' @name wise_sim
#' @param data an n by p data matrix, with n being the sample size and p being the dimension.
#' @param measure the similarity measure: "distance" for distance-based measure; "graph" for k-nearest neighbor graph-based measure. The default is "distance".
#' @param metric character string specifying the distance metric or graph weight. "manhattan" for Manhattan distance (default), "euclidean" for Euclidean distance.
#' @param k the Number of nearest neighbors used in k-nearest neighbor graph. k = floor(sqrt(n)) if not specified.
#' @return an n by n similarity matrix.
#' @export
#' @examples
#' X <- matrix(rnorm(100), nrow = 10)
#' wise_sim(X, measure = "distance", metric = "manhattan")
wise_sim = function(data, measure = "distance", metric = "manhattan", k = NULL){

  # Input checks
  if (!all(sapply(data, is.numeric))) {
    stop("'data' must contain only numeric entries.")
  }

  if (any(is.na(data))) {
    stop("'data' must not contain NA values.")
  }

  # Select similarity measure
  if (measure == "distance"){
    y = cbind(data)
    if (metric == "euclidean") {
      sim_mat = -as.matrix(dist(y, method = "euclidean"))
    }
    if (metric == "manhattan") {
      sim_mat = -as.matrix(dist(y, method = "manhattan"))
    }
  }

  if (measure == "graph"){
    n = nrow(data)
    if(is.null(k)){
      k <- floor(sqrt(n))
    }

    # Get indices of k nearest neighbors
    nn_indices <- get.knn(data, k = k)$nn.index

    # Calculate the adjacent matrix for k-NNG with distance-based weight
    if (metric == "euclidean"){
      sim_mat <- -as.matrix(dist(data, method = "euclidean"))
      for (i in 1:n) {
        sim_mat[i, -nn_indices[i, ]] <- 0
      }
    }
    if (metric == "manhattan"){
      sim_mat <- -as.matrix(dist(data, method = "manhattan"))
      for (i in 1:n) {
        sim_mat[i, -nn_indices[i, ]] <- 0
      }
    }

    # Symmetrize the similarity matrix
    sim_mat = 0.5*(sim_mat+t(sim_mat))
  }


  return(sim_mat)
}





#' @title Conducts the serial independence test (WISE) based on a similarity matrix
#' @description Returns the p-value of WISE, the squared test statistic, and related quantities (the chi-square critical value, permutation mean, permutation variance).
#' @name wise_test
#' @importFrom stats dist pchisq qchisq
#' @param sim an n by n similarity matrix, typically generated from wise_sim().
#' @param dependence design for the weight matrix W:
#' if "proximity", \eqn{W_{ij} =  (1 / (|i - j|^2 + 1))-1};
#' if "periodicity", then \eqn{W_{ij} = |cos(|i-j|\pi/h)|-1};
#' If "customized", users should input their self-defined weight matrix through the parameter "weight".
#' The default is "proximity"
#' @param weight an n by n weight matrix with zero diagonal (only used if dependence = "customized").
#' @param alpha the nominal significance level (default is 0.05).
#' @param h the estimated periodicity (default is 4). The parameter is used only if dependence = "periodicity".
#' @return A list containing:
#' \item{p_value}{The p-value of the test.}
#' \item{test_statistic_sq}{The value of the squared test statistic.}
#' \item{critical_value}{The chi-square critical value at the given significance level.}
#' \item{t}{The unstandardized test statistic.}
#' \item{permutation_mean}{The mean of t under the permutation null.}
#' \item{permutation_variance}{The variance of t under the permutation null.}
#' @export
#' @examples
#' library(MASS)
#' n <- 100
#' p <- 50
#'
#' # Example 1: iid data
#' set.seed(123)
#' data_iid <- mvrnorm(n = n, mu = rep(0, p) , Sigma = diag(p))
#' wise_test(
#'  wise_sim(data_iid, measure = "distance", metric = "manhattan"),
#'  dependence = "proximity",
#'  alpha = 0.05
#' )
#'
#' # Example 2: AR(1)
#' set.seed(123)
#' data_ar <- matrix(0, nrow = n, ncol = p)
#' error <- mvrnorm(n = n, mu = rep(0,p), Sigma = diag(p))
#' data_ar[1,] <- error[1,]
#' phi <- 0.1 * diag(p)
#' for (t in 2:n) {
#'   data_ar[t, ] <- phi %*% data_ar[t - 1, ] + error[t,]
#' }
#' wise_test(
#'  wise_sim(data_ar, measure = "distance", metric = "manhattan"),
#'  dependence = "proximity",
#'  alpha = 0.05
#' )
#'
#' # Example 3: NMA(2)
#' set.seed(123)
#' data_nma <- matrix(0, nrow = n, ncol = p)
#' error <- mvrnorm(n = n, mu = rep(0,p), Sigma = diag(p))
#' data_nma[1:2, 1:p] <-error[1:2,1:p]
#' for (i in 3:n) {
#'   data_nma[i, ] <- error[i,]*error[i-1,]*error[i-2,]
#' }
#' wise_test(
#'  wise_sim(data_nma, measure = "distance", metric = "manhattan"),
#'  dependence = "proximity",
#'  alpha = 0.05
#' )
wise_test <- function(sim, dependence = "proximity", alpha = 0.05, weight = NULL, h=4){
  n <- nrow(sim)
  if (n < 5) {
    stop("Sample size 'n' must be at least 5.")
  }

  # Calculate the weight matrix
  if(dependence == "proximity"){
    B <- outer(1:n, 1:n, function(i, j) (1 / ((abs(i - j))^2 + 1)) - 1)
  }
  else if (dependence == "periodicity") {
    B <- outer(1:n, 1:n, function(i, j) abs(cos(abs(i - j) * pi / h)) - 1)
  }
  else if (dependence == "customized") {
    # validation checks
    if (is.null(weight)) {
      stop("If dependence = 'customized', you must supply a weight matrix via the 'weight' argument.")
    }
    if (!is.matrix(weight)) {
      stop("'weight' must be a matrix.")
    }
    if (!is.numeric(weight)) {
      stop("'weight' must be numeric.")
    }
    if (!all(dim(weight) == c(n, n))) {
      stop(paste0("'weight' must be an ", n, " x ", n, " matrix."))
    }
    if (!all(diag(weight) == 0)) {
      stop("The diagonal entries of 'weight' must all be 0.")
    }
    B <- weight
  }
  else {
    stop("Invalid value for 'dependence'. Must be one of 'proximity', 'periodicity', or 'customized'.")
  }

  A1 <- sum(sim)
  A2 <- sum(sim*sim)
  A3 <- sum(apply(sim, 1, sum)^2) # square sum of the row sum of matrix A
  B1 <- sum(B)
  B2 <- sum(B*B)
  B3 <- sum(apply(B, 1, sum)^2) # square sum of the row sum of matrix B

  # Calculate t
  t <- sum(sim*B)

  # Calculate the permutation mean of t
  per_mean <- A1*B1/(n*(n-1))

  # Calculate the permutation variance of t
  term1 <- 4*(n+1)*(A3-A1*A1/n)*(B3-B1*B1/n)/(n*(n-1)*(n-2)*(n-3))
  term2 <- 2*(A2-A1*A1/(n*(n-1)))*(B2-B1*B1/(n*(n-1)))/(n*(n-3))
  term3 <- 4*(A2-A1*A1/(n*(n-1)))*(B3-B1*B1/n)/(n*(n-2)*(n-3))
  term4 <- 4*(A3-A1*A1/n)*(B2-B1*B1/(n*(n-1)))/(n*(n-2)*(n-3))
  per_var <- term1+term2-term3-term4

  # Calculate test statistics and p-value
  test_statistics <- ((t - per_mean) / sqrt(per_var))^2
  p_value <- 1 - pchisq(test_statistics, 1)
  critical_value <- qchisq(1 - alpha, 1)

  results <- list(
    p_value              = round(p_value, 4),
    test_statistic_sq    = round(test_statistics, 4),
    critical_value       = round(critical_value, 4),
    t                    = round(t, 4),
    permutation_mean     = round(per_mean, 4),
    permutation_variance = round(per_var, 4)
  )

  return(results)

}
















