#' Eta Squared Statistic for Strength of Association
#'
#' @description
#' Computes the Eta squared statistic to quantify the strength of association between a numeric variable and another numeric variable or a grouping factor.
#'
#' @details
#' This measure is based on variance decomposition without relying on linear model.
#' The second argument can be a categorical variable, which allows the function to handle both numeric and labelled inputs.
#' Variable \code{a} is treated as the response and inserted as rows in the contingency table,
#' while \code{b} can be treated as the grouping factor (columns).
#' The function is compatible with both raw (elementary) data and grouped frequency distributions.
#'
#' @param a A numeric vector representing the dependent variable.
#' @param b A categorical or numeric vector representing the grouping variable.
#'
#' @return A numeric value between 0 and 1 representing the Eta squared statistic.
#'
#' @examples
#' Eta_squared(c(1, 2.5, 4, 7, 7, 11), c("a", "b", "b", "c", "d", "f"))
#'
#' @export
Eta_squared <- function(a,b){
  #Observed:
  obs <- table(a,b)

  #Number of element for each row and column in observed table:
  col_tot <- colSums(obs)
  row_tot <- rowSums(obs)
  tot <- sum(obs)

  S <- length(colnames(obs))

  #Conditional mean of first variable:
  cond_mean_a <- numeric(S)
  for(j in 1:S){
    cond_mean_a[j] <- (1/((as.numeric(col_tot))[j])) * sum((as.numeric(rownames(obs))) * as.numeric(obs[ ,j]))
  }

  #Conditional variance of first variable:
  cond_var_a <- numeric(S)
  for(j in 1:S){
    cond_var_a[j] <- (1/((as.numeric(col_tot))[j])) * sum(((as.numeric(rownames(obs)) - cond_mean_a[j])^2) * as.numeric(obs[ ,j]))
  }

  #Within-group variance:
  wg_var_a <- (1/tot) * sum(cond_var_a * as.numeric(col_tot))

  #Between-group variance:
  bg_var_a <- (1/tot) * sum(((cond_mean_a - mean(as.numeric(rownames(obs))))^2) * as.numeric(col_tot))

  #Eta squared:
  eta_squared <- bg_var_a/(wg_var_a + bg_var_a)

  return(eta_squared)
}

#--------------------------------------------------------------------------------------------------------------------

#' Chi-squared Statistic for Association Between Two Variables
#'
#' Calculates the Chi-squared statistic from a contingency table constructed using two input vectors.
#' Both parameters can be a categorical (non-numeric) variable, allowing flexibility in input types.
#' Variable \code{a} is used as rows, and \code{b} as columns in the contingency table.
#'
#' @param a First array or vector. Can be numeric or a categorical factor.
#' @param b Second array or vector. Can be numeric or a categorical factor.
#'
#' @return The Chi-squared statistic measuring association between the variables.
#'
#' @examples
#' Chi_squared(c(1, 3, 3, 6, 10), c("a", "b", "c", "d", "e"))
#'
#' @export
Chi_squared <- function(a,b){
  #Observed:
  obs <- table(a,b)

  #Number of element for each row and column in observed table:
  col_tot <- colSums(obs)
  row_tot <- rowSums(obs)
  tot <- sum(obs)

  r <- nrow(obs)
  c <- ncol(obs)

  mini <- c(r,c)

  #Table of expected value in case of independent variables:
  exp_tab <- matrix(0, r, c)
  for(i in 1:r){
    for(j in 1:c){
      exp_tab[i,j] = (row_tot[i]*col_tot[j])/tot
    }
  }

  #Chi squared:
  chi_squared <- sum(((obs-exp_tab)^2)/exp_tab)

  return(chi_squared)
}

#--------------------------------------------------------------------------------------------------------------------

#' Cramer V Statistic for Strength of Association
#'
#' @description
#' Computes Cramer V to assess the strength of association between two categorical variables.
#'
#' @details
#' This measure is derived from the Chi-squared statistic and a contingency table.
#' Both parameters can be a categorical variable, allowing flexibility in working with labelled data.
#' Variable \code{a} is treated as rows and \code{b} as columns in the contingency table.
#' The function supports both elementary data records and grouped frequency distributions.
#'
#' @param a A numeric or categorical vector (used for rows in the contingency table).
#' @param b A numeric or categorical vector (used for columns in the contingency table).
#'
#' @return A numeric value between 0 and 1 representing the strength of association (Cramer V).
#'
#' @examples
#' Cramer_V(c(1, 1, 1, 4, 6), c("a", "b", "c", "c", "c"))
#'
#' @export
Cramer_V <- function(a,b){
  #Observed:
  obs <- table(a,b)

  #Number of element for each row and column in observed table:
  col_tot <- colSums(obs)
  row_tot <- rowSums(obs)
  tot <- sum(obs)

  r <- nrow(obs)
  c <- ncol(obs)

  mini <- c(r,c)

  #Table of expected value in case of independent variables:
  exp_tab <- matrix(0, r, c)
  for(i in 1:r){
    for(j in 1:c){
      exp_tab[i,j] = (row_tot[i]*col_tot[j])/tot
    }
  }

  #Chi squared:
  Chi2 <- sum(((obs-exp_tab)^2)/exp_tab)

  #Cramers V:
  V <- sqrt(Chi2/(tot*(min(mini)-1)))

  return(V)
}
