# Methods for fitting generic log-linear models.

#' Fits a log-linear model to the data provided, using the design matrix provided.
#' Names for the effects will be "rows1", "cols1" etc.  If there are remaining entries,
#' they can be specified as the "effect_names" character vector.
#' This function is a wrapper around a call to glm() that handles some of the details
#' of the call and packages the output in a more convenient form.
#'
#' @importFrom stats glm
#' @importFrom stats poisson
#' @importFrom stats fitted
#' @importFrom stats coef
#' @importFrom stats model.matrix
#' @param n matrix of observed counts to be fit
#' @param x design matrix for predictor variables
#' @param effect_names character vector of additional names to apply to the columns of x
#' The default is NULL, in which case the columns will be labeled "model1" etc.
#' @returns a list containing
#'    x: the design matrix
#'    beta: the regression parameters
#'    se: the vector of standard errors
#'    g_squared: G^2 fit measure
#'    chisq: X^2 fit measure
#'    df: degrees of freedom
#'    expected: matrix of expected frequencies
#' @export
log_linear_fit <- function(n, x, effect_names=NULL) {
  y <- log_linear_matrix_to_vector(n)
  result <- glm(y ~ x - 1, family=poisson(link=log))

  expected <- matrix(fitted(result), nrow=nrow(n), byrow=TRUE)
  pi <- expected / sum(expected)
  g_squared <- likelihood_ratio_chisq(n, pi)
  chisq <- pearson_chisq(n, pi)
  df <- result$df.residual
  se <- summary(result)$coefficients[,2]

  beta <- coef(result)
  names <- log_linear_create_coefficient_names(x, n, effect_names)
  colnames(x) <- names
  names(beta) <- names
  names(se) <- names

  list(x=x, beta=beta, se=se, g_squared=g_squared, chisq=chisq, df=df, expected=expected)
}


#' Creates missing column names
#'
#' @param x the design matrix being modified
#' @param n the matrix of observed counts
#' @param effect_names user specified names to be applied to effects after the intercept
#' and main effects. Default is NULL
#' @returns vector of names to apply to x
log_linear_create_coefficient_names <- function(x, n, effect_names=NULL) {
  names <- colnames(x)
  n_base <- nrow(n) - 1 + ncol(n) - 1 + 1
  n_needed <- sum(is.na(names))
  if (n_needed == 0) {
    return(names)
  }

  names <- log_linear_create_base_names(n, x, names)
  n_needed <- ncol(x) - n_base

  names <- log_linear_add_effect_names(names, effect_names)
  n_needed <- n_needed - length(effect_names)

  names <- log_linear_add_model_names(names, n_needed)

  names
}


log_linear_create_base_names <- function(n, x, names) {
  # main effects
  n_base <- nrow(n) - 1 + ncol(n) - 1 + 1
  n_names <- ncol(x) - sum(is.na(names))
  if (n_names < n_base) {
    names <- c("(Intercept)", paste0("rows", 1:(nrow(n) - 1)), paste0("cols", 1:(ncol(n) - 1)))
  } else {
    names <- names[1:n_base]
  }
  names
}


log_linear_add_effect_names <- function(names, effect_names) {
  # effect_names
  if (!is.null(effect_names)) {
    names <- c(names, effect_names)
  }
  names
}


log_linear_add_model_names <- function(names, n_needed) {
  if (0 < n_needed) {
    names <- c(names, paste0("model", 1:n_needed))
  }
}


#' Design matrix for baseline independence model with main effects for rows and columns.
#'
#' It is intended as a straw-man model as it assumes no agreement beyond chance.
#' @param n the matrix of cell counts
#' @param n_raters number of raters. Currently only 2 (the default) are supported. This is
#' an extension point for future work.
#' @returns the design matrix for the model
#' @export
#' @examples
#' x <- log_linear_main_effect_design(vision_data)
log_linear_main_effect_design <- function(n, n_raters=2) {
  rows <- factor(rep(1:nrow(n), each=ncol(n)))
  cols <- factor(rep(1:ncol(n), nrow(n)))
  dat <- n
  dim(dat) <- NULL
  options("contrasts")
  x <- model.matrix(dat ~ rows + cols,
                    contrasts.arg = list(rows="contr.sum", cols="contr.sum"))
  x
}


#' Creates design matrix for model with main effects and a single agreement parameter delta.
#'
#' The model has main effects for rows and for columns, plus an additional parameter
#' for the agreement (diagonal) cells.
#' @param n the matrix of cell counts
#' @param n_raters number of raters. Currently only 2 (the default) are supported. This is
#' an extension point for future work.
#' @returns design matrix for the model
#' @export
#' @examples
#' x <- log_linear_equal_weight_agreement_design(vision_data)
log_linear_equal_weight_agreement_design <- function(n, n_raters=2) {
  x <- log_linear_main_effect_design(n)
  x_new <- rep(0.0, nrow(x))
  for (i in 1:nrow(n)) {
    index <- (i - 1) * ncol(n) + i
    x_new[index] <- 1.0
  }
  log_linear_append_column(x, x_new)
}


#' Creates design matrix for symmetry model.
#'
#' @param n matrix of observed counts
#' @return design matrix for the model
#' @export
log_linear_symmetry_design <- function(n) {
  I <- nrow(n)
  J <- ncol(n)
  if (I != J) {
    stop("Matrix must be square")
  }
  r <- I
  x <- matrix(0.0, nrow=r * r, ncol=r + r * (r - 1) / 2)
  index <- 1
  col <- 1
  names <- c()
  for (i in 1:r) {
    index <- (i - 1) * r + i
    x[index, col] <- 1.0
    names <- c(names, paste0("level_", i))
    col <- col + 1
  }
  for (i in 1:r) {
    for (j in i:r) {
      if (j == i) {
        next
      } else {
        index <- (i - 1) * r + j
        x[index, col] <- 1.0
        index2 <- (j - 1) * r + i
        x[index2, col] <- 1.0
        names <- c(names, paste0("cell_", i, "_", j))
        col <- col + 1
      }
    }
  }
  colnames(x) <- names
  x
}


#' Creates the design matrix for a quasi-symmetry design
#'
#' @param n matrix of observed counts
#' @returns design matrix for quasi-symmetry design
#' @export
log_linear_quasi_symmetry_model_design <- function(n) {
  I <- nrow(n)
  J <- ncol(n)
  if (I != J) {
    stop("Matrix must be square")
  }
  x <- log_linear_main_effect_design(n)
  names <- colnames(x)
  for (i in 1:I) {
    for (j in i:J) {
      if (j == i) {
        next
      }
      x_new <- rep(0.0, nrow(x))
      index <- (i - 1) * J + j
      x_new[index] <- 1.0
      index <- (j - 1) * I + i
      x_new[index] <- 1.0
      x <- log_linear_append_column(x, x_new)
      names <- c(names, paste0("cell_", i, "_", j))
    }
  }
  colnames(x) <- names
  x
}


#' Adds indicator variables for the diagonal cells in table n.
#'
#' @param n the matrix of observed counts
#' @param x the design matrix to be augmented
#' @returns new design matrix with nrow(n) columns added. The columns are all
#' 0 unless the row corresponds to a diagonal cell in n, in which case the
#' entry is 1
#' @export
#' @examples
#' x <- log_linear_main_effect_design(vision_data)
#' x_prime <- log_linear_add_all_diagonals(vision_data, x)
log_linear_add_all_diagonals <- function(n, x) {
  r <- nrow(n)
  p <- ncol(x)
  x_prime <- matrix(0.0, nrow=nrow(x), ncol=p + r)
  x_prime[, 1:p] <- x
  for (i in 1:r) {
    for (j in 1:r) {
      if (j == i) {
        index <- (i - 1) * r + j
        x_prime[index, p + j] = 1.0
      }
    }
  }
  x_prime
}


#' Creates a vector containing the linear-by-linear vector.
#'
#' Uses the ordinal ranks (1, 2, ..., nrow(n)) as data.
#' @param n the matrix of observed cell counts
#' @param centered should the variables be centered before the product is computed
#' @returns a vector containing the new variable
#' @export
#' @examples
#' linear <- log_linear_create_linear_by_linear(vision_data)
#' x <- log_linear_equal_weight_agreement_design(vision_data)
#' x_prime <- log_linear_append_column(x, linear)
log_linear_create_linear_by_linear <- function(n, centered=FALSE) {
  rows <- nrow(n)
  cols <- ncol(n)
  if (centered) {
    row_mean <- (rows + 1) / 2
    col_mean <- (cols + 1) / 2
  }

  linear_linear <- vector("double", rows * cols)
  index <- 1
  for (i in 1:rows) {
    if (centered) {
      u <- i - row_mean
    } else {
      u <- i
    }
    for (j in 1:cols) {
      if (centered) {
        v <- j - col_mean
      } else {
        v <- j
      }
      linear_linear[index] <- u * v
      index <- index + 1
    }
  }
  linear_linear
}


#' Appends a column to an existing design matrix.
#'
#' Takes the design matrix provided and appends the new column
#' @param x the original design matrix
#' @param x_new the column to be appended
#' @param position column index within the new matrix for the new column.
#' Defaults to last position = appending the column
#' @returns the new design matrix
#' @export
#' @examples
#' x <- log_linear_main_effect_design(vision_data)
#' new_column <- c(1, 0, 0, 0,
#'                 0, 1, 0, 0,
#'                 0, 0, 1, 0,
#'                 0, 0, 0, 1)
#' x_prime <- log_linear_append_column(x, new_column)
log_linear_append_column <- function(x, x_new, position=ncol(x) + 1) {
  if (nrow(x) != length(x_new)) {
    stop(paste0("Length of new column (", length(x_new), ") must equal number of rows in x(",
                nrow(x)))
  }
  x_prime <- matrix(nrow=nrow(x), ncol=ncol(x) + 1)
  x_prime[, 1:(position - 1)] <- x[, 1:(position - 1)]
  x_prime[, position] <- x_new
  if (position <= ncol(x)) {
    x_prime[, (position + 1):ncol(x_prime)] <- x[, position:ncol(x)]
  }
  x_prime
}


#' Removes a column from an existing design matrix.
#'
#' Takes the design matrix provided and removes the column in the position specified
#' @param x the original design matrix
#' @param position column index within the new matrix for the new column.
#' Defaults to last position
#' @returns the new design matrix
#' @export
#' @examples
#' x <- log_linear_main_effect_design(vision_data)
#' linear <- log_linear_create_linear_by_linear(vision_data)
#' x_prime <- log_linear_append_column(x, linear)
#' x_again <- log_linear_remove_column(x_prime, ncol(x_prime))
log_linear_remove_column <- function(x, position=ncol(x)) {
  x_prime <- matrix(nrow=nrow(x), ncol=ncol(x) - 1)
  x_prime[, 1:(position - 1)] <- x[, 1:(position - 1)]
  if (position < ncol(x)) {
    x_prime[, position:ncol(x_prime)] <- x[, (position + 1):ncol(x)]
  }
  x_prime
}


#' Converts a matrix of data into a vector suitable for use in analysis with the
#' design matrices created.
#' Unlike simply calling vector() on the matrix the resulting vector is organized
#' by rows, then columns. This order corresponds to the order in the design matrix.
#'
#' @param dat the matrix to be converted a vector
#' @returns a vector suitable to use as dependent variable, e.g. in a call to glm()
#' @export
log_linear_matrix_to_vector <- function(dat) {
  as.vector(t(dat))
}



#' Computes the logs of the cell frequencies.
#'
#' In the case of an observed 0, epsilon is inserted into the cell before the log is taken.
#' @param n matrix of cell counts
#' @param epsilon amount to be inserted into cell with observed 0.
#' @param all_cells add epsilon to all cells or just those with 0 observed frequencies
#' @returns a list containing: log_n -- a vector of log frequencies and dat -- modified version
#' of the cell counts data
#' @export
log_Linear_create_log_n <- function(n, epsilon=1.0e-6, all_cells=FALSE) {
  log_n <- vector("double", nrow(n) * ncol(n))
  index = 1
  for (i in 1:nrow(n)) {
    for (j in 1:ncol(n)) {
      if (all_cells) {
        n[i, j] <- n[i, j] + epsilon
      } else {
        n[i, j] <- ifelse(0.0 < n[i, j], n[i, j], epsilon)
      }
      log_n[index] <- log(n[i,j])
      index <- index + 1
    }
  }
  list(log_n=log_n, dat=n)
}

