####**********************************************************************
####**********************************************************************
####
####  RANDOM FORESTS FOR SURVIVAL, REGRESSION, AND CLASSIFICATION (RF-SRC)
####  Version 1.2
####
####  Copyright 2012, University of Miami
####
####  This program is free software; you can redistribute it and/or
####  modify it under the terms of the GNU General Public License
####  as published by the Free Software Foundation; either version 2
####  of the License, or (at your option) any later version.
####
####  This program is distributed in the hope that it will be useful,
####  but WITHOUT ANY WARRANTY; without even the implied warranty of
####  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
####  GNU General Public License for more details.
####
####  You should have received a copy of the GNU General Public
####  License along with this program; if not, write to the Free
####  Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
####  Boston, MA  02110-1301, USA.
####
####  ----------------------------------------------------------------
####  Project Partially Funded By: 
####  ----------------------------------------------------------------
####  Dr. Ishwaran's work was funded in part by DMS grant 1148991 from the
####  National Science Foundation and grant R01 CA163739 from the National
####  Cancer Institute.
####
####  Dr. Kogalur's work was funded in part by grant R01 CA163739 from the 
####  National Cancer Institute.
####  ----------------------------------------------------------------
####  Written by:
####  ----------------------------------------------------------------
####    Hemant Ishwaran, Ph.D.
####    Director of Statistical Methodology
####    Professor, Division of Biostatistics
####    Clinical Research Building, Room 1058
####    1120 NW 14th Street
####    University of Miami, Miami FL 33136
####
####    email:  hemant.ishwaran@gmail.com
####    URL:    http://web.ccs.miami.edu/~hishwaran
####    --------------------------------------------------------------
####    Udaya B. Kogalur, Ph.D.
####    Adjunct Staff
####    Dept of Quantitative Health Sciences
####    Cleveland Clinic Foundation
####    
####    Kogalur & Company, Inc.
####    5425 Nestleway Drive, Suite L1
####    Clemmons, NC 27012
####
####    email:  commerce@kogalur.com
####    URL:    http://www.kogalur.com
####    --------------------------------------------------------------
####
####**********************************************************************
####**********************************************************************


adrop <- function(x, d) {
  if (!is.array(x)) {
    x
  }
  else {
    if (d > 1) {
      x[,,1:d, drop = FALSE]
    }
    else {
      if (dim(x)[1] == 1) {
        rbind(x[,,1, drop = TRUE])
      }
      else {
        if (dim(x)[2] == 1) {
          cbind(x[,,1, drop = TRUE])
        }
        else {
          x[,,1, drop = TRUE]
        }
      }
    }
  }
}
amatrix <- function(x, d, names) {
  x <- matrix(x, d, dimnames = names)
  if (ncol(x) > 1) {
    x
   }
  else {
    c(x)
  }
}
amatrix.remove.names <- function(x) {
  if (!is.null(dim(x)) && ncol(x) == 1) {
    unlist(c(x), use.names = FALSE)
  }
  else {
    x
  }
}
atmatrix <- function(x, d, names, keep.names = FALSE) {
  x <- t(matrix(x, ncol = d, dimnames = names))
  if (ncol(x) > 1) {
    x
  }
  else {
    if (keep.names == FALSE) {
      c(x)
    }
    else {
      x.names <- rownames(x) 
      x <- c(x)
      names(x) <- x.names
      x
    }
  }
}
avector <- function(x, name = FALSE) {
  if (!is.null(dim(x)) && nrow(x) > 1 && ncol(x) == 1) {
    x.names <- rownames(x)
    x <- unlist(c(x))
    if (name) names(x) <- x.names else names(x) <- NULL
    x
  }
  else if (!is.null(dim(x)) && nrow(x) == 1 && ncol(x) > 1) {
    x.names <- colnames(x)
    x <- unlist(c(x))
    if (name) names(x) <- x.names else names(x) <- NULL
    x
  }
  else if (!is.null(dim(x)) && nrow(x) == 1 && ncol(x) == 1) {
    unlist(c(x))
  }
  else {
    x
  }
}
available <- function (package, lib.loc = NULL, quietly = TRUE) 
{
  package <- as.character(substitute(package))
  installed <- package %in% installed.packages()
  if (installed) {
    require(package, quietly = TRUE, character.only = TRUE)
  }
  else {
    return(invisible(FALSE))
  }
}
bayes.rule <- function(prob) {
  levels.class <- colnames(prob)
  factor(levels.class[apply(prob, 1, function(x) {
    if (!all(is.na(x))) {
      resample(which(x == max(x, na.rm = TRUE)), 1)
    }
    else {
      NA
    }
  })], levels = levels.class)
}
data.matrix <- function(x) {
  as.data.frame(lapply(x, function(xi) {
     if (is.integer(xi) || is.numeric(xi)) {
       xi
     }
     else if (is.logical(xi) || is.factor(xi)) {
       as.integer(xi)
     }
     else {
       as.numeric(xi)
     }
  }))
}
extract.pred <- function(obj, type, subset, time, which.outcome, oob = FALSE) {
  if (oob == FALSE) {
    pred <- obj$predicted
    surv <- obj$survival
    chf <- obj$chf
    cif <- obj$cif
  }
  else {
    pred <- obj$predicted.oob
    surv <- obj$survival.oob
    chf <- obj$chf.oob
    cif <- obj$cif.oob
  }
  if (obj$family == "surv") {
    n <- length(pred)
    if (missing(subset)) subset <- 1:n
    surv.type <- match.arg(type, c("mort", "rel.freq", "surv"))
    time.idx <-  max(which(obj$time.interest <= time))
    return(switch(surv.type,
             "mort" = pred[subset],
             "rel.freq" = pred[subset]/max(n, na.omit(pred)),
             "surv" =  100 * surv[subset, time.idx]
    ))
  }
  else if (obj$family == "surv-CR") {
    n <- length(pred)
    if (missing(subset)) subset <- 1:n
    if (missing(which.outcome)) which.outcome <- 1  
    cr.type <- match.arg(type, c("years.lost", "cif", "chf"))
    time.idx <-  max(which(obj$time.interest <= time))
    return(switch(cr.type,
             "years.lost" = pred[subset, which.outcome],
             "cif" = cif[subset, time.idx, which.outcome],
             "chf" = chf[subset, time.idx, which.outcome]
    ))
  }
  else if (obj$family == "class") {
    class.type <- match.arg(type, c("response", "prob"))
    if (missing(subset)) subset <- 1:nrow(pred)
    if (missing(which.outcome)) which.outcome <- 1
    prob <- pred[subset,, drop = FALSE]
    return(switch(class.type,
                  "prob" = prob[, which.outcome],
                  "response" =  bayes.rule(prob)))
  }
  else {
    if (missing(subset)) subset <- 1:length(pred)
    return(pred[subset])
  }
}
family.pretty <- function(fmly) {
  switch(fmly,
         "surv"     = "Random Forests [S]RC",
         "surv-CR"  = "Random Forests [S]RC",
         "regr"     = "Random Forests S[R]C",
         "class"    = "Random Forests SR[C]",
         )
}
finalizeFormula <- function(formula.obj, data) {
  yvar.names <- formula.obj$yvar.names
  index <- length(yvar.names)
  fNames <- formula.obj$fNames
  fmly <- formula.obj$family
  if (fNames[index + 1] == ".") {
    xvar.names <- names(data)[!is.element(names(data), fNames[1:index])]
  }
  else {
    xvar.names <- fNames[-c(1:index)]
    not.specified <- !is.element(xvar.names, names(data))
    if (sum(not.specified) > 0) {
      stop("formula appears misspecified, object ", xvar.names[not.specified], " not found")
    }
  }
  return (list(family=fmly, yvar.names=yvar.names, xvar.names=xvar.names))
}
finalizeData <- function(fnames, data, na.action) {
  data <- data[ , is.element(names(data), fnames), drop = FALSE]
  data <- as.data.frame(data.matrix(data))
  if (na.action == "na.omit") {
    data <- na.omit(data)
  }
  if (nrow(data) == 0) {
    stop("No records in the NA-processed data.  Consider na.impute.")
  }
  return (data)
}
get.importance.xvar <- function(importance.xvar, importance, object) {
  if (!is.null(importance)) {
    if (missing(importance.xvar) || is.null(importance.xvar)) {
      importance.xvar <- object$xvar.names
    }
    else {
      importance.xvar <- unique(importance.xvar)
      importance.xvar <- intersect(importance.xvar, object$xvar.names)
    }
    if (length(importance.xvar) == 0) {
      stop("xvar names do not match object xvar matrix")
    }
  }
  else {
    importance.xvar <- 0
  }
  return (importance.xvar)
}
get.nmiss <- function(xvar, yvar = NULL) {
  if (!is.null(yvar)) {
    sum(apply(yvar, 1, function(x){any(is.na(x))}) | apply(xvar, 1, function(x){any(is.na(x))}))
  }
  else {
    sum(apply(xvar, 1, function(x){any(is.na(x))}))
  }
}
get.outcome.target <- function(family, outcome.target) {
    outcome.target <- 1
  outcome.target
}
get.grow.nodesize <- function(fmly, nodesize) {
  if (fmly == "surv"){
    if (is.null(nodesize)) {
      nodesize <- 3
    }
  }
  else if (fmly == "surv-CR"){
   if (is.null(nodesize)) {
     nodesize <- 6
   }
 }
  else if (fmly == "class") {
    if (is.null(nodesize)) {
      nodesize <- 1
    }
  }
  else {
    if (is.null(nodesize)) {
      nodesize <- 5
    }
  }
  nodesize <- round(nodesize)
}  
get.sexp.dim <- function(fmly, event.type, yfactor, splitrule = NULL) {
  if (grepl("surv", fmly)) {
    if (!is.null(splitrule)) {
      if ((length(event.type) > 1) & (splitrule != "logrankscore")) {
        sexp.dim <- length(event.type)
      }
      else {
        sexp.dim <- 1
      }
    }
    else {
      if (length(event.type) > 1) {
        sexp.dim <- length(event.type)
      }
      else {
        sexp.dim <- 1
      }
    }
  }
  else {
    sexp.dim <- 0
    if (fmly == "class") {
      if (!is.null(yfactor$levels)) {
        sexp.dim <- length(yfactor$levels[[1]]) + 1
      }
      else {
        if (!is.null(yfactor$order.levels)) {
          sexp.dim <- length(yfactor$order.levels[[1]]) + 1
        }
      }
      if (sexp.dim == 0) {
        stop("The classification outcome is not a factor")
      }
    }
    else {
      sexp.dim <- 1
    }
  }
  sexp.dim
}
get.event.info <- function(obj, subset = NULL) {
  if (grepl("surv", obj$family)) {
    if (!is.null(obj$yvar)) {
      if (is.null(subset)) {
        subset <- (1:nrow(cbind(obj$yvar)))
      }
      r.dim <- 2
      time <- obj$yvar[subset, 1]
      cens <- obj$yvar[subset, 2]
      event <- na.omit(cens)[na.omit(cens) > 0]
      event.type <- unique(event)
    }
    else {
      r.dim <- 0
      event <- event.type <- cens <- cens <- time <- NULL
    }
    time.interest <- obj$time.interest
  }
  else {
      r.dim <- 1
    event <- event.type <- cens <- time.interest <- cens <- time <- NULL
  }
  return(list(event = event, event.type = event.type, cens = cens,
              time.interest = time.interest, time = time, r.dim = r.dim))
}
get.grow.event.info <- function(yvar, fmly, need.deaths = TRUE, ntime) {
  if (grepl("surv", fmly)) {
    r.dim <- 2
    time <- yvar[, 1]
    cens <- yvar[, 2]
    if (need.deaths & all(na.omit(cens) == 0)) {
      stop("no deaths in data!")
    }
    if (!all(na.omit(time) >= 0)) {
      stop("time must be  positive")
    }
    event.type <- unique(na.omit(cens))
    if (sum(event.type >= 0) != length(event.type)) {
      stop("censoring variable must be coded as NA, 0, or greater than 0.")    
    }
    event <- na.omit(cens)[na.omit(cens) > 0]
    event.type <- unique(event)
    nonMissingOutcome <- which(!is.na(cens) & !is.na(time))
    nonMissingDeathFlag <- (cens[nonMissingOutcome] != 0)
    time.interest <- sort(unique(time[nonMissingOutcome[nonMissingDeathFlag]]))
    if (!missing(ntime) && length(time.interest) > ntime) {
      time.interest <- time.interest[
         unique(round(seq.int(1, length(time.interest), length.out = ntime)))]
    }
  }
  else {
      r.dim <- 1
    event <- event.type <- cens <- time.interest <- cens <- time <- NULL
  }
  return(list(event = event, event.type = event.type, cens = cens, 
              time.interest = time.interest,
              time = time, r.dim = r.dim))
}
get.grow.splitinfo <- function (fmly, splitrule, nsplit, event.type) {
  splitrule.names <- c("logrank",              
                       "logrankscore",         
                       "logrankCR",            
                       "logrankACR",           
                       "random",               
                       "regr",                 
                       "regr.unwt",            
                       "regr.hvwt",            
                       "class",                
                       "class.unwt",           
                       "class.hvwt")           
  nsplit <- round(nsplit)
  if (nsplit < 0) {
    stop("Invalid nsplit value specified.")    
  }
  if (grepl("surv", fmly)) {    
    if (is.null(splitrule)) {
      if (length(event.type) ==  1) {
        splitrule.idx <- which(splitrule.names == "logrank")
      }
      else {
        splitrule.idx <- which(splitrule.names == "logrankCR")
      }
      splitrule <- splitrule.names[splitrule.idx]
    }
    else {
      splitrule.idx <- which(splitrule.names == splitrule)
      if (length(splitrule.idx) != 1) {
        stop("Invalid split rule specified:  ", splitrule)
      }
      if ((length(event.type) ==  1) & (splitrule.idx == which(splitrule.names == "logrankCR"))) {
        stop("Cannot specify logrankCR splitting for right-censored data")
      }
      if ((length(event.type) >   1) & (splitrule.idx == which(splitrule.names == "logrank"))) {
        splitrule.idx <- which(splitrule.names == "logrankACR")
      }
    }
  }
  if (fmly == "class") {    
    if (is.null(splitrule)) {
      splitrule.idx <- which(splitrule.names == "class")
      splitrule <- splitrule.names[splitrule.idx]      
    }
    else {
      if ((splitrule != "class") &
          (splitrule != "class.unwt") &
          (splitrule != "class.hvwt") &
          (splitrule != "random")) {
        stop("Invalid split rule specified:  ", splitrule)
      }
      splitrule.idx <- which(splitrule.names == splitrule)
    }
  }
  if (fmly == "regr") {    
    if (is.null(splitrule)) {
      splitrule.idx <- which(splitrule.names == "regr")
      splitrule <- splitrule.names[splitrule.idx]      
    }
    else {
      if ((splitrule != "regr") &
          (splitrule != "regr.unwt") &
          (splitrule != "regr.hvwt") &
          (splitrule != "random")) {
        stop("Invalid split rule specified:  ", splitrule)
      }
      splitrule.idx <- which(splitrule.names == splitrule)
    }
  }
  if ((splitrule == "random") & (nsplit == 0)) {
    nsplit <- 1
  }
  splitinfo <- list(name = splitrule, index = splitrule.idx, nsplit = nsplit)
  return (splitinfo)
}
get.grow.xvar.wt <- function(weight, n.xvar) {
  if (is.null(weight)) {
    weight <- rep(1/n.xvar, n.xvar)
  }
  else {
    if (any(weight < 0) | length(weight) != n.xvar | all(weight == 0)) {
      weight <- rep(1/n.xvar, n.xvar)
    }
    else {
      weight <-weight/sum(weight)
    }
  }
  return (weight)
}
get.grow.mtry <- function (mtry = NULL, n.xvar, fmly) {
  if (!is.null(mtry)) {
    mtry <- round(mtry)
    if (mtry < 1 | mtry > n.xvar) mtry <- max(1, min(mtry, n.xvar))
  }
  else {
    if (grepl("surv", fmly) | (fmly == "class")) {
      mtry <- max(ceiling(sqrt(n.xvar)), 1)
    }
    else {
      mtry <- max(ceiling(n.xvar/3), 1)
    }
  }
  return (mtry)
}
get.yvar.type <- function(fmly, y.cnt) {
  switch(fmly,
         "surv" = c("T", "S"),
         "surv-CR" = c("T", "S"),
         "regr" = "R",
         "class" = "C",
         )
}
get.yvar.target <- function(fmly, yvar.types, outcome.idx) {
    outcome.idx = 0
  return(outcome.idx)
}
parseFormula <- function (formula, data) {
  if (!inherits(formula, "formula")) {
    stop("'formula' is not a formula object.")
  }
  if (is.null(data)) {
    stop("'data' is missing.")
  }
  if (!is.data.frame(data)) {
    stop("'data' must be a data frame.")
  }
  fNames <- all.vars(formula, max.names = 1e7)
  if ((all.names(formula)[2] == "Surv")) {
    if (sum(is.element(names(data), fNames[1:2])) != 2) {
        stop("Survival formula incorrectly specified.")
    }
    family <- "surv"
    yvar.names <- fNames[1:2]
  }
  else {
    if ((all.names(formula)[2] == "Multivar")) {
      stop("Regression (or) classification formula incorrectly specified.")
    }
    else {
      if (sum(is.element(names(data), fNames[1])) != 1) {
        stop("Regression (or) classification formula incorrectly specified.")
      }
      yvar.names <- fNames[1]
      Y <- data[, yvar.names]
      if ( !(is.factor(Y) | is.numeric(Y)) ) {
        stop("the y-outcome must be either real or a factor.")
      }
      if (is.factor(Y)) {
        family <- "class"
      }
      else {
        family <- "regr"
      }
    }
  }
  return (list(fNames=fNames, family=family, yvar.names=yvar.names))
}
parseMissingData <- function(formula.obj, data) {
  yvar.names <- formula.obj$yvar.names
  resp <- data[, yvar.names, drop = FALSE]
  col.resp.na <- apply(data[, yvar.names, drop = FALSE], 2, function(x) {all(is.na(x))})
  if (any(col.resp.na)) {
    stop("All records are missing for the yvar(s)")
  }
  colPt <- apply(data, 2, function(x){all(is.na(x))})
  if (sum(colPt) >= (ncol(data) - length(yvar.names))) {
    stop("All x-variables have all missing data:  analysis not meaningful.")
  }
  data <- data[, !colPt, drop = FALSE]
  rowPt <- apply(data, 1, function(x){all(is.na(x))})
  if (sum(rowPt) == nrow(data)) {
    stop("Rows of the data have all missing data:  analysis not meaningful.")
  }
  data <- data[!rowPt,, drop = FALSE]
  return(data)
}
resample <- function(x, size, ...) {
    if (length(x) <= 1) {
      if (!missing(size) && size == 0) x[FALSE] else x
    }
    else {
      sample(x, size, ...)
    }  
}
