#' @name HydeUtilities
#' @importFrom dplyr group_by_
#' @importFrom dplyr mutate
#' @importFrom magrittr %>%
#' @export %>%
#' 
#' @title Hyde Network Utility Functions 
#' @description The functions described below are unexported functions that 
#'   are used internally by \code{HydeNet} to prepare modify network objects
#'   and prepare JAGS code.
#'   
#' @details 
#'   \code{termName}: In most model objects, factors in the model are 
#'   represented as [variable][level].  This utility function pulls out the 
#'   [variable] component.  This is typically called from within 
#'   \code{makeJagsReady}.
#'   
#'   \code{decisionOptions}: When compiling multiple JAGS models to evaluate the 
#'   effect of decision nodes, the options for each decision node are extracted
#'   by this utility.
#'   
#'   \code{makeJagsReady}: Manages the presence of factors in interactions and
#'   makes sure the proper numeric factor value is given to the JAGS code.  
#'   This is called from within a \code{writeJagsFormula} call.
#'   
#'   \code{matchLevelNumber}: Assigns the correct numeric value of a level to 
#'   a factor variable in a model.  This is called from within 
#'   \code{makeJagsRead}.
#'   
#'   \code{matchVars}: Given a list of existing node names, the terms of a formula
#'   are matched to the node names.  This allows functions to be used in 
#'   formula defintions.  Most commonly, \code{factor(var)} would get reduced to
#'   \code{var}, which is a node name that JAGS will understand.  There is still
#'   limited ability for translation here, and \code{matchVars} assumes that the 
#'   longest match is the desired match. If you pass a function with two node names,
#'   the longer of the two will be taken and JAGS will likely fail.
#'   
#'   \code{nodeFromFunction}: This is a utility function necessary to make 
#'   \code{modelToNode} work properly.  A node vector was needed to pass to 
#'   \code{matchVars}, and this is the mechanism to generate that vector.
#'   
#'   \code{polyToPow}: converts polynomials generated by the \code{poly} function
#'   to use the \code{pow} function in JAGS.
#'   
#'   \code{validateParameters}: Users may pass parameters to the JAGS code using the 
#'   \code{setNode} function.  If a faulty parameter is given, such as 
#'   \code{lambda = -10} for a poisson distribution (\code{lambda} must be
#'   positive in a Poisson distribution), the error returned by JAGS may not
#'   clear enough to diagnose a problem that presented several steps earlier
#'   in the code.  \code{validateParamters} allows the user to receive instant
#'   feedback on the appropriateness of the code.
#'   
#'   Logical expressions for comparisons are stored in the \code{jagsDists}
#'   data object (\code{data(jagsDists, package='Hyde')}).  This is a utility
#'   function used only within \code{setNode} and is not visible to the user.
#'   
#' @author Jarrod Dalton and Benjamin Nutter

#' @rdname HydeUtilities
#' @importFrom stringr str_extract
#'   
#' @param term Usually the \code{term} column from the output of 
#'   \code{broom::tidy()}
#' @param reg A regular expression, usually provided by \code{factorRegex}

termName <- function(term, reg){
  if (!is.null(reg)){
    return(sapply(term, 
                  function(t, reg){
                    t <- unlist(strsplit(t, ":"))
                    t <- ifelse(grepl(reg, t),
                                stringr::str_extract(t, reg),
                                t)
                    t <- paste(t, collapse=":")
                  },
                  reg))
  }
  else return(term)   
}

#' @rdname HydeUtilities
#' @importFrom stringr perl
#' @importFrom stringr str_extract
#' 
#' @param node Character string indicating a node in a network
#' @param network A Hyde Network Object

decisionOptions <- function(node, network){
  #* In some cases, nodeFitter isn't set for a node.  When nodeFitter is NULL,
  #* we want to skip the "cpt" check and move on to other possibilities.
  #* If it isn't NULL and "cpt" is the fitter, we return dist immediately
  #* to avoid overwriting it in subsequent checks
  if (!is.null(network$nodeFitter[[node]])){
    if (network$nodeFitter[[node]] == "cpt"){
      D <- {if (!is.null(network$nodeData[[node]])) network$nodeData[[node]][[node]] 
            else network$data[[node]]}
      dist <- 1:length(unique(D))
      return(dist)
    }
  }
  #* This uses a regular expression to extract the level number from
  #* the node JAGS model.  For instance
  #* pi.var[1] <- .123; pi.var[2] <- .321; ...
  #* the regular expression pulls out the numbers in between each set of [].
  if (network$nodeType[[node]] == "dcat"){
    dist <- writeJagsModel(network, node)[1]
    dist <- unlist(strsplit(dist, ";"))
    dist <- as.numeric(stringr::str_extract(dist, stringr::regex("(?<=[\\[]).*(?=[\\]])")))
  }
  else if (network$nodeType[[node]] == "dbern"){
    dist <- 0:1
  }
  dist
}

#' @rdname HydeUtilities 
#' @param mdl Output from \code{broom::tidy()}
#' @param regex A regular expression, usually returned by \code{factorRegex}
#' @param nodes A vector of node names, usually passed from \code{network$nodes}.

makeJagsReady <- function(mdl, regex, nodes){
  term_name <- NULL # avoids global binding NOTE on check
  
  mdl$term_name <- termName(as.character(mdl$term), regex)
  mdl$term_name <- matchVars(mdl$term_name, nodes)
  mdl$level_name <- if (!is.null(regex)) gsub(regex, "", mdl$term) else mdl$term
  mdl$factor <- if (!is.null(regex)) grepl(regex, mdl$term) else FALSE
  
  factorRef <- mdl[mdl$factor & !grepl(":", mdl$term_name), 
                   c("term_name", "level_name"), 
                   drop=FALSE]
  if (nrow(factorRef) > 0){
    factorRef <- factorRef %>%
      dplyr::group_by_('term_name') %>%
      dplyr::mutate(level_value = 2:(length(term_name) + 1))
  }
#   factorRef <- plyr::ddply(factorRef,
#                            "term_name",
#                            transform,
#                            level_value = 2:(length(term_name)+1))
  
  mdl <- merge(mdl, factorRef,
               by=c("term_name", "level_name"), all=TRUE)
  
  mdl$jagsVar <- if (nrow(factorRef) > 0)
                   mapply(matchLevelNumber, mdl$term_name, mdl$level_value)
                 else mdl$term_name
  
  #* Change 'poly' to 'pow'
  mdl$jagsVar <- sapply(mdl$jagsVar, polyToPow)
  
  mdl
}

#' @rdname HydeUtilities
#' @param t Usually the \code{term_name} column generated within 
#'   \code{makeJagsReady}
#' @param lev usually the \code{level_value} column generated within
#'   \code{makeJagsReady}

matchLevelNumber <- function(t, lev){
  t <- unlist(strsplit(t, ":"))
  l <- unlist(strsplit(as.character(lev), ":"))
  for (i in 1:length(t)){
    t[i] <- {if (is.na(l[i])) t[i]
             else paste0("(", t[i], " == ", l[i], ")")}
  }
  paste0(t, collapse="*")
}

#' @rdname HydeUtilities
#' @param terms A vector of term names, usually from a \code{broom::tidy} object.
#' @param vnames A vector of term names, usually from \code{network$nodes}.

matchVars <- function(terms, vnames)
{
  Matches <- sapply(vnames, function(p) stringr::str_extract(terms, p))
  Matches <- apply(as.matrix(Matches), 1, function(s) ifelse(is.na(s), "", s))
  Matches <- apply(as.matrix(Matches), 2, function(s) s[which.max(nchar(s))])
  Matches[which(grepl("Intercept", terms))] <- 
    terms[which(grepl("Intercept", terms))]
  Matches
}

#' @rdname HydeUtilities
#' @param node_fn A character string representing a function passed in a model formula

nodeFromFunction <- function(node_fn)
{
  node <- stringr::str_extract(node_fn, "(?<=[(]).+?(?=[)])")
  node <- gsub("([*]|[,]|[/]|\\^)[[:print:]]+", "", node)
  ifelse(is.na(node), node_fn, node)
}

#' @rdname HydeUtilities
#' @param poly A single term for which the polynomial components should be 
#'   converted to the JAGS pow() function.

polyToPow <- function(poly){
  poly <- unlist(strsplit(poly, "[*]"))
  poly <- gsub("poly[(]", "pow(", poly)
  poly <- ifelse(grepl("pow[(]", poly),
                 gsub("\\d{1,2}[)]", "", poly),
                 poly)
  poly <- ifelse(grepl("pow[(]", poly),
                 paste0(poly, ")"),
                 poly)
  poly <- paste0(poly, collapse="*")
  return(poly)
}

  
#' @rdname HydeUtilities
#' @param params The list of parameters given in the \code{...} argument of 
#'   \code{setNode}
#' @param dist The JAGS distribution function name.  Appropriate names are
#'   in the \code{FnName} column of the \code{jagsDists} data object.

validateParameters <- function(params, dist){
  expr <- jagsDists$paramLogic[jagsDists$FnName == dist]
  valid <- sapply(expr, function(e) with(params, eval(parse(text=e))))  
  valid[sapply(params, function(p) p %in% c("fromData", "fromFormula"))] <- TRUE
  return(valid)
}

#' @rdname HydeUtilities
#' 
makeFactorRef <- function(network)
{
  network_factors <- 
    names(network$factorLevels)[!vapply(network$factorLevels, is.null, logical(1))]
  
  if (length(network_factors) == 0) return(NULL)
  
  Ref <- lapply(network_factors,
         function(f){
           data.frame(value = 1:length(network$factorLevels[[f]]),
                      label = network$factorLevels[[f]],
                      stringsAsFactors = FALSE)
         })
  names(Ref) <- network_factors
  
  types <- unlist(network$nodeType[network_factors])
  types <- types[types %in% "dbern"]
  
  Ref[names(types)] <- 
    lapply(Ref[names(types)], 
           function(f){
             f$value <- f$value - 1
             f
           })
  
  Ref[unique(names(Ref))]
  #* The code below was the old way of doing this
  #* before we implemented the `factorLevels` element.
  #* I'm just hesitant to give it up before the 
  #* new system is well tested.
#   dataList <- c(list(network$data), network$nodeData)
#   names(dataList) <- NULL
#   Ref <- do.call("c", lapply(dataList, dataframeFactors))
#   
#   types <- unlist(network$nodeType[names(Ref)])
#   types <- types[types %in% "dbern"]
#   
#   Ref[names(types)] <- 
#     lapply(Ref[names(types)], 
#            function(f){
#              f$value <- f$value - 1
#              f
#            })
#   Ref[unique(names(Ref))]
}

#' @rdname HydeUtilities
#' @param dataframe A data frame.  The data frame will be searched for factors and
#'   a reference table (another data frame) is returned.
#'   
dataframeFactors <- function(dataframe)
{
  if (is.null(dataframe)) return(NULL)
  factor_vars <- names(dataframe)[sapply(dataframe, class) == "factor"]
  reference_list <- 
    lapply(factor_vars,
           function(f) data.frame(value = sort(unique(as.numeric(dataframe[[f]]))),
                                  label = levels(dataframe[[f]]),
                                  stringsAsFactors=FALSE))
  names(reference_list) <- factor_vars
  reference_list
}