#' Dot-and-Whisker Plots of Regression Results
#'
#' \code{dwplot} is a function for quickly and easily generating dot-and-whisker plots of regression models saved in tidy data frames.
#'
#' @param x Either a tidy data.frame (see 'Details'), a model object to be tidied with \code{\link[broom]{tidy}}, or a list of such model objects.
#' @param alpha A number setting the criterion of the confidence intervals. The default value is .05, corresponding to 95-percent confidence intervals.
#' @param dodge_size A number (typically between 0 and 0.3) indicating how much vertical separation should be between different models' coefficients when multiple models are graphed in a single plot.  Lower values tend to look better when the number of independent variables is small, while a higher value may be helpful when many models appear on the same plot.
#' @param order_vars A vector of variable names that specifies the order in which the variables are to appear along the y-axis of the plot.
#'
#' @details \code{dwplot} visualizes regression results saved in tidy data.frames by, e.g., \code{\link[broom]{tidy}} as dot-and-whisker plots generated by \code{\link[ggplot2]{ggplot}}.
#'
#' Tidy data.frames to be plotted should include the variables \code{term} (names of predictors), \code{estimate} (corresponding estimates of coefficients or other quantities of interest), \code{std.error} (corresponding standard errors), and optionally \code{model} (when multiple models are desired on a single plot).
#' In place of \code{std.error} one may substitute \code{lb} (the lower bounds of the confidence intervals of each estimate) and \code{ub} (the corresponding upper bounds).
#'
#' For convenience, \code{dwplot} also accepts as input those model objects that can be tidied by \code{\link[broom]{tidy}}, or a list of such model objects.
#'
#' Because the function takes a data.frame as input, it is easily employed for a wide range of models, including those not supported by \code{\link[broom]{tidy}}.
#' And because the output is a \code{ggplot} object, it can easily be further customized with any additional arguments and layers supported by \code{ggplot2}.
#' Together, these two features make \code{dwplot} extremely flexible.
#'
#' @references
#' Kastellec, Jonathan P. and Leoni, Eduardo L. 2007. "Using Graphs Instead of Tables in Political Science." Perspectives on Politics, 5(4):755-771.
#'
#' @return The function returns a \code{ggplot} object.
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom stats qnorm
#' @importFrom broom tidy
#' @importFrom plyr ldply
#'
#' @examples
#' library(broom)
#' library(dplyr)
#'
#' # Plot regression coefficients from a single model object
#' data(mtcars)
#' m1 <- lm(mpg ~ wt + cyl + disp, data = mtcars)
#'
#'dwplot(m1) +
#'     scale_y_discrete(breaks = 4:1, labels=c("Intercept", "Weight", "Cylinders", "Displacement")) +
#'     theme_bw() + xlab("Coefficient") + ylab("") +
#'     geom_vline(xintercept = 0, colour = "grey50", linetype = 2) +
#'     theme(legend.position="none")
#'
#' # Plot regression coefficients from multiple models on the fly
#'
#' m2 <- update(m1, . ~ . - disp)
#' dwplot(list(full=m1,nodisp=m2))
#'
#' # Plot regression coefficients from multiple models in a tidy data.frame
#' library(dplyr)
#' by_trans <- mtcars %>% group_by(am) %>%
#'     do(tidy(lm(mpg ~ wt + cyl + disp, data = .))) %>% rename(model=am)
#'
#' dwplot(by_trans, dodge_size = .05) +
#'     scale_y_discrete(breaks = 4:1, labels=c("Intercept", "Weight", "Cylinders", "Displacement")) +
#'     theme_bw() + xlab("Coefficient Estimate") + ylab("") +
#'     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
#'     ggtitle("Predicting Gas Mileage, OLS Estimates") +
#'     theme(plot.title = element_text(face="bold"),
#'           legend.justification=c(1,0), legend.position=c(1,0),
#'           legend.background = element_rect(colour="grey80"),
#'           legend.title.align = .5) +
#'     scale_colour_grey(start = .4, end = .8,
#'                       name = "Transmission",
#'                       breaks = c(0, 1),
#'                       labels = c("Automatic", "Manual"))
#'
#' @export

dwplot <- function(x, alpha = .05, dodge_size = .15, order_vars = NULL) {
    # If x is model object(s), convert to a tidy data.frame
    df <- dw_tidy(x)

    # set variables that will appear in pipelines to NULL to make R CMD check happy
    estimate <- model <- NULL

    n_vars <- length(unique(df$term))
    dodge_size <- dodge_size

    # Confirm number of models, get model names
    if ("model" %in% names(df)) {
        n_models <- length(unique(df$model))
        df$model <- factor(df$model, levels = unique(df$model))
    } else {
        if (length(df$term) == n_vars) {
            df$model <- factor("one")
            n_models <- 1
        } else {
            stop("Please add a variable named 'model' to distinguish different models")
        }
    }
    mod_names <- unique(df$model)

    # Specify order of variables if an order is provided
    if (!is.null(order_vars)) {
        df$term <- factor(df$term, levels = order_vars)
    }

    # Add rows of NAs for variables not included in a particular model
    if (n_models > 1) {
        df <- add_NAs(df, n_models, mod_names)
    }

    # Prep arguments to ggplot
    var_names <- df$term

    y_ind <- rep(seq(n_vars, 1), n_models)
    df$y_ind <- y_ind

    # Confirm alpha within bounds
    if (alpha < 0 | alpha > 1) {
        stop("Value of alpha for the confidential intervals should be between 0 and 1.")
    }

    # Generate lower and upper bound if not included in results
    if ((!"lb" %in% names(df)) | (!"ub" %in% names(df))) {
        ci <- 1 - alpha/2
        lb <- c(df$estimate - stats::qnorm(ci) * df$std.error)
        ub <- c(df$estimate + stats::qnorm(ci) * df$std.error)

        df <- cbind(df, lb, ub)
    }

    # Calculate y-axis shift for plotting multiple models
    if (n_models == 1) {
        shift <- 0
    } else {
        shift <- seq(dodge_size, -dodge_size, length.out = n_models)
    }
    shift_index <- data.frame(model = mod_names, shift)
    ## use explicit 'by' to suppress "Joining by:" message
    ## presumably we will never *want* to join by other columns?
    df <- dplyr::left_join(df, shift_index, by="model")

    # Catch difference between single and multiple models
    if (length(y_ind) != length(var_names)) {
        var_names <- unique(var_names)
    }

    # Make the plot
    p <- ggplot(transform(df, model=factor(model)),
                          aes(x = estimate, y = y_ind+shift, colour=model)) +
        geom_point(na.rm = TRUE) +
        geom_segment(aes(x = lb,
                         xend = ub,
                         y = y_ind + shift, yend = y_ind + shift),
                     na.rm = TRUE) +
        scale_y_discrete(breaks=y_ind, labels=var_names) +
        coord_cartesian(ylim=c(.5, n_vars+.5)) +
        ylab("") + xlab("")

    # Omit the legend if there is only one model
    if (!"model" %in% names(df) | length(mod_names) == 1){
        p <- p + theme(legend.position="none")
    }

    return(p)
}

dw_tidy <- function(x) {
    if (!is.data.frame(x)) {
        if (class(x)=="list") {
            ind <- seq(length(x))
            nm <- paste("Model", ind)
            if (!is.null(nm_orig <- names(x))) {
                setNm <- nchar(nm)>0
                nm[setNm] <- nm_orig[setNm]
            }
            names(x) <- nm

            ## ldply calls plyr::rbind.fill, so all tidied models
            ##  need not have same columns ...
            df <- plyr::ldply(x,broom::tidy,.id="model")

        } else if (class(x) == "lmerMod"){
            group <- vector() # only for avoiding the NOTE in check.
            df <- broom::tidy(x) %>% filter(group == "fixed")
        } else {
            df <- broom::tidy(x)
        }
    } else {
        df <- x
    }
    return(df)
}

add_NAs <- function(df = df, n_models = n_models, mod_names = mod_names) {
    # set variables that will appear in pipelines to NULL to make R CMD check happy
    term <- model <- NULL

    if (!is.factor(df$term)) {
        df$term <- factor(df$term, levels = unique(df$term))
    }
    if (!is.factor(df$model)) {
        df$model <- factor(df$model, levels = unique(df$model))
    }
    for (i in seq(n_models)) {
        m <- df[df$model==mod_names[[i]], ]
        not_in <- setdiff(unique(df$term), m$term)
        for (j in seq(not_in)) {
            t <- data.frame(term = not_in[j],
                            model = mod_names[[i]],
                            stringsAsFactors = FALSE)
            if ("submodel" %in% names(m)) {
                t$submodel <- m$submodel[1]
            }
            m <- merge(m, t, all = TRUE)
        }
        if (i==1) dft <- m else dft <- rbind(dft, m)
    }
    df <- dft %>% group_by(model) %>% arrange(term) %>% ungroup
    df$estimate <- as.numeric(df$estimate)
    if ("std.error" %in% names(df)) {
        df$std.error <- as.numeric(df$std.error)
    }
    if ("ub" %in% names(df)) {
        df$ub <- as.numeric(df$ub)
    }
    if ("lb" %in% names(df)) {
        df$lb <- as.numeric(df$lb)
    }
    return(df)
}
