#' Apply borderline-SMOTE algorithm
#'
#' `step_bsmote` creates a *specification* of a recipe
#'  step that generate new examples of the minority class using nearest
#'  neighbors of these cases in the border region between classes.
#'
#' @inheritParams recipes::step_center
#' @inheritParams step_upsample
#' @param ... One or more selector functions to choose which
#'  variable is used to sample the data. See [selections()]
#'  for more details. The selection should result in _single
#'  factor variable_. For the `tidy` method, these are not
#'  currently used.
#' @param role Not used by this step since no new variables are
#'  created.
#' @param column A character string of the variable name that will
#'  be populated (eventually) by the `...` selectors.
#' @param neighbors An integer. Number of nearest neighbours that are used
#'  to generate the new examples of the minority class.
#' @param all_neighbors Type of two borderline-SMOTE method. Defaults to FALSE.
#'  See details.
#' @param seed An integer that will be used as the seed when
#' smote-ing.
#' @return An updated version of `recipe` with the new step
#'  added to the sequence of existing steps (if any). For the
#'  `tidy` method, a tibble with columns `terms` which is
#'  the variable used to sample.
#'
#' @details
#' This methods works the same way as [step_smote()], expect that instead of
#' generating points around every point of of the minority class each point is
#' first being classified into the boxes "danger" and "not". For each point the
#' k nearest neightbors is calculated. If all the neighbors comes from a
#' different class it is labeled noise and put intothe "not" box. If more then
#' half of the neightbours comes from a different class it is labeled "danger.
#  Points will be generated around points labeled "danger".
#'
#' If all_neighbors = FALSE then points will be generated between nearest
#' neighbours in its own class. If all_neighbors = TRUE then points will be
#' generated between any nearest neigbors. See examples for visualization.
#'
#' The parameter `neighbors` controls the way the new examples are created.
#' For each currently existing minority class example X new examples will be
#' created (this is controlled by the parameter `over_ratio` as mentioned
#' above). These examples will be generated by using the information from the
#' `neighbors` nearest neighbours of each example of the minority class.
#' The parameter `neighbors` controls how many of these neighbours are used.
#'
#' All columns in the data are sampled and returned by [juice()]
#'  and [bake()].
#'
#' All columns used in this step must be numeric with no missing data.
#'
#' When used in modeling, users should strongly consider using the
#'  option `skip = TRUE` so that the extra sampling is _not_
#'  conducted outside of the training set.
#'
#' @references Hui Han, Wen-Yuan Wang, and Bing-Huan Mao. Borderline-smote:
#' a new over-sampling method in imbalanced data sets learning. In
#' International Conference on Intelligent Computing, pages 878–887. Springer,
#' 2005.
#'
#' @keywords datagen
#' @concept preprocessing
#' @concept subsampling
#' @export
#' @examples
#' library(recipes)
#' library(modeldata)
#' data(credit_data)
#'
#' sort(table(credit_data$Status, useNA = "always"))
#'
#' ds_rec <- recipe(Status ~ Age + Income + Assets, data = credit_data) %>%
#'   step_meanimpute(all_predictors()) %>%
#'   step_bsmote(Status) %>%
#'   prep()
#'
#' sort(table(juice(ds_rec)$Status, useNA = "always"))
#'
#' # since `skip` defaults to TRUE, baking the step has no effect
#' baked_okc <- bake(ds_rec, new_data = credit_data)
#' table(baked_okc$Status, useNA = "always")
#'
#' ds_rec2 <- recipe(Status ~ Age + Income + Assets, data = credit_data) %>%
#'   step_meanimpute(all_predictors()) %>%
#'   step_bsmote(Status, over_ratio = 0.2) %>%
#'   prep()
#'
#' table(juice(ds_rec2)$Status, useNA = "always")
#'
#' library(ggplot2)
#'
#' ggplot(circle_example, aes(x, y, color = class)) +
#'   geom_point() +
#'   labs(title = "Without SMOTE")
#'
#' recipe(class ~ ., data = circle_example) %>%
#'   step_bsmote(class, all_neighbors = FALSE) %>%
#'   prep() %>%
#'   juice() %>%
#'   ggplot(aes(x, y, color = class)) +
#'   geom_point() +
#'   labs(title = "With borderline-SMOTE, all_neighbors = FALSE")
#'
#' recipe(class ~ ., data = circle_example) %>%
#'   step_bsmote(class, all_neighbors = TRUE) %>%
#'   prep() %>%
#'   juice() %>%
#'   ggplot(aes(x, y, color = class)) +
#'   geom_point() +
#'   labs(title = "With borderline-SMOTE, all_neighbors = TRUE")
#'
#' @importFrom recipes rand_id add_step ellipse_check
step_bsmote <-
  function(recipe, ..., role = NA, trained = FALSE,
           column = NULL, over_ratio = 1, neighbors = 5, all_neighbors = FALSE,
           skip = TRUE, seed = sample.int(10^5, 1), id = rand_id("bsmote")) {

    add_step(recipe,
             step_bsmote_new(
               terms = ellipse_check(...),
               role = role,
               trained = trained,
               column = column,
               over_ratio = over_ratio,
               neighbors = neighbors,
               all_neighbors = all_neighbors,
               skip = skip,
               seed = seed,
               id = id
             ))
  }

#' @importFrom recipes step
step_bsmote_new <-
  function(terms, role, trained, column, over_ratio, neighbors, all_neighbors, skip,
           seed, id) {
    step(
      subclass = "bsmote",
      terms = terms,
      role = role,
      trained = trained,
      column = column,
      over_ratio = over_ratio,
      neighbors = neighbors,
      all_neighbors =  all_neighbors,
      skip = skip,
      id = id,
      seed = seed,
      id = id
    )
  }

#' @importFrom recipes bake prep check_type
#' @importFrom dplyr select
#' @export
prep.step_bsmote <- function(x, training, info = NULL, ...) {

  col_name <- terms_select(x$terms, info = info)
  if (length(col_name) != 1)
    stop("Please select a single factor variable.", call. = FALSE)
  if (!is.factor(training[[col_name]]))
    stop(col_name, " should be a factor variable.", call. = FALSE)

  check_type(select(training, -col_name), TRUE)
  check_na(select(training, -col_name), "step_bsmote")

  step_bsmote_new(
    terms = x$terms,
    role = x$role,
    trained = TRUE,
    column = col_name,
    over_ratio = x$over_ratio,
    neighbors = x$neighbors,
    all_neighbors = x$all_neighbors,
    skip = x$skip,
    seed = x$seed,
    id = x$id
  )
}

#' @importFrom tibble as_tibble tibble
#' @importFrom withr with_seed
#' @export
bake.step_bsmote <- function(object, new_data, ...) {

  new_data <- as.data.frame(new_data)
  # bsmote with seed for reproducibility
  with_seed(
    seed = object$seed,
    code = {
      new_data <- bsmote(new_data, object$column,
                         k = object$neighbors, over_ratio = object$over_ratio,
                         all_neighbors = object$all_neighbors)
    }
  )

  as_tibble(new_data)
}

#' @importFrom recipes printer terms_select
#' @export
print.step_bsmote <-
  function(x, width = max(20, options()$width - 26), ...) {
    cat("BorderlineSMOTE based on ", sep = "")
    printer(x$column, x$terms, x$trained, width = width)
    invisible(x)
  }

#' @rdname step_bsmote
#' @param x A `step_bsmote` object.
#' @importFrom generics tidy
#' @importFrom recipes sel2char is_trained
#' @export
tidy.step_bsmote <- function(x, ...) {
  if (is_trained(x)) {
    res <- tibble(terms = x$column)
  }
  else {
    term_names <- sel2char(x$terms)
    res <- tibble(terms = unname(term_names))
  }
  res$id <- x$id
  res
}
