#' Replicate weights
#'
#' @description This function allows calculating replicate weights.
#'
#'
#' @param data A data frame with information on (at least) cluster and strata indicators, and sampling weights. It could be \code{NULL} if the sampling design is indicated in the \code{design} argument (see \code{design}).
#' @param method A character string indicating the method to be applied to define replicate weights. Choose between one of these: \code{JKn}, \code{dCV}, \code{bootstrap}, \code{subbootstrap}, \code{BRR}, \code{split}, \code{extrapolation}.
#' @param cluster A character string indicating the name of the column with cluster identifiers in the data frame indicated in \code{data}. It could be \code{NULL} if the sampling design is indicated in the \code{design} argument (see \code{design}).
#' @param strata A character string indicating the name of the column with strata identifiers in the data frame indicated in \code{data}. It could be \code{NULL} if the sampling design is indicated in the \code{design} argument (see \code{design}).
#' @param weights A character string indicating the name of the column with sampling weights in the data frame indicated in \code{data}. It could be \code{NULL} if the sampling design is indicated in the \code{design} argument (see \code{design}).
#' @param design An object of class \code{survey.design} generated by \code{survey::svydesign()}. It could be \code{NULL} if information about \code{cluster}, \code{strata}, \code{weights} and \code{data} are given.
#' @param k A numeric value indicating the number of folds to be defined. Default is \code{k=10}. Only applies for the \code{dCV} method.
#' @param R A numeric value indicating the number of times the sample is partitioned. Default is \code{R=1}. Only applies for \code{dCV}, \code{split} or \code{extrapolation} methods.
#' @param B A numeric value indicating the number of bootstrap resamples. Default is \code{B=200}. Only applies for \code{bootstrap} and  \code{subbootstrap} methods.
#' @param train.prob A numeric value between 0 and 1, indicating the proportion of clusters (for the method \code{split}) or strata (for the method \code{extrapolation}) to be set in the training sets. Default is \code{train.prob=0.7}. Only applies for \code{split} and \code{extrapolation} methods.
#' @param method.split A character string indicating the way in which replicate weights should be defined in the \code{split} method. Choose one of the following: \code{dCV}, \code{bootstrap} or \code{subbootstrap}. Only applies for \code{split} method.
#' @param rw.test A logical value. If \code{TRUE}, the function returns in the output object the replicate weights to the corresponding test sets. If \code{FALSE}, only the replicate weights of the training sets are returned. Default is \code{rw.test = FALSE}.
#' @param dCV.sw.test A logical value. If \code{TRUE} original sampling weights for the units in the test sets are returned instead of the replicate weights. Default is \code{dCV.sw.test = FALSE}. Only applies for \code{dCV} method.
#'
#' @return This function returns a new data frame with new columns, each of them indicating replicate weights for different subsets.
#'
#' @details
#' Some of these methods (specifically \code{JKn}, \code{bootstrap}, \code{subbootstrap} and \code{BRR}),
#' were previously implemented in the \code{survey} R-package, to which we can access by means of the function
#' \code{as.svrepdesign()} (the names of the methods are kept as in \code{as.svrepdesign()}).
#' Thus, the function \code{replicate.weights()} depends on this function to define replicate weights based on these
#' options. In contrast, \code{dCV}, \code{split} and \code{extrapolation} have been expressly defined to be
#' incorporated into this function.
#'
#' Selecting any of the above-mentioned methods, the object returned by this function is a new data frame,
#' which includes new columns into the original data set, each of them indicating replicate
#' weights for different training (always) and test (optionally, controlled by the argument \code{rw.test}) subsets.
#' The number of new columns and the way in which they are denoted depend on the values set for the arguments,
#' in general, and on the replicate weights method selected, in particular. The new columns indicating training and test sets
#' follow a similar structure for any of the selected methods. Specifically, the structure of the names of the training sets
#' is the following: \code{rw_r_x_train_t} where \code{x=1,...,R} indicates the \code{x}-th partition of the sample and
#' \code{t=1,...,T} the \code{t}-th training set. Similarly, the structure of the new columns indicating the test sets
#' is the following: \code{rw_r_x_test_t} or \code{sw_r_x_test_t}, where \code{x} indicates the partition and \code{t}
#' the number of the test set. In addition, for some of the methods we also indicate the fold or set to which each unit
#' in the data set has been included in each partition. This information is included as \code{fold_t} or \code{set_t},
#' depending on the method. See more detailed information below.
#'
#'
#' @export
#'
#' @examples
#' data(simdata_lasso_binomial)
#'
#' # JKn ---------------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                              method = "JKn",
#'                              cluster = "cluster",
#'                              strata = "strata",
#'                              weights = "weights",
#'                              rw.test = TRUE)
#'
#' # dCV ---------------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                              method = "dCV",
#'                              cluster = "cluster",
#'                              strata = "strata",
#'                              weights = "weights",
#'                              k = 10, R = 20,
#'                              rw.test = TRUE)
#'
#' # subbootstrap ------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                              method = "subbootstrap",
#'                              cluster = "cluster",
#'                              strata = "strata",
#'                              weights = "weights",
#'                              B = 100)
#'
#' # BRR ---------------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                              method = "BRR",
#'                              cluster = "cluster",
#'                              strata = "strata",
#'                              weights = "weights",
#'                              rw.test = TRUE)
#'
#' # split ---------------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                              method = "split",
#'                              cluster = "cluster",
#'                              strata = "strata",
#'                              weights = "weights",
#'                              R=20,
#'                              train.prob = 0.5,
#'                              method.split = "subbootstrap",
#'                              rw.test = TRUE)
#'
#' # extrapolation -------------------------------------------------------------
#' newdata <- replicate.weights(data = simdata_lasso_binomial,
#'                             method = "extrapolation",
#'                             cluster = "cluster",
#'                             strata = "strata",
#'                             weights = "weights",
#'                             R=20,
#'                             train.prob = 0.5,
#'                             rw.test = TRUE)
replicate.weights <- function(data,
                              method = c("JKn", "dCV", "bootstrap", "subbootstrap",
                                         "BRR", "split", "extrapolation"),
                              cluster = NULL, strata = NULL, weights = NULL, design = NULL,
                              k = 10, R=1, B = 200,
                              train.prob = 0.7, method.split = c("dCV", "bootstrap", "subbootstrap"),
                              rw.test = FALSE, dCV.sw.test = FALSE){

  # Stops and messages:
  if(method == "split"){
    if(is.null(train.prob)){stop("Selected replicate weights method: 'split'.\nPlease, set a value between 0 and 1 for the argument 'train.prob'.")}
    if(train.prob < 0 | train.prob > 1){stop("Selected replicate weights method: 'split'.\nPlease, set a value between 0 and 1 for the argument 'train.prob'.")}
    if(length(method.split)!=1){stop("Selected replicate weights method: 'split'.\nPlease, set a valid method for the argument 'method.split'. Choose between: 'dCV', 'bootstrap' or 'subbootstrap'.")}
  }

  if(method == "extrapolation"){
    if(is.null(train.prob)){stop("Selected replicate weights method: 'extrapolation'.\nPlease, set a value between 0 and 1 for the argument 'train.prob'.")}
    if(train.prob < 0 | train.prob > 1){stop("Selected replicate weights method: 'extrapolation'.\nPlease, set a value between 0 and 1 for the argument 'train.prob'.")}
  }

  if(method %in% c("JKn", "bootstrap", "subbootstrap", "BRR")){
    if(R!=1){message("Selected method:", method,". For this method, R = 1. Thus, the argument R =",R, "has been ignored.")}
  }

  if(method %in% c("dCV", "split", "extrapolation")){
    if(R != round(R)){stop("The argument 'R' must be an integer greater or equal to 1. R=",R," is not an integer.\nPlease, set a valid value for 'R' or skip the argument to select the default option R=1.")}
    if(R < 1){stop("The argument 'R' must be an integer greater or equal to 1. R=",R," lower than 1.\nPlease, set a valid value for 'R' or skip the argument to select the default option R=1.")}
  }

  if(method != "dCV"){
    if(!is.null(k) & k!=10){message("Selected method:", method,". The argument k =",k, "is not needed and, hence, has been ignored.")}
  }

  if(method == "dCV"){
    if(k != round(k)){stop("The argument 'k' must be an integer. k=",k," is not an integer.\nPlease, set a valid value for 'k' or skip the argument to select the default option k=10.")}
    if(k < 1){stop("The argument 'k' must be a positive integer. k=",k," is not a positive integer.\nPlease, set a valid value for 'k' or skip the argument to select the default option k=10.")}
  }

  if(!(method %in% c("bootstrap", "subbootstrap"))){
    if(!is.null(B) & B!=200){message("Selected method:", method,". The argument B=",B, " is not needed and, hence, has been ignored.")}
  }

  if(method %in% c("bootstrap", "subbootstrap")){
    if(B != round(B)){stop("The argument 'B' must be an integer. B=",B," is not an integer.\nPlease, set a valid value for 'B' or skip the argument to select the default option B=200.")}
    if(B < 1){stop("The argument 'B' must be a positive integer. B=",B," is not a positive integer.\nPlease, set a valid value for 'B' or skip the argument to select the default option B=200.")}
  }


  # Step 0: Notation
  if(!is.null(design)){
    cluster <- as.character(design$call$id[2])
    if(cluster == "1" || cluster == "0"){
      cluster <- NULL
    }
    strata <- as.character(design$call$strata[2])
    weights <- as.character(design$call$weights[2])
    data <- get(design$call$data)
  }


  if(method == "dCV"){

    newdata <- cv.folds(data, k, weights, strata, cluster, R, rw.test, dCV.sw.test)

    if(rw.test==TRUE & dCV.sw.test ==TRUE){
      for(r in 1:R){

        for(kk in 1:k){

          newdata[, paste0("sw_r_",r,"_test_", kk)] <- rep(0, nrow(newdata))
          newdata[which(newdata[,paste0("folds_",r)]==kk), paste0("sw_r_",r,"_test_", kk)] <- newdata[which(newdata[,paste0("folds_",r)]==kk), weights]

        }

      }
    }

  } else {

    if(method == "split"){

      newdata <- rw.split(data, train.prob, method = method.split,
                          weights, strata, cluster, R, rw.test)

    } else {

      if(method == "extrapolation"){

        newdata <- splitstrata(data, train.prob, strata, weights, R, rw.test)

      } else {

      # Define cluster formula
      if(is.null(cluster)) {
        formula.cluster <- as.formula("~1")
      } else {
        formula.cluster <- as.formula(paste0("~", cluster))
      }

      # Define strata formula
      if(!is.null(strata)) {
        formula.strata <- as.formula(paste0("~", strata))
      }

      # Define weights formula
      if(!is.null(weights)){
        formula.weights <- as.formula(paste0("~", weights))
      }

      # Define the design
      des <- survey::svydesign(ids = formula.cluster,
                               strata = formula.strata,
                               weights = formula.weights,
                               data = data, nest=TRUE)


      # Generate replicate weights based on the selected method
      if(method %in% c("JKn", "bootstrap", "subbootstrap", "BRR")){

        if(method %in% c("bootstrap", "subbootstrap")){
          rep.des <- survey::as.svrepdesign(design = des, type = method, replicates = B)
        } else {
          rep.des <- survey::as.svrepdesign(design = des, type = method)
        }

        mat.repw.ind <- apply(rep.des$repweights$weights, 2, function(x){x[rep.des$repweights$index]})
        mat.repw <- apply(mat.repw.ind, 2, function(x){x*data[,weights]})
        colnames(mat.repw) <- paste0("rw_r_1_train_", 1:ncol(mat.repw))
        newdata <- cbind(data, mat.repw)

      }

      # Define replicate weights for the testing set in BRR
      if(method == "BRR" & rw.test == TRUE){

        mat.repw.test <- mat.repw.ind
        colnames(mat.repw.test) <- paste0("rw_r_1_test_", 1:ncol(mat.repw.test))
        mat.repw.test <- -1*(mat.repw.test - 2)
        mat.repw.test <- apply(mat.repw.test, 2, function(x){x*data[,weights]})
        newdata <- cbind(newdata, mat.repw.test)

      }

      # Define each unit as fold in JKn method
      if(method == "JKn" & rw.test == TRUE){

        mat.repw.test <- -mat.repw.ind
        mat.repw.test[which(mat.repw.test==0)] <- 1
        mat.repw.test[which(mat.repw.test < 0)] <- 0
        mat.repw.test <- apply(mat.repw.test, 2, function(x){x*data[,weights]})
        colnames(mat.repw.test) <- paste0("sw_r_1_test_", 1:ncol(mat.repw.test))
        newdata <- cbind(newdata, mat.repw.test)

      }


    }

    }

  }

  return(newdata)

}



