#' @include partition.R partition_bundle.R context.R S4classes.R
#' @include features.R
NULL

#' trim an object
#' 
#' Method to trim and adjust objects by 
#' applying thresholds, minimum frequencies etc. It can be applied to \code{context},
#' \code{features}, \code{context}, \code{partition} and \code{partition_bundle} objects.
#' 
#' @param object the object to be trimmed
#' @param termsToKeep ...
#' @param termsToDrop ...
#' @param docsToKeep ...
#' @param docsToDrop ...
#' @param verbose logical
#' @param ... further arguments
#' @author Andreas Blaette
#' @docType methods
#' @aliases trim trim-method trim,TermDocumentMatrix-method
#' @rdname trim-method
setGeneric("trim", function(object, ...){standardGeneric("trim")})



#' @importFrom Matrix rowSums
#' @importFrom tm stopwords
#' @importFrom slam as.simple_triplet_matrix
#' @rdname trim-method
setMethod("trim", "TermDocumentMatrix", function(object, termsToKeep = NULL, termsToDrop = NULL, docsToKeep = NULL, docsToDrop = NULL, verbose = TRUE){
  .rmBlank <- function(mat, verbose=TRUE){
    .message("removing empty rows", verbose = verbose)
    matTmp <- as.sparseMatrix(mat)
    matTmp <- matTmp[which(rowSums(matTmp) > 0),]
    mat <- as.simple_triplet_matrix(matTmp)
    class(mat) <- c("TermDocumentMatrix", "simple_triplet_matrix")
    mat
  }
  if (!is.null(docsToKeep)){
    object <- object[,which(colnames(object) %in% docsToKeep)]
  }
  if (!is.null(docsToDrop)){
    object <- object[,which(!colnames(object) %in% docsToDrop)]
  }
  if (!is.null(termsToKeep)){
    object <- object[which(rownames(object) %in% termsToKeep),]
  }
  if (!is.null(termsToDrop)){
    object <- object[which(!rownames(object) %in% termsToDrop), ]
  }
  object
})

#' @rdname trim-method
setMethod("trim", "DocumentTermMatrix", function(object, ...){
  t(trim(t(object), ...))
})




#' @rdname context-class
setMethod("trim", "context", function(object, s_attribute = NULL, positivelist = NULL, p_attribute = p_attributes(object), regex = FALSE, stoplist = NULL, verbose = TRUE, progress = TRUE, ...){
  
  if ("sAttribute" %in% names(list(...))) s_attribute <- list(...)[["sAttribute"]]
  
  if (!is.null(s_attribute)){
    stopifnot(length(s_attribute) == 1L)
    sAttrCol <- paste(s_attribute, "int", sep = "_")
    if (!sAttrCol %in% colnames(object@cpos)){
      object <- enrich(object, s_attribute = s_attribute)
    }
    setnames(object@cpos, old = sAttrCol, new = "struc")
    
    position <- 0 # work around to make data.table syntax pass R CMD check
    struc <- 0 # work around to make data.table syntax pass R CMD check
    
    .message("checking boundaries of regions", verbose = verbose)
    if (progress) pb <- txtProgressBar(min = 1, max = object@count, style = 3)
    .checkBoundary <- function(.SD, .GRP){
      if (progress) setTxtProgressBar(pb, value = .GRP)
      struc_hit <- .SD[position == 0][["struc"]][1]
      .SD[struc == struc_hit]
    }
    object@cpos <- object@cpos[, .checkBoundary(.SD, .GRP), by = "match_id"]
    if (progress) close(pb)
    setnames(object@cpos, old = "struc", new = sAttrCol)
  }
  
  if (!is.null(positivelist)){
    .message("filtering by positivelist", verbose = verbose)
    before <- length(unique(object@cpos[["match_id"]]))
    positivelistIds <- .token2id(corpus = object@corpus, p_attribute = p_attribute, token = positivelist, regex = regex)
    .fn <- function(.SD){
      neighbors <- .SD[[paste(p_attribute[1], "id", sep = "_")]][.SD[["position"]] != 0]
      if (any(neighbors %in% positivelistIds)) return( .SD ) else return( NULL )
    }
    object@cpos <- object@cpos[, .fn(.SD), by = "match_id", with = TRUE]
    after <- length(unique(object@cpos[["match_id"]]))
    .message("number of hits droped due to positivelist:", before - after, verbose = verbose)
    if (nrow(object@cpos) == 0) {
      warning("no remaining hits after applying positivelist, returning NULL object")
      return( invisible(NULL) )
    }
  }
  
  if (!is.null(stoplist)){
    .message("applying stoplist", verbose = verbose)
    before <- length(unique(object@cpos[["match_id"]]))
    stoplistIds <- .token2id(corpus = object@corpus, p_attribute = p_attribute, token = stoplist, regex = regex)
    .fn <- function(.SD){
      pAttr <- paste(p_attribute[1], "id", sep = "_")
      negatives <- which(.SD[[pAttr]] %in% stoplistIds)
      negatives <- negatives[ -which(.SD[["position"]] == 0) ] # exclude node
      if (any(negatives)) return( NULL ) else return( .SD ) # this is the only difference
    }
    object@cpos <- object@cpos[, .fn(.SD), by = "match_id", with = TRUE]
    after <- length(unique(object@cpos[["match_id"]]))
    .message("number of hits droped due to stoplist:", before - after, verbose = verbose)
    if (nrow(object@cpos) == 0) {
      warning("no remaining hits after applying stoplist, returning NULL object")
      return( NULL )
    }
  }
  
  object
})
