#' Calculates the importance values for National Forest Inventory Data
#' 
#' @description
#' iv_nfi() function calculates the importance values of tree species based on frequency, density and coverage.
#' It can estimate the entire study area or specific groups within it using the `plotgrp` parameter.
#' It uses the \code{\link[BiodiversityR]{importancevalue}} function from the \pkg{BiodiversityR} package for core calculations.
#' Users have flexibility in specifying data inclusion criteria and analysis levels using parameters `frequency`, `clusterplot`, `largetreearea`, `stockedland`, and `talltree`.
#' These parameters determine whether to include frequency in importance calculations, to treat cluster plots as single plots, to include large tree survey plots, and to focus only on Stocked land and tall trees.
#' 
#' @details
#' The importance value (ranging from 0 to 100) is calculated as the mean of:
#' 1. Relative frequency: (number of plots where a species is observed / total survey plots) * 100
#' 2. Relative density: (total number of individuals of a species / sum of all species' densities) * 100
#' 3. Relative coverage: (total basal area of a species / sum of all species' basal area) * 100
#' 
#' @param data : A `list` generated by \code{\link{read_nfi}} that contains 'plot' and 'tree' data frames.
#' @param sp :A character vector; the column name of species information (e.g., "SP" for species, "GENUS" for genus-level analysis).
#' @param frequency : A logical flag (default TRUE); if TRUE, includes frequency in importance value calculations.
#' @param plotgrp : A character vector; specifies variables from 'plot' table to use for grouping. Use \code{c()} to combine multiple variables.
#' @param continuousplot : A logical flag (default TRUE); if TRUE, includes only plots that have been continuously measured in all NFI cycles (5th, 6th, etc.). If FALSE, includes plots regardless of missing cycle measurements.
#' @param clusterplot : A logical flag (default FALSE); if TRUE, treats each cluster plot as a single unit. If FALSE, calculates for each subplot separately.
#' @param largetreearea : A logical flag (default FALSE); if TRUE, includes large tree survey plots in the analysis. If FALSE, only uses standard tree plots.
#' @param stockedland : A logical flag (default TRUE); if TRUE, includes only stocked land. If FALSE, includes all land types.
#' @param talltree : A logical flag (default TRUE); if TRUE, includes only tall trees. If FALSE, includes both trees and shrubs.
#' 
#' @return  A `data.frame` that includes importance value for tree species. Each row represents a combination of a tree species and groups specified by plotgrp and treegrp.
#' 
#' @note 
#' - Consider calculating importance by genus rather than species due to potential incompleteness in species classification.
#' - Since the frequencies of each species may be identical across the nation, it may be desirable to exclude frequency from the importance calculation. 
#'  
#' @examples
#' 
#' data("nfi_donghae")
#
#' # Calculate importance values without frequency
#' importance <- iv_nfi(nfi_donghae, sp = "SP", frequency = FALSE, continuousplot = TRUE)
#' 
#' # Calculate importance values using genus
#' genus_importance <- iv_nfi(nfi_donghae, sp = "GENUS", continuousplot = TRUE)
#' 
#' @seealso
#' \code{\link[BiodiversityR]{importancevalue}} for calculating the importance values.
#' 
#' @references
#' Curtis, J. T. & McIntosh, R. P. (1951). An upland forest continuum in the prairie-forest border region of Wisconsin. Ecology, 32(3), 476–496.
#'
#' @export 


##  

iv_nfi <- function(data, sp="SP", frequency=TRUE, plotgrp=NULL, continuousplot=FALSE, clusterplot=FALSE, largetreearea=FALSE, stockedland=TRUE, talltree=TRUE){
  
  ## error message-------------------------------------------------------------- 
  required_names <- c("plot", "tree")
  
  if (!all(required_names %in% names(data))) {
    missing_dfs <- required_names[!required_names %in% names(data)]
    stop("Missing required data frames in the list: ", paste(missing_dfs, collapse = ", "), call. = FALSE)
  }
  
  if(!sp %in% names(data$tree)){
    stop(paste0("param 'sp': ", sp," is not a column name in the 'tree' data frame."))
  } 
  
  if (clusterplot){
    if(!is.null(plotgrp) && plotgrp=="FORTYP_SUB"){
      stop("When the param 'clusterplot' is set to TRUE, param 'plotgrp' uses FORTYP_CLST (the forest type for the cluster plot) instead of FORTYP_SUB (the forest type for each subplot).")
    }
  }
  
  ## Preprocessing--------------------------------------------------------------
  if (stockedland){
    data <- filter_nfi(data, c("plot$LAND_USECD == '1'"))
  }

  if(talltree){
    data$tree <- data$tree %>% filter(WDY_PLNTS_TYP_CD == "1")
  }

  if(!largetreearea){ 
    data$tree <- data$tree %>% filter(LARGEP_TREE == "0")
  }
  
  
  if(continuousplot){
    
    all_cycle <- unique(data$plot$CYCLE)
    samples_with_all_cycle <- data$tree %>%
      group_by(SUB_PLOT) %>%
      filter(all(all_cycle %in% CYCLE)) %>%
      distinct(SUB_PLOT) %>%
      pull(SUB_PLOT)
    
    data <- filter_nfi(data, c("plot$SUB_PLOT %in% samples_with_all_cycle"))

  }
 
  df <- left_join(data$tree[, c('CLST_PLOT', 'SUB_PLOT',"CYCLE", 'WDY_PLNTS_TYP_CD', 
                                'BASAL_AREA', 'LARGEP_TREE', sp)], 
                  data$plot[,c('CLST_PLOT', 'SUB_PLOT', "CYCLE", 'INVYR','LAND_USE', "LAND_USECD", plotgrp)],
                  by = c("CLST_PLOT", "SUB_PLOT", "CYCLE"))
  
  sp<- rlang::sym(sp)
  plotgrp<- rlang::syms(plotgrp)

  
  if(clusterplot){
    iv_temp <- df %>% 
      group_by(CYCLE, CLST_PLOT , !!sp, !!!plotgrp) %>% 
      summarise(count = n(), basal = sum(BASAL_AREA, na.rm=TRUE),.groups = 'drop')
    plot_id <- c('CLST_PLOT')
    
    
  }else{ #subplot
    iv_temp <- df %>% 
      group_by(CYCLE, SUB_PLOT , !!sp, !!!plotgrp) %>% 
      summarise(count = n(), basal = sum(BASAL_AREA, na.rm=TRUE),.groups = 'drop')
    plot_id <- c('SUB_PLOT')
  }
  
  plot_id  <- rlang::sym(plot_id)
  
  iv_temp <- data.frame(iv_temp)
  
  plotgrp_nm <- c("CYCLE", as.character(unlist(lapply(plotgrp, quo_name))))
  
  iv_temp$factor <- apply(iv_temp[plotgrp_nm], 1, function(row) paste(row, collapse = "_"))
  
  ## Calculating importance Value by survey cycle--------------------------------------------------------------
  iv_temp_2<-BiodiversityR::importancevalue.comp(iv_temp, site=quo_name(plot_id), species=quo_name(sp), count='count', 
                                                      basal='basal', factor="factor")
  
  for(i in 2:length(iv_temp_2)){
    
    if(is.null(dim(iv_temp_2[[i]]))){
      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      colnm_temp <- rownames(iv_temp_2[[i]])
      iv_temp_2[[i]] <- t(iv_temp_2[[i]])
      colnames(iv_temp_2[[i]]) <- colnm_temp
      rownames(iv_temp_2[[i]]) <- NULL
      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      iv_temp_2[[i]]$species <- iv_temp$SP[iv_temp$factor ==iv_temp_2[[1]][i-1]][1]
      
    }else{
      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      iv_temp_2[[i]]$species <- rownames(iv_temp_2[[i]])
      rownames(iv_temp_2[[i]]) <- NULL
    }
    
    iv_temp_2[[i]]$CYCLE <- sapply(strsplit(iv_temp_2[[1]][i-1], "_"), `[`, 1)
    
    if(length(plotgrp) > 0 ){
      for(j in 1:length(as.character(unlist(lapply(plotgrp, quo_name))))){
        ivcol <- as.character(unlist(lapply(plotgrp, quo_name)))[j]
        iv_temp_2[[i]][ivcol] <- sapply(strsplit(iv_temp_2[[1]][i-1], "_"), `[`, j+1)
      }
    }
  }
  
  iv_temp_2[[1]] <- NULL
  
  iv <- data.table::rbindlist(iv_temp_2, fill=TRUE, use.names=TRUE)
  iv <- as.data.frame(iv)
  

  
  
  
  ## Inclusion of frequency-------------------------------------------------------------- 
  if(frequency){
    iv$importance.value <- iv$importance.value/3
  }else{
    
    iv$importance.value <- (iv$density.percent + iv$dominance.percent)/2
  }
  
  iv <- iv[, c("CYCLE", "species", setdiff(names(iv), c("CYCLE", "species")))]
  
  
  return(iv)
  
}



 
#' iv_tsvis()
#' 
#' 
#' @description
#' iv_tsvis() is a function that calculates the importance values of tree species based on frequency, density and coverage.
#' It is an internal function used within the tsvis_nfi() function.
#' iv_nfi() calculates importance by cycle, while this function calculates importance by year.
#' 
#' @param data : A `list` generated by \code{\link{read_nfi}} that contains 'plot' and 'tree' data frames.
#' @param sp :A character vector; the column name of species information (e.g., "SP" for species, "GENUS" for genus-level analysis).
#' @param frequency : A logical flag (default TRUE); if TRUE, includes frequency in importance value calculations.
#' @param plotgrp : A character vector; specifies variables from 'plot' table to use for grouping. Use \code{c()} to combine multiple variables.
#' @param clusterplot : A logical flag (default FALSE); if TRUE, treats each cluster plot as a single unit. If FALSE, calculates for each subplot separately.
#' @param largetreearea : A logical flag (default FALSE); if TRUE, includes large tree survey plots in the analysis. If FALSE, only uses standard tree plots.
#' @param stockedland : A logical flag (default TRUE); if TRUE, includes only stocked land. If FALSE, includes all land types.
#' @param talltree : A logical flag (default TRUE); if TRUE, includes only tall trees. If FALSE, includes both trees and shrubs.
#'
#' @return  A `data.frame` that includes importance value for tree species.
#' 
#' @seealso
#' \code{\link[BiodiversityR]{importancevalue}} for calculating the importance values.
#' \code{\link[knfi]{tsvis_nfi}}
#' 
#' @noRd 



iv_tsvis <- function(data, sp="SP", frequency=TRUE, plotgrp=NULL, clusterplot=FALSE, largetreearea=FALSE, stockedland=TRUE, talltree=TRUE){
  
  ## error message-------------------------------------------------------------- 
  required_names <- c("plot", "tree")
  
  if (!all(required_names %in% names(data))) {
    missing_dfs <- required_names[!required_names %in% names(data)]
    stop("Missing required data frames in the list: ", paste(missing_dfs, collapse = ", "), call. = FALSE)
  }
  
  if(!sp %in% names(data$tree)){
    stop(paste0("param 'sp': ", sp," is not a column name in the 'tree' data frame."))
  } 
  
  if (clusterplot){
    if(!is.null(plotgrp) && plotgrp=="FORTYP_SUB"){
      stop("When the param 'clusterplot' is set to TRUE, param 'plotgrp' uses FORTYP_CLST (the forest type for the cluster plot) instead of FORTYP_SUB (the forest type for each subplot).")
    }
  }
  
  ## Preprocessing--------------------------------------------------------------
  if (stockedland){
    data <- filter_nfi(data, c("plot$LAND_USECD == '1'"))
  }
  
  if(talltree){
    data$tree <- data$tree %>% filter(WDY_PLNTS_TYP_CD == "1")
  }
  
  if(!largetreearea){ 
    data$tree <- data$tree %>% filter(LARGEP_TREE == "0")
  }
  
  df <- left_join(data$tree[, c('CLST_PLOT', 'SUB_PLOT',"CYCLE", 'WDY_PLNTS_TYP_CD',
                                'BASAL_AREA', 'LARGEP_TREE', sp)],
                  data$plot[,c('CLST_PLOT', 'SUB_PLOT', "CYCLE", 'INVYR','LAND_USE', "LAND_USECD", plotgrp)],
                  by = c("CLST_PLOT", "SUB_PLOT", "CYCLE"))

  sp<- rlang::sym(sp)
  plotgrp<- rlang::syms(plotgrp)

  if(clusterplot){
    iv_temp <- df %>%
      group_by(CLST_PLOT , !!sp, !!!plotgrp) %>%
      summarise(count = n(), basal = sum(BASAL_AREA, na.rm=TRUE),.groups = 'drop')
    plot_id <- c('CLST_PLOT')

  }else{
    iv_temp <- df %>%
      group_by(SUB_PLOT , !!sp, !!!plotgrp) %>%
      summarise(count = n(), basal = sum(BASAL_AREA, na.rm=TRUE),.groups = 'drop')
    plot_id <- c('SUB_PLOT')

  }

  plot_id  <- rlang::sym(plot_id)

  iv_temp <- data.frame(iv_temp)

  plotgrp_nm <- as.character(unlist(lapply(plotgrp, quo_name)))

  if(length(plotgrp) > 0){
    iv_temp$factor <- apply(iv_temp[plotgrp_nm], 1, function(row) paste(row, collapse = "_"))
  }else{
    iv_temp$factor <- "forest"
  }

  ## Calculating importance Value by survey cycle--------------------------------------------------------------
  iv_temp_2 <-BiodiversityR::importancevalue.comp(iv_temp, site=quo_name(plot_id), species=quo_name(sp), count='count',
                                                 basal='basal', factor="factor")


  for(i in 2:length(iv_temp_2)){

    if(is.null(dim(iv_temp_2[[i]]))){
      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      colnm_temp <- rownames(iv_temp_2[[i]])
      iv_temp_2[[i]] <- t(iv_temp_2[[i]])
      colnames(iv_temp_2[[i]]) <- colnm_temp
      rownames(iv_temp_2[[i]]) <- NULL
      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      iv_temp_2[[i]]$species <- iv_temp$SP[iv_temp$factor ==iv_temp_2[[1]][i-1]][1]

    }else{

      iv_temp_2[[i]] <- as.data.frame(iv_temp_2[[i]])
      iv_temp_2[[i]]$species <- rownames(iv_temp_2[[i]])
      rownames(iv_temp_2[[i]]) <- NULL

    }


    if(length(plotgrp) >0){
      for(j in 1:length(as.character(unlist(lapply(plotgrp, quo_name))))){
        ivcol <- as.character(unlist(lapply(plotgrp, quo_name)))[j]
        iv_temp_2[[i]][ivcol] <- sapply(strsplit(iv_temp_2[[1]][i-1], "_"), `[`, j)
      }
    }
  }

  iv_temp_2[[1]] <- NULL

  iv <- data.table::rbindlist(iv_temp_2, fill=TRUE, use.names=TRUE)

  iv  <- as.data.frame(iv)
  rownames(iv) <- NULL


  ## Inclusion of frequency--------------------------------------------------------------
  if(frequency){
    iv$importance.value <- iv$importance.value/3
  }else{

    iv$importance.value <- (iv$density.percent + iv$dominance.percent)/2
    iv$frequency <- NULL
    iv$frequency.percent <- NULL
  }

  iv <- iv[, c("species", setdiff(names(iv), "species"))]

  return(iv)
  
}




