#' Return a count of records
#'
#' Prior to downloading data it is often valuable to have some estimate of how
#' many records are available, both for deciding if the query is feasible,
#' and for estimating how long it will take to download. Alternatively, for some kinds
#' of reporting, the count of observations may be all that is required, for example
#' for understanding how observations are growing or shrinking in particular
#' locations, or for particular taxa. To this end, `atlas_counts()` takes
#' arguments in the same format as [atlas_occurrences()], and
#' provides either a total count of records matching the criteria, or a
#' `data.frame` of counts matching the criteria supplied to the `group_by`
#' argument.
#'
#' @param request optional `data_request` object: generated by a call to
#' [galah_call()].
#' @param identify `data.frame`: generated by a call to
#' [galah_identify()].
#' @param filter `data.frame`: generated by a call to
#' [galah_filter()]
#' @param geolocate `string`: generated by a call to
#' [galah_geolocate()]
#' @param data_profile `string`: generated by a call to
#' [galah_apply_profile()]
#' @param group_by `data.frame`: An object of class `galah_group_by`,
#' as returned by [galah_group_by()]. Alternatively a vector of field
#' names (see `search_all(fields)` and `show_all(fields)`.
#' @param limit `numeric`: maximum number of categories to return, defaulting to 100.
#' If limit is NULL, all results are returned. For some categories this will
#' take a while.
#' @param type `string`: one of `c("record", "species")`. Defaults to
#' "record". If "species", the number of species matching the criteria will be
#' returned, if "record", the number of records matching the criteria will be
#' returned.
#' @param refresh_cache `logical`: if set to `TRUE` and 
#' `galah_config(caching = TRUE)` then files cached from a previous query will 
#' be replaced by the current query
#' @importFrom glue glue_collapse
#' @importFrom dplyr bind_rows
#' @rdname atlas_counts
#' @return
#' 
#' An object of class `tbl_df` and `data.frame` (aka a tibble) returning: 
#'  * A single number, if `group_by` is not specified or,
#'  * A summary of counts grouped by field(s), if `group_by` is specified
#'
#' @examples \dontrun{
#' # Count all records in the specified atlas
#' atlas_counts()
#' 
#' # Filtered counts
#' galah_call() |>
#'   galah_filter(year == 2015) |>
#'   atlas_counts()
#' }
#' 
#' @export
atlas_counts <- function(request = NULL, 
                         identify = NULL, 
                         filter = NULL, 
                         geolocate = NULL,
                         data_profile = NULL,
                         group_by = NULL, 
                         limit = NULL,
                         type = c("record", "species"),
                         refresh_cache = FALSE
                         ) {
  type <- match.arg(type)

  if(!is.null(request)){
    check_data_request(request)
    current_call <- update_galah_call(request, 
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      group_by = group_by,
      limit = limit,
      type = type,
      refresh_cache = refresh_cache
    ) 

  }else{
    current_call <- galah_call(
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      group_by = group_by,
      limit = limit,
      type = type,
      refresh_cache = refresh_cache
    )
  }

  # subset to available arguments
  custom_call <- current_call[
     names(current_call) %in% names(formals(atlas_counts_internal))]
  class(custom_call) <- "data_request"

  # check for caching
  caching <- getOption("galah_config")$package$caching
  cache_file <- cache_filename("counts", unlist(custom_call))
  if (caching && file.exists(cache_file) && !refresh_cache) {
    return(read_cache_file(cache_file))
  }
        
  # call using do.call
  result <- do.call(atlas_counts_internal, custom_call)
  if(is.null(result)){
    result <- tibble()
  }
  attr(result, "data_type") <- "counts"
  attr(result, "data_request") <- custom_call

  # if caching requested, save
  if (caching) {
   write_cache_file(object = result, 
                    data_type = "counts",
                    cache_file = cache_file)
  }

  result                             
}


atlas_counts_internal <- function(identify = NULL, 
                                  filter = NULL, 
                                  geolocate = NULL,
                                  data_profile = NULL,
                                  group_by = NULL, 
                                  limit = 100,
                                  type = "record",
                                  refresh_cache = FALSE
                                  ) {

  verbose <- getOption("galah_config")$package$verbose
  
  # check type
  if(is_gbif() && type == "species"){
    abort("Use of `type = 'species'` is not supported for atlas = GBIF")
  }

  # ensure profile works from galah_filter as well as galah_profile
  if(is_gbif()){
    profile <- NULL
  }else{
    if(is.null(data_profile)){
      if(is.null(filter)){
        profile <- NULL
      }else{
        profile <- extract_profile(filter)
      }
    }else{
      profile <- data_profile$data_profile
    }
  }
  
  # set options if group_by = NULL
  if(is.null(group_by)) {
    # query <- list()
    query <- build_query(identify, filter, geolocate, profile = profile)
    if (type == "species") {
      result <- species_count(query)
    }else{
      result <- record_count(query)
    }
    if(is.null(result)){
      system_down_message("atlas_counts")
    }
    return(tibble(count = result))
  }else{
    if(is_gbif()){
      lookup_fun <- "grouped_counts_GBIF"
    }else{
      lookup_fun <- "grouped_counts_LA"
    }
  }
  
  # if all combinations of levels of `group_by` are needed (expand = TRUE)
  if(attr(group_by, "expand") & nrow(group_by) > 1){ 
    
    # get counts given the filter provided by the user
    lookup_args <- list(
      identify = identify,
      filter = filter, 
      geolocate = geolocate,
      profile = profile,
      type = type,
      facets = group_by$name, 
      limit = NULL)
    field_values_df <- do.call(lookup_fun, lookup_args)
    n_fields_df <- data.frame(
      facets = group_by$name,
      n_fields = unlist(lapply(
        group_by$name, 
        function(a){length(which(!is.na(field_values_df[[a]])))})))

    if(sum(field_values_df$count) < 1){
      return(tibble(count = 0))
    }

    # work out which to pass as facets vs those we iterate over with lapply
    facets_large <- n_fields_df$facets[which.max(n_fields_df$n_fields)]
    facets_small <- n_fields_df$facets[n_fields_df$facets != facets_large]

    # work out what combinations of `group`s should be sent to atlas_counts_lookup
    levels_df <- expand.grid(
      lapply(
        field_values_df[, 
          which(names(field_values_df) %in% facets_small), 
          drop = FALSE], 
        function(a){a[!is.na(a)]}),
      stringsAsFactors = FALSE)
    levels_list <- split(levels_df, seq_len(nrow(levels_df)))
    filter_list <- lapply(levels_list, function(a){
      field <- colnames(a)
      value <- paste0("\'", a, "\'")
      paste(
        paste(field, value, sep = " == "),
        collapse = " & ")
    })
    
    # run `atlas_counts_lookup` the requisite number of times
    if (verbose) { pb <- txtProgressBar(max = 1, style = 3) } # start progressbar
    
    result_list <- lapply(seq_along(levels_list),
      function(a){
        if (verbose) {
          val <- (a / length(levels_list))
          setTxtProgressBar(pb, val)
        }
        filter_this_loop <- galah_filter(filter_list[[a]])    
        filter_final <- rbind(filter, filter_this_loop)
        lookup_args <- list(
          identify = identify,
          filter = filter_final,
          geolocate = geolocate,
          profile = profile,
          facets = n_fields_df$facets[which.max(n_fields_df$n_fields)],
          limit = limit,
          type = type)
        counts_query <- do.call(lookup_fun, lookup_args)
        if(nrow(counts_query) > 0){   
          as.data.frame(list(levels_list[[a]], counts_query), row.names = NULL)
        }
      }) 
    if(verbose){
      close(pb)
    } # close progress bar
    if (all(unlist(lapply(result_list, is.null)))) {
      system_down_message("atlas_counts")
      return(tibble())
    } else {
      result_list |>
        bind_rows() |>
        tibble()
    } 
     
  # if `groups` is of nrow == 1 (expand = FALSE)
  }else{
    lookup_args <- list(
      identify, filter, geolocate, profile,
      facets = group_by$name, 
      limit, type, refresh_cache,
      verbose = verbose)
    result <- do.call(lookup_fun, lookup_args)
    if(is.null(result)){
      system_down_message("atlas_counts")
      return(tibble())
    }else{
      result
    }
  } 
}

# get just the record count for a query
# handle too long queries in here?
record_count <- function(query) {
  if(is_gbif()){
    query$limit <- 0
    col_name <- "count"
  }else{
    query$pageSize <- 0
    col_name <- "totalRecords"
  }
  url <- url_lookup("records_counts")
  resp <- url_GET(url, query)
  resp[[col_name]]
}
# above doesn't work because ALA requires queries get put in an &fq= statement
# whereas gbif just needs list(limit = 0, ...) where ... is named params

species_count <- function(query) {
  query$flimit <- 1
  query$facets <- species_facets()
  total_categories(query)
}

# Get number of categories of a filter
total_categories <- function(query) {
  query$flimit <- 1
  url <- url_lookup("records_facets") 
  resp <- url_GET(url, params = query)
  if(is.null(resp)){
    NULL
  }else if(length(resp) < 1){
    0
  }else{
    resp$count
  }
}

# # Extract filter names and values returned from API
parse_field <- function(fq){
  str_extract(fq, "^[:alnum:]+")
}

parse_fq <- function(fq){
  gsub("\"", "", sub("^[[:alnum:]]+:",  "", fq))
}
