#' Return occurrence records
#'
#' The most common form of data stored by living atlases are observations of
#' individual life forms, known as 'occurrences'. This function allows the
#' user to search for occurrence records that match their specific criteria,
#' and return them as a `data.frame` for analysis. Optionally,
#' the user can also request a DOI for a given download to facilitate citation
#' and re-use of specific data resources.
#'
#' @param request optional `data_request` object: generated by a call to
#' [galah_call()].
#' @param identify `data.frame`: generated by a call to
#' [galah_identify()].
#' @param filter `data.frame`: generated by a call to
#' [galah_filter()]
#' @param geolocate `string`: generated by a call to
#' [galah_geolocate()]
#' @param data_profile `string`: generated by a call to
#' [galah_apply_profile()]
#' @param select `data.frame`: generated by a call to
#' [galah_select()] 
#' @param mint_doi `logical`: by default no DOI will be generated. Set to
#' `TRUE` if you intend to use the data in a publication or similar
#' @param doi  
#'   `r lifecycle::badge("deprecated")` Use `collect_occurrences` instead. 
#'   
#'   `string`: this argument enables retrieval of occurrence records previously 
#'   downloaded from the ALA, using the DOI generated by the data.
#' @param refresh_cache `logical`: if set to `TRUE` and 
#' `galah_config(caching = TRUE)` then files cached from a previous query will 
#' be replaced by the current query
#' @details
#' Note that unless care is taken, some queries can be particularly large.
#' While most cases this will simply take a long time to process, if the number
#' of requested records is >50 million the call will not return any data. Users
#' can test whether this threshold will be reached by first calling
#' [atlas_counts()] using the same arguments that they intend to pass to
#' `atlas_occurrences`(). It may also be beneficial when requesting a large
#' number of records to show a progress bar by setting `verbose = TRUE` in
#' [galah_config()].
#' @return An object of class `tbl_df` and `data.frame` (aka a tibble) of 
#' occurrences, containing columns as specified by [galah_select()]. 
#' The `data.frame` object has the following attributes:
#' 
#' * a listing of the user-supplied arguments of the `data_request` 
#' (i.e., identify, filter, geolocate, select)
#' * a `doi` of the data download
#' * the `search_url` of the query to ALA API
#' 
#' @examples 
#' \dontrun{
#' # Download occurrence records for a specific taxon
#' galah_config(email = "your_email_here")
#' galah_call() |>
#'   galah_identify("Reptilia") |>
#'   atlas_occurrences()
#'
#' # Download occurrence records in a year range
#' galah_call() |>
#'   galah_identify("Litoria") |>
#'   galah_filter(year >= 2010 & year <= 2020) |>
#'   atlas_occurrences()
#'
#' # Download occurrences records in a WKT-specified area
#' polygon <- "POLYGON((146.24960 -34.05930,
#'                      146.37045 -34.05930,
#'                      146.37045 -34.152549,
#'                      146.24960 -34.15254,
#'                      146.24960 -34.05930))"
#' galah_call() |> 
#'   galah_identify("Reptilia") |>
#'   galah_filter(year >= 2010, year <= 2020) |>
#'   galah_geolocate(polygon) |>
#'   atlas_occurrences()
#' }
#' 
#' @importFrom assertthat assert_that
#' @importFrom rlang caller_env
#' 
#' @export
atlas_occurrences <- function(request = NULL,
                              identify = NULL,
                              filter = NULL,
                              geolocate = NULL,
                              data_profile = NULL,
                              select = NULL,
                              mint_doi = FALSE,
                              doi = NULL, # check missingness code
                              refresh_cache = FALSE
                              ) {
  if(!is.null(request)){
    check_data_request(request)
    current_call <- update_galah_call(request,
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      select = select,
      mint_doi = mint_doi, # NOTE: check behaviour of update_galah_call here
      doi = doi,
      refresh_cache = refresh_cache)
  }else{
    current_call <- galah_call(
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      select = select,
      mint_doi = mint_doi,
      doi = doi,
      refresh_cache = refresh_cache)
  }

  # choose beahvior depending on whether we are calling LAs or GBIF
  if(is_gbif()){
    function_name <- "occurrences_GBIF"
    current_call$format <- "SIMPLE_CSV"
    arg_names <- names(formals(occurrences_GBIF))
  }else{
    function_name <- "occurrences_LA"
    arg_names <- names(formals(occurrences_LA))
  }

  # subset to available arguments
  custom_call <- current_call[names(current_call) %in% arg_names]
  if(!is.null(doi)){
    custom_call <- custom_call["doi"]
  }
  class(custom_call) <- "data_request"

  # check for caching
  caching <- getOption("galah_config")$package$caching
  cache_file <- cache_filename("occurrences", unlist(custom_call))
  if (caching && file.exists(cache_file) && !refresh_cache) {
    return(read_cache_file(cache_file))
  }

  # run function using do.call
  result <- do.call(function_name, custom_call)
  if(is.null(result)){
    result <- tibble()
  }
  attr(result, "data_request") <- custom_call

  # if caching requested, save
  if (caching) {
    write_cache_file(object = result, 
                     data_type = "occurrences",
                     cache_file = cache_file)
  }

  result
}