context("Validate GTFS")


# setup -------------------------------------------------------------------


data_path <- system.file("extdata/spo_gtfs.zip", package = "gtfstools")
gtfs <- read_gtfs(data_path)

full_val <- validate_gtfs(gtfs)
full_val <- attr(full_val, "validation_result")

partial_val_1 <- validate_gtfs(gtfs, "stop_times")
partial_val_1 <- attr(partial_val_1, "validation_result")

partial_val_2 <- validate_gtfs(gtfs, c("stop_times", "agency"))
partial_val_2 <- attr(partial_val_2, "validation_result")

extra_file_gtfs <- gtfs
extra_file_gtfs$extra_file <- extra_file_gtfs$calendar
extra_file_gtfs <- validate_gtfs(extra_file_gtfs)
extra_file_val <- attr(extra_file_gtfs, "validation_result")

extra_field_gtfs <- gtfs
extra_field_gtfs$calendar <- data.table::copy(gtfs$calendar)
extra_field_gtfs$calendar[, extra_field := "ola"]
extra_field_gtfs$shapes <- data.table::copy(gtfs$shapes)
extra_field_gtfs$shapes[, additional_field := 2]
extra_field_gtfs <- validate_gtfs(extra_field_gtfs)
extra_field_val <- attr(extra_field_gtfs, "validation_result")

missing_req_file_gtfs <- gtfs
missing_req_file_gtfs$agency <- NULL
missing_req_file_gtfs <- validate_gtfs(missing_req_file_gtfs, warnings = FALSE)
missing_req_file_val <- attr(missing_req_file_gtfs, "validation_result")

missing_req_field_gtfs <- gtfs
missing_req_field_gtfs$stop_times <- data.table::copy(gtfs$stop_times)
missing_req_field_gtfs$stop_times[, trip_id := NULL]
missing_req_field_gtfs <- validate_gtfs(
  missing_req_field_gtfs,
  warnings = FALSE
)
missing_req_field_val <- attr(missing_req_field_gtfs, "validation_result")

specified_files <- c(
  "agency", "stops", "routes", "trips", "stop_times", "calendar",
  "calendar_dates", "fare_attributes", "fare_rules", "shapes", "frequencies",
  "transfers", "pathways", "levels", "feed_info", "translations", "attributions"
)

required_files <- c(
  "agency", "stops", "routes", "trips", "stop_times", "calendar"
)

# files' fields

agency_field <- c(
  "agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang",
  "agency_phone", "agency_fare_url", "agency_email"
)

stops_field <- c(
  "stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon",
  "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone",
  "wheelchair_boarding", "level_id", "platform_code"
)

routes_field <- c(
  "route_id", "agency_id", "route_short_name", "route_long_name", "route_desc",
  "route_type", "route_url", "route_color", "route_text_color",
  "route_sort_order", "continuous_pickup", "continuous_drop_off"
)

trips_field <- c(
  "route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name",
  "direction_id", "block_id", "shape_id", "wheelchair_accessible",
  "bikes_allowed"
)

stop_times_field <- c(
  "trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence",
  "stop_headsign", "pickup_type", "drop_off_type", "continuous_pickup",
  "continuous_drop_off", "shape_dist_traveled", "timepoint"
)

calendar_field <- c(
  "service_id", "monday", "tuesday", "wednesday", "thursday", "friday",
  "saturday", "sunday", "start_date", "end_date"
)

calendar_dates_field <- c("service_id", "date", "exception_type")

fare_attributes_field <- c(
  "agency_id", "fare_id", "price", "currency_type", "payment_method",
  "transfers", "transfer_duration"
)

fare_rules_field <- c(
  "fare_id", "route_id", "origin_id", "destination_id", "contains_id"
)

shapes_field <- c(
  "shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence",
  "shape_dist_traveled"
)

frequencies_field <- c(
  "trip_id", "start_time", "end_time", "headway_secs", "exact_times"
)

transfers_field <- c(
  "from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"
)

pathways_field <- c(
  "pathway_id", "from_stop_id", "to_stop_id", "pathway_mode",
  "is_bidirectional", "length", "traversal_time", "stair_count", "max_slope",
  "min_width", "signposted_as", "reversed_signposted_as"
)

levels_field <- c("level_id", "level_index", "level_name")

feed_info_field <- c(
  "feed_publisher_name", "feed_publisher_url", "feed_lang", "feed_start_date",
  "feed_end_date", "feed_version", "feed_contact_email", "feed_contact_url"
)

translations_field <- c(
  "table_name", "field_name", "language", "translation", "record_id",
  "record_sub_id", "field_value"
)

attributions_field <- c(
  "attribution_id", "agency_id", "route_id", "trip_id", "organization_name",
  "is_producer", "is_operator", "is_authority", "attribution_url",
  "attribution_email", "attribution_phone"
)


# tests -------------------------------------------------------------------


test_that("raises errors due to incorrect input types", {

  no_class_gtfs <- gtfs
  attr(no_class_gtfs, "class") <- NULL

  expect_error(validate_gtfs(no_class_gtfs))
  expect_error(validate_gtfs(gtfs, files = NA))
  expect_error(validate_gtfs(gtfs, files = as.factor("stop_times")))
  expect_error(validate_gtfs(gtfs, quiet = "TRUE"))
  expect_error(validate_gtfs(gtfs, warnings = "TRUE"))

})

test_that("raises error due to non-existent/mistyped supplied file in gtfs", {
  expect_error(validate_gtfs(gtfs, files = "agency.txt"))
  expect_error(validate_gtfs(gtfs, files = "non-existent-file"))
})

test_that("raises warnings and messages adequately", {
  expect_silent(validate_gtfs(gtfs))
  expect_silent(validate_gtfs(gtfs, "stop_times"))
  expect_silent(validate_gtfs(gtfs, c("stop_times", "agency")))
  expect_silent(validate_gtfs(extra_file_gtfs))
  expect_silent(validate_gtfs(extra_field_gtfs))
  expect_silent(validate_gtfs(missing_req_file_gtfs, warnings = FALSE))
  expect_silent(validate_gtfs(missing_req_field_gtfs, warnings = FALSE))
  expect_message(validate_gtfs(gtfs, quiet = FALSE))
  expect_message(validate_gtfs(gtfs, "stop_times", quiet = FALSE))
  expect_message(validate_gtfs(gtfs, c("stop_times", "agency"), quiet = FALSE))
  expect_message(validate_gtfs(extra_file_gtfs, quiet = FALSE))
  expect_message(validate_gtfs(extra_field_gtfs, quiet = FALSE))
  expect_silent(
    validate_gtfs(missing_req_file_gtfs, warnings = FALSE, quiet = FALSE)
  )
  expect_silent(
    validate_gtfs(missing_req_field_gtfs, warnings = FALSE, quiet = FALSE)
  )
  expect_warning(validate_gtfs(missing_req_file_gtfs))
  expect_warning(validate_gtfs(missing_req_field_gtfs))
})

test_that("results in a dt_gtfs, and validation_result has right col types", {

  # validate_gtfs results in a dt_gtfs

  expect_s3_class(validate_gtfs(gtfs), "dt_gtfs")

  # validation result is a data.table

  expect_s3_class(full_val, "data.table")

  # columns' types

  expect_equal(class(full_val$file), "character")
  expect_equal(class(full_val$file_provided_status), "logical")
  expect_equal(class(full_val$field), "character")
  expect_equal(class(full_val$field_spec), "character")
  expect_equal(class(full_val$field_provided_status), "logical")
  expect_equal(class(full_val$validation_status), "character")
  expect_equal(class(full_val$validation_details), "character")

})

test_that("doesn't change original gtfs (only validation_result attribute)", {

  no_val_gtfs <- gtfs
  attr(no_val_gtfs, "validation_result") <- NULL
  pre_validation_no_val_gtfs <- no_val_gtfs

  validated_gtfs <- validate_gtfs(no_val_gtfs)

  expect_identical(no_val_gtfs, pre_validation_no_val_gtfs)
  expect_false(identical(no_val_gtfs, validated_gtfs))

  # the difference between validated_gtfs and no_val_gtfs is the
  # validation_result

  val_result <- attr(validated_gtfs, "validation_result")
  attr(no_val_gtfs, "validation_result") <- val_result

  expect_identical(no_val_gtfs, validated_gtfs)

})

test_that("validates against the correct files", {

  # all files

  validated_files <- unique(full_val$file)
  expect_equal(sum(validated_files %in% specified_files), 17)

  # only stop_times

  validated_files <- unique(partial_val_1$file)
  expect_true(validated_files == "stop_times")

  # only stop_times and agency

  validated_files <- unique(partial_val_2$file)
  expect_equal(sum(validated_files %in% c("stop_times", "agency")), 2)

})

test_that("validates all fields from desired files", {

  # full validation

  validated_files <- unique(full_val$file)
  invisible(lapply(
    validated_files,
    function (i) {
      supposed_fields <- get(paste0(i, "_field"))
      expect_equal(
        sum(full_val[file == i]$field %in% supposed_fields),
        length(supposed_fields)
      )
    }
  ))

  # partial validation 1 - only stop_times

  supposed_fields <- stop_times_field
  expect_equal(
    sum(partial_val_1$field %in% supposed_fields),
    length(supposed_fields)
  )

  # partial validation 2 - stop_times and agency

  validated_files <- unique(partial_val_2$file)
  invisible(lapply(
    validated_files,
    function (i) {
      supposed_fields <- get(paste0(i, "_field"))
      expect_equal(
        sum(full_val[file == i]$field %in% supposed_fields),
        length(supposed_fields)
      )
    }
  ))

})

test_that("recognizes extra files and fields as extra", {

  # extra file

  expect_equal(
    sum(extra_file_val[file == "extra_file"]$file_spec == "ext"),
    length(extra_file_val[file == "extra_file"]$field)
  )
  expect_equal(
    sum(extra_file_val[file == "extra_file"]$field_spec == "ext"),
    length(extra_file_val[file == "extra_file"]$field)
  )
  expect_equal(
    sum(extra_file_val[file == "extra_file"]$file_provided_status == TRUE),
    length(extra_file_val[file == "extra_file"]$field)
  )
  expect_equal(
    sum(extra_file_val[file == "extra_file"]$field_provided_status == TRUE),
    length(extra_file_val[file == "extra_file"]$field)
  )

  # extra field in required and optional files

  expect_equal(
    extra_field_val[file == "calendar" & field == "extra_field"]$field_spec,
    "ext"
  )
  expect_equal(
    extra_field_val[file == "shapes" & field == "additional_field"]$field_spec,
    "ext"
  )
  expect_equal(
    sum(extra_field_val[file == "calendar"]$field_spec == "ext"),
    1
  )
  expect_equal(
    sum(extra_field_val[file == "shapes"]$field_spec == "ext"),
    1
  )
  expect_true(
    extra_field_val[file == "calendar" & field == "extra_field"]$field_provided_status,
  )
  expect_true(
    extra_field_val[file == "shapes" & field == "additional_field"]$field_provided_status,
  )

})

test_that("attributes have right validation status and details", {

  # ok

  ok_status <- full_val[
    file_provided_status == TRUE & field_provided_status == TRUE
  ]
  expect_equal(sum(ok_status$validation_status == "ok"), nrow(ok_status))
  expect_equal(sum(is.na(ok_status$validation_details)), nrow(ok_status))

  # info: missing_opt_file

  file_info_status <- full_val[
    file_spec == "opt" & file_provided_status == FALSE
  ]
  expect_equal(
    sum(file_info_status$validation_status == "info"),
    nrow(file_info_status)
  )
  expect_equal(
    sum(file_info_status$validation_details == "missing_opt_file"),
    nrow(file_info_status)
  )

  # problem: missing_req_file

  file_problem_status <- missing_req_file_val[
    file_spec == "req" & file_provided_status == FALSE
  ]
  expect_equal(
    sum(file_problem_status$validation_status == "problem"),
    nrow(file_problem_status)
  )
  expect_equal(
    sum(file_problem_status$validation_details == "missing_req_file"),
    nrow(file_problem_status)
  )

  # info: undocumented_file

  file_extra_status <- extra_file_val[file_spec == "ext"]
  expect_equal(
    sum(file_extra_status$validation_status == "info"),
    nrow(file_extra_status)
  )
  expect_equal(
    sum(file_extra_status$validation_details == "undocumented_file"),
    nrow(file_extra_status)
  )

  # info: missing_opt_field

  field_info_status <- full_val[
    file_spec == "req" & file_provided_status == TRUE & field_provided_status == FALSE & field_spec == "opt"
  ]
  expect_equal(
    sum(field_info_status$validation_status == "info"),
    nrow(field_info_status)
  )
  expect_equal(
    sum(field_info_status$validation_details == "missing_opt_field"),
    nrow(field_info_status)
  )

  # problem: missing_req_field

  field_problem_status <- missing_req_field_val[
    file_provided_status == TRUE & field_spec == "req" & field_provided_status == FALSE
  ]
  expect_equal(
    sum(field_problem_status$validation_status == "problem"),
    nrow(field_problem_status)
  )
  expect_equal(
    sum(field_problem_status$validation_details == "missing_req_field"),
    nrow(field_problem_status)
  )

  # info: undocumented_field

  field_extra_status <- extra_field_val[
    file_spec != "ext" & field_spec == "ext"
  ]
  expect_equal(
    sum(field_extra_status$validation_status == "info"),
    nrow(field_extra_status)
  )
  expect_equal(
    sum(field_extra_status$validation_details == "undocumented_field"),
    nrow(field_extra_status)
  )

})

test_that("handles 'calendar' absence and 'translations' presence adequately", {

  # check first that calendar is required and calendar_dates is optional

  expect_equal(
    sum(full_val[file == "calendar"]$file_spec == "req"),
    nrow(full_val[file == "calendar"])
  )

  expect_equal(
    sum(full_val[file == "calendar_dates"]$file_spec == "opt"),
    nrow(full_val[file == "calendar_dates"])
  )

  # remove calendar and check that calendar is now optional and calendar_dates
  # required

  no_calendar_gtfs <- gtfs
  no_calendar_gtfs$calendar <- NULL
  no_calendar_gtfs <- validate_gtfs(no_calendar_gtfs, warnings = FALSE)
  no_calendar_val <- attr(no_calendar_gtfs, "validation_result")

  expect_equal(
    sum(no_calendar_val[file == "calendar"]$file_spec == "opt"),
    nrow(no_calendar_val[file == "calendar"])
  )

  expect_equal(
    sum(no_calendar_val[file == "calendar_dates"]$file_spec == "req"),
    nrow(no_calendar_val[file == "calendar_dates"])
  )

  # check that feed_info is optional

  expect_equal(
    sum(full_val[file == "feed_info"]$file_spec == "opt"),
    nrow(full_val[file == "feed_info"])
  )

  # adds empty translations and check that feed_info becomes required

  translations_gtfs <- gtfs
  translations_gtfs$translations <- data.table::data.table(NULL)
  translations_gtfs <- validate_gtfs(translations_gtfs, warnings = FALSE)
  translations_val <- attr(translations_gtfs, "validation_result")

  expect_equal(
    sum(translations_val[file == "feed_info"]$file_spec == "req"),
    nrow(translations_val[file == "feed_info"])
  )

})
