From bf8399f31533e1fc11df3842a6ec369df5c311ef Mon Sep 17 00:00:00 2001 From: "Evan L. Ray" Date: Tue, 7 Jan 2025 22:19:28 -0500 Subject: [PATCH 1/5] support length 1 arrays and relative metrics --- R/config.R | 21 ++ R/generate_eval_data.R | 45 ++- inst/schema/config_schema.json | 25 +- tests/testthat/_snaps/config.md | 267 ++++++++++++++++++ .../helper-check_exp_scores_for_window.R | 85 ++++++ tests/testthat/test-config.R | 60 +++- tests/testthat/test-generate_eval_data.R | 104 +++---- ...onfig_invalid_rel_metrics_no_baseline.yaml | 90 ++++++ ...config_invalid_rel_metrics_non_metric.yaml | 90 ++++++ .../config_valid_length_one_arrays.yaml | 9 + ...config_valid_mean_median_quantile_rel.yaml | 86 ++++++ .../config_valid_rel_metrics.yaml | 91 ++++++ 12 files changed, 888 insertions(+), 85 deletions(-) create mode 100644 tests/testthat/helper-check_exp_scores_for_window.R create mode 100644 tests/testthat/testdata/test_configs/config_invalid_rel_metrics_no_baseline.yaml create mode 100644 tests/testthat/testdata/test_configs/config_invalid_rel_metrics_non_metric.yaml create mode 100644 tests/testthat/testdata/test_configs/config_valid_length_one_arrays.yaml create mode 100644 tests/testthat/testdata/test_configs/config_valid_mean_median_quantile_rel.yaml create mode 100644 tests/testthat/testdata/test_configs/config_valid_rel_metrics.yaml diff --git a/R/config.R b/R/config.R index dd3596f..f4d3a7c 100644 --- a/R/config.R +++ b/R/config.R @@ -149,6 +149,27 @@ validate_config_targets <- function(webevals_config, task_groups, task_id_names) ) } + # check that relative_metrics is a subset of metrics + extra_relative_metrics <- setdiff( + target$relative_metrics, + target$metrics + ) + if (length(extra_relative_metrics) > 0) { + raise_config_error( + c( + cli::format_inline( + "Requested relative metrics for metrics that were not requested ", + "for {.arg target_id} {.val {target_id}}." + ), + "i" = cli::format_inline("Requested metric{?s}: {.val {target$metrics}}."), + "x" = cli::format_inline( + "Relative metric{?s} not found in the requested metrics: ", + "{.val {extra_relative_metrics}}." + ) + ) + ) + } + # check that disaggregate_by are task id variable names extra_disaggregate_by <- setdiff( target$disaggregate_by, diff --git a/R/generate_eval_data.R b/R/generate_eval_data.R index 524f9d0..63af6fc 100644 --- a/R/generate_eval_data.R +++ b/R/generate_eval_data.R @@ -34,6 +34,9 @@ generate_target_eval_data <- function(hub_path, target) { target_id <- target$target_id metrics <- target$metrics + # if relative_metrics and baseline are not provided, the are NULL + relative_metrics <- target$relative_metrics + baseline <- target$baseline # adding `NULL` at the beginning will calculate overall scores disaggregate_by <- c(list(NULL), as.list(target$disaggregate_by)) eval_windows <- config$eval_windows @@ -50,6 +53,8 @@ generate_target_eval_data <- function(hub_path, model_out_tbl = model_out_tbl, oracle_output = oracle_output, metric_name_to_output_type = metric_name_to_output_type, + relative_metrics = relative_metrics, + baseline = baseline, target_id = target_id, window_name = eval_window$window_name, by = by, @@ -70,18 +75,22 @@ generate_target_eval_data <- function(hub_path, #' out_path/target_id/window_name/by/scores.csv #' @noRd get_and_save_scores <- function(model_out_tbl, oracle_output, metric_name_to_output_type, + relative_metrics, baseline, target_id, window_name, by, out_path) { # Iterate over the output types and calculate scores for each scores <- purrr::map( unique(metric_name_to_output_type$output_type), - ~ hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(output_type == !!.x), + ~ get_scores_for_output_type( + model_out_tbl = model_out_tbl, oracle_output = oracle_output, - metrics = metric_name_to_output_type$metric[ - metric_name_to_output_type$output_type == .x - ], - by = c("model_id", by) + metric_name_to_output_type = metric_name_to_output_type, + relative_metrics = relative_metrics, + baseline = baseline, + target_id = target_id, + window_name = window_name, + by = by, + output_type = .x ) ) |> purrr::reduce(dplyr::left_join, by = c("model_id", by)) @@ -97,3 +106,27 @@ get_and_save_scores <- function(model_out_tbl, oracle_output, metric_name_to_out file = file.path(target_window_by_out_path, "scores.csv"), row.names = FALSE) } + + +#' Get scores for a target in a given evaluation window for a specific output type. +get_scores_for_output_type <- function(model_out_tbl, oracle_output, metric_name_to_output_type, + relative_metrics, baseline, + target_id, window_name, by, + output_type) { + metrics <- metric_name_to_output_type$metric[ + metric_name_to_output_type$output_type == output_type + ] + if (!is.null(relative_metrics)) { + relative_metrics <- relative_metrics[relative_metrics %in% metrics] + } + scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == !!output_type), + oracle_output = oracle_output, + metrics = metrics, + relative_metrics = relative_metrics, + baseline = baseline, + by = c("model_id", by) + ) + + return(scores) +} diff --git a/inst/schema/config_schema.json b/inst/schema/config_schema.json index 76b8e1e..d5309f1 100644 --- a/inst/schema/config_schema.json +++ b/inst/schema/config_schema.json @@ -22,24 +22,37 @@ }, "metrics": { "description": "Names of metrics to compute for this target. These should be names of metrics supported by hubEvals::score_model_out.", - "type": "array", + "type": ["string", "array"], "items": { "type": "string" }, "minItems": 1 }, + "relative_metrics": { + "description": "Optional names of metrics for which to compute pairwise relative skill for this target. These should be a subset of the metrics for the target.", + "type": ["string", "array"], + "items": { + "type": "string" + }, + "minItems": 0 + }, + "baseline": { + "description": "Name of the model to use as a baseline for relative skill metrics for this target. Required if relative_metrics is provided.", + "type": "string", + "minItems": 0 + }, "disaggregate_by": { "description": "Optional list of task id columns to disaggregate by. Aggregated scores for each model will always be computed.", - "type": "array", + "type": ["string", "array"], "items": { "type": "string" } } }, - "required": [ - "target_id", - "metrics" - ] + "required": ["target_id", "metrics"], + "dependentRequired": { + "relative_metrics": ["baseline"] + } } }, "eval_windows": { diff --git a/tests/testthat/_snaps/config.md b/tests/testthat/_snaps/config.md index a1b21a7..553c243 100644 --- a/tests/testthat/_snaps/config.md +++ b/tests/testthat/_snaps/config.md @@ -228,6 +228,273 @@ +# read_webevals_config succeeds, valid yaml file with length 1 arrays + + Code + read_config(hub_path, test_path("testdata", "test_configs", + "config_valid_length_one_arrays.yaml")) + Output + $targets + $targets[[1]] + $targets[[1]]$target_id + [1] "wk inc flu hosp" + + $targets[[1]]$metrics + [1] "wis" + + $targets[[1]]$disaggregate_by + [1] "location" + + + + $eval_windows + $eval_windows[[1]] + $eval_windows[[1]]$window_name + [1] "Full season" + + $eval_windows[[1]]$min_round_id + [1] "2023-01-21" + + + + +# read_webevals_config succeeds, valid yaml file with relative metrics + + Code + read_config(hub_path, test_path("testdata", "test_configs", + "config_valid_rel_metrics.yaml")) + Output + $targets + $targets[[1]] + $targets[[1]]$target_id + [1] "wk inc flu hosp" + + $targets[[1]]$metrics + [1] "wis" "ae_median" "interval_coverage_50" + [4] "interval_coverage_95" + + $targets[[1]]$relative_metrics + [1] "wis" "ae_median" + + $targets[[1]]$baseline + [1] "FS-base" + + $targets[[1]]$disaggregate_by + [1] "location" "reference_date" "horizon" "target_end_date" + + + $targets[[2]] + $targets[[2]]$target_id + [1] "wk flu hosp rate category" + + $targets[[2]]$metrics + [1] "log_score" "rps" + + $targets[[2]]$disaggregate_by + [1] "location" "reference_date" "horizon" "target_end_date" + + + + $eval_windows + $eval_windows[[1]] + $eval_windows[[1]]$window_name + [1] "Full season" + + $eval_windows[[1]]$min_round_id + [1] "2023-01-21" + + + $eval_windows[[2]] + $eval_windows[[2]]$window_name + [1] "Last 4 weeks" + + $eval_windows[[2]]$min_round_id + [1] "2023-01-21" + + $eval_windows[[2]]$n_last_round_ids + [1] 4 + + + + $task_id_text + $task_id_text$location + $task_id_text$location$US + [1] "United States" + + $task_id_text$location$`01` + [1] "Alabama" + + $task_id_text$location$`02` + [1] "Alaska" + + $task_id_text$location$`04` + [1] "Arizona" + + $task_id_text$location$`05` + [1] "Arkansas" + + $task_id_text$location$`06` + [1] "California" + + $task_id_text$location$`08` + [1] "Colorado" + + $task_id_text$location$`09` + [1] "Connecticut" + + $task_id_text$location$`10` + [1] "Delaware" + + $task_id_text$location$`11` + [1] "District of Columbia" + + $task_id_text$location$`12` + [1] "Florida" + + $task_id_text$location$`13` + [1] "Georgia" + + $task_id_text$location$`15` + [1] "Hawaii" + + $task_id_text$location$`16` + [1] "Idaho" + + $task_id_text$location$`17` + [1] "Illinois" + + $task_id_text$location$`18` + [1] "Indiana" + + $task_id_text$location$`19` + [1] "Iowa" + + $task_id_text$location$`20` + [1] "Kansas" + + $task_id_text$location$`21` + [1] "Kentucky" + + $task_id_text$location$`22` + [1] "Louisiana" + + $task_id_text$location$`23` + [1] "Maine" + + $task_id_text$location$`24` + [1] "Maryland" + + $task_id_text$location$`25` + [1] "Massachusetts" + + $task_id_text$location$`26` + [1] "Michigan" + + $task_id_text$location$`27` + [1] "Minnesota" + + $task_id_text$location$`28` + [1] "Mississippi" + + $task_id_text$location$`29` + [1] "Missouri" + + $task_id_text$location$`30` + [1] "Montana" + + $task_id_text$location$`31` + [1] "Nebraska" + + $task_id_text$location$`32` + [1] "Nevada" + + $task_id_text$location$`33` + [1] "New Hampshire" + + $task_id_text$location$`34` + [1] "New Jersey" + + $task_id_text$location$`35` + [1] "New Mexico" + + $task_id_text$location$`36` + [1] "New York" + + $task_id_text$location$`37` + [1] "North Carolina" + + $task_id_text$location$`38` + [1] "North Dakota" + + $task_id_text$location$`39` + [1] "Ohio" + + $task_id_text$location$`40` + [1] "Oklahoma" + + $task_id_text$location$`41` + [1] "Oregon" + + $task_id_text$location$`42` + [1] "Pennsylvania" + + $task_id_text$location$`44` + [1] "Rhode Island" + + $task_id_text$location$`45` + [1] "South Carolina" + + $task_id_text$location$`46` + [1] "South Dakota" + + $task_id_text$location$`47` + [1] "Tennessee" + + $task_id_text$location$`48` + [1] "Texas" + + $task_id_text$location$`49` + [1] "Utah" + + $task_id_text$location$`50` + [1] "Vermont" + + $task_id_text$location$`51` + [1] "Virginia" + + $task_id_text$location$`53` + [1] "Washington" + + $task_id_text$location$`54` + [1] "West Virginia" + + $task_id_text$location$`55` + [1] "Wisconsin" + + $task_id_text$location$`56` + [1] "Wyoming" + + $task_id_text$location$`60` + [1] "American Samoa" + + $task_id_text$location$`66` + [1] "Guam" + + $task_id_text$location$`69` + [1] "Northern Mariana Islands" + + $task_id_text$location$`72` + [1] "Puerto Rico" + + $task_id_text$location$`74` + [1] "U.S. Minor Outlying Islands" + + $task_id_text$location$`78` + [1] "Virgin Islands" + + + + # read_webevals_config succeeds, valid yaml file with no min_round_id Code diff --git a/tests/testthat/helper-check_exp_scores_for_window.R b/tests/testthat/helper-check_exp_scores_for_window.R new file mode 100644 index 0000000..4a63eb5 --- /dev/null +++ b/tests/testthat/helper-check_exp_scores_for_window.R @@ -0,0 +1,85 @@ +#' Helper function to check that the output files were created and have the expected contents +#' for one evaluation window. +#' @param out_path The path to the output directory where scores were saved. +#' @param window_name The name of the evaluation window. +#' @param model_out_tbl The model output table, filtered to data for the evaluation window. +#' @param oracle_output The oracle output. +#' @param include_rel Whether to include relative metrics in the expected scores. +check_exp_scores_for_window <- function(out_path, window_name, model_out_tbl, oracle_output, + include_rel = FALSE) { + # check that the output files were created and have the expected contents + # no disaggregation + scores_path <- file.path(out_path, "wk inc flu hosp", window_name, "scores.csv") + testthat::expect_true(file.exists(scores_path)) + + actual_scores <- read.csv(scores_path) + expected_mean_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), + oracle_output = oracle_output, + metrics = "se_point", + relative_metrics = if (include_rel) "se_point" else NULL, + baseline = "FS-base", + by = "model_id" + ) + expected_median_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), + oracle_output = oracle_output, + metrics = "ae_point", + relative_metrics = if (include_rel) "ae_point" else NULL, + baseline = "FS-base", + by = "model_id" + ) + expected_quantile_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), + oracle_output = oracle_output, + metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), + relative_metrics = if (include_rel) c("wis", "ae_median") else NULL, + baseline = "FS-base", + by = "model_id" + ) + expected_scores <- expected_mean_scores |> + dplyr::left_join(expected_median_scores, by = "model_id") |> + dplyr::left_join(expected_quantile_scores, by = "model_id") + expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter + + for (by in c("location", "reference_date", "horizon", "target_end_date")) { + # check that the output files were created and have the expected contents + # disaggregated by `by` + scores_path <- file.path(out_path, "wk inc flu hosp", window_name, by, "scores.csv") + testthat::expect_true(file.exists(scores_path)) + + actual_scores <- read.csv(scores_path) + if (by %in% c("reference_date", "target_end_date")) { + actual_scores[[by]] <- as.Date(actual_scores[[by]]) + } + + expected_mean_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), + oracle_output = oracle_output, + metrics = "se_point", + relative_metrics = if (include_rel) "se_point" else NULL, + baseline = "FS-base", + by = c("model_id", by) + ) + expected_median_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), + oracle_output = oracle_output, + metrics = "ae_point", + relative_metrics = if (include_rel) "ae_point" else NULL, + baseline = "FS-base", + by = c("model_id", by) + ) + expected_quantile_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), + oracle_output = oracle_output, + metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), + relative_metrics = if (include_rel) c("wis", "ae_median") else NULL, + baseline = "FS-base", + by = c("model_id", by) + ) + expected_scores <- expected_mean_scores |> + dplyr::left_join(expected_median_scores, by = c("model_id", by)) |> + dplyr::left_join(expected_quantile_scores, by = c("model_id", by)) + expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter + } +} diff --git a/tests/testthat/test-config.R b/tests/testthat/test-config.R index 0bb11ed..e2406ce 100644 --- a/tests/testthat/test-config.R +++ b/tests/testthat/test-config.R @@ -12,6 +12,34 @@ test_that( } ) +test_that( + "read_webevals_config succeeds, valid yaml file with length 1 arrays", + { + hub_path <- test_path("testdata", "ecfh") + expect_snapshot( + read_config( + hub_path, + test_path("testdata", "test_configs", + "config_valid_length_one_arrays.yaml") + ) + ) + } +) + +test_that( + "read_webevals_config succeeds, valid yaml file with relative metrics", + { + hub_path <- test_path("testdata", "ecfh") + expect_snapshot( + read_config( + hub_path, + test_path("testdata", "test_configs", + "config_valid_rel_metrics.yaml") + ) + ) + } +) + test_that( "read_webevals_config succeeds, valid yaml file with no min_round_id", { @@ -85,7 +113,7 @@ test_that( ) test_that( - "read_webevals_config succeeds, round_id_from_variable false", + "read_webevals_config fails, round_id_from_variable false", { hub_path <- test_path("testdata", "test_hub_invalid_rifv_F") expect_error( @@ -203,3 +231,33 @@ test_that( ) } ) + +test_that( + "read_webevals_config fails, invalid relative metrics, not a subset of metrics", + { + hub_path <- test_path("testdata", "ecfh") + expect_error( + read_config( + hub_path, + test_path("testdata", "test_configs", + "config_invalid_rel_metrics_non_metric.yaml") + ), + regexp = 'Relative metric not found in the requested metrics: "log_score".' + ) + } +) + +test_that( + "read_webevals_config fails, invalid relative metrics, no baseline", + { + hub_path <- test_path("testdata", "ecfh") + expect_error( + read_config( + hub_path, + test_path("testdata", "test_configs", + "config_invalid_rel_metrics_no_baseline.yaml") + ), + regexp = "must have property baseline when property relative_metrics is present" + ) + } +) diff --git a/tests/testthat/test-generate_eval_data.R b/tests/testthat/test-generate_eval_data.R index 44f64a5..c80a0e3 100644 --- a/tests/testthat/test-generate_eval_data.R +++ b/tests/testthat/test-generate_eval_data.R @@ -1,77 +1,35 @@ -#' Helper function to check that the output files were created and have the expected contents -#' for one evaluation window. -#' @param out_path The path to the output directory where scores were saved. -#' @param window_name The name of the evaluation window. -#' @param model_out_tbl The model output table, filtered to data for the evaluation window. -#' @param oracle_output The oracle output. -check_exp_scores_for_window <- function(out_path, window_name, model_out_tbl, oracle_output) { - # check that the output files were created and have the expected contents - # no disaggregation - scores_path <- file.path(out_path, "wk inc flu hosp", window_name, "scores.csv") - testthat::expect_true(file.exists(scores_path)) - - actual_scores <- read.csv(scores_path) - expected_mean_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), - oracle_output = oracle_output, - metrics = "se_point", - by = "model_id" - ) - expected_median_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), - oracle_output = oracle_output, - metrics = "ae_point", - by = "model_id" - ) - expected_quantile_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), - oracle_output = oracle_output, - metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), - by = "model_id" - ) - expected_scores <- expected_mean_scores |> - dplyr::left_join(expected_median_scores, by = "model_id") |> - dplyr::left_join(expected_quantile_scores, by = "model_id") - expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter - - for (by in c("location", "reference_date", "horizon", "target_end_date")) { - # check that the output files were created and have the expected contents - # disaggregated by `by` - scores_path <- file.path(out_path, "wk inc flu hosp", window_name, by, "scores.csv") - testthat::expect_true(file.exists(scores_path)) - - actual_scores <- read.csv(scores_path) - if (by %in% c("reference_date", "target_end_date")) { - actual_scores[[by]] <- as.Date(actual_scores[[by]]) - } - - expected_mean_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), - oracle_output = oracle_output, - metrics = "se_point", - by = c("model_id", by) - ) - expected_median_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), - oracle_output = oracle_output, - metrics = "ae_point", - by = c("model_id", by) +test_that( + "generate_eval_data works, integration test, no relative metrics", + { + out_path <- withr::local_tempdir() + hub_path <- test_path("testdata", "ecfh") + model_out_tbl <- hubData::connect_hub(hub_path) |> + dplyr::collect() + oracle_output <- read.csv( + test_path("testdata", "ecfh", "target-data", "oracle-output.csv") ) - expected_quantile_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), - oracle_output = oracle_output, - metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), - by = c("model_id", by) + oracle_output[["target_end_date"]] <- as.Date(oracle_output[["target_end_date"]]) + + generate_eval_data( + hub_path = hub_path, + config_path = test_path("testdata", "test_configs", "config_valid_mean_median_quantile.yaml"), + out_path = out_path, + oracle_output = oracle_output ) - expected_scores <- expected_mean_scores |> - dplyr::left_join(expected_median_scores, by = c("model_id", by)) |> - dplyr::left_join(expected_quantile_scores, by = c("model_id", by)) - expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter + + check_exp_scores_for_window(out_path, + "Full season", + model_out_tbl, + oracle_output) + check_exp_scores_for_window(out_path, + "Last 5 weeks", + model_out_tbl |> dplyr::filter(reference_date >= "2022-12-17"), + oracle_output) } -} +) test_that( - "generate_eval_data works, integration test", + "generate_eval_data works, integration test, with relative metrics", { out_path <- withr::local_tempdir() hub_path <- test_path("testdata", "ecfh") @@ -84,7 +42,7 @@ test_that( generate_eval_data( hub_path = hub_path, - config_path = test_path("testdata", "test_configs", "config_valid_mean_median_quantile.yaml"), + config_path = test_path("testdata", "test_configs", "config_valid_mean_median_quantile_rel.yaml"), out_path = out_path, oracle_output = oracle_output ) @@ -92,10 +50,12 @@ test_that( check_exp_scores_for_window(out_path, "Full season", model_out_tbl, - oracle_output) + oracle_output, + include_rel = TRUE) check_exp_scores_for_window(out_path, "Last 5 weeks", model_out_tbl |> dplyr::filter(reference_date >= "2022-12-17"), - oracle_output) + oracle_output, + include_rel = TRUE) } ) diff --git a/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_no_baseline.yaml b/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_no_baseline.yaml new file mode 100644 index 0000000..395efcb --- /dev/null +++ b/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_no_baseline.yaml @@ -0,0 +1,90 @@ +targets: +- target_id: wk inc flu hosp + metrics: + - wis + - ae_median + - interval_coverage_50 + - interval_coverage_95 + relative_metrics: + - wis + - ae_median + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +- target_id: wk flu hosp rate category + metrics: + - log_score + - rps + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +eval_windows: +- window_name: Full season + min_round_id: '2023-01-21' +- window_name: Last 4 weeks + min_round_id: '2023-01-21' + n_last_round_ids: 4 +task_id_text: + location: + US: United States + '01': Alabama + '02': Alaska + '04': Arizona + '05': Arkansas + '06': California + '08': Colorado + '09': Connecticut + '10': Delaware + '11': District of Columbia + '12': Florida + '13': Georgia + '15': Hawaii + '16': Idaho + '17': Illinois + '18': Indiana + '19': Iowa + '20': Kansas + '21': Kentucky + '22': Louisiana + '23': Maine + '24': Maryland + '25': Massachusetts + '26': Michigan + '27': Minnesota + '28': Mississippi + '29': Missouri + '30': Montana + '31': Nebraska + '32': Nevada + '33': New Hampshire + '34': New Jersey + '35': New Mexico + '36': New York + '37': North Carolina + '38': North Dakota + '39': Ohio + '40': Oklahoma + '41': Oregon + '42': Pennsylvania + '44': Rhode Island + '45': South Carolina + '46': South Dakota + '47': Tennessee + '48': Texas + '49': Utah + '50': Vermont + '51': Virginia + '53': Washington + '54': West Virginia + '55': Wisconsin + '56': Wyoming + '60': American Samoa + '66': Guam + '69': Northern Mariana Islands + '72': Puerto Rico + '74': U.S. Minor Outlying Islands + '78': Virgin Islands diff --git a/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_non_metric.yaml b/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_non_metric.yaml new file mode 100644 index 0000000..85f1a58 --- /dev/null +++ b/tests/testthat/testdata/test_configs/config_invalid_rel_metrics_non_metric.yaml @@ -0,0 +1,90 @@ +targets: +- target_id: wk inc flu hosp + metrics: + - wis + - ae_median + - interval_coverage_50 + - interval_coverage_95 + relative_metrics: + - log_score + baseline: FS-base + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +- target_id: wk flu hosp rate category + metrics: + - log_score + - rps + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +eval_windows: +- window_name: Full season + min_round_id: '2023-01-21' +- window_name: Last 4 weeks + min_round_id: '2023-01-21' + n_last_round_ids: 4 +task_id_text: + location: + US: United States + '01': Alabama + '02': Alaska + '04': Arizona + '05': Arkansas + '06': California + '08': Colorado + '09': Connecticut + '10': Delaware + '11': District of Columbia + '12': Florida + '13': Georgia + '15': Hawaii + '16': Idaho + '17': Illinois + '18': Indiana + '19': Iowa + '20': Kansas + '21': Kentucky + '22': Louisiana + '23': Maine + '24': Maryland + '25': Massachusetts + '26': Michigan + '27': Minnesota + '28': Mississippi + '29': Missouri + '30': Montana + '31': Nebraska + '32': Nevada + '33': New Hampshire + '34': New Jersey + '35': New Mexico + '36': New York + '37': North Carolina + '38': North Dakota + '39': Ohio + '40': Oklahoma + '41': Oregon + '42': Pennsylvania + '44': Rhode Island + '45': South Carolina + '46': South Dakota + '47': Tennessee + '48': Texas + '49': Utah + '50': Vermont + '51': Virginia + '53': Washington + '54': West Virginia + '55': Wisconsin + '56': Wyoming + '60': American Samoa + '66': Guam + '69': Northern Mariana Islands + '72': Puerto Rico + '74': U.S. Minor Outlying Islands + '78': Virgin Islands diff --git a/tests/testthat/testdata/test_configs/config_valid_length_one_arrays.yaml b/tests/testthat/testdata/test_configs/config_valid_length_one_arrays.yaml new file mode 100644 index 0000000..277ba80 --- /dev/null +++ b/tests/testthat/testdata/test_configs/config_valid_length_one_arrays.yaml @@ -0,0 +1,9 @@ +targets: +- target_id: wk inc flu hosp + metrics: + - wis + disaggregate_by: + - location +eval_windows: +- window_name: Full season + min_round_id: '2023-01-21' diff --git a/tests/testthat/testdata/test_configs/config_valid_mean_median_quantile_rel.yaml b/tests/testthat/testdata/test_configs/config_valid_mean_median_quantile_rel.yaml new file mode 100644 index 0000000..6c99e31 --- /dev/null +++ b/tests/testthat/testdata/test_configs/config_valid_mean_median_quantile_rel.yaml @@ -0,0 +1,86 @@ +targets: +- target_id: wk inc flu hosp + metrics: + - se_point + - ae_point + - wis + - ae_median + - interval_coverage_50 + - interval_coverage_95 + relative_metrics: + - se_point + - ae_point + - wis + - ae_median + baseline: FS-base + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +eval_windows: +- window_name: Full season + min_round_id: '2022-10-22' +- window_name: Last 5 weeks + min_round_id: '2022-10-22' + n_last_round_ids: 5 +task_id_text: + location: + US: United States + '01': Alabama + '02': Alaska + '04': Arizona + '05': Arkansas + '06': California + '08': Colorado + '09': Connecticut + '10': Delaware + '11': District of Columbia + '12': Florida + '13': Georgia + '15': Hawaii + '16': Idaho + '17': Illinois + '18': Indiana + '19': Iowa + '20': Kansas + '21': Kentucky + '22': Louisiana + '23': Maine + '24': Maryland + '25': Massachusetts + '26': Michigan + '27': Minnesota + '28': Mississippi + '29': Missouri + '30': Montana + '31': Nebraska + '32': Nevada + '33': New Hampshire + '34': New Jersey + '35': New Mexico + '36': New York + '37': North Carolina + '38': North Dakota + '39': Ohio + '40': Oklahoma + '41': Oregon + '42': Pennsylvania + '44': Rhode Island + '45': South Carolina + '46': South Dakota + '47': Tennessee + '48': Texas + '49': Utah + '50': Vermont + '51': Virginia + '53': Washington + '54': West Virginia + '55': Wisconsin + '56': Wyoming + '60': American Samoa + '66': Guam + '69': Northern Mariana Islands + '72': Puerto Rico + '74': U.S. Minor Outlying Islands + '78': Virgin Islands diff --git a/tests/testthat/testdata/test_configs/config_valid_rel_metrics.yaml b/tests/testthat/testdata/test_configs/config_valid_rel_metrics.yaml new file mode 100644 index 0000000..a95d8e8 --- /dev/null +++ b/tests/testthat/testdata/test_configs/config_valid_rel_metrics.yaml @@ -0,0 +1,91 @@ +targets: +- target_id: wk inc flu hosp + metrics: + - wis + - ae_median + - interval_coverage_50 + - interval_coverage_95 + relative_metrics: + - wis + - ae_median + baseline: FS-base + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +- target_id: wk flu hosp rate category + metrics: + - log_score + - rps + disaggregate_by: + - location + - reference_date + - horizon + - target_end_date +eval_windows: +- window_name: Full season + min_round_id: '2023-01-21' +- window_name: Last 4 weeks + min_round_id: '2023-01-21' + n_last_round_ids: 4 +task_id_text: + location: + US: United States + '01': Alabama + '02': Alaska + '04': Arizona + '05': Arkansas + '06': California + '08': Colorado + '09': Connecticut + '10': Delaware + '11': District of Columbia + '12': Florida + '13': Georgia + '15': Hawaii + '16': Idaho + '17': Illinois + '18': Indiana + '19': Iowa + '20': Kansas + '21': Kentucky + '22': Louisiana + '23': Maine + '24': Maryland + '25': Massachusetts + '26': Michigan + '27': Minnesota + '28': Mississippi + '29': Missouri + '30': Montana + '31': Nebraska + '32': Nevada + '33': New Hampshire + '34': New Jersey + '35': New Mexico + '36': New York + '37': North Carolina + '38': North Dakota + '39': Ohio + '40': Oklahoma + '41': Oregon + '42': Pennsylvania + '44': Rhode Island + '45': South Carolina + '46': South Dakota + '47': Tennessee + '48': Texas + '49': Utah + '50': Vermont + '51': Virginia + '53': Washington + '54': West Virginia + '55': Wisconsin + '56': Wyoming + '60': American Samoa + '66': Guam + '69': Northern Mariana Islands + '72': Puerto Rico + '74': U.S. Minor Outlying Islands + '78': Virgin Islands From 6495df9171a35abe7c1afe6fb96b311be79c1181 Mon Sep 17 00:00:00 2001 From: "Evan L. Ray" Date: Wed, 8 Jan 2025 16:54:08 -0500 Subject: [PATCH 2/5] refactor expected scores to fixtures --- .../helper-check_exp_scores_for_window.R | 83 ++++--------------- .../testdata/create_exp_score_fixtures.R | 55 ++++++++++++ .../expected-scores/scores_Full season.csv | 4 + .../scores_Full season_by_horizon.csv | 13 +++ .../scores_Full season_by_location.csv | 7 ++ .../scores_Full season_by_reference_date.csv | 13 +++ .../scores_Full season_by_target_end_date.csv | 49 +++++++++++ .../expected-scores/scores_Last 5 weeks.csv | 4 + .../scores_Last 5 weeks_by_horizon.csv | 13 +++ .../scores_Last 5 weeks_by_location.csv | 7 ++ .../scores_Last 5 weeks_by_reference_date.csv | 7 ++ ...scores_Last 5 weeks_by_target_end_date.csv | 25 ++++++ 12 files changed, 213 insertions(+), 67 deletions(-) create mode 100644 tests/testthat/testdata/create_exp_score_fixtures.R create mode 100644 tests/testthat/testdata/expected-scores/scores_Full season.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv create mode 100644 tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv diff --git a/tests/testthat/helper-check_exp_scores_for_window.R b/tests/testthat/helper-check_exp_scores_for_window.R index 4a63eb5..57d6667 100644 --- a/tests/testthat/helper-check_exp_scores_for_window.R +++ b/tests/testthat/helper-check_exp_scores_for_window.R @@ -8,78 +8,27 @@ check_exp_scores_for_window <- function(out_path, window_name, model_out_tbl, oracle_output, include_rel = FALSE) { # check that the output files were created and have the expected contents - # no disaggregation - scores_path <- file.path(out_path, "wk inc flu hosp", window_name, "scores.csv") - testthat::expect_true(file.exists(scores_path)) - - actual_scores <- read.csv(scores_path) - expected_mean_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), - oracle_output = oracle_output, - metrics = "se_point", - relative_metrics = if (include_rel) "se_point" else NULL, - baseline = "FS-base", - by = "model_id" - ) - expected_median_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), - oracle_output = oracle_output, - metrics = "ae_point", - relative_metrics = if (include_rel) "ae_point" else NULL, - baseline = "FS-base", - by = "model_id" - ) - expected_quantile_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), - oracle_output = oracle_output, - metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), - relative_metrics = if (include_rel) c("wis", "ae_median") else NULL, - baseline = "FS-base", - by = "model_id" - ) - expected_scores <- expected_mean_scores |> - dplyr::left_join(expected_median_scores, by = "model_id") |> - dplyr::left_join(expected_quantile_scores, by = "model_id") - expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter - - for (by in c("location", "reference_date", "horizon", "target_end_date")) { - # check that the output files were created and have the expected contents - # disaggregated by `by` - scores_path <- file.path(out_path, "wk inc flu hosp", window_name, by, "scores.csv") + # disaggregated by `by` if non-NULL, otherwise no disaggregation + for (by in list(NULL, "location", "reference_date", "horizon", "target_end_date")) { + if (is.null(by)) { + scores_path <- file.path(out_path, "wk inc flu hosp", window_name, "scores.csv") + } else { + scores_path <- file.path(out_path, "wk inc flu hosp", window_name, by, "scores.csv") + } testthat::expect_true(file.exists(scores_path)) actual_scores <- read.csv(scores_path) - if (by %in% c("reference_date", "target_end_date")) { - actual_scores[[by]] <- as.Date(actual_scores[[by]]) - } - expected_mean_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), - oracle_output = oracle_output, - metrics = "se_point", - relative_metrics = if (include_rel) "se_point" else NULL, - baseline = "FS-base", - by = c("model_id", by) + expected_scores_path <- testthat::test_path( + "testdata", "expected-scores", + paste0("scores_", window_name, ifelse(is.null(by), "", paste0("_by_", by)), ".csv") ) - expected_median_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), - oracle_output = oracle_output, - metrics = "ae_point", - relative_metrics = if (include_rel) "ae_point" else NULL, - baseline = "FS-base", - by = c("model_id", by) - ) - expected_quantile_scores <- hubEvals::score_model_out( - model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), - oracle_output = oracle_output, - metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), - relative_metrics = if (include_rel) c("wis", "ae_median") else NULL, - baseline = "FS-base", - by = c("model_id", by) - ) - expected_scores <- expected_mean_scores |> - dplyr::left_join(expected_median_scores, by = c("model_id", by)) |> - dplyr::left_join(expected_quantile_scores, by = c("model_id", by)) + expected_scores <- read.csv(expected_scores_path) + if (!include_rel) { + expected_scores <- expected_scores |> + dplyr::select(-dplyr::contains("relative")) + } + expect_df_equal_up_to_order(actual_scores, expected_scores, ignore_attr = TRUE) # nolint: object_usage_linter } } diff --git a/tests/testthat/testdata/create_exp_score_fixtures.R b/tests/testthat/testdata/create_exp_score_fixtures.R new file mode 100644 index 0000000..78599b0 --- /dev/null +++ b/tests/testthat/testdata/create_exp_score_fixtures.R @@ -0,0 +1,55 @@ +library(testthat) +library(hubData) +library(hubEvals) +library(dplyr) + +hub_path <- testthat::test_path("testdata", "ecfh") +model_out_tbl <- hubData::connect_hub(hub_path) |> + dplyr::collect() +oracle_output <- read.csv( + test_path("testdata", "ecfh", "target-data", "oracle-output.csv") +) +oracle_output[["target_end_date"]] <- as.Date(oracle_output[["target_end_date"]]) + +make_score_fixtures_one_window <- function(window_name, model_out_tbl) { + for (by in list(NULL, "location", "reference_date", "horizon", "target_end_date")) { + expected_mean_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "mean"), + oracle_output = oracle_output, + metrics = "se_point", + relative_metrics = "se_point", + baseline = "FS-base", + by = c("model_id", by) + ) + expected_median_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "median"), + oracle_output = oracle_output, + metrics = "ae_point", + relative_metrics = "ae_point", + baseline = "FS-base", + by = c("model_id", by) + ) + expected_quantile_scores <- hubEvals::score_model_out( + model_out_tbl = model_out_tbl |> dplyr::filter(.data[["output_type"]] == "quantile"), + oracle_output = oracle_output, + metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_95"), + relative_metrics = c("wis", "ae_median"), + baseline = "FS-base", + by = c("model_id", by) + ) + expected_scores <- expected_mean_scores |> + dplyr::left_join(expected_median_scores, by = c("model_id", by)) |> + dplyr::left_join(expected_quantile_scores, by = c("model_id", by)) + + save_path <- testthat::test_path("testdata", "expected-scores") + if (!dir.exists(save_path)) { + dir.create(save_path, recursive = TRUE) + } + + file_name <- paste0("scores_", window_name, ifelse(is.null(by), "", paste0("_by_", by)), ".csv") + write.csv(expected_scores, file = file.path(save_path, file_name), row.names = FALSE) + } +} + +make_score_fixtures_one_window("Full season", model_out_tbl) +make_score_fixtures_one_window("Last 5 weeks", model_out_tbl |> dplyr::filter(reference_date >= "2022-12-17")) diff --git a/tests/testthat/testdata/expected-scores/scores_Full season.csv b/tests/testthat/testdata/expected-scores/scores_Full season.csv new file mode 100644 index 0000000..8113645 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Full season.csv @@ -0,0 +1,4 @@ +"model_id","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",39601755.4396545,1.91088968575342,1,3837.4375,1.52457821016562,1,3132.195625,3837.4375,0,0.40625,1.71310584625945,1,1.52457821016562,1 +"MOBS-GLEAM",14746519.7514426,0.71155867160308,0.372370355498847,1915.1875,0.760886172317223,0.49907978957312,1251.3253125,1915.1875,0.4375,0.71875,0.684392983409579,0.399504201625338,0.760886172317223,0.49907978957312 +"PSI-DICE",15241667.712703,0.735450873390352,0.384873537637198,2169.8125,0.862046315450087,0.565432661769736,1559.4625,2169.8125,0.28125,0.6875,0.852923841809011,0.497881577878776,0.862046315450087,0.565432661769736 diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv b/tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv new file mode 100644 index 0000000..b1ec707 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv @@ -0,0 +1,13 @@ +"model_id","horizon","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",0,6970257.38545523,1.27454923973301,1,1573.75,1.25894719773911,1,1177.7875,1573.75,0,0.25,1.62373337952405,1,1.25894719773911,1 +"FS-base",1,29097350.747173,1.71928082758726,1,3469,1.43574766705554,1,2767.495,3469,0,0.5,1.62238807312521,1,1.43574766705554,1 +"FS-base",2,57604120.7825812,1.73531847157679,1,4730.75,1.43991498437613,1,3968.39875,4730.75,0,0.5,1.55262559465219,1,1.43991498437613,1 +"FS-base",3,64735292.8434087,2.49329936252022,1,5576.25,1.8377267323066,1,4615.10125,5576.25,0,0.375,2.03279570421319,1,1.8377267323066,1 +"MOBS-GLEAM",0,7264860.05568886,1.32841893042092,1.04226568029588,1290.25,1.03215670969524,0.819857029388404,672.825,1290.25,0.5,0.75,0.927576843087799,0.571261793829532,1.03215670969524,0.819857029388404 +"MOBS-GLEAM",1,13523503.2122718,0.799065866741583,0.46476750855353,2051.625,0.849125340854086,0.591416834822715,1265.94125,2051.625,0.5,0.75,0.742132500791228,0.457432172415849,0.849125340854086,0.591416834822715 +"MOBS-GLEAM",2,23511851.0974193,0.708292201252593,0.408162658816745,2339.5,0.712081827606183,0.494530465570998,1799.845,2339.5,0.375,0.625,0.704184632002208,0.453544392432842,0.712081827606183,0.494530465570998 +"MOBS-GLEAM",3,14685864.6403903,0.565630513706274,0.226860248796814,1979.375,0.652329137100986,0.354965254427258,1266.69,1979.375,0.375,0.75,0.557934019447527,0.274466351090347,0.652329137100986,0.354965254427258 +"PSI-DICE",0,3229985.12986495,0.590620240252353,0.463395388612898,962,0.76956772309771,0.611278792692613,481.60125,962,0.125,1,0.663950012413537,0.408903346316717,0.76956772309771,0.611278792692613 +"PSI-DICE",1,12319050.3206928,0.727898124282247,0.42337360633733,1981.875,0.82025725213194,0.571310175843182,1416.75875,1981.875,0.375,0.625,0.830546215438793,0.51192820583235,0.82025725213194,0.571310175843182 +"PSI-DICE",2,27007396.0423284,0.813595148832029,0.468844861711614,3204.25,0.975288820733965,0.677323891560535,2337.735,3204.25,0.25,0.5,0.914632682644162,0.589087727134275,0.975288820733965,0.677323891560535 +"PSI-DICE",3,18410239.3579258,0.709075931207976,0.284392617215144,2531.125,0.834165626596645,0.453911678995741,2001.755,2531.125,0.375,0.625,0.881705242087001,0.433740213175171,0.834165626596645,0.453911678995741 diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv b/tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv new file mode 100644 index 0000000..7226881 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv @@ -0,0 +1,7 @@ +"model_id","location","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base","01",14365.9170528647,0.55883962141545,1,102.9375,0.96400016707104,1,73.93875,102.9375,0,0.6875,1.01550810548672,1,0.96400016707104,1 +"FS-base","US",79189144.9622562,1.91728375259368,1,7571.9375,1.54933892775112,1,6190.4525,7571.9375,0,0.125,1.74202276281089,1,1.54933892775112,1 +"MOBS-GLEAM","01",300919.487866858,11.7058821972324,20.9467649548242,236.5625,2.21538593343284,2.29811778992107,152.659375,236.5625,0.375,0.625,2.09669263668964,2.06467346283241,2.21538593343284,2.29811778992107 +"MOBS-GLEAM","US",29192120.0150183,0.706783454162007,0.368637898905615,3593.8125,0.735351236759228,0.474622578435176,2349.99125,3593.8125,0.5,0.8125,0.661298709570329,0.379615423912872,0.735351236759228,0.474622578435176 +"PSI-DICE","01",3929.65863864692,0.152865211307584,0.273540395937571,50,0.468245375626492,0.485731633272617,34.195625,50,0.3125,0.75,0.469658120534688,0.462485841321364,0.468245375626492,0.485731633272617 +"PSI-DICE","US",30479405.7667673,0.737950504367565,0.384893734883672,4289.625,0.877725548837983,0.566516165776593,3084.729375,4289.625,0.25,0.625,0.868057510878472,0.498304344472395,0.877725548837983,0.566516165776593 diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv b/tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv new file mode 100644 index 0000000..77887db --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv @@ -0,0 +1,13 @@ +"model_id","reference_date","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",2022-10-22,10208266.5669188,1.94998063334083,1,1994.75,1.6213568310999,1,1780.68125,1994.75,0,0,1.68730459407208,1,1.6213568310999,1 +"FS-base",2022-11-19,83701352.6732892,1.63940181445449,1,5850.25,1.28116609986496,1,5404.70125,5850.25,0,0.375,1.48340297594654,1,1.28116609986496,1 +"FS-base",2022-12-17,21306295.3973807,3.10891393913009,1,2894.375,1.82701375914094,1,1937.05,2894.375,0,0.75,1.73261142871645,1,1.82701375914094,1 +"FS-base",2023-01-14,43191107.1210294,3.1778410567911,1,4610.375,1.98229121430346,1,3406.35,4610.375,0,0.5,2.56485081622142,1,1.98229121430346,1 +"MOBS-GLEAM",2022-10-22,2877650.25924563,0.549688062931421,0.281894113989051,693,0.56327874869143,0.347411956385512,603.08375,693,0.5,0.5,0.571458806558006,0.338681473733718,0.56327874869143,0.347411956385512 +"MOBS-GLEAM",2022-11-19,41097190.9787231,0.80494289886192,0.490997930931146,4082.125,0.893958406121322,0.697769326097175,2797.1575,4082.125,0.25,0.375,0.767722685817498,0.517541556991702,0.893958406121322,0.697769326097175 +"MOBS-GLEAM",2022-12-17,11393303.8117821,1.6624570518978,0.534738845927327,1834.625,1.1580687083961,0.633858777801771,1061.10375,1834.625,0.5,1,0.949113592475093,0.547793681113033,1.1580687083961,0.633858777801771 +"MOBS-GLEAM",2023-01-14,3617933.95601942,0.266194127276707,0.0837657146847685,1051,0.451891237965012,0.227964102703142,543.95625,1051,0.5,1,0.40957817951803,0.159688889867453,0.451891237965012,0.227964102703142 +"PSI-DICE",2022-10-22,4883993.75672922,0.932939317027852,0.478435170624993,1347.125,1.09495942904898,0.675335255044492,1094.49625,1347.125,0,0.375,1.03710226118215,0.614650291847572,1.09495942904898,0.675335255044492 +"PSI-DICE",2022-11-19,38689808.514466,0.757791125874522,0.462236359136077,3987,0.873126659572088,0.681509337207812,3199.25625,3987,0.25,0.625,0.878084841796866,0.591939517471017,0.873126659572088,0.681509337207812 +"PSI-DICE",2022-12-17,1325989.57383748,0.193482132503959,0.0622346376555207,748.75,0.472632797117437,0.258691427337508,679.86125,748.75,0.875,1,0.608107881413205,0.350977646421104,0.472632797117437,0.258691427337508 +"PSI-DICE",2023-01-14,16066879.0057793,1.18214121291188,0.371995072058629,2596.375,1.11634549283673,0.563159179025567,1264.23625,2596.375,0,0.75,0.951921375580667,0.371141030722034,1.11634549283673,0.563159179025567 diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv b/tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv new file mode 100644 index 0000000..4c19d40 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv @@ -0,0 +1,49 @@ +"model_id","target_end_date","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",2022-10-22,216285.49784771,3.86853672481114,1,353,2.07324050161915,1,212.02,353,0,0,1.92239967613821,1,2.07324050161915,1 +"FS-base",2022-10-29,3467107.14440222,3.53885633168634,1,1400,2.02187326095055,1,1203.76,1400,0,0,1.95605930386773,1,2.02187326095055,1 +"FS-base",2022-11-05,11788212.6451863,1.97891050438188,1,2558,1.60154169511087,1,2317.32,2558,0,0,1.67613393183549,1,1.60154169511087,1 +"FS-base",2022-11-12,25361460.9802391,1.85658122072428,1,3668,1.51437249673553,1,3389.625,3668,0,0,1.61492150895569,1,1.51437249673553,1 +"FS-base",2022-11-19,3653912.05049764,4.41757782959963,1,1390,1.90325785111796,1,1078.76,1390,0,0,2.56531239572762,1,1.90325785111796,1 +"FS-base",2022-11-26,61882926.7067664,1.70436957818155,1,5582,1.28433608386802,1,5142.425,5582,0,0.5,1.48220397976422,1,1.28433608386802,1 +"FS-base",2022-12-03,154904704.802538,1.49573443825615,1,8825.5,1.1988137044987,1,8336.7,8825.5,0,0.5,1.3303629191629,1,1.1988137044987,1 +"FS-base",2022-12-10,114363867.133355,1.82656943886595,1,7603.5,1.31743195769032,1,7060.92,7603.5,0,0.5,1.62424624888108,1,1.31743195769032,1 +"FS-base",2022-12-17,3175383.68141869,0.704102187093623,1,1303,0.932532186623287,1,802.62,1303,0,0.5,1.0219261709135,1,0.932532186623287,1 +"FS-base",2022-12-24,10914437.3525853,2.02860323794238,1,2371.5,2.1175276905063,1,1431.24,2371.5,0,1,1.41285865848852,1,2.1175276905063,1 +"FS-base",2022-12-31,10536520.7055104,6.81438673555078,1,2337.5,1.62564875830736,1,1404.75,2337.5,0,1,1.4301951892091,1,1.62564875830736,1 +"FS-base",2023-01-07,60598839.8500084,30.2439038289645,1,5565.5,5.28287394851579,1,4109.59,5565.5,0,0.5,3.34901399077292,1,5.28287394851579,1 +"FS-base",2023-01-14,20835448.3120569,1.75445319649409,1,3249,1.38977013494529,1,2617.75,3249,0,0.5,1.90292745531258,1,1.38977013494529,1 +"FS-base",2023-01-21,40124931.7849381,2.77673880767223,1,4522.5,1.78249219444739,1,3292.555,4522.5,0,0.5,2.27799926546679,1,1.78249219444739,1 +"FS-base",2023-01-28,53187044.9770904,4.18873826922126,1,5202,2.28442232579952,1,3814.825,5202,0,0.5,2.93007181293188,1,2.28442232579952,1 +"FS-base",2023-02-04,58617003.4100323,6.83431778418711,1,5468,3.07955311475367,1,3900.27,5468,0,0.5,3.90397251191362,1,3.07955311475367,1 +"MOBS-GLEAM",2022-10-22,22805.4038560825,0.407903179914191,0.10544120656735,88.5,0.519778426043329,0.25070821529745,88.83,88.5,0.5,0.5,0.805427616410514,0.418969908499198,0.519778426043329,0.25070821529745 +"MOBS-GLEAM",2022-10-29,181128.553200142,0.184876873037054,0.0522419832027924,256.5,0.370436065309869,0.183214285714286,287.81,256.5,0.5,0.5,0.467679128934482,0.239092510134911,0.370436065309869,0.183214285714286 +"MOBS-GLEAM",2022-11-05,3195096.74417106,0.536367191521472,0.271041661729421,903,0.565360496749459,0.353010164190774,783.8,903,0.5,0.5,0.566928078889691,0.338235547960575,0.565360496749459,0.353010164190774 +"MOBS-GLEAM",2022-11-12,8111570.33575524,0.593806057453923,0.319838448663329,1524,0.629199477923923,0.415485278080698,1251.895,1524,0.5,0.5,0.596441247174566,0.369331415717078,0.629199477923923,0.415485278080698 +"MOBS-GLEAM",2022-11-19,229016.022573171,0.276880255999742,0.0626769389651786,435.5,0.59630848500854,0.313309352517986,227.45,435.5,0.5,0.5,0.540880552123038,0.210843931921836,0.59630848500854,0.313309352517986 +"MOBS-GLEAM",2022-11-26,25662609.986773,0.706795461783021,0.414696126257502,3694,0.8499350580094,0.661769974919384,2586.02,3694,0.5,0.5,0.745369963733039,0.502879478067254,0.8499350580094,0.661769974919384 +"MOBS-GLEAM",2022-12-03,88681402.0744181,0.856293082154173,0.57249004920453,6909.5,0.938553429407263,0.782901818593847,5303.975,6909.5,0,0,0.846403452705154,0.636219967133278,0.938553429407263,0.782901818593847 +"MOBS-GLEAM",2022-12-10,49815735.8311282,0.795635045618531,0.435589815907853,5289.5,0.916493238666794,0.695666469389097,3071.185,5289.5,0,0.5,0.706474611788527,0.434955359924769,0.916493238666794,0.695666469389097 +"MOBS-GLEAM",2022-12-17,21404754.7053754,4.74624049069998,6.74084043154503,2955,2.1148370003621,2.26784343821949,1512.54,2955,0.5,1,1.9258231922373,1.88450325185019,2.1148370003621,2.26784343821949 +"MOBS-GLEAM",2022-12-24,23836982.874849,4.43044190741004,2.18398641219949,2976.5,2.65773610406578,1.2551127978073,1524.14,2976.5,0.5,1,1.50456554857934,1.06490875045415,2.65773610406578,1.2551127978073 +"MOBS-GLEAM",2022-12-31,130104.946396882,0.0841440401187458,0.0123479989299352,712.5,0.495518605473366,0.304812834224599,679.09,712.5,0.5,1,0.691390817611683,0.48342409681438,0.495518605473366,0.304812834224599 +"MOBS-GLEAM",2023-01-07,201372.720507006,0.100501877723489,0.0033230458042668,694.5,0.659232046939936,0.124786631928847,528.645,694.5,0.5,1,0.43080684475876,0.128636919984719,0.659232046939936,0.124786631928847 +"MOBS-GLEAM",2023-01-14,7402864.09095083,0.623359688404893,0.355301406529708,1682,0.719480876262846,0.517697753154817,862.48,1682,0.5,1,0.626964710785212,0.329473784738802,0.719480876262846,0.517697753154817 +"MOBS-GLEAM",2023-01-21,4413291.43426512,0.305410054296752,0.109988758558382,1279.5,0.504300445062562,0.282918739635158,665.795,1279.5,0.5,1,0.460639388241491,0.20221226372832,0.504300445062562,0.282918739635158 +"MOBS-GLEAM",2023-01-28,2040800.62469103,0.160722967034098,0.0383702577492334,833,0.365806189425413,0.160130718954248,432.515,833,0.5,1,0.332203970082568,0.113377415739909,0.365806189425413,0.160130718954248 +"MOBS-GLEAM",2023-02-04,614779.674170713,0.0716788545322083,0.0104880774929804,409.5,0.230628566293275,0.0748902706656913,215.035,409.5,0.5,1,0.215239131931724,0.055133362562079,0.230628566293275,0.0748902706656913 +"PSI-DICE",2022-10-22,35430.4657558039,0.633718207265103,0.163813413790466,158,0.927966003557581,0.447592067988669,71.23,158,0,1,0.645847226352819,0.335958871804547,0.927966003557581,0.447592067988669 +"PSI-DICE",2022-10-29,1497472.35088465,1.52846142035425,0.431908299488921,924.5,1.33515844982056,0.660357142857143,672.71,924.5,0,0,1.09312541894137,0.558840632684256,1.33515844982056,0.660357142857143 +"PSI-DICE",2022-11-05,5612203.99621589,0.942131752720016,0.476086084051736,1764,1.1044251564408,0.689601250977326,1454.925,1764,0,0,1.05235753403749,0.627848117653151,1.1044251564408,0.689601250977326 +"PSI-DICE",2022-11-12,12390868.2140605,0.907071294221524,0.488570757958902,2542,1.04949151763951,0.693020719738277,2179.12,2542,0,0.5,1.03819972964429,0.642879374562083,1.04949151763951,0.693020719738277 +"PSI-DICE",2022-11-19,676235.05850397,0.817567845301119,0.185071520375503,643.5,0.881112537549933,0.46294964028777,303.07,643.5,0,1,0.720706392314482,0.280942934480329,0.881112537549933,0.46294964028777 +"PSI-DICE",2022-11-26,30140437.0403912,0.830123051680458,0.487055778457478,3981.5,0.916084578631409,0.713274811895378,3140.365,3981.5,0.5,0.5,0.905149127291555,0.610677841679752,0.916084578631409,0.713274811895378 +"PSI-DICE",2022-12-03,80859897.561446,0.780769916644506,0.521997686671434,6543,0.888769822506943,0.741374426378109,5565.145,6543,0,0.5,0.888080721120448,0.667547710724867,0.888769822506943,0.741374426378109 +"PSI-DICE",2022-12-10,43082664.3975227,0.68809738692792,0.376715701186336,4780,0.828213948544716,0.628657854935227,3788.445,4780,0.5,0.5,0.871468247812224,0.536537023504019,0.828213948544716,0.628657854935227 +"PSI-DICE",2022-12-17,1349506.81439674,0.299236500166799,0.424990158604649,708.5,0.507059903470912,0.543745203376823,399.075,708.5,0.5,1,0.508117398840427,0.497215369664349,0.507059903470912,0.543745203376823 +"PSI-DICE",2022-12-24,598632.205240062,0.111264299812846,0.054847738449684,199,0.177688387269978,0.0839131351465317,476.545,199,1,1,0.470424757140252,0.332959531594981,0.177688387269978,0.0839131351465317 +"PSI-DICE",2022-12-31,2696623.46753519,1.74401357920139,0.255931112641852,1785,1.2414045063438,0.763636363636364,993.31,1785,1,1,1.01130249752148,0.707108026339206,1.2414045063438,0.763636363636364 +"PSI-DICE",2023-01-07,659195.808177916,0.328993998504522,0.0108780268699785,302.5,0.287138508566351,0.0543527086515138,850.515,302.5,1,1,0.693107252636451,0.20695860170966,0.287138508566351,0.0543527086515138 +"PSI-DICE",2023-01-14,10858768.1808033,0.914364800769576,0.521167964239081,2338,1.00008697306928,0.719606032625423,1153.03,2338,0,1,0.83817493794253,0.440466049087957,1.00008697306928,0.719606032625423 +"PSI-DICE",2023-01-21,17039659.6862554,1.1791841684355,0.424665137814682,2822.5,1.11245643312941,0.624101713653952,1377.415,2822.5,0,1,0.952983430267054,0.418342290409727,1.11245643312941,0.624101713653952 +"PSI-DICE",2023-01-28,18860859.1441164,1.48538431720322,0.354613781462017,2725,1.19666490538325,0.523836985774702,1337.56,2725,0,0.5,1.02734643243272,0.350621588146245,1.19666490538325,0.523836985774702 +"PSI-DICE",2023-02-04,17508229.011942,2.04133261587809,0.298688571462278,2500,1.40798880520925,0.457205559619605,1188.94,2500,0,0.5,1.19006865635317,0.304835306273668,1.40798880520925,0.457205559619605 diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv new file mode 100644 index 0000000..3cd8cba --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv @@ -0,0 +1,4 @@ +"model_id","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",32248701.2592051,2.516318304043,1,3752.375,1.80028006322584,1,2671.7,3752.375,0,0.625,2.09158420206952,1,1.80028006322584,1 +"MOBS-GLEAM",7505618.88390074,0.585652303605228,0.232741741243249,1442.8125,0.692219348738606,0.384506479229821,802.53,1442.8125,0.5,1,0.628273784364583,0.300381779391399,0.692219348738606,0.384506479229821 +"PSI-DICE",8696434.28980838,0.678569862626817,0.269667737001535,1672.5625,0.802446696625247,0.445734368233452,972.04875,1672.5625,0.4375,0.875,0.760984320523049,0.363831549200883,0.802446696625247,0.445734368233452 diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv new file mode 100644 index 0000000..7de7369 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv @@ -0,0 +1,13 @@ +"model_id","horizon","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",0,12005415.9967378,1.17910075563572,1,2276,1.1362016338983,1,1710.185,2276,0,0.5,1.46955530884177,1,1.1362016338983,1 +"FS-base",1,25519684.5687617,1.73557665681884,1,3447,1.54610664512698,1,2361.8975,3447,0,0.75,1.76475125714419,1,1.54610664512698,1 +"FS-base",2,31861782.8413004,4.42711075210795,1,3769.75,2.01285819422435,1,2609.7875,3769.75,0,0.75,2.19078914445878,1,2.01285819422435,1 +"FS-base",3,59607921.6300204,9.85979244980579,1,5516.75,3.40124018345902,1,4004.93,5516.75,0,0.5,3.48430801951263,1,3.40124018345902,1 +"MOBS-GLEAM",0,14403809.3981631,1.41465673076401,1.19977595129373,2318.5,1.15741805280897,1.01867311072056,1187.51,2318.5,0.5,1,1.02042271730993,0.694375169937755,1.15741805280897,1.01867311072056 +"MOBS-GLEAM",1,14125137.1545571,0.96064111818304,0.553499676553504,2128,0.954486492843113,0.617348418914998,1094.9675,2128,0.5,1,0.818132570171667,0.463596536259512,0.954486492843113,0.617348418914998 +"MOBS-GLEAM",2,1085452.78554396,0.150820803773674,0.0340675470343409,772.75,0.412609899751142,0.204987068107965,555.8025,772.75,0.5,1,0.466569053404943,0.212968488813744,0.412609899751142,0.204987068107965 +"MOBS-GLEAM",3,408076.197338859,0.0675001996285132,0.00684600613777045,552,0.340324390496103,0.100058911496805,371.84,552,0.5,1,0.323502556592893,0.0928455678376401,0.340324390496103,0.100058911496805 +"PSI-DICE",0,6104137.49760001,0.5995121816587,0.508448645116394,1523.25,0.760421414251139,0.66926625659051,776.0525,1523.25,0.25,1,0.666858890304222,0.453782777886603,0.760421414251139,0.66926625659051 +"PSI-DICE",1,8819145.94574774,0.599784209529554,0.345582090640068,1510.75,0.677627100123464,0.438279663475486,926.98,1510.75,0.5,1,0.692616474824807,0.392472577662663,0.677627100123464,0.438279663475486 +"PSI-DICE",2,10778741.3058258,1.49767769640803,0.33829686679849,2255,1.2040573587044,0.598182903375555,1165.435,2255,0.5,0.75,0.978325762757436,0.446563178036526,1.2040573587044,0.598182903375555 +"PSI-DICE",3,9083712.41005997,1.50254390000086,0.152391027260463,1401.25,0.863912232214972,0.253999184302352,1019.7275,1401.25,0.5,0.75,0.887167742249569,0.254618058243215,0.863912232214972,0.253999184302352 diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv new file mode 100644 index 0000000..cba0496 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv @@ -0,0 +1,7 @@ +"model_id","location","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base","01",7934.16128810812,2.64616637050549,1,86.5,1.84494399073852,1,49.975,86.5,0,1,1.73800770724664,1,1.84494399073852,1 +"FS-base","US",64489468.357122,2.516324718005,1,7418.25,1.80001101254491,1,5293.425,7418.25,0,0.25,2.09638683517122,1,1.80001101254491,1 +"MOBS-GLEAM","01",1071.56641993473,0.357384091554102,0.135057302344082,26.625,0.567880158998997,0.307803468208093,15.3625,26.625,0.625,1,0.534270003053056,0.307403701850926,0.567880158998997,0.307803468208093 +"MOBS-GLEAM","US",15010166.2013815,0.585684037969409,0.232753759393085,2859,0.693725809303527,0.385400869477303,1589.6975,2859,0.375,1,0.629577430662491,0.300315485720493,0.693725809303527,0.385400869477303 +"PSI-DICE","01",3170.52744208901,1.05742028544283,0.399604611875116,44.75,0.954465243763572,0.517341040462428,30.96625,44.75,0.375,0.75,1.07693008833469,0.619634817408704,0.954465243763572,0.517341040462428 +"PSI-DICE","US",17389698.0521747,0.6785313658505,0.2696517508235,3300.375,0.800823825771293,0.44489940349813,1913.13125,3300.375,0.5,1,0.757668837558793,0.361416521439333,0.800823825771293,0.44489940349813 diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv new file mode 100644 index 0000000..5f5f958 --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv @@ -0,0 +1,7 @@ +"model_id","reference_date","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",2022-12-17,21306295.3973807,3.10891393913009,1,2894.375,1.82701375914094,1,1937.05,2894.375,0,0.75,1.73261142871645,1,1.82701375914094,1 +"FS-base",2023-01-14,43191107.1210294,3.1778410567911,1,4610.375,1.98229121430346,1,3406.35,4610.375,0,0.5,2.56485081622142,1,1.98229121430346,1 +"MOBS-GLEAM",2022-12-17,11393303.8117821,1.6624570518978,0.534738845927327,1834.625,1.1580687083961,0.633858777801771,1061.10375,1834.625,0.5,1,0.949113592475093,0.547793681113033,1.1580687083961,0.633858777801771 +"MOBS-GLEAM",2023-01-14,3617933.95601942,0.266194127276707,0.0837657146847685,1051,0.451891237965012,0.227964102703142,543.95625,1051,0.5,1,0.40957817951803,0.159688889867453,0.451891237965012,0.227964102703142 +"PSI-DICE",2022-12-17,1325989.57383748,0.193482132503959,0.0622346376555207,748.75,0.472632797117437,0.258691427337508,679.86125,748.75,0.875,1,0.608107881413205,0.350977646421104,0.472632797117437,0.258691427337508 +"PSI-DICE",2023-01-14,16066879.0057793,1.18214121291188,0.371995072058629,2596.375,1.11634549283673,0.563159179025567,1264.23625,2596.375,0,0.75,0.951921375580667,0.371141030722034,1.11634549283673,0.563159179025567 diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv new file mode 100644 index 0000000..214da8a --- /dev/null +++ b/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv @@ -0,0 +1,25 @@ +"model_id","target_end_date","se_point","se_point_relative_skill","se_point_scaled_relative_skill","ae_point","ae_point_relative_skill","ae_point_scaled_relative_skill","wis","ae_median","interval_coverage_50","interval_coverage_95","wis_relative_skill","wis_scaled_relative_skill","ae_median_relative_skill","ae_median_scaled_relative_skill" +"FS-base",2022-12-17,3175383.68141869,0.704102187093623,1,1303,0.932532186623287,1,802.62,1303,0,0.5,1.0219261709135,1,0.932532186623287,1 +"FS-base",2022-12-24,10914437.3525853,2.02860323794238,1,2371.5,2.1175276905063,1,1431.24,2371.5,0,1,1.41285865848852,1,2.1175276905063,1 +"FS-base",2022-12-31,10536520.7055104,6.81438673555078,1,2337.5,1.62564875830736,1,1404.75,2337.5,0,1,1.4301951892091,1,1.62564875830736,1 +"FS-base",2023-01-07,60598839.8500084,30.2439038289645,1,5565.5,5.28287394851579,1,4109.59,5565.5,0,0.5,3.34901399077292,1,5.28287394851579,1 +"FS-base",2023-01-14,20835448.3120569,1.75445319649409,1,3249,1.38977013494529,1,2617.75,3249,0,0.5,1.90292745531258,1,1.38977013494529,1 +"FS-base",2023-01-21,40124931.7849381,2.77673880767223,1,4522.5,1.78249219444739,1,3292.555,4522.5,0,0.5,2.27799926546679,1,1.78249219444739,1 +"FS-base",2023-01-28,53187044.9770904,4.18873826922126,1,5202,2.28442232579952,1,3814.825,5202,0,0.5,2.93007181293188,1,2.28442232579952,1 +"FS-base",2023-02-04,58617003.4100323,6.83431778418711,1,5468,3.07955311475367,1,3900.27,5468,0,0.5,3.90397251191362,1,3.07955311475367,1 +"MOBS-GLEAM",2022-12-17,21404754.7053754,4.74624049069998,6.74084043154503,2955,2.1148370003621,2.26784343821949,1512.54,2955,0.5,1,1.9258231922373,1.88450325185019,2.1148370003621,2.26784343821949 +"MOBS-GLEAM",2022-12-24,23836982.874849,4.43044190741004,2.18398641219949,2976.5,2.65773610406578,1.2551127978073,1524.14,2976.5,0.5,1,1.50456554857934,1.06490875045415,2.65773610406578,1.2551127978073 +"MOBS-GLEAM",2022-12-31,130104.946396882,0.0841440401187458,0.0123479989299352,712.5,0.495518605473366,0.304812834224599,679.09,712.5,0.5,1,0.691390817611683,0.48342409681438,0.495518605473366,0.304812834224599 +"MOBS-GLEAM",2023-01-07,201372.720507006,0.100501877723489,0.0033230458042668,694.5,0.659232046939936,0.124786631928847,528.645,694.5,0.5,1,0.43080684475876,0.128636919984719,0.659232046939936,0.124786631928847 +"MOBS-GLEAM",2023-01-14,7402864.09095083,0.623359688404893,0.355301406529708,1682,0.719480876262846,0.517697753154817,862.48,1682,0.5,1,0.626964710785212,0.329473784738802,0.719480876262846,0.517697753154817 +"MOBS-GLEAM",2023-01-21,4413291.43426512,0.305410054296752,0.109988758558382,1279.5,0.504300445062562,0.282918739635158,665.795,1279.5,0.5,1,0.460639388241491,0.20221226372832,0.504300445062562,0.282918739635158 +"MOBS-GLEAM",2023-01-28,2040800.62469103,0.160722967034098,0.0383702577492334,833,0.365806189425413,0.160130718954248,432.515,833,0.5,1,0.332203970082568,0.113377415739909,0.365806189425413,0.160130718954248 +"MOBS-GLEAM",2023-02-04,614779.674170713,0.0716788545322083,0.0104880774929804,409.5,0.230628566293275,0.0748902706656913,215.035,409.5,0.5,1,0.215239131931724,0.055133362562079,0.230628566293275,0.0748902706656913 +"PSI-DICE",2022-12-17,1349506.81439674,0.299236500166799,0.424990158604649,708.5,0.507059903470912,0.543745203376823,399.075,708.5,0.5,1,0.508117398840427,0.497215369664349,0.507059903470912,0.543745203376823 +"PSI-DICE",2022-12-24,598632.205240062,0.111264299812846,0.054847738449684,199,0.177688387269978,0.0839131351465317,476.545,199,1,1,0.470424757140252,0.332959531594981,0.177688387269978,0.0839131351465317 +"PSI-DICE",2022-12-31,2696623.46753519,1.74401357920139,0.255931112641852,1785,1.2414045063438,0.763636363636364,993.31,1785,1,1,1.01130249752148,0.707108026339206,1.2414045063438,0.763636363636364 +"PSI-DICE",2023-01-07,659195.808177916,0.328993998504522,0.0108780268699785,302.5,0.287138508566351,0.0543527086515138,850.515,302.5,1,1,0.693107252636451,0.20695860170966,0.287138508566351,0.0543527086515138 +"PSI-DICE",2023-01-14,10858768.1808033,0.914364800769576,0.521167964239081,2338,1.00008697306928,0.719606032625423,1153.03,2338,0,1,0.83817493794253,0.440466049087957,1.00008697306928,0.719606032625423 +"PSI-DICE",2023-01-21,17039659.6862554,1.1791841684355,0.424665137814682,2822.5,1.11245643312941,0.624101713653952,1377.415,2822.5,0,1,0.952983430267054,0.418342290409727,1.11245643312941,0.624101713653952 +"PSI-DICE",2023-01-28,18860859.1441164,1.48538431720322,0.354613781462017,2725,1.19666490538325,0.523836985774702,1337.56,2725,0,0.5,1.02734643243272,0.350621588146245,1.19666490538325,0.523836985774702 +"PSI-DICE",2023-02-04,17508229.011942,2.04133261587809,0.298688571462278,2500,1.40798880520925,0.457205559619605,1188.94,2500,0,0.5,1.19006865635317,0.304835306273668,1.40798880520925,0.457205559619605 From 221b1b0e4bef9b62d5f89c2d0fe3c6ba3d6e2b42 Mon Sep 17 00:00:00 2001 From: "Evan L. Ray" Date: Wed, 8 Jan 2025 17:02:51 -0500 Subject: [PATCH 3/5] no spaces in file name --- tests/testthat/helper-check_exp_scores_for_window.R | 4 +++- tests/testthat/testdata/create_exp_score_fixtures.R | 1 + .../{scores_Full season.csv => scores_Full_season.csv} | 0 ...eason_by_horizon.csv => scores_Full_season_by_horizon.csv} | 0 ...son_by_location.csv => scores_Full_season_by_location.csv} | 0 ...ence_date.csv => scores_Full_season_by_reference_date.csv} | 0 ...end_date.csv => scores_Full_season_by_target_end_date.csv} | 0 .../{scores_Last 5 weeks.csv => scores_Last_5_weeks.csv} | 0 ...eeks_by_horizon.csv => scores_Last_5_weeks_by_horizon.csv} | 0 ...ks_by_location.csv => scores_Last_5_weeks_by_location.csv} | 0 ...nce_date.csv => scores_Last_5_weeks_by_reference_date.csv} | 0 ...nd_date.csv => scores_Last_5_weeks_by_target_end_date.csv} | 0 12 files changed, 4 insertions(+), 1 deletion(-) rename tests/testthat/testdata/expected-scores/{scores_Full season.csv => scores_Full_season.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Full season_by_horizon.csv => scores_Full_season_by_horizon.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Full season_by_location.csv => scores_Full_season_by_location.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Full season_by_reference_date.csv => scores_Full_season_by_reference_date.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Full season_by_target_end_date.csv => scores_Full_season_by_target_end_date.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Last 5 weeks.csv => scores_Last_5_weeks.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Last 5 weeks_by_horizon.csv => scores_Last_5_weeks_by_horizon.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Last 5 weeks_by_location.csv => scores_Last_5_weeks_by_location.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Last 5 weeks_by_reference_date.csv => scores_Last_5_weeks_by_reference_date.csv} (100%) rename tests/testthat/testdata/expected-scores/{scores_Last 5 weeks_by_target_end_date.csv => scores_Last_5_weeks_by_target_end_date.csv} (100%) diff --git a/tests/testthat/helper-check_exp_scores_for_window.R b/tests/testthat/helper-check_exp_scores_for_window.R index 57d6667..5f13f13 100644 --- a/tests/testthat/helper-check_exp_scores_for_window.R +++ b/tests/testthat/helper-check_exp_scores_for_window.R @@ -19,9 +19,11 @@ check_exp_scores_for_window <- function(out_path, window_name, model_out_tbl, or actual_scores <- read.csv(scores_path) + file_name <- paste0("scores_", window_name, ifelse(is.null(by), "", paste0("_by_", by)), ".csv") + file_name <- gsub(" ", "_", file_name) expected_scores_path <- testthat::test_path( "testdata", "expected-scores", - paste0("scores_", window_name, ifelse(is.null(by), "", paste0("_by_", by)), ".csv") + file_name ) expected_scores <- read.csv(expected_scores_path) if (!include_rel) { diff --git a/tests/testthat/testdata/create_exp_score_fixtures.R b/tests/testthat/testdata/create_exp_score_fixtures.R index 78599b0..a72ede6 100644 --- a/tests/testthat/testdata/create_exp_score_fixtures.R +++ b/tests/testthat/testdata/create_exp_score_fixtures.R @@ -47,6 +47,7 @@ make_score_fixtures_one_window <- function(window_name, model_out_tbl) { } file_name <- paste0("scores_", window_name, ifelse(is.null(by), "", paste0("_by_", by)), ".csv") + file_name <- gsub(" ", "_", file_name) write.csv(expected_scores, file = file.path(save_path, file_name), row.names = FALSE) } } diff --git a/tests/testthat/testdata/expected-scores/scores_Full season.csv b/tests/testthat/testdata/expected-scores/scores_Full_season.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Full season.csv rename to tests/testthat/testdata/expected-scores/scores_Full_season.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv b/tests/testthat/testdata/expected-scores/scores_Full_season_by_horizon.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Full season_by_horizon.csv rename to tests/testthat/testdata/expected-scores/scores_Full_season_by_horizon.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv b/tests/testthat/testdata/expected-scores/scores_Full_season_by_location.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Full season_by_location.csv rename to tests/testthat/testdata/expected-scores/scores_Full_season_by_location.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv b/tests/testthat/testdata/expected-scores/scores_Full_season_by_reference_date.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Full season_by_reference_date.csv rename to tests/testthat/testdata/expected-scores/scores_Full_season_by_reference_date.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv b/tests/testthat/testdata/expected-scores/scores_Full_season_by_target_end_date.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Full season_by_target_end_date.csv rename to tests/testthat/testdata/expected-scores/scores_Full_season_by_target_end_date.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv b/tests/testthat/testdata/expected-scores/scores_Last_5_weeks.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Last 5 weeks.csv rename to tests/testthat/testdata/expected-scores/scores_Last_5_weeks.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv b/tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_horizon.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_horizon.csv rename to tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_horizon.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv b/tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_location.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_location.csv rename to tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_location.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv b/tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_reference_date.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_reference_date.csv rename to tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_reference_date.csv diff --git a/tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv b/tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_target_end_date.csv similarity index 100% rename from tests/testthat/testdata/expected-scores/scores_Last 5 weeks_by_target_end_date.csv rename to tests/testthat/testdata/expected-scores/scores_Last_5_weeks_by_target_end_date.csv From 7dd09cbf0f3204219b5a28702fe522b3bac30c68 Mon Sep 17 00:00:00 2001 From: Evan Ray Date: Wed, 8 Jan 2025 20:22:52 -0500 Subject: [PATCH 4/5] Update tests/testthat/testdata/create_exp_score_fixtures.R Co-authored-by: Zhian N. Kamvar --- tests/testthat/testdata/create_exp_score_fixtures.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/testthat/testdata/create_exp_score_fixtures.R b/tests/testthat/testdata/create_exp_score_fixtures.R index a72ede6..780bce7 100644 --- a/tests/testthat/testdata/create_exp_score_fixtures.R +++ b/tests/testthat/testdata/create_exp_score_fixtures.R @@ -1,8 +1,3 @@ -library(testthat) -library(hubData) -library(hubEvals) -library(dplyr) - hub_path <- testthat::test_path("testdata", "ecfh") model_out_tbl <- hubData::connect_hub(hub_path) |> dplyr::collect() From 8a3632a505d63006b95fc1cb4d806b0d6473cc5b Mon Sep 17 00:00:00 2001 From: "Evan L. Ray" Date: Wed, 8 Jan 2025 20:37:16 -0500 Subject: [PATCH 5/5] make the script go again --- tests/testthat/testdata/create_exp_score_fixtures.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/testthat/testdata/create_exp_score_fixtures.R b/tests/testthat/testdata/create_exp_score_fixtures.R index 780bce7..36e3e22 100644 --- a/tests/testthat/testdata/create_exp_score_fixtures.R +++ b/tests/testthat/testdata/create_exp_score_fixtures.R @@ -1,8 +1,10 @@ +library(rlang) + hub_path <- testthat::test_path("testdata", "ecfh") model_out_tbl <- hubData::connect_hub(hub_path) |> dplyr::collect() oracle_output <- read.csv( - test_path("testdata", "ecfh", "target-data", "oracle-output.csv") + testthat::test_path("testdata", "ecfh", "target-data", "oracle-output.csv") ) oracle_output[["target_end_date"]] <- as.Date(oracle_output[["target_end_date"]])