hubverse-org · elray1 · Dec 24, 2024 · Dec 23, 2024 · Dec 23, 2024 · Dec 24, 2024
diff --git a/R/config.R b/R/config.R
@@ -120,34 +120,26 @@ validate_config_targets <- function(webevals_config, task_groups, task_id_names)
     }
 
     # check that metrics are valid for the available output types
-    output_types_for_target <- purrr::map(
-      task_groups_w_target,
-      function(task_group) names(task_group[["output_type"]])
-    ) |>
-      unlist() |>
-      unique()
-    target_type <- task_groups_w_target[[1]]$target_metadata[[1]]$target_type
-    target_is_ordinal <- target_type == "ordinal"
-
     metric_name_to_output_type <- get_metric_name_to_output_type(
-      target$metrics,
-      output_types_for_target,
-      target_is_ordinal
+      task_groups_w_target,
+      target$metrics
     )
     unsupported_metrics <- setdiff(
       target$metrics,
       metric_name_to_output_type$metric[!is.na(metric_name_to_output_type$output_type)]
     )
 
     if (length(unsupported_metrics) > 0) {
+      available_output_types <- get_output_types(task_groups_w_target) # nolint: object_usage
+      target_is_ordinal <- is_target_ordinal(task_groups_w_target)
       raise_config_error(
         c(
           cli::format_inline(
             "Requested scores for metrics that are incompatible with the ",
             "available output types for {.arg target_id} {.val {target_id}}."
           ),
           "i" = cli::format_inline(
-            "Output type{?s}: {.val {output_types_for_target}}",
+            "Output type{?s}: {.val {available_output_types}}",
             ifelse(target_is_ordinal, " for ordinal target.", ".")
           ),
           "x" = cli::format_inline(
@@ -251,61 +243,6 @@ validate_config_task_id_text <- function(webevals_config, task_groups, task_id_n
 }
 
 
-#' Get a data frame with 1 row for each metric, matching the metric with the
-#' output type to use for calculating the metric.  If the metric is invalid or
-#' can't be calculated from the available_output_types, the output_type will be
-#' NA.
-#'
-#' This implementation is somewhat fragile.  It assumes that all metrics are
-#' either an interval coverage (to be computed based on quantile forecasts) or
-#' a standard metric provided by scoringutils.  If hubEvals eventually supports
-#' other metrics, this function will need to be updated.
-#'
-#' Consider moving this function to hubEvals.
-#' @noRd
-get_metric_name_to_output_type <- function(metrics, available_output_types,
-                                           is_ordinal) {
-  result <- data.frame(
-    metric = metrics,
-    output_type = NA_character_
-  )
-
-  # manually handle interval coverage
-  if ("quantile" %in% available_output_types) {
-    result$output_type[grepl(pattern = "^interval_coverage_", x = metrics)] <- "quantile"
-  }
-
-  # other metrics
-  for (output_type in available_output_types) {
-    supported_metrics <- get_standard_metrics(output_type, is_ordinal)
-    result$output_type[result$metric %in% supported_metrics] <- output_type
-  }
-
-  return(result)
-}
-
-
-#' Get the standard metrics that are supported for a given output type
-#' @noRd
-get_standard_metrics <- function(output_type, is_ordinal) {
-  return(
-    switch(
-      output_type,
-      mean = "se_point",
-      median = "ae_point",
-      quantile = names(scoringutils::get_metrics(scoringutils::example_quantile)),
-      pmf = if (is_ordinal) {
-        names(scoringutils::get_metrics(scoringutils::example_ordinal))
-      } else {
-        names(scoringutils::get_metrics(scoringutils::example_nominal))
-      },
-      cdf = NULL,
-      sample = NULL
-    )
-  )
-}
-
-
 #' Raise an error related to the webevals config file
 #' @noRd
 raise_config_error <- function(msgs) {

diff --git a/R/utils-hub_tasks_config.R b/R/utils-hub_tasks_config.R
@@ -19,3 +19,26 @@ filter_task_groups_to_target <- function(task_groups, target_id) {
 
   return(task_groups)
 }
+
+
+#' Get a character vector of all output types across all task groups
+#' @noRd
+get_output_types <- function(task_groups) {
+  output_types <- purrr::map(
+    task_groups,
+    function(task_group) names(task_group[["output_type"]])
+  ) |>
+    unlist() |>
+    unique()
+  return(output_types)
+}
+
+
+#' Get a boolean indicating whether the target is ordinal
+#' @noRd
+is_target_ordinal <- function(task_groups_w_target) {
+  # The task_groups_w_target has been filtered to a single target,
+  # so we can just check the target_type of the first entry
+  target_type <- task_groups_w_target[[1]]$target_metadata[[1]]$target_type
+  return(target_type == "ordinal")
+}
diff --git a/R/utils-metrics.R b/R/utils-metrics.R
@@ -0,0 +1,59 @@
+#' Get a data frame with 1 row for each metric, matching the metric with the
+#' output type to use for calculating the metric.  If the metric is invalid or
+#' can't be calculated from the available output types for the target, the
+#' output_type will be NA.
+#'
+#' This implementation is somewhat fragile.  It assumes that all metrics are
+#' either an interval coverage (to be computed based on quantile forecasts) or
+#' a standard metric provided by scoringutils.  If hubEvals eventually supports
+#' other metrics, this function will need to be updated.
+#'
+#' @noRd
+get_metric_name_to_output_type <- function(task_groups_w_target, metrics) {
+  # the available output types for the target, based on the hub's tasks config
+  available_output_types <- get_output_types(task_groups_w_target)
+
+  # indicator of whether the target is ordinal
+  target_is_ordinal <- is_target_ordinal(task_groups_w_target)
+
+  # result is a data frame with 1 row for each metric
+  # we populate the output type to use for each metric below
+  result <- data.frame(
+    metric = metrics,
+    output_type = NA_character_
+  )
+
+  # manually handle interval coverage
+  if ("quantile" %in% available_output_types) {
+    result$output_type[grepl(pattern = "^interval_coverage_", x = metrics)] <- "quantile"
+  }
+
+  # other metrics
+  for (output_type in available_output_types) {
+    supported_metrics <- get_standard_metrics(output_type, target_is_ordinal)
+    result$output_type[result$metric %in% supported_metrics] <- output_type
+  }
+
+  return(result)
+}
+
+
+#' Get the standard metrics that are supported for a given output type
+#' @noRd
+get_standard_metrics <- function(output_type, target_is_ordinal) {
+  return(
+    switch(
+      output_type,
+      mean = "se_point",
+      median = "ae_point",
+      quantile = names(scoringutils::get_metrics(scoringutils::example_quantile)),
+      pmf = if (target_is_ordinal) {
+        names(scoringutils::get_metrics(scoringutils::example_ordinal))
+      } else {
+        names(scoringutils::get_metrics(scoringutils::example_nominal))
+      },
+      cdf = NULL,
+      sample = NULL
+    )
+  )
+}
diff --git a/tests/testthat/test-utils-hub_tasks_config.R b/tests/testthat/test-utils-hub_tasks_config.R
@@ -61,3 +61,60 @@ test_that(
     )
   }
 )
+
+
+test_that(
+  "get_output_types works",
+  {
+    task_groups <- list(
+      list(
+        output_type = list(
+          "output_type_1" = "output_type_1_value",
+          "output_type_2" = "output_type_2_value"
+        )
+      ),
+      list(
+        output_type = list(
+          "output_type_2" = "output_type_2_value",
+          "output_type_3" = "output_type_3_value"
+        )
+      ),
+      list(
+        output_type = list(
+          "output_type_3" = "output_type_3_value"
+        )
+      )
+    )
+
+    expect_equal(
+      get_output_types(task_groups),
+      c("output_type_1", "output_type_2", "output_type_3")
+    )
+  }
+)
+
+
+test_that(
+  "is_target_ordinal works",
+  {
+    task_groups_w_target <- list(
+      list(
+        target_metadata = list(
+          list(target_type = "ordinal")
+        )
+      )
+    )
+
+    expect_true(is_target_ordinal(task_groups_w_target))
+
+    task_groups_w_target <- list(
+      list(
+        target_metadata = list(
+          list(target_type = "nominal")
+        )
+      )
+    )
+
+    expect_false(is_target_ordinal(task_groups_w_target))
+  }
+)
diff --git a/tests/testthat/test-utils-metrics.R b/tests/testthat/test-utils-metrics.R
@@ -0,0 +1,70 @@
+test_that(
+  "get_metric_name_to_output_type works, no ordinal targets",
+  {
+    task_groups <- list(
+      list(
+        output_type = list(
+          "mean" = list(),
+          "quantile" = list()
+        ),
+        target_metadata = list(
+          list(target_type = "continuous")
+        )
+      ),
+      list(
+        output_type = list(
+          "median" = list()
+        ),
+        target_metadata = list(
+          list(target_type = "continuous")
+        )
+      ),
+      list(
+        output_type = list(
+          "pmf" = list()
+        ),
+        target_metadata = list(
+          list(target_type = "nominal")
+        )
+      )
+    )
+    metrics <- c("se_point", "ae_point", "interval_coverage_50", "wis", "ae_median",
+                 "NOT A REAL METRIC", "log_score", "rps")
+
+    # note: the "rps" metric is only supported for ordinal pmf targets
+    expect_equal(
+      get_metric_name_to_output_type(task_groups, metrics),
+      data.frame(
+        metric = metrics,
+        output_type = c("mean", "median", "quantile", "quantile", "quantile", NA_character_, "pmf", NA_character_)
+      )
+    )
+  }
+)
+
+
+test_that(
+  "get_metric_name_to_output_type works, ordinal target",
+  {
+    task_groups <- list(
+      list(
+        output_type = list(
+          "pmf" = list()
+        ),
+        target_metadata = list(
+          list(target_type = "ordinal")
+        )
+      )
+    )
+    metrics <- c("se_point", "ae_point", "interval_coverage_50", "wis", "ae_median",
+                 "NOT A REAL METRIC", "log_score", "rps")
+
+    expect_equal(
+      get_metric_name_to_output_type(task_groups, metrics),
+      data.frame(
+        metric = metrics,
+        output_type = c(rep(NA_character_, 6), "pmf", "pmf")
+      )
+    )
+  }
+)