-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
shuffle code around in preparation to add support for actually computing scores #8
Merged
Merged
Changes from 2 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
1abd554
shuffle code around in preparation to add support for actually comput…
elray1 d7aa9b6
add explanatory comment to test
elray1 3849574
Apply suggestions from code review
elray1 f823d8e
add explanation of reasoning for is_target_ordinal
elray1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -120,34 +120,26 @@ validate_config_targets <- function(webevals_config, task_groups, task_id_names) | |
} | ||
|
||
# check that metrics are valid for the available output types | ||
output_types_for_target <- purrr::map( | ||
task_groups_w_target, | ||
function(task_group) names(task_group[["output_type"]]) | ||
) |> | ||
unlist() |> | ||
unique() | ||
target_type <- task_groups_w_target[[1]]$target_metadata[[1]]$target_type | ||
target_is_ordinal <- target_type == "ordinal" | ||
|
||
metric_name_to_output_type <- get_metric_name_to_output_type( | ||
target$metrics, | ||
output_types_for_target, | ||
target_is_ordinal | ||
task_groups_w_target, | ||
target$metrics | ||
) | ||
unsupported_metrics <- setdiff( | ||
target$metrics, | ||
metric_name_to_output_type$metric[!is.na(metric_name_to_output_type$output_type)] | ||
) | ||
|
||
if (length(unsupported_metrics) > 0) { | ||
available_output_types <- get_output_types(task_groups_w_target) # nolint: object_usage | ||
target_is_ordinal <- get_target_is_ordinal(task_groups_w_target) | ||
elray1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise_config_error( | ||
c( | ||
cli::format_inline( | ||
"Requested scores for metrics that are incompatible with the ", | ||
"available output types for {.arg target_id} {.val {target_id}}." | ||
), | ||
"i" = cli::format_inline( | ||
"Output type{?s}: {.val {output_types_for_target}}", | ||
"Output type{?s}: {.val {available_output_types}}", | ||
ifelse(target_is_ordinal, " for ordinal target.", ".") | ||
), | ||
"x" = cli::format_inline( | ||
|
@@ -251,61 +243,6 @@ validate_config_task_id_text <- function(webevals_config, task_groups, task_id_n | |
} | ||
|
||
|
||
#' Get a data frame with 1 row for each metric, matching the metric with the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
#' output type to use for calculating the metric. If the metric is invalid or | ||
#' can't be calculated from the available_output_types, the output_type will be | ||
#' NA. | ||
#' | ||
#' This implementation is somewhat fragile. It assumes that all metrics are | ||
#' either an interval coverage (to be computed based on quantile forecasts) or | ||
#' a standard metric provided by scoringutils. If hubEvals eventually supports | ||
#' other metrics, this function will need to be updated. | ||
#' | ||
#' Consider moving this function to hubEvals. | ||
#' @noRd | ||
get_metric_name_to_output_type <- function(metrics, available_output_types, | ||
is_ordinal) { | ||
result <- data.frame( | ||
metric = metrics, | ||
output_type = NA_character_ | ||
) | ||
|
||
# manually handle interval coverage | ||
if ("quantile" %in% available_output_types) { | ||
result$output_type[grepl(pattern = "^interval_coverage_", x = metrics)] <- "quantile" | ||
} | ||
|
||
# other metrics | ||
for (output_type in available_output_types) { | ||
supported_metrics <- get_standard_metrics(output_type, is_ordinal) | ||
result$output_type[result$metric %in% supported_metrics] <- output_type | ||
} | ||
|
||
return(result) | ||
} | ||
|
||
|
||
#' Get the standard metrics that are supported for a given output type | ||
#' @noRd | ||
get_standard_metrics <- function(output_type, is_ordinal) { | ||
return( | ||
switch( | ||
output_type, | ||
mean = "se_point", | ||
median = "ae_point", | ||
quantile = names(scoringutils::get_metrics(scoringutils::example_quantile)), | ||
pmf = if (is_ordinal) { | ||
names(scoringutils::get_metrics(scoringutils::example_ordinal)) | ||
} else { | ||
names(scoringutils::get_metrics(scoringutils::example_nominal)) | ||
}, | ||
cdf = NULL, | ||
sample = NULL | ||
) | ||
) | ||
} | ||
|
||
|
||
#' Raise an error related to the webevals config file | ||
#' @noRd | ||
raise_config_error <- function(msgs) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#' Get a data frame with 1 row for each metric, matching the metric with the | ||
#' output type to use for calculating the metric. If the metric is invalid or | ||
#' can't be calculated from the available output types for the target, the | ||
#' output_type will be NA. | ||
#' | ||
#' This implementation is somewhat fragile. It assumes that all metrics are | ||
#' either an interval coverage (to be computed based on quantile forecasts) or | ||
#' a standard metric provided by scoringutils. If hubEvals eventually supports | ||
#' other metrics, this function will need to be updated. | ||
#' | ||
#' @noRd | ||
get_metric_name_to_output_type <- function(task_groups_w_target, metrics) { | ||
zkamvar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# the available output types for the target, based on the hub's tasks config | ||
available_output_types <- get_output_types(task_groups_w_target) | ||
|
||
# indicator of whether the target is ordinal | ||
target_is_ordinal <- get_target_is_ordinal(task_groups_w_target) | ||
elray1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# result is a data frame with 1 row for each metric | ||
# we populate the output type to use for each metric below | ||
result <- data.frame( | ||
metric = metrics, | ||
output_type = NA_character_ | ||
) | ||
|
||
# manually handle interval coverage | ||
if ("quantile" %in% available_output_types) { | ||
result$output_type[grepl(pattern = "^interval_coverage_", x = metrics)] <- "quantile" | ||
} | ||
|
||
# other metrics | ||
for (output_type in available_output_types) { | ||
supported_metrics <- get_standard_metrics(output_type, target_is_ordinal) | ||
result$output_type[result$metric %in% supported_metrics] <- output_type | ||
} | ||
|
||
return(result) | ||
} | ||
|
||
|
||
#' Get the standard metrics that are supported for a given output type | ||
#' @noRd | ||
get_standard_metrics <- function(output_type, target_is_ordinal) { | ||
return( | ||
switch( | ||
output_type, | ||
mean = "se_point", | ||
median = "ae_point", | ||
quantile = names(scoringutils::get_metrics(scoringutils::example_quantile)), | ||
pmf = if (target_is_ordinal) { | ||
names(scoringutils::get_metrics(scoringutils::example_ordinal)) | ||
} else { | ||
names(scoringutils::get_metrics(scoringutils::example_nominal)) | ||
}, | ||
cdf = NULL, | ||
sample = NULL | ||
) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
test_that( | ||
"get_metric_name_to_output_type works, no ordinal targets", | ||
zkamvar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
task_groups <- list( | ||
list( | ||
output_type = list( | ||
"mean" = list(), | ||
"quantile" = list() | ||
), | ||
target_metadata = list( | ||
list(target_type = "continuous") | ||
) | ||
), | ||
list( | ||
output_type = list( | ||
"median" = list() | ||
), | ||
target_metadata = list( | ||
list(target_type = "continuous") | ||
) | ||
), | ||
list( | ||
output_type = list( | ||
"pmf" = list() | ||
), | ||
target_metadata = list( | ||
list(target_type = "nominal") | ||
) | ||
) | ||
) | ||
metrics <- c("se_point", "ae_point", "interval_coverage_50", "wis", "ae_median", | ||
"NOT A REAL METRIC", "log_score", "rps") | ||
|
||
# note: the "rps" metric is only supported for ordinal pmf targets | ||
expect_equal( | ||
get_metric_name_to_output_type(task_groups, metrics), | ||
zkamvar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data.frame( | ||
metric = metrics, | ||
output_type = c("mean", "median", "quantile", "quantile", "quantile", NA_character_, "pmf", NA_character_) | ||
) | ||
) | ||
} | ||
) | ||
|
||
|
||
test_that( | ||
"get_metric_name_to_output_type works, ordinal target", | ||
zkamvar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
task_groups <- list( | ||
list( | ||
output_type = list( | ||
"pmf" = list() | ||
), | ||
target_metadata = list( | ||
list(target_type = "ordinal") | ||
) | ||
) | ||
) | ||
metrics <- c("se_point", "ae_point", "interval_coverage_50", "wis", "ae_median", | ||
"NOT A REAL METRIC", "log_score", "rps") | ||
|
||
expect_equal( | ||
get_metric_name_to_output_type(task_groups, metrics), | ||
zkamvar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data.frame( | ||
metric = metrics, | ||
output_type = c(rep(NA_character_, 6), "pmf", "pmf") | ||
) | ||
) | ||
} | ||
) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved computation of
output_types_for_target
andtarget_is_ordinal
into their own functions inutils-hub_tasks_config.R
. These functions are now called from withinget_metric_name_to_output_type
.Slightly awkwardly, to support issuing informative errors, we also compute them below if we need to issue the error.