Skip to content

Commit

Permalink
fixed available_as_ofs, initial_as_of, and current_date PTC options
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewcornell committed Nov 21, 2024
1 parent cc49d33 commit 9853d92
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 13 deletions.
16 changes: 13 additions & 3 deletions src/hub_predtimechart/generate_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ def task_text(task_id, task_value):
return task_value


def get_max_ref_date_or_first_config_ref_date(reference_dates):
if len(reference_dates) == 0:
return min(hub_config.reference_dates)
else:
return max(reference_dates)


# set `target_variables` and `initial_target_var`. recall that we currently only support one target
options = {}
options['target_variables'] = [{'value': hub_config.fetch_target_id,
Expand All @@ -39,9 +46,12 @@ def task_text(task_id, task_value):
options['initial_interval'] = options['intervals'][-1]

# set `available_as_ofs`, `initial_as_of`, and `current_date`
options['available_as_ofs'] = {hub_config.fetch_target_id: hub_config.reference_dates}
options['initial_as_of'] = hub_config.reference_dates[-1]
options['current_date'] = hub_config.reference_dates[-1]
# available_as_ofs is the subset of hub_config.reference_dates for which
# there is at least one model output file
options['available_as_ofs'] = hub_config.get_available_as_ofs()
options['initial_as_of'] = max([get_max_ref_date_or_first_config_ref_date(reference_dates)
for reference_dates in options['available_as_ofs'].values()])
options['current_date'] = options['initial_as_of']

# set `models` and `initial_checked_models`
options['models'] = []
Expand Down
27 changes: 27 additions & 0 deletions src/hub_predtimechart/hub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Optional

import pandas as pd
import yaml
from jsonschema import validate
from jsonschema.exceptions import ValidationError
Expand Down Expand Up @@ -122,6 +123,32 @@ def model_output_file_for_ref_date(self, model_id: str, reference_date: str) ->
return None


def get_available_as_ofs(self) -> dict[str, list[str]]:
"""
Returns a list of reference_dates with at least one forecast file.
"""
# loop over every (reference_date X model_id) combination.
as_ofs = {self.fetch_target_id: set()}
for reference_date in self.reference_dates: # ex: ['2022-10-22', '2022-10-29', ...]
for model_id in self.model_id_to_metadata: # ex: ['Flusight-baseline', 'MOBS-GLEAM_FLUH', ...]
model_output_file = self.model_output_file_for_ref_date(model_id, reference_date)
if model_output_file:
# todo xx extract to function, call from here and _generate_json_files()
if model_output_file.suffix == '.csv':
df = pd.read_csv(model_output_file, usecols=[self.target_col_name])
elif model_output_file.suffix in ['.parquet', '.pqt']:
df = pd.read_parquet(model_output_file, columns=[self.target_col_name])
else:
raise RuntimeError(f"unsupported model output file type: {model_output_file!r}. "
f"Only .csv and .parquet are supported")

df = df.loc[df[self.target_col_name] == self.fetch_target_id, :]
if not df.empty:
as_ofs[self.fetch_target_id].add(reference_date)

return {fetch_target_id: sorted(list(reference_dates)) for fetch_target_id, reference_dates in as_ofs.items()}


def _validate_hub_ptc_compatibility(ptc_config: dict, tasks: dict, model_metadata_schema: dict):
"""
Validates a hub's predtimechart compatibility as identified in README.MD > Assumptions/limitations .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,10 @@
"intervals": ["0%", "50%", "95%"],
"initial_interval": "95%",
"available_as_ofs": {
"wk inc flu hosp": [
"2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", "2024-04-27", "2024-05-04", "2024-05-11", "2024-11-16", "2024-11-23", "2024-11-30", "2024-12-07", "2024-12-14", "2024-12-21", "2024-12-28", "2025-01-04", "2025-01-11", "2025-01-18", "2025-01-25", "2025-02-01", "2025-02-08", "2025-02-15", "2025-02-22", "2025-03-01", "2025-03-08", "2025-03-15", "2025-03-22", "2025-03-29", "2025-04-05", "2025-04-12", "2025-04-19", "2025-04-26", "2025-05-03", "2025-05-10", "2025-05-17", "2025-05-24", "2025-05-31"]
"wk inc flu hosp": []
},
"initial_as_of": "2025-05-31",
"current_date": "2025-05-31",
"initial_as_of": "2023-10-07",
"current_date": "2023-10-07",
"models": [
"CADPH-FluCAT_Ensemble", "CEPH-Rtrend_fluH", "CMU-TimeSeries", "CU-ensemble", "GT-FluFNP", "ISU_NiemiLab-NLH", "JHU_CSSE-CSSE_Ensemble", "LUcompUncertLab-chimera", "LosAlamos_NAU-CModel_Flu", "MIGHTE-Nsemble", "MOBS-GLEAM_FLUH", "NIH-Flu_ARIMA", "NU_UCSD-GLEAM_AI_FLUH", "PSI-PROF", "SGroup-RandomForest", "SigSci-CREG", "SigSci-TSENS", "Stevens-GBR", "UGA_flucast-Copycat", "UGA_flucast-INFLAenza", "UGuelph-CompositeCurve", "UGuelphensemble-GRYPHON", "UM-DeepOutbreak", "UMass-flusion", "UMass-trends_ensemble", "UNC_IDD-InfluPaint", "UVAFluX-Ensemble", "VTSanghani-Ensemble", "cfa-flumech", "cfarenewal-cfaepimlight", "fjordhest-ensemble"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@
"intervals": ["0%", "50%", "95%"],
"initial_interval": "95%",
"available_as_ofs": {
"wk inc covid hosp": ["2024-11-09", "2024-11-16", "2024-11-23", "2024-11-30", "2024-12-07", "2024-12-14", "2024-12-21", "2024-12-28", "2025-01-04", "2025-01-11", "2025-01-18", "2025-01-25", "2025-02-01", "2025-02-08", "2025-02-15", "2025-02-22", "2025-03-01", "2025-03-08", "2025-03-15", "2025-03-22", "2025-03-29", "2025-04-05", "2025-04-12", "2025-04-19", "2025-04-26", "2025-05-03", "2025-05-10", "2025-05-17", "2025-05-24", "2025-05-31"]
"wk inc covid hosp": []
},
"initial_as_of": "2025-05-31",
"current_date": "2025-05-31",
"initial_as_of": "2024-11-09",
"current_date": "2024-11-09",
"models": [],
"initial_checked_models": [],
"initial_xaxis_range": null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@
"intervals": ["0%", "50%", "95%"],
"initial_interval": "95%",
"available_as_ofs": {
"wk inc flu hosp": ["2022-10-22", "2022-10-29", "2022-11-05", "2022-11-12", "2022-11-19", "2022-11-26", "2022-12-03", "2022-12-10", "2022-12-17", "2022-12-24", "2022-12-31", "2023-01-07", "2023-01-14", "2023-01-21", "2023-01-28", "2023-02-04", "2023-02-11", "2023-02-18", "2023-02-25", "2023-03-04", "2023-03-11", "2023-03-18", "2023-03-25", "2023-04-01", "2023-04-08", "2023-04-15", "2023-04-22", "2023-04-29", "2023-05-06", "2023-05-13", "2023-05-20", "2023-05-27"]
"wk inc flu hosp": ["2022-10-22", "2022-11-19", "2022-12-17"]
},
"initial_as_of": "2023-05-27",
"current_date": "2023-05-27",
"initial_as_of": "2022-12-17",
"current_date": "2022-12-17",
"models": ["Flusight-baseline", "MOBS-GLEAM_FLUH", "PSI-DICE"],
"initial_checked_models": ["Flusight-baseline"],
"disclaimer": "Most forecasts have failed to reliably predict rapid changes in the trends of reported cases and hospitalizations. Due to this limitation, they should not be relied upon for decisions about the possibility or timing of rapid changes in trends.",
Expand Down
8 changes: 8 additions & 0 deletions tests/hub_predtimechart/test_hub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ def test_model_output_file_for_ref_date():
assert file is None


def test_get_available_as_ofs():
hub_dir = Path('tests/hubs/example-complex-forecast-hub')
hub_config = HubConfig(hub_dir, hub_dir / 'hub-config/predtimechart-config.yml')
act_as_ofs = hub_config.get_available_as_ofs()
exp_as_ofs = {'wk inc flu hosp': ['2022-10-22', '2022-11-19', '2022-12-17']}
assert act_as_ofs == exp_as_ofs


def test_hub_dir_existence():
with pytest.raises(RuntimeError, match="hub_dir not found"):
hub_dir = Path('tests/hubs/example-complex-forecast-hub')
Expand Down

0 comments on commit 9853d92

Please sign in to comment.