From 0f08335bc164f1fef447a207454d1b18c89a6dac Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Thu, 19 Dec 2024 12:52:19 +1100 Subject: [PATCH 1/7] feat: Use a executor specific output directory --- .../ref-core/src/ref_core/executor/local.py | 22 ++++++-- packages/ref-core/src/ref_core/metrics.py | 52 ++++++++++++++++++- packages/ref-core/tests/unit/test_metrics.py | 6 ++- .../src/ref_metrics_example/example.py | 4 +- .../tests/unit/test_metrics.py | 22 ++++++-- ...4a447fbf6d65_metric_add_output_fragment.py | 42 +++++++++++++++ packages/ref/src/ref/datasets/cmip6.py | 6 ++- .../ref/src/ref/models/metric_execution.py | 16 +++++- packages/ref/src/ref/solver.py | 4 +- 9 files changed, 158 insertions(+), 16 deletions(-) create mode 100644 packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py index 0a4064f..1177d18 100644 --- a/packages/ref-core/src/ref_core/executor/local.py +++ b/packages/ref-core/src/ref_core/executor/local.py @@ -1,4 +1,8 @@ -from ref_core.metrics import Metric, MetricExecutionDefinition, MetricResult +from attrs import evolve +from loguru import logger + +from ref.config import Config +from ref_core.metrics import FailedMetricResult, Metric, MetricExecutionDefinition, MetricResult class LocalExecutor: @@ -12,6 +16,9 @@ class LocalExecutor: name = "local" + def __init__(self, config: Config | None = None): + self.config = Config.default() if config is None else config + def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> MetricResult: """ Run a metric in process @@ -28,7 +35,12 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M : Results from running the metric """ - # TODO: Update fragment use the output directory which may vary depending on the executor - definition.output_fragment.mkdir(parents=True, exist_ok=True) - - return metric.run(definition=definition) + # TODO: This should be changed to use executor specific configuration + definition = evolve(definition, output_directory=self.config.paths.tmp) + definition.output_filename().mkdir(parents=True, exist_ok=True) + + try: + return metric.run(definition=definition) + except Exception: + logger.exception(f"Error running metric {metric.slug}") + return FailedMetricResult() diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index 06d75f1..ed30023 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -37,6 +37,36 @@ class MetricExecutionDefinition: Collection of datasets required for the metric execution """ + output_directory: pathlib.Path | None = None + """ + Root directory for output data + + This will be resolved by the executor as the output directory may vary depending on where + the executor is being run. + """ + + def output_filename(self, filename: str | None = None) -> pathlib.Path: + """ + Get the full path to a file in the output directory + + Parameters + ---------- + filename + Name of the file to get the full path for + + Returns + ------- + : + Full path to the file in the output directory + """ + if self.output_directory is None: + raise AssertionError("Output directory is not set") # pragma: no cover + + if filename is None: + return self.output_directory / self.output_fragment + else: + return self.output_directory / self.output_fragment / filename + @frozen class MetricResult: @@ -63,7 +93,9 @@ class MetricResult: # Log info is in the output bundle file already, but is definitely useful @staticmethod - def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]) -> "MetricResult": + def build_from_output_bundle( + configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any] + ) -> "MetricResult": """ Build a MetricResult from a CMEC output bundle. @@ -82,7 +114,10 @@ def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str A prepared MetricResult object. The output bundle will be written to the output directory. """ - with open(configuration.output_fragment / "output.json", "w") as file_handle: + configuration.output_filename().mkdir(parents=True, exist_ok=True) + bundle_path = configuration.output_filename("output.json") + + with open(bundle_path, "w") as file_handle: json.dump(cmec_output_bundle, file_handle) return MetricResult( output_bundle=configuration.output_fragment / "output.json", @@ -90,6 +125,19 @@ def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str ) +@frozen +class FailedMetricResult(MetricResult): + """ + The result of running a metric. + + The content of the result follows the Earth System Metrics and Diagnostics Standards + ([EMDS](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md)). + """ + + def __init__(self): + super().__init__(output_bundle=None, successful=False) + + @frozen(hash=True) class DataRequirement: """ diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py index 818de5f..b22d8d5 100644 --- a/packages/ref-core/tests/unit/test_metrics.py +++ b/packages/ref-core/tests/unit/test_metrics.py @@ -2,6 +2,7 @@ import pandas as pd import pytest +from attr import evolve from ref_core.datasets import FacetFilter, SourceDatasetType from ref_core.metrics import DataRequirement, MetricExecutionDefinition, MetricResult @@ -11,7 +12,10 @@ def test_build(self, tmp_path): config = MetricExecutionDefinition( output_fragment=tmp_path, key="mocked-metric-slug", metric_dataset=None ) - result = MetricResult.build(config, {"data": "value"}) + # Setting the output directory generally happens as a side effect of the executor + config = evolve(config, output_directory=tmp_path) + + result = MetricResult.build_from_output_bundle(config, {"data": "value"}) assert result.successful assert result.output_bundle.exists() diff --git a/packages/ref-metrics-example/src/ref_metrics_example/example.py b/packages/ref-metrics-example/src/ref_metrics_example/example.py index 447c0f6..2d3b800 100644 --- a/packages/ref-metrics-example/src/ref_metrics_example/example.py +++ b/packages/ref-metrics-example/src/ref_metrics_example/example.py @@ -120,4 +120,6 @@ def run(self, definition: MetricExecutionDefinition) -> MetricResult: input_files=input_datasets.path.to_list() ) - return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries)) + return MetricResult.build_from_output_bundle( + definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries) + ) diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py index 7c277db..9b7bd45 100644 --- a/packages/ref-metrics-example/tests/unit/test_metrics.py +++ b/packages/ref-metrics-example/tests/unit/test_metrics.py @@ -1,3 +1,6 @@ +import pathlib +from unittest import mock + import pytest from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType from ref_core.metrics import MetricExecutionDefinition @@ -25,12 +28,17 @@ def test_annual_mean(esgf_data_dir, metric_dataset): assert annual_mean.time.size == 286 -def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog): +@mock.patch("ref_metrics_example.example.calculate_annual_mean_timeseries") +def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog): metric = GlobalMeanTimeseries() ds = cmip6_data_catalog.groupby("instance_id").first() + output_directory = tmp_path / "output" + + mock_calc.return_value.attrs.__getitem__.return_value = "ABC" configuration = MetricExecutionDefinition( - output_fragment=tmp_path, + output_directory=output_directory, + output_fragment=pathlib.Path(metric.slug), key="global_mean_timeseries", metric_dataset=MetricDataset( { @@ -41,7 +49,13 @@ def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog): result = metric.run(configuration) + assert mock_calc.call_count == 1 + + assert result.output_bundle == pathlib.Path(metric.slug) / "output.json" + + output_bundle_path = output_directory / result.output_bundle + assert result.successful - assert result.output_bundle.exists() - assert result.output_bundle.is_file() + assert output_bundle_path.exists() + assert output_bundle_path.is_file() assert result.output_bundle.name == "output.json" diff --git a/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py b/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py new file mode 100644 index 0000000..b382cd8 --- /dev/null +++ b/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py @@ -0,0 +1,42 @@ +"""metric_add_output_fragment + +Revision ID: 4a447fbf6d65 +Revises: 4b95a617184e +Create Date: 2024-12-19 12:16:38.605113 + +""" + +from collections.abc import Sequence +from typing import Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "4a447fbf6d65" +down_revision: Union[str, None] = "4b95a617184e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("metric_execution_result", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "output_fragment", + sa.String(), + nullable=False, + default="", # This is the default value for the migration + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("metric_execution_result", schema=None) as batch_op: + batch_op.drop_column("output_fragment") + + # ### end Alembic commands ### diff --git a/packages/ref/src/ref/datasets/cmip6.py b/packages/ref/src/ref/datasets/cmip6.py index 90a6e94..2844a08 100644 --- a/packages/ref/src/ref/datasets/cmip6.py +++ b/packages/ref/src/ref/datasets/cmip6.py @@ -50,7 +50,11 @@ def _fix_parent_variant_label(group: pd.DataFrame) -> pd.DataFrame: return group - data_catalog = data_catalog.groupby("instance_id").apply(_fix_parent_variant_label).reset_index(drop=True) + data_catalog = ( + data_catalog.groupby("instance_id") + .apply(_fix_parent_variant_label, include_groups=False) + .reset_index(level="instance_id") + ) data_catalog["branch_time_in_child"] = _clean_branch_time(data_catalog["branch_time_in_child"]) data_catalog["branch_time_in_parent"] = _clean_branch_time(data_catalog["branch_time_in_parent"]) diff --git a/packages/ref/src/ref/models/metric_execution.py b/packages/ref/src/ref/models/metric_execution.py index 6a53162..d3aaa63 100644 --- a/packages/ref/src/ref/models/metric_execution.py +++ b/packages/ref/src/ref/models/metric_execution.py @@ -98,16 +98,30 @@ class MetricExecutionResult(CreatedUpdatedMixin, Base): """ Represents a run of a metric calculation - A execution might be run multiple times as new data becomes available. + An execution might be run multiple times as new data becomes available. """ __tablename__ = "metric_execution_result" __table_args__ = ( + # TODO: This unique constraint is constraining... + # If we perform a run with hash A, then run with hash B, then run with hash A again this will fail + # This may happen if a dataset is retracted + # This will currently result in a IntegrityError so we will know if it ever occurs UniqueConstraint("metric_execution_id", "dataset_hash", name="metric_execution_result_ident"), ) id: Mapped[int] = mapped_column(primary_key=True) + output_fragment: Mapped[str] = mapped_column() + """ + Relative directory to store the output of the metric execution. + + During execution this directory is relative to the temporary directory. + If the metric execution is successful, the results will be moved to the final output directory + and the temporary directory will be cleaned up. + This directory may contain multiple input and output files. + """ + metric_execution_id: Mapped[int] = mapped_column(ForeignKey("metric_execution.id")) """ The target metric execution diff --git a/packages/ref/src/ref/solver.py b/packages/ref/src/ref/solver.py index b98f05a..05d7754 100644 --- a/packages/ref/src/ref/solver.py +++ b/packages/ref/src/ref/solver.py @@ -254,7 +254,9 @@ def solve_metrics(db: Database, dry_run: bool = False, solver: MetricSolver | No if metric_execution_model.should_run(info.metric_dataset.hash): logger.info(f"Running metric {metric_execution_model.key}") metric_execution_result = MetricExecutionResult( - metric_execution=metric_execution_model, dataset_hash=info.metric_dataset.hash + metric_execution=metric_execution_model, + dataset_hash=info.metric_dataset.hash, + output_fragment=str(info.output_fragment), ) db.session.add(metric_execution_result) db.session.flush() From 21d4b6bb9f1add72d524c3a5672cf1b49835700a Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Thu, 19 Dec 2024 13:00:39 +1100 Subject: [PATCH 2/7] refactor: Use a build method for failed executions --- .../ref-core/src/ref_core/executor/local.py | 4 ++-- packages/ref-core/src/ref_core/metrics.py | 19 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py index 1177d18..82434ee 100644 --- a/packages/ref-core/src/ref_core/executor/local.py +++ b/packages/ref-core/src/ref_core/executor/local.py @@ -2,7 +2,7 @@ from loguru import logger from ref.config import Config -from ref_core.metrics import FailedMetricResult, Metric, MetricExecutionDefinition, MetricResult +from ref_core.metrics import Metric, MetricExecutionDefinition, MetricResult class LocalExecutor: @@ -43,4 +43,4 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M return metric.run(definition=definition) except Exception: logger.exception(f"Error running metric {metric.slug}") - return FailedMetricResult() + return MetricResult.build_from_failure() diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index ed30023..79b2d6e 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -124,18 +124,15 @@ def build_from_output_bundle( successful=True, ) + @staticmethod + def build_from_failure() -> "MetricResult": + """ + Build a failed metric result. -@frozen -class FailedMetricResult(MetricResult): - """ - The result of running a metric. - - The content of the result follows the Earth System Metrics and Diagnostics Standards - ([EMDS](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md)). - """ - - def __init__(self): - super().__init__(output_bundle=None, successful=False) + This is a placeholder. + Additional log information should still be captured in the output bundle. + """ + return MetricResult(output_bundle=None, successful=False) @frozen(hash=True) From 478479e674fdbb7f69ed3f42f06994515872b622 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Thu, 19 Dec 2024 13:03:24 +1100 Subject: [PATCH 3/7] docs: Changelog --- changelog/46.feature.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog/46.feature.md diff --git a/changelog/46.feature.md b/changelog/46.feature.md new file mode 100644 index 0000000..d1501bf --- /dev/null +++ b/changelog/46.feature.md @@ -0,0 +1,4 @@ +Support the option for different assumptions about the root paths between executors and the ref CLI. + +Where possible path fragments are stored in the database instead of complete paths. +This allows the ability to move the data folders without needing to update the database. From c21e4ff155b80b23f1c12dec6798d3bc0c0032c8 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 7 Jan 2025 10:50:17 +1100 Subject: [PATCH 4/7] refactor: Rename output_bundle to output_fragment --- docs/how-to-guides/running-metrics-locally.py | 2 +- packages/ref-core/src/ref_core/metrics.py | 13 ++++++++----- packages/ref-core/tests/conftest.py | 2 +- packages/ref-core/tests/unit/test_executor.py | 2 +- packages/ref-core/tests/unit/test_metrics.py | 8 ++++---- .../src/ref_metrics_esmvaltool/example.py | 4 +++- .../tests/unit/test_metrics.py | 10 +++++++--- .../ref-metrics-example/tests/unit/test_metrics.py | 6 +++--- 8 files changed, 28 insertions(+), 19 deletions(-) diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py index 7e9b7a2..4c206b2 100644 --- a/docs/how-to-guides/running-metrics-locally.py +++ b/docs/how-to-guides/running-metrics-locally.py @@ -163,7 +163,7 @@ result # %% -with open(result.output_bundle) as fh: +with open(result.output_fragment) as fh: # Load the output bundle and pretty print loaded_result = json.loads(fh.read()) print(json.dumps(loaded_result, indent=2)) diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index 79b2d6e..4ae6ac8 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -79,9 +79,12 @@ class MetricResult: # Do we want to load a serialised version of the output bundle here or just a file path? - output_bundle: pathlib.Path | None + output_fragment: pathlib.Path | None """ - Path to the output bundle file. + Path to the output bundle file relative to the output directory. + + The absolute path of the outputs may differ between executors + depending on where the output directory is mounted. The contents of this file are defined by [EMDS standard](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md#common-output-bundle-format-) @@ -120,7 +123,7 @@ def build_from_output_bundle( with open(bundle_path, "w") as file_handle: json.dump(cmec_output_bundle, file_handle) return MetricResult( - output_bundle=configuration.output_fragment / "output.json", + output_fragment=configuration.output_fragment / "output.json", successful=True, ) @@ -132,7 +135,7 @@ def build_from_failure() -> "MetricResult": This is a placeholder. Additional log information should still be captured in the output bundle. """ - return MetricResult(output_bundle=None, successful=False) + return MetricResult(output_fragment=None, successful=False) @frozen(hash=True) @@ -167,7 +170,7 @@ class DataRequirement: """ The fields to group the datasets by. - This groupby operation is performed after the data catalog is filtered according to `filters`. + This group by operation is performed after the data catalog is filtered according to `filters`. Each group will contain a unique combination of values from the metadata fields, and will result in a separate execution of the metric. If `group_by=None`, all datasets will be processed together as a single execution. diff --git a/packages/ref-core/tests/conftest.py b/packages/ref-core/tests/conftest.py index 0fbacef..35f06b5 100644 --- a/packages/ref-core/tests/conftest.py +++ b/packages/ref-core/tests/conftest.py @@ -18,7 +18,7 @@ def __init__(self, temp_dir: pathlib.Path) -> None: def run(self, definition: MetricExecutionDefinition) -> MetricResult: return MetricResult( - output_bundle=self.temp_dir / definition.output_fragment / "output.json", + output_fragment=self.temp_dir / definition.output_fragment / "output.json", successful=True, ) diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index e950950..03fb82c 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -25,7 +25,7 @@ def test_run_metric(self, metric_definition, mock_metric): result = executor.run_metric(mock_metric, metric_definition) assert result.successful - assert result.output_bundle == metric_definition.output_fragment / "output.json" + assert result.output_fragment == metric_definition.output_fragment / "output.json" @pytest.mark.parametrize("executor_name", ["local", None]) diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py index b22d8d5..2dc6148 100644 --- a/packages/ref-core/tests/unit/test_metrics.py +++ b/packages/ref-core/tests/unit/test_metrics.py @@ -18,12 +18,12 @@ def test_build(self, tmp_path): result = MetricResult.build_from_output_bundle(config, {"data": "value"}) assert result.successful - assert result.output_bundle.exists() - assert result.output_bundle.is_file() - with open(result.output_bundle) as f: + assert result.output_fragment.exists() + assert result.output_fragment.is_file() + with open(result.output_fragment) as f: assert f.read() == '{"data": "value"}' - assert result.output_bundle.is_relative_to(tmp_path) + assert result.output_fragment.is_relative_to(tmp_path) @pytest.fixture diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py index 18a0a8e..1060c56 100644 --- a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py +++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py @@ -105,4 +105,6 @@ def run(self, definition: MetricExecutionDefinition) -> MetricResult: result = next(result_dir.glob("work/timeseries/script1/*.nc")) annual_mean_global_mean_timeseries = xarray.open_dataset(result) - return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries)) + return MetricResult.build_from_output_bundle( + definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries) + ) diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py index 3b8b93d..c306e8c 100644 --- a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py +++ b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py @@ -23,8 +23,10 @@ def metric_dataset(cmip6_data_catalog) -> MetricDataset: def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog): metric = GlobalMeanTimeseries() ds = cmip6_data_catalog.groupby("instance_id", as_index=False).first() + output_directory = tmp_path / "output" configuration = MetricExecutionDefinition( + output_directory=output_directory, output_fragment=tmp_path, key="global_mean_timeseries", metric_dataset=MetricDataset( @@ -58,7 +60,9 @@ def mock_check_call(cmd, *args, **kwargs): result = metric.run(configuration) + output_bundle_path = output_directory / result.output_fragment + assert result.successful - assert result.output_bundle.exists() - assert result.output_bundle.is_file() - assert result.output_bundle.name == "output.json" + assert output_bundle_path.exists() + assert output_bundle_path.is_file() + assert result.output_fragment.name == "output.json" diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py index 9b7bd45..4ba0cf4 100644 --- a/packages/ref-metrics-example/tests/unit/test_metrics.py +++ b/packages/ref-metrics-example/tests/unit/test_metrics.py @@ -51,11 +51,11 @@ def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog) assert mock_calc.call_count == 1 - assert result.output_bundle == pathlib.Path(metric.slug) / "output.json" + assert result.output_fragment == pathlib.Path(metric.slug) / "output.json" - output_bundle_path = output_directory / result.output_bundle + output_bundle_path = output_directory / result.output_fragment assert result.successful assert output_bundle_path.exists() assert output_bundle_path.is_file() - assert result.output_bundle.name == "output.json" + assert result.output_fragment.name == "output.json" From 5504eda4516a69198f1eae4fd61e621f20866a84 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 7 Jan 2025 13:54:24 +1100 Subject: [PATCH 5/7] refactor: Rename output_filename to to_output_path --- docs/how-to-guides/running-metrics-locally.py | 17 ++--- .../ref-core/src/ref_core/executor/local.py | 6 +- packages/ref-core/src/ref_core/metrics.py | 63 +++++++++++++------ packages/ref-core/tests/conftest.py | 8 +-- packages/ref-core/tests/unit/test_executor.py | 2 +- packages/ref-core/tests/unit/test_metrics.py | 12 ++-- .../tests/unit/test_metrics.py | 10 +-- .../tests/unit/test_metrics.py | 15 +++-- pyproject.toml | 1 + uv.lock | 28 +++++++++ 10 files changed, 112 insertions(+), 50 deletions(-) diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py index 4c206b2..200d1ae 100644 --- a/docs/how-to-guides/running-metrics-locally.py +++ b/docs/how-to-guides/running-metrics-locally.py @@ -24,9 +24,6 @@ # This guide will walk you through how to run a metric provider locally. -# %% -# !pip install prettyprinter - # %% tags=["remove_input"] import json from pathlib import Path @@ -35,6 +32,7 @@ import pandas as pd import prettyprinter import ref_metrics_example +from attr import evolve from ref_core.datasets import SourceDatasetType from ref_core.executor import run_metric @@ -138,12 +136,13 @@ # and which datasets should be used for the metric calculation. # %% +output_directory = Path("out") definition = metric_executions[0].build_metric_execution_info() prettyprinter.pprint(definition) # %% # Update the output fragment to be a subdirectory of the current working directory -definition = attrs.evolve(definition, output_fragment=Path("out") / definition.output_fragment) +definition = attrs.evolve(definition, output_fragment=output_directory / definition.output_fragment) definition.output_fragment # %% [markdown] @@ -163,7 +162,9 @@ result # %% -with open(result.output_fragment) as fh: + +output_file = result.definition.to_output_path(result.bundle_filename) +with open(output_file) as fh: # Load the output bundle and pretty print loaded_result = json.loads(fh.read()) print(json.dumps(loaded_result, indent=2)) @@ -175,7 +176,9 @@ # This will not perform and validation/verification of the output results. # %% -direct_result = metric.run(definition=definition) +direct_result = metric.run(definition=evolve(definition, output_directory=output_directory)) assert direct_result.successful -direct_result +prettyprinter.pprint(direct_result) + +# %% diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py index 82434ee..ea4ecd6 100644 --- a/packages/ref-core/src/ref_core/executor/local.py +++ b/packages/ref-core/src/ref_core/executor/local.py @@ -37,10 +37,12 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M """ # TODO: This should be changed to use executor specific configuration definition = evolve(definition, output_directory=self.config.paths.tmp) - definition.output_filename().mkdir(parents=True, exist_ok=True) + execution_output_path = definition.to_output_path(filename=None) + execution_output_path.mkdir(parents=True, exist_ok=True) try: return metric.run(definition=definition) + # TODO: Copy results to the output directory except Exception: logger.exception(f"Error running metric {metric.slug}") - return MetricResult.build_from_failure() + return MetricResult.build_from_failure(definition) diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index 4ae6ac8..032db78 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -17,13 +17,6 @@ class MetricExecutionDefinition: This represents the information needed by a metric to perform a single execution of the metric """ - output_fragment: pathlib.Path - """ - Relative directory to store the output of the metric execution - - This is relative to the temporary directory which may differ by executor. - """ - key: str """ A unique identifier for the metric execution @@ -37,6 +30,13 @@ class MetricExecutionDefinition: Collection of datasets required for the metric execution """ + output_fragment: pathlib.Path + """ + Relative directory to store the output of the metric execution + + This is relative to the temporary directory which may differ by executor. + """ + output_directory: pathlib.Path | None = None """ Root directory for output data @@ -45,9 +45,9 @@ class MetricExecutionDefinition: the executor is being run. """ - def output_filename(self, filename: str | None = None) -> pathlib.Path: + def to_output_path(self, filename: str | None) -> pathlib.Path: """ - Get the full path to a file in the output directory + Get the absolute path for a file in the output directory Parameters ---------- @@ -79,9 +79,14 @@ class MetricResult: # Do we want to load a serialised version of the output bundle here or just a file path? - output_fragment: pathlib.Path | None + definition: MetricExecutionDefinition + """ + The definition of the metric execution that produced this result. + """ + + bundle_filename: pathlib.Path | None """ - Path to the output bundle file relative to the output directory. + Filename of the output bundle file relative to the execution directory. The absolute path of the outputs may differ between executors depending on where the output directory is mounted. @@ -89,6 +94,7 @@ class MetricResult: The contents of this file are defined by [EMDS standard](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md#common-output-bundle-format-) """ + successful: bool """ Whether the metric ran successfully. @@ -97,15 +103,15 @@ class MetricResult: @staticmethod def build_from_output_bundle( - configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any] + definition: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any] ) -> "MetricResult": """ Build a MetricResult from a CMEC output bundle. Parameters ---------- - configuration - The configuration used to run the metric. + definition + The execution defintion. cmec_output_bundle An output bundle in the CMEC format. @@ -117,25 +123,44 @@ def build_from_output_bundle( A prepared MetricResult object. The output bundle will be written to the output directory. """ - configuration.output_filename().mkdir(parents=True, exist_ok=True) - bundle_path = configuration.output_filename("output.json") + definition.to_output_path(filename=None).mkdir(parents=True, exist_ok=True) + bundle_path = definition.to_output_path("output.json") with open(bundle_path, "w") as file_handle: json.dump(cmec_output_bundle, file_handle) return MetricResult( - output_fragment=configuration.output_fragment / "output.json", + definition=definition, + bundle_filename=pathlib.Path("output.json"), successful=True, ) @staticmethod - def build_from_failure() -> "MetricResult": + def build_from_failure(definition: MetricExecutionDefinition) -> "MetricResult": """ Build a failed metric result. This is a placeholder. Additional log information should still be captured in the output bundle. """ - return MetricResult(output_fragment=None, successful=False) + return MetricResult(bundle_filename=None, successful=False, definition=definition) + + def to_output_path(self, filename: str | None) -> pathlib.Path: + """ + Get the absolute path for a file in the output directory + + Parameters + ---------- + filename + Name of the file to get the full path for + + If None the path to the output bundle will be returned + + Returns + ------- + : + Full path to the file in the output directory + """ + return self.definition.to_output_path(filename) @frozen(hash=True) diff --git a/packages/ref-core/tests/conftest.py b/packages/ref-core/tests/conftest.py index 35f06b5..b82f46f 100644 --- a/packages/ref-core/tests/conftest.py +++ b/packages/ref-core/tests/conftest.py @@ -17,9 +17,11 @@ def __init__(self, temp_dir: pathlib.Path) -> None: data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),) def run(self, definition: MetricExecutionDefinition) -> MetricResult: + # TODO: This doesn't write output.json, use build function? return MetricResult( - output_fragment=self.temp_dir / definition.output_fragment / "output.json", + bundle_filename=self.temp_dir / definition.output_fragment / "output.json", successful=True, + definition=definition, ) @@ -30,9 +32,7 @@ class FailedMetric: data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),) def run(self, definition: MetricExecutionDefinition) -> MetricResult: - return MetricResult( - successful=False, - ) + return MetricResult.build_from_failure(definition) @pytest.fixture diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index 03fb82c..856efcb 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -25,7 +25,7 @@ def test_run_metric(self, metric_definition, mock_metric): result = executor.run_metric(mock_metric, metric_definition) assert result.successful - assert result.output_fragment == metric_definition.output_fragment / "output.json" + assert result.bundle_filename == metric_definition.output_fragment / "output.json" @pytest.mark.parametrize("executor_name", ["local", None]) diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py index 2dc6148..7a1b2a1 100644 --- a/packages/ref-core/tests/unit/test_metrics.py +++ b/packages/ref-core/tests/unit/test_metrics.py @@ -18,12 +18,16 @@ def test_build(self, tmp_path): result = MetricResult.build_from_output_bundle(config, {"data": "value"}) assert result.successful - assert result.output_fragment.exists() - assert result.output_fragment.is_file() - with open(result.output_fragment) as f: + + # Convert relative path to absolute path + output_filename = result.to_output_path(result.bundle_filename) + + assert output_filename.exists() + assert output_filename.is_file() + with open(output_filename) as f: assert f.read() == '{"data": "value"}' - assert result.output_fragment.is_relative_to(tmp_path) + assert output_filename.is_relative_to(tmp_path) @pytest.fixture diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py index c306e8c..9b0beb7 100644 --- a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py +++ b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py @@ -25,7 +25,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog): ds = cmip6_data_catalog.groupby("instance_id", as_index=False).first() output_directory = tmp_path / "output" - configuration = MetricExecutionDefinition( + definition = MetricExecutionDefinition( output_directory=output_directory, output_fragment=tmp_path, key="global_mean_timeseries", @@ -36,7 +36,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog): ), ) - result_dir = configuration.output_fragment / "results" / "recipe_test_a" + result_dir = definition.output_fragment / "results" / "recipe_test_a" result = result_dir / "work" / "timeseries" / "script1" / "result.nc" def mock_check_call(cmd, *args, **kwargs): @@ -58,11 +58,11 @@ def mock_check_call(cmd, *args, **kwargs): ) open_dataset.return_value.attrs.__getitem__.return_value = "ABC" - result = metric.run(configuration) + result = metric.run(definition) - output_bundle_path = output_directory / result.output_fragment + output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename assert result.successful assert output_bundle_path.exists() assert output_bundle_path.is_file() - assert result.output_fragment.name == "output.json" + assert result.bundle_filename.name == "output.json" diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py index 4ba0cf4..b102ecb 100644 --- a/packages/ref-metrics-example/tests/unit/test_metrics.py +++ b/packages/ref-metrics-example/tests/unit/test_metrics.py @@ -1,5 +1,4 @@ import pathlib -from unittest import mock import pytest from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType @@ -28,15 +27,16 @@ def test_annual_mean(esgf_data_dir, metric_dataset): assert annual_mean.time.size == 286 -@mock.patch("ref_metrics_example.example.calculate_annual_mean_timeseries") -def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog): +def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog, mocker): metric = GlobalMeanTimeseries() ds = cmip6_data_catalog.groupby("instance_id").first() output_directory = tmp_path / "output" + mock_calc = mocker.patch("ref_metrics_example.example.calculate_annual_mean_timeseries") + mock_calc.return_value.attrs.__getitem__.return_value = "ABC" - configuration = MetricExecutionDefinition( + definition = MetricExecutionDefinition( output_directory=output_directory, output_fragment=pathlib.Path(metric.slug), key="global_mean_timeseries", @@ -47,15 +47,14 @@ def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog) ), ) - result = metric.run(configuration) + result = metric.run(definition) assert mock_calc.call_count == 1 - assert result.output_fragment == pathlib.Path(metric.slug) / "output.json" + assert str(result.bundle_filename) == "output.json" - output_bundle_path = output_directory / result.output_fragment + output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename assert result.successful assert output_bundle_path.exists() assert output_bundle_path.is_file() - assert result.output_fragment.name == "output.json" diff --git a/pyproject.toml b/pyproject.toml index 1ac75b1..15a8af3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dev-dependencies = [ "bump-my-version>=0.28.1", "pytest-regressions>=2.5.0", "ipywidgets>=8.1.5", + "prettyprinter>=0.18.0", ] [tool.uv.workspace] diff --git a/uv.lock b/uv.lock index 4cd4d75..e98322e 100644 --- a/uv.lock +++ b/uv.lock @@ -528,6 +528,7 @@ dev = [ { name = "pandas-stubs" }, { name = "pip" }, { name = "pre-commit" }, + { name = "prettyprinter" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-loguru" }, @@ -567,6 +568,7 @@ dev = [ { name = "pandas-stubs", specifier = ">=2.2.3.241009" }, { name = "pip", specifier = ">=24.3.1" }, { name = "pre-commit", specifier = ">=3.3.1" }, + { name = "prettyprinter", specifier = ">=0.18.0" }, { name = "pytest", specifier = ">=7.3.1" }, { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "pytest-loguru", specifier = ">=0.4.0" }, @@ -584,6 +586,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "colorful" +version = "0.5.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/5f/38e40c3bc4107c39e4062d943026b8ee25154cb4b185b882f274a1ab65da/colorful-0.5.6.tar.gz", hash = "sha256:b56d5c01db1dac4898308ea889edcb113fbee3e6ec5df4bacffd61d5241b5b8d", size = 209280 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/61/39e7db0cb326c9c8f6a49fad4fc9c2f1241f05a4e10f0643fc31ce26a7e0/colorful-0.5.6-py2.py3-none-any.whl", hash = "sha256:eab8c1c809f5025ad2b5238a50bd691e26850da8cac8f90d660ede6ea1af9f1e", size = 201369 }, +] + [[package]] name = "comm" version = "0.2.2" @@ -2625,6 +2639,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/8f/496e10d51edd6671ebe0432e33ff800aa86775d2d147ce7d43389324a525/pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878", size = 218713 }, ] +[[package]] +name = "prettyprinter" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorful" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/41/967b5e033b5b50eebe0b8154a9e9827c517e244b9b612ec3357c40a4a33c/prettyprinter-0.18.0.tar.gz", hash = "sha256:9fe5da7ec53510881dd35d7a5c677ba45f34cfe6a8e78d1abd20652cf82139a8", size = 651884 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/d0/9effbeca8f1b8df9d33154de3477a51e55a9c46cb15612dd7791a1624397/prettyprinter-0.18.0-py2.py3-none-any.whl", hash = "sha256:358a58f276cb312e3ca29d7a7f244c91e4e0bda7848249d30e4f36d2eb58b67c", size = 48013 }, +] + [[package]] name = "prometheus-client" version = "0.21.0" @@ -2713,6 +2740,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712 }, { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155 }, { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356 }, + { url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224 }, ] [[package]] From 036a961aa31f38dbe380defc17c5e2a20527190d Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 7 Jan 2025 13:59:37 +1100 Subject: [PATCH 6/7] chore: Move pytest-mock as dependency of root package --- packages/ref-metrics-esmvaltool/pyproject.toml | 5 ----- pyproject.toml | 1 + uv.lock | 10 ++-------- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/packages/ref-metrics-esmvaltool/pyproject.toml b/packages/ref-metrics-esmvaltool/pyproject.toml index def11d4..2d26edf 100644 --- a/packages/ref-metrics-esmvaltool/pyproject.toml +++ b/packages/ref-metrics-esmvaltool/pyproject.toml @@ -30,11 +30,6 @@ dependencies = [ [project.license] text = "Apache-2.0" -[tool.uv] -dev-dependencies = [ - "pytest-mock >= 3.12", -] - [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/pyproject.toml b/pyproject.toml index 15a8af3..f621d27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ text = "Apache-2.0" dev-dependencies = [ "pytest>=7.3.1", "pytest-cov>=4.0.0", + "pytest-mock >= 3.12", "coverage>=7.2.0", "mypy>=1.11.0", "ruff>=0.6.9", diff --git a/uv.lock b/uv.lock index e98322e..c2ce09d 100644 --- a/uv.lock +++ b/uv.lock @@ -532,6 +532,7 @@ dev = [ { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-loguru" }, + { name = "pytest-mock" }, { name = "pytest-regressions" }, { name = "ruff" }, { name = "towncrier" }, @@ -572,6 +573,7 @@ dev = [ { name = "pytest", specifier = ">=7.3.1" }, { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "pytest-loguru", specifier = ">=0.4.0" }, + { name = "pytest-mock", specifier = ">=3.12" }, { name = "pytest-regressions", specifier = ">=2.5.0" }, { name = "ruff", specifier = ">=0.6.9" }, { name = "towncrier", specifier = ">=24.8.0" }, @@ -3298,11 +3300,6 @@ dependencies = [ { name = "xarray" }, ] -[package.dev-dependencies] -dev = [ - { name = "pytest-mock" }, -] - [package.metadata] requires-dist = [ { name = "pooch", specifier = ">=1.8" }, @@ -3311,9 +3308,6 @@ requires-dist = [ { name = "xarray", specifier = ">=2022" }, ] -[package.metadata.requires-dev] -dev = [{ name = "pytest-mock", specifier = ">=3.12" }] - [[package]] name = "ref-metrics-example" version = "0.1.0" From 9f7a9174e3611577b6381a8b66e4bd69258b1aa7 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 8 Jan 2025 10:53:19 +1100 Subject: [PATCH 7/7] test: Improve coverage --- packages/ref-core/tests/unit/test_executor.py | 9 +++++++++ packages/ref-core/tests/unit/test_metrics.py | 18 ++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index 856efcb..47d0407 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -27,6 +27,15 @@ def test_run_metric(self, metric_definition, mock_metric): assert result.successful assert result.bundle_filename == metric_definition.output_fragment / "output.json" + def test_raises_exception(self, metric_definition, mock_metric): + executor = LocalExecutor() + + mock_metric.run = lambda definition: 1 / 0 + + result = executor.run_metric(mock_metric, metric_definition) + assert result.successful is False + assert result.bundle_filename is None + @pytest.mark.parametrize("executor_name", ["local", None]) def test_run_metric_local(monkeypatch, executor_name, mock_metric, provider, metric_definition): diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py index 7a1b2a1..4d8d944 100644 --- a/packages/ref-core/tests/unit/test_metrics.py +++ b/packages/ref-core/tests/unit/test_metrics.py @@ -8,14 +8,14 @@ class TestMetricResult: - def test_build(self, tmp_path): - config = MetricExecutionDefinition( + def test_build_from_output_bundle(self, tmp_path): + definition = MetricExecutionDefinition( output_fragment=tmp_path, key="mocked-metric-slug", metric_dataset=None ) # Setting the output directory generally happens as a side effect of the executor - config = evolve(config, output_directory=tmp_path) + definition = evolve(definition, output_directory=tmp_path) - result = MetricResult.build_from_output_bundle(config, {"data": "value"}) + result = MetricResult.build_from_output_bundle(definition, {"data": "value"}) assert result.successful @@ -29,6 +29,16 @@ def test_build(self, tmp_path): assert output_filename.is_relative_to(tmp_path) + def test_build_from_failure(self): + definition = MetricExecutionDefinition( + output_fragment="output", key="mocked-metric-slug", metric_dataset=None + ) + result = MetricResult.build_from_failure(definition) + + assert not result.successful + assert result.bundle_filename is None + assert result.definition == definition + @pytest.fixture def apply_data_catalog():