Skip to content

Commit

Permalink
refactor: Rename output_filename to to_output_path
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisjared committed Jan 7, 2025
1 parent c21e4ff commit 5504eda
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 50 deletions.
17 changes: 10 additions & 7 deletions docs/how-to-guides/running-metrics-locally.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
# This guide will walk you through how to run a metric provider locally.


# %%
# !pip install prettyprinter

# %% tags=["remove_input"]
import json
from pathlib import Path
Expand All @@ -35,6 +32,7 @@
import pandas as pd
import prettyprinter
import ref_metrics_example
from attr import evolve
from ref_core.datasets import SourceDatasetType
from ref_core.executor import run_metric

Expand Down Expand Up @@ -138,12 +136,13 @@
# and which datasets should be used for the metric calculation.

# %%
output_directory = Path("out")
definition = metric_executions[0].build_metric_execution_info()
prettyprinter.pprint(definition)

# %%
# Update the output fragment to be a subdirectory of the current working directory
definition = attrs.evolve(definition, output_fragment=Path("out") / definition.output_fragment)
definition = attrs.evolve(definition, output_fragment=output_directory / definition.output_fragment)
definition.output_fragment

# %% [markdown]
Expand All @@ -163,7 +162,9 @@
result

# %%
with open(result.output_fragment) as fh:

output_file = result.definition.to_output_path(result.bundle_filename)
with open(output_file) as fh:
# Load the output bundle and pretty print
loaded_result = json.loads(fh.read())
print(json.dumps(loaded_result, indent=2))
Expand All @@ -175,7 +176,9 @@
# This will not perform and validation/verification of the output results.

# %%
direct_result = metric.run(definition=definition)
direct_result = metric.run(definition=evolve(definition, output_directory=output_directory))
assert direct_result.successful

direct_result
prettyprinter.pprint(direct_result)

# %%
6 changes: 4 additions & 2 deletions packages/ref-core/src/ref_core/executor/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M
"""
# TODO: This should be changed to use executor specific configuration
definition = evolve(definition, output_directory=self.config.paths.tmp)
definition.output_filename().mkdir(parents=True, exist_ok=True)
execution_output_path = definition.to_output_path(filename=None)
execution_output_path.mkdir(parents=True, exist_ok=True)

try:
return metric.run(definition=definition)
# TODO: Copy results to the output directory
except Exception:
logger.exception(f"Error running metric {metric.slug}")
return MetricResult.build_from_failure()
return MetricResult.build_from_failure(definition)
63 changes: 44 additions & 19 deletions packages/ref-core/src/ref_core/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,6 @@ class MetricExecutionDefinition:
This represents the information needed by a metric to perform a single execution of the metric
"""

output_fragment: pathlib.Path
"""
Relative directory to store the output of the metric execution
This is relative to the temporary directory which may differ by executor.
"""

key: str
"""
A unique identifier for the metric execution
Expand All @@ -37,6 +30,13 @@ class MetricExecutionDefinition:
Collection of datasets required for the metric execution
"""

output_fragment: pathlib.Path
"""
Relative directory to store the output of the metric execution
This is relative to the temporary directory which may differ by executor.
"""

output_directory: pathlib.Path | None = None
"""
Root directory for output data
Expand All @@ -45,9 +45,9 @@ class MetricExecutionDefinition:
the executor is being run.
"""

def output_filename(self, filename: str | None = None) -> pathlib.Path:
def to_output_path(self, filename: str | None) -> pathlib.Path:
"""
Get the full path to a file in the output directory
Get the absolute path for a file in the output directory
Parameters
----------
Expand Down Expand Up @@ -79,16 +79,22 @@ class MetricResult:

# Do we want to load a serialised version of the output bundle here or just a file path?

output_fragment: pathlib.Path | None
definition: MetricExecutionDefinition
"""
The definition of the metric execution that produced this result.
"""

bundle_filename: pathlib.Path | None
"""
Path to the output bundle file relative to the output directory.
Filename of the output bundle file relative to the execution directory.
The absolute path of the outputs may differ between executors
depending on where the output directory is mounted.
The contents of this file are defined by
[EMDS standard](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md#common-output-bundle-format-)
"""

successful: bool
"""
Whether the metric ran successfully.
Expand All @@ -97,15 +103,15 @@ class MetricResult:

@staticmethod
def build_from_output_bundle(
configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]
definition: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]
) -> "MetricResult":
"""
Build a MetricResult from a CMEC output bundle.
Parameters
----------
configuration
The configuration used to run the metric.
definition
The execution defintion.
cmec_output_bundle
An output bundle in the CMEC format.
Expand All @@ -117,25 +123,44 @@ def build_from_output_bundle(
A prepared MetricResult object.
The output bundle will be written to the output directory.
"""
configuration.output_filename().mkdir(parents=True, exist_ok=True)
bundle_path = configuration.output_filename("output.json")
definition.to_output_path(filename=None).mkdir(parents=True, exist_ok=True)
bundle_path = definition.to_output_path("output.json")

with open(bundle_path, "w") as file_handle:
json.dump(cmec_output_bundle, file_handle)
return MetricResult(
output_fragment=configuration.output_fragment / "output.json",
definition=definition,
bundle_filename=pathlib.Path("output.json"),
successful=True,
)

@staticmethod
def build_from_failure() -> "MetricResult":
def build_from_failure(definition: MetricExecutionDefinition) -> "MetricResult":
"""
Build a failed metric result.
This is a placeholder.
Additional log information should still be captured in the output bundle.
"""
return MetricResult(output_fragment=None, successful=False)
return MetricResult(bundle_filename=None, successful=False, definition=definition)

def to_output_path(self, filename: str | None) -> pathlib.Path:
"""
Get the absolute path for a file in the output directory
Parameters
----------
filename
Name of the file to get the full path for
If None the path to the output bundle will be returned
Returns
-------
:
Full path to the file in the output directory
"""
return self.definition.to_output_path(filename)


@frozen(hash=True)
Expand Down
8 changes: 4 additions & 4 deletions packages/ref-core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ def __init__(self, temp_dir: pathlib.Path) -> None:
data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),)

def run(self, definition: MetricExecutionDefinition) -> MetricResult:
# TODO: This doesn't write output.json, use build function?
return MetricResult(
output_fragment=self.temp_dir / definition.output_fragment / "output.json",
bundle_filename=self.temp_dir / definition.output_fragment / "output.json",
successful=True,
definition=definition,
)


Expand All @@ -30,9 +32,7 @@ class FailedMetric:
data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),)

def run(self, definition: MetricExecutionDefinition) -> MetricResult:
return MetricResult(
successful=False,
)
return MetricResult.build_from_failure(definition)


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion packages/ref-core/tests/unit/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_run_metric(self, metric_definition, mock_metric):

result = executor.run_metric(mock_metric, metric_definition)
assert result.successful
assert result.output_fragment == metric_definition.output_fragment / "output.json"
assert result.bundle_filename == metric_definition.output_fragment / "output.json"


@pytest.mark.parametrize("executor_name", ["local", None])
Expand Down
12 changes: 8 additions & 4 deletions packages/ref-core/tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ def test_build(self, tmp_path):
result = MetricResult.build_from_output_bundle(config, {"data": "value"})

assert result.successful
assert result.output_fragment.exists()
assert result.output_fragment.is_file()
with open(result.output_fragment) as f:

# Convert relative path to absolute path
output_filename = result.to_output_path(result.bundle_filename)

assert output_filename.exists()
assert output_filename.is_file()
with open(output_filename) as f:
assert f.read() == '{"data": "value"}'

assert result.output_fragment.is_relative_to(tmp_path)
assert output_filename.is_relative_to(tmp_path)


@pytest.fixture
Expand Down
10 changes: 5 additions & 5 deletions packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog):
ds = cmip6_data_catalog.groupby("instance_id", as_index=False).first()
output_directory = tmp_path / "output"

configuration = MetricExecutionDefinition(
definition = MetricExecutionDefinition(
output_directory=output_directory,
output_fragment=tmp_path,
key="global_mean_timeseries",
Expand All @@ -36,7 +36,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog):
),
)

result_dir = configuration.output_fragment / "results" / "recipe_test_a"
result_dir = definition.output_fragment / "results" / "recipe_test_a"
result = result_dir / "work" / "timeseries" / "script1" / "result.nc"

def mock_check_call(cmd, *args, **kwargs):
Expand All @@ -58,11 +58,11 @@ def mock_check_call(cmd, *args, **kwargs):
)
open_dataset.return_value.attrs.__getitem__.return_value = "ABC"

result = metric.run(configuration)
result = metric.run(definition)

output_bundle_path = output_directory / result.output_fragment
output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename

assert result.successful
assert output_bundle_path.exists()
assert output_bundle_path.is_file()
assert result.output_fragment.name == "output.json"
assert result.bundle_filename.name == "output.json"
15 changes: 7 additions & 8 deletions packages/ref-metrics-example/tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pathlib
from unittest import mock

import pytest
from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType
Expand Down Expand Up @@ -28,15 +27,16 @@ def test_annual_mean(esgf_data_dir, metric_dataset):
assert annual_mean.time.size == 286


@mock.patch("ref_metrics_example.example.calculate_annual_mean_timeseries")
def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog):
def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog, mocker):
metric = GlobalMeanTimeseries()
ds = cmip6_data_catalog.groupby("instance_id").first()
output_directory = tmp_path / "output"

mock_calc = mocker.patch("ref_metrics_example.example.calculate_annual_mean_timeseries")

mock_calc.return_value.attrs.__getitem__.return_value = "ABC"

configuration = MetricExecutionDefinition(
definition = MetricExecutionDefinition(
output_directory=output_directory,
output_fragment=pathlib.Path(metric.slug),
key="global_mean_timeseries",
Expand All @@ -47,15 +47,14 @@ def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog)
),
)

result = metric.run(configuration)
result = metric.run(definition)

assert mock_calc.call_count == 1

assert result.output_fragment == pathlib.Path(metric.slug) / "output.json"
assert str(result.bundle_filename) == "output.json"

output_bundle_path = output_directory / result.output_fragment
output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename

assert result.successful
assert output_bundle_path.exists()
assert output_bundle_path.is_file()
assert result.output_fragment.name == "output.json"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dev-dependencies = [
"bump-my-version>=0.28.1",
"pytest-regressions>=2.5.0",
"ipywidgets>=8.1.5",
"prettyprinter>=0.18.0",
]

[tool.uv.workspace]
Expand Down
Loading

0 comments on commit 5504eda

Please sign in to comment.