From 0f08335bc164f1fef447a207454d1b18c89a6dac Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Thu, 19 Dec 2024 12:52:19 +1100
Subject: [PATCH 1/7] feat: Use a executor specific output directory

---
 .../ref-core/src/ref_core/executor/local.py   | 22 ++++++--
 packages/ref-core/src/ref_core/metrics.py     | 52 ++++++++++++++++++-
 packages/ref-core/tests/unit/test_metrics.py  |  6 ++-
 .../src/ref_metrics_example/example.py        |  4 +-
 .../tests/unit/test_metrics.py                | 22 ++++++--
 ...4a447fbf6d65_metric_add_output_fragment.py | 42 +++++++++++++++
 packages/ref/src/ref/datasets/cmip6.py        |  6 ++-
 .../ref/src/ref/models/metric_execution.py    | 16 +++++-
 packages/ref/src/ref/solver.py                |  4 +-
 9 files changed, 158 insertions(+), 16 deletions(-)
 create mode 100644 packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py

diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py
index 0a4064f..1177d18 100644
--- a/packages/ref-core/src/ref_core/executor/local.py
+++ b/packages/ref-core/src/ref_core/executor/local.py
@@ -1,4 +1,8 @@
-from ref_core.metrics import Metric, MetricExecutionDefinition, MetricResult
+from attrs import evolve
+from loguru import logger
+
+from ref.config import Config
+from ref_core.metrics import FailedMetricResult, Metric, MetricExecutionDefinition, MetricResult
 
 
 class LocalExecutor:
@@ -12,6 +16,9 @@ class LocalExecutor:
 
     name = "local"
 
+    def __init__(self, config: Config | None = None):
+        self.config = Config.default() if config is None else config
+
     def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> MetricResult:
         """
         Run a metric in process
@@ -28,7 +35,12 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M
         :
             Results from running the metric
         """
-        # TODO: Update fragment use the output directory which may vary depending on the executor
-        definition.output_fragment.mkdir(parents=True, exist_ok=True)
-
-        return metric.run(definition=definition)
+        # TODO: This should be changed to use executor specific configuration
+        definition = evolve(definition, output_directory=self.config.paths.tmp)
+        definition.output_filename().mkdir(parents=True, exist_ok=True)
+
+        try:
+            return metric.run(definition=definition)
+        except Exception:
+            logger.exception(f"Error running metric {metric.slug}")
+            return FailedMetricResult()
diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py
index 06d75f1..ed30023 100644
--- a/packages/ref-core/src/ref_core/metrics.py
+++ b/packages/ref-core/src/ref_core/metrics.py
@@ -37,6 +37,36 @@ class MetricExecutionDefinition:
     Collection of datasets required for the metric execution
     """
 
+    output_directory: pathlib.Path | None = None
+    """
+    Root directory for output data
+
+    This will be resolved by the executor as the output directory may vary depending on where
+    the executor is being run.
+    """
+
+    def output_filename(self, filename: str | None = None) -> pathlib.Path:
+        """
+        Get the full path to a file in the output directory
+
+        Parameters
+        ----------
+        filename
+            Name of the file to get the full path for
+
+        Returns
+        -------
+        :
+            Full path to the file in the output directory
+        """
+        if self.output_directory is None:
+            raise AssertionError("Output directory is not set")  # pragma: no cover
+
+        if filename is None:
+            return self.output_directory / self.output_fragment
+        else:
+            return self.output_directory / self.output_fragment / filename
+
 
 @frozen
 class MetricResult:
@@ -63,7 +93,9 @@ class MetricResult:
     # Log info is in the output bundle file already, but is definitely useful
 
     @staticmethod
-    def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]) -> "MetricResult":
+    def build_from_output_bundle(
+        configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]
+    ) -> "MetricResult":
         """
         Build a MetricResult from a CMEC output bundle.
 
@@ -82,7 +114,10 @@ def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str
             A prepared MetricResult object.
             The output bundle will be written to the output directory.
         """
-        with open(configuration.output_fragment / "output.json", "w") as file_handle:
+        configuration.output_filename().mkdir(parents=True, exist_ok=True)
+        bundle_path = configuration.output_filename("output.json")
+
+        with open(bundle_path, "w") as file_handle:
             json.dump(cmec_output_bundle, file_handle)
         return MetricResult(
             output_bundle=configuration.output_fragment / "output.json",
@@ -90,6 +125,19 @@ def build(configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str
         )
 
 
+@frozen
+class FailedMetricResult(MetricResult):
+    """
+    The result of running a metric.
+
+    The content of the result follows the Earth System Metrics and Diagnostics Standards
+    ([EMDS](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md)).
+    """
+
+    def __init__(self):
+        super().__init__(output_bundle=None, successful=False)
+
+
 @frozen(hash=True)
 class DataRequirement:
     """
diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py
index 818de5f..b22d8d5 100644
--- a/packages/ref-core/tests/unit/test_metrics.py
+++ b/packages/ref-core/tests/unit/test_metrics.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 import pytest
+from attr import evolve
 from ref_core.datasets import FacetFilter, SourceDatasetType
 from ref_core.metrics import DataRequirement, MetricExecutionDefinition, MetricResult
 
@@ -11,7 +12,10 @@ def test_build(self, tmp_path):
         config = MetricExecutionDefinition(
             output_fragment=tmp_path, key="mocked-metric-slug", metric_dataset=None
         )
-        result = MetricResult.build(config, {"data": "value"})
+        # Setting the output directory generally happens as a side effect of the executor
+        config = evolve(config, output_directory=tmp_path)
+
+        result = MetricResult.build_from_output_bundle(config, {"data": "value"})
 
         assert result.successful
         assert result.output_bundle.exists()
diff --git a/packages/ref-metrics-example/src/ref_metrics_example/example.py b/packages/ref-metrics-example/src/ref_metrics_example/example.py
index 447c0f6..2d3b800 100644
--- a/packages/ref-metrics-example/src/ref_metrics_example/example.py
+++ b/packages/ref-metrics-example/src/ref_metrics_example/example.py
@@ -120,4 +120,6 @@ def run(self, definition: MetricExecutionDefinition) -> MetricResult:
             input_files=input_datasets.path.to_list()
         )
 
-        return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries))
+        return MetricResult.build_from_output_bundle(
+            definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries)
+        )
diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py
index 7c277db..9b7bd45 100644
--- a/packages/ref-metrics-example/tests/unit/test_metrics.py
+++ b/packages/ref-metrics-example/tests/unit/test_metrics.py
@@ -1,3 +1,6 @@
+import pathlib
+from unittest import mock
+
 import pytest
 from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType
 from ref_core.metrics import MetricExecutionDefinition
@@ -25,12 +28,17 @@ def test_annual_mean(esgf_data_dir, metric_dataset):
     assert annual_mean.time.size == 286
 
 
-def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog):
+@mock.patch("ref_metrics_example.example.calculate_annual_mean_timeseries")
+def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog):
     metric = GlobalMeanTimeseries()
     ds = cmip6_data_catalog.groupby("instance_id").first()
+    output_directory = tmp_path / "output"
+
+    mock_calc.return_value.attrs.__getitem__.return_value = "ABC"
 
     configuration = MetricExecutionDefinition(
-        output_fragment=tmp_path,
+        output_directory=output_directory,
+        output_fragment=pathlib.Path(metric.slug),
         key="global_mean_timeseries",
         metric_dataset=MetricDataset(
             {
@@ -41,7 +49,13 @@ def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog):
 
     result = metric.run(configuration)
 
+    assert mock_calc.call_count == 1
+
+    assert result.output_bundle == pathlib.Path(metric.slug) / "output.json"
+
+    output_bundle_path = output_directory / result.output_bundle
+
     assert result.successful
-    assert result.output_bundle.exists()
-    assert result.output_bundle.is_file()
+    assert output_bundle_path.exists()
+    assert output_bundle_path.is_file()
     assert result.output_bundle.name == "output.json"
diff --git a/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py b/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py
new file mode 100644
index 0000000..b382cd8
--- /dev/null
+++ b/packages/ref/alembic/versions/4a447fbf6d65_metric_add_output_fragment.py
@@ -0,0 +1,42 @@
+"""metric_add_output_fragment
+
+Revision ID: 4a447fbf6d65
+Revises: 4b95a617184e
+Create Date: 2024-12-19 12:16:38.605113
+
+"""
+
+from collections.abc import Sequence
+from typing import Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "4a447fbf6d65"
+down_revision: Union[str, None] = "4b95a617184e"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("metric_execution_result", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "output_fragment",
+                sa.String(),
+                nullable=False,
+                default="",  # This is the default value for the migration
+            )
+        )
+
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("metric_execution_result", schema=None) as batch_op:
+        batch_op.drop_column("output_fragment")
+
+    # ### end Alembic commands ###
diff --git a/packages/ref/src/ref/datasets/cmip6.py b/packages/ref/src/ref/datasets/cmip6.py
index 90a6e94..2844a08 100644
--- a/packages/ref/src/ref/datasets/cmip6.py
+++ b/packages/ref/src/ref/datasets/cmip6.py
@@ -50,7 +50,11 @@ def _fix_parent_variant_label(group: pd.DataFrame) -> pd.DataFrame:
 
         return group
 
-    data_catalog = data_catalog.groupby("instance_id").apply(_fix_parent_variant_label).reset_index(drop=True)
+    data_catalog = (
+        data_catalog.groupby("instance_id")
+        .apply(_fix_parent_variant_label, include_groups=False)
+        .reset_index(level="instance_id")
+    )
 
     data_catalog["branch_time_in_child"] = _clean_branch_time(data_catalog["branch_time_in_child"])
     data_catalog["branch_time_in_parent"] = _clean_branch_time(data_catalog["branch_time_in_parent"])
diff --git a/packages/ref/src/ref/models/metric_execution.py b/packages/ref/src/ref/models/metric_execution.py
index 6a53162..d3aaa63 100644
--- a/packages/ref/src/ref/models/metric_execution.py
+++ b/packages/ref/src/ref/models/metric_execution.py
@@ -98,16 +98,30 @@ class MetricExecutionResult(CreatedUpdatedMixin, Base):
     """
     Represents a run of a metric calculation
 
-    A execution might be run multiple times as new data becomes available.
+    An execution might be run multiple times as new data becomes available.
     """
 
     __tablename__ = "metric_execution_result"
     __table_args__ = (
+        # TODO: This unique constraint is constraining...
+        # If we perform a run with hash A, then run with hash B, then run with hash A again this will fail
+        # This may happen if a dataset is retracted
+        # This will currently result in a IntegrityError so we will know if it ever occurs
         UniqueConstraint("metric_execution_id", "dataset_hash", name="metric_execution_result_ident"),
     )
 
     id: Mapped[int] = mapped_column(primary_key=True)
 
+    output_fragment: Mapped[str] = mapped_column()
+    """
+    Relative directory to store the output of the metric execution.
+
+    During execution this directory is relative to the temporary directory.
+    If the metric execution is successful, the results will be moved to the final output directory
+    and the temporary directory will be cleaned up.
+    This directory may contain multiple input and output files.
+    """
+
     metric_execution_id: Mapped[int] = mapped_column(ForeignKey("metric_execution.id"))
     """
     The target metric execution
diff --git a/packages/ref/src/ref/solver.py b/packages/ref/src/ref/solver.py
index b98f05a..05d7754 100644
--- a/packages/ref/src/ref/solver.py
+++ b/packages/ref/src/ref/solver.py
@@ -254,7 +254,9 @@ def solve_metrics(db: Database, dry_run: bool = False, solver: MetricSolver | No
             if metric_execution_model.should_run(info.metric_dataset.hash):
                 logger.info(f"Running metric {metric_execution_model.key}")
                 metric_execution_result = MetricExecutionResult(
-                    metric_execution=metric_execution_model, dataset_hash=info.metric_dataset.hash
+                    metric_execution=metric_execution_model,
+                    dataset_hash=info.metric_dataset.hash,
+                    output_fragment=str(info.output_fragment),
                 )
                 db.session.add(metric_execution_result)
                 db.session.flush()

From 21d4b6bb9f1add72d524c3a5672cf1b49835700a Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Thu, 19 Dec 2024 13:00:39 +1100
Subject: [PATCH 2/7] refactor: Use a build method for failed executions

---
 .../ref-core/src/ref_core/executor/local.py   |  4 ++--
 packages/ref-core/src/ref_core/metrics.py     | 19 ++++++++-----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py
index 1177d18..82434ee 100644
--- a/packages/ref-core/src/ref_core/executor/local.py
+++ b/packages/ref-core/src/ref_core/executor/local.py
@@ -2,7 +2,7 @@
 from loguru import logger
 
 from ref.config import Config
-from ref_core.metrics import FailedMetricResult, Metric, MetricExecutionDefinition, MetricResult
+from ref_core.metrics import Metric, MetricExecutionDefinition, MetricResult
 
 
 class LocalExecutor:
@@ -43,4 +43,4 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M
             return metric.run(definition=definition)
         except Exception:
             logger.exception(f"Error running metric {metric.slug}")
-            return FailedMetricResult()
+            return MetricResult.build_from_failure()
diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py
index ed30023..79b2d6e 100644
--- a/packages/ref-core/src/ref_core/metrics.py
+++ b/packages/ref-core/src/ref_core/metrics.py
@@ -124,18 +124,15 @@ def build_from_output_bundle(
             successful=True,
         )
 
+    @staticmethod
+    def build_from_failure() -> "MetricResult":
+        """
+        Build a failed metric result.
 
-@frozen
-class FailedMetricResult(MetricResult):
-    """
-    The result of running a metric.
-
-    The content of the result follows the Earth System Metrics and Diagnostics Standards
-    ([EMDS](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md)).
-    """
-
-    def __init__(self):
-        super().__init__(output_bundle=None, successful=False)
+        This is a placeholder.
+        Additional log information should still be captured in the output bundle.
+        """
+        return MetricResult(output_bundle=None, successful=False)
 
 
 @frozen(hash=True)

From 478479e674fdbb7f69ed3f42f06994515872b622 Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Thu, 19 Dec 2024 13:03:24 +1100
Subject: [PATCH 3/7] docs: Changelog

---
 changelog/46.feature.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 changelog/46.feature.md

diff --git a/changelog/46.feature.md b/changelog/46.feature.md
new file mode 100644
index 0000000..d1501bf
--- /dev/null
+++ b/changelog/46.feature.md
@@ -0,0 +1,4 @@
+Support the option for different assumptions about the root paths between executors and the ref CLI.
+
+Where possible path fragments are stored in the database instead of complete paths.
+This allows the ability to move the data folders without needing to update the database.

From c21e4ff155b80b23f1c12dec6798d3bc0c0032c8 Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Tue, 7 Jan 2025 10:50:17 +1100
Subject: [PATCH 4/7] refactor: Rename output_bundle to output_fragment

---
 docs/how-to-guides/running-metrics-locally.py       |  2 +-
 packages/ref-core/src/ref_core/metrics.py           | 13 ++++++++-----
 packages/ref-core/tests/conftest.py                 |  2 +-
 packages/ref-core/tests/unit/test_executor.py       |  2 +-
 packages/ref-core/tests/unit/test_metrics.py        |  8 ++++----
 .../src/ref_metrics_esmvaltool/example.py           |  4 +++-
 .../tests/unit/test_metrics.py                      | 10 +++++++---
 .../ref-metrics-example/tests/unit/test_metrics.py  |  6 +++---
 8 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py
index 7e9b7a2..4c206b2 100644
--- a/docs/how-to-guides/running-metrics-locally.py
+++ b/docs/how-to-guides/running-metrics-locally.py
@@ -163,7 +163,7 @@
 result
 
 # %%
-with open(result.output_bundle) as fh:
+with open(result.output_fragment) as fh:
     # Load the output bundle and pretty print
     loaded_result = json.loads(fh.read())
     print(json.dumps(loaded_result, indent=2))
diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py
index 79b2d6e..4ae6ac8 100644
--- a/packages/ref-core/src/ref_core/metrics.py
+++ b/packages/ref-core/src/ref_core/metrics.py
@@ -79,9 +79,12 @@ class MetricResult:
 
     # Do we want to load a serialised version of the output bundle here or just a file path?
 
-    output_bundle: pathlib.Path | None
+    output_fragment: pathlib.Path | None
     """
-    Path to the output bundle file.
+    Path to the output bundle file relative to the output directory.
+
+    The absolute path of the outputs may differ between executors
+    depending on where the output directory is mounted.
 
     The contents of this file are defined by
     [EMDS standard](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md#common-output-bundle-format-)
@@ -120,7 +123,7 @@ def build_from_output_bundle(
         with open(bundle_path, "w") as file_handle:
             json.dump(cmec_output_bundle, file_handle)
         return MetricResult(
-            output_bundle=configuration.output_fragment / "output.json",
+            output_fragment=configuration.output_fragment / "output.json",
             successful=True,
         )
 
@@ -132,7 +135,7 @@ def build_from_failure() -> "MetricResult":
         This is a placeholder.
         Additional log information should still be captured in the output bundle.
         """
-        return MetricResult(output_bundle=None, successful=False)
+        return MetricResult(output_fragment=None, successful=False)
 
 
 @frozen(hash=True)
@@ -167,7 +170,7 @@ class DataRequirement:
     """
     The fields to group the datasets by.
 
-    This groupby operation is performed after the data catalog is filtered according to `filters`.
+    This group by operation is performed after the data catalog is filtered according to `filters`.
     Each group will contain a unique combination of values from the metadata fields,
     and will result in a separate execution of the metric.
     If `group_by=None`, all datasets will be processed together as a single execution.
diff --git a/packages/ref-core/tests/conftest.py b/packages/ref-core/tests/conftest.py
index 0fbacef..35f06b5 100644
--- a/packages/ref-core/tests/conftest.py
+++ b/packages/ref-core/tests/conftest.py
@@ -18,7 +18,7 @@ def __init__(self, temp_dir: pathlib.Path) -> None:
 
     def run(self, definition: MetricExecutionDefinition) -> MetricResult:
         return MetricResult(
-            output_bundle=self.temp_dir / definition.output_fragment / "output.json",
+            output_fragment=self.temp_dir / definition.output_fragment / "output.json",
             successful=True,
         )
 
diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py
index e950950..03fb82c 100644
--- a/packages/ref-core/tests/unit/test_executor.py
+++ b/packages/ref-core/tests/unit/test_executor.py
@@ -25,7 +25,7 @@ def test_run_metric(self, metric_definition, mock_metric):
 
         result = executor.run_metric(mock_metric, metric_definition)
         assert result.successful
-        assert result.output_bundle == metric_definition.output_fragment / "output.json"
+        assert result.output_fragment == metric_definition.output_fragment / "output.json"
 
 
 @pytest.mark.parametrize("executor_name", ["local", None])
diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py
index b22d8d5..2dc6148 100644
--- a/packages/ref-core/tests/unit/test_metrics.py
+++ b/packages/ref-core/tests/unit/test_metrics.py
@@ -18,12 +18,12 @@ def test_build(self, tmp_path):
         result = MetricResult.build_from_output_bundle(config, {"data": "value"})
 
         assert result.successful
-        assert result.output_bundle.exists()
-        assert result.output_bundle.is_file()
-        with open(result.output_bundle) as f:
+        assert result.output_fragment.exists()
+        assert result.output_fragment.is_file()
+        with open(result.output_fragment) as f:
             assert f.read() == '{"data": "value"}'
 
-        assert result.output_bundle.is_relative_to(tmp_path)
+        assert result.output_fragment.is_relative_to(tmp_path)
 
 
 @pytest.fixture
diff --git a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py
index 18a0a8e..1060c56 100644
--- a/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py
+++ b/packages/ref-metrics-esmvaltool/src/ref_metrics_esmvaltool/example.py
@@ -105,4 +105,6 @@ def run(self, definition: MetricExecutionDefinition) -> MetricResult:
         result = next(result_dir.glob("work/timeseries/script1/*.nc"))
         annual_mean_global_mean_timeseries = xarray.open_dataset(result)
 
-        return MetricResult.build(definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries))
+        return MetricResult.build_from_output_bundle(
+            definition, format_cmec_output_bundle(annual_mean_global_mean_timeseries)
+        )
diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
index 3b8b93d..c306e8c 100644
--- a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
+++ b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
@@ -23,8 +23,10 @@ def metric_dataset(cmip6_data_catalog) -> MetricDataset:
 def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog):
     metric = GlobalMeanTimeseries()
     ds = cmip6_data_catalog.groupby("instance_id", as_index=False).first()
+    output_directory = tmp_path / "output"
 
     configuration = MetricExecutionDefinition(
+        output_directory=output_directory,
         output_fragment=tmp_path,
         key="global_mean_timeseries",
         metric_dataset=MetricDataset(
@@ -58,7 +60,9 @@ def mock_check_call(cmd, *args, **kwargs):
 
     result = metric.run(configuration)
 
+    output_bundle_path = output_directory / result.output_fragment
+
     assert result.successful
-    assert result.output_bundle.exists()
-    assert result.output_bundle.is_file()
-    assert result.output_bundle.name == "output.json"
+    assert output_bundle_path.exists()
+    assert output_bundle_path.is_file()
+    assert result.output_fragment.name == "output.json"
diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py
index 9b7bd45..4ba0cf4 100644
--- a/packages/ref-metrics-example/tests/unit/test_metrics.py
+++ b/packages/ref-metrics-example/tests/unit/test_metrics.py
@@ -51,11 +51,11 @@ def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog)
 
     assert mock_calc.call_count == 1
 
-    assert result.output_bundle == pathlib.Path(metric.slug) / "output.json"
+    assert result.output_fragment == pathlib.Path(metric.slug) / "output.json"
 
-    output_bundle_path = output_directory / result.output_bundle
+    output_bundle_path = output_directory / result.output_fragment
 
     assert result.successful
     assert output_bundle_path.exists()
     assert output_bundle_path.is_file()
-    assert result.output_bundle.name == "output.json"
+    assert result.output_fragment.name == "output.json"

From 5504eda4516a69198f1eae4fd61e621f20866a84 Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Tue, 7 Jan 2025 13:54:24 +1100
Subject: [PATCH 5/7] refactor: Rename output_filename to to_output_path

---
 docs/how-to-guides/running-metrics-locally.py | 17 ++---
 .../ref-core/src/ref_core/executor/local.py   |  6 +-
 packages/ref-core/src/ref_core/metrics.py     | 63 +++++++++++++------
 packages/ref-core/tests/conftest.py           |  8 +--
 packages/ref-core/tests/unit/test_executor.py |  2 +-
 packages/ref-core/tests/unit/test_metrics.py  | 12 ++--
 .../tests/unit/test_metrics.py                | 10 +--
 .../tests/unit/test_metrics.py                | 15 +++--
 pyproject.toml                                |  1 +
 uv.lock                                       | 28 +++++++++
 10 files changed, 112 insertions(+), 50 deletions(-)

diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py
index 4c206b2..200d1ae 100644
--- a/docs/how-to-guides/running-metrics-locally.py
+++ b/docs/how-to-guides/running-metrics-locally.py
@@ -24,9 +24,6 @@
 # This guide will walk you through how to run a metric provider locally.
 
 
-# %%
-# !pip install prettyprinter
-
 # %% tags=["remove_input"]
 import json
 from pathlib import Path
@@ -35,6 +32,7 @@
 import pandas as pd
 import prettyprinter
 import ref_metrics_example
+from attr import evolve
 from ref_core.datasets import SourceDatasetType
 from ref_core.executor import run_metric
 
@@ -138,12 +136,13 @@
 # and which datasets should be used for the metric calculation.
 
 # %%
+output_directory = Path("out")
 definition = metric_executions[0].build_metric_execution_info()
 prettyprinter.pprint(definition)
 
 # %%
 # Update the output fragment to be a subdirectory of the current working directory
-definition = attrs.evolve(definition, output_fragment=Path("out") / definition.output_fragment)
+definition = attrs.evolve(definition, output_fragment=output_directory / definition.output_fragment)
 definition.output_fragment
 
 # %% [markdown]
@@ -163,7 +162,9 @@
 result
 
 # %%
-with open(result.output_fragment) as fh:
+
+output_file = result.definition.to_output_path(result.bundle_filename)
+with open(output_file) as fh:
     # Load the output bundle and pretty print
     loaded_result = json.loads(fh.read())
     print(json.dumps(loaded_result, indent=2))
@@ -175,7 +176,9 @@
 # This will not perform and validation/verification of the output results.
 
 # %%
-direct_result = metric.run(definition=definition)
+direct_result = metric.run(definition=evolve(definition, output_directory=output_directory))
 assert direct_result.successful
 
-direct_result
+prettyprinter.pprint(direct_result)
+
+# %%
diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py
index 82434ee..ea4ecd6 100644
--- a/packages/ref-core/src/ref_core/executor/local.py
+++ b/packages/ref-core/src/ref_core/executor/local.py
@@ -37,10 +37,12 @@ def run_metric(self, metric: Metric, definition: MetricExecutionDefinition) -> M
         """
         # TODO: This should be changed to use executor specific configuration
         definition = evolve(definition, output_directory=self.config.paths.tmp)
-        definition.output_filename().mkdir(parents=True, exist_ok=True)
+        execution_output_path = definition.to_output_path(filename=None)
+        execution_output_path.mkdir(parents=True, exist_ok=True)
 
         try:
             return metric.run(definition=definition)
+            # TODO: Copy results to the output directory
         except Exception:
             logger.exception(f"Error running metric {metric.slug}")
-            return MetricResult.build_from_failure()
+            return MetricResult.build_from_failure(definition)
diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py
index 4ae6ac8..032db78 100644
--- a/packages/ref-core/src/ref_core/metrics.py
+++ b/packages/ref-core/src/ref_core/metrics.py
@@ -17,13 +17,6 @@ class MetricExecutionDefinition:
     This represents the information needed by a metric to perform a single execution of the metric
     """
 
-    output_fragment: pathlib.Path
-    """
-    Relative directory to store the output of the metric execution
-
-    This is relative to the temporary directory which may differ by executor.
-    """
-
     key: str
     """
     A unique identifier for the metric execution
@@ -37,6 +30,13 @@ class MetricExecutionDefinition:
     Collection of datasets required for the metric execution
     """
 
+    output_fragment: pathlib.Path
+    """
+    Relative directory to store the output of the metric execution
+
+    This is relative to the temporary directory which may differ by executor.
+    """
+
     output_directory: pathlib.Path | None = None
     """
     Root directory for output data
@@ -45,9 +45,9 @@ class MetricExecutionDefinition:
     the executor is being run.
     """
 
-    def output_filename(self, filename: str | None = None) -> pathlib.Path:
+    def to_output_path(self, filename: str | None) -> pathlib.Path:
         """
-        Get the full path to a file in the output directory
+        Get the absolute path for a file in the output directory
 
         Parameters
         ----------
@@ -79,9 +79,14 @@ class MetricResult:
 
     # Do we want to load a serialised version of the output bundle here or just a file path?
 
-    output_fragment: pathlib.Path | None
+    definition: MetricExecutionDefinition
+    """
+    The definition of the metric execution that produced this result.
+    """
+
+    bundle_filename: pathlib.Path | None
     """
-    Path to the output bundle file relative to the output directory.
+    Filename of the output bundle file relative to the execution directory.
 
     The absolute path of the outputs may differ between executors
     depending on where the output directory is mounted.
@@ -89,6 +94,7 @@ class MetricResult:
     The contents of this file are defined by
     [EMDS standard](https://github.com/Earth-System-Diagnostics-Standards/EMDS/blob/main/standards.md#common-output-bundle-format-)
     """
+
     successful: bool
     """
     Whether the metric ran successfully.
@@ -97,15 +103,15 @@ class MetricResult:
 
     @staticmethod
     def build_from_output_bundle(
-        configuration: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]
+        definition: MetricExecutionDefinition, cmec_output_bundle: dict[str, Any]
     ) -> "MetricResult":
         """
         Build a MetricResult from a CMEC output bundle.
 
         Parameters
         ----------
-        configuration
-            The configuration used to run the metric.
+        definition
+            The execution defintion.
         cmec_output_bundle
             An output bundle in the CMEC format.
 
@@ -117,25 +123,44 @@ def build_from_output_bundle(
             A prepared MetricResult object.
             The output bundle will be written to the output directory.
         """
-        configuration.output_filename().mkdir(parents=True, exist_ok=True)
-        bundle_path = configuration.output_filename("output.json")
+        definition.to_output_path(filename=None).mkdir(parents=True, exist_ok=True)
+        bundle_path = definition.to_output_path("output.json")
 
         with open(bundle_path, "w") as file_handle:
             json.dump(cmec_output_bundle, file_handle)
         return MetricResult(
-            output_fragment=configuration.output_fragment / "output.json",
+            definition=definition,
+            bundle_filename=pathlib.Path("output.json"),
             successful=True,
         )
 
     @staticmethod
-    def build_from_failure() -> "MetricResult":
+    def build_from_failure(definition: MetricExecutionDefinition) -> "MetricResult":
         """
         Build a failed metric result.
 
         This is a placeholder.
         Additional log information should still be captured in the output bundle.
         """
-        return MetricResult(output_fragment=None, successful=False)
+        return MetricResult(bundle_filename=None, successful=False, definition=definition)
+
+    def to_output_path(self, filename: str | None) -> pathlib.Path:
+        """
+        Get the absolute path for a file in the output directory
+
+        Parameters
+        ----------
+        filename
+            Name of the file to get the full path for
+
+            If None the path to the output bundle will be returned
+
+        Returns
+        -------
+        :
+            Full path to the file in the output directory
+        """
+        return self.definition.to_output_path(filename)
 
 
 @frozen(hash=True)
diff --git a/packages/ref-core/tests/conftest.py b/packages/ref-core/tests/conftest.py
index 35f06b5..b82f46f 100644
--- a/packages/ref-core/tests/conftest.py
+++ b/packages/ref-core/tests/conftest.py
@@ -17,9 +17,11 @@ def __init__(self, temp_dir: pathlib.Path) -> None:
     data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),)
 
     def run(self, definition: MetricExecutionDefinition) -> MetricResult:
+        # TODO: This doesn't write output.json, use build function?
         return MetricResult(
-            output_fragment=self.temp_dir / definition.output_fragment / "output.json",
+            bundle_filename=self.temp_dir / definition.output_fragment / "output.json",
             successful=True,
+            definition=definition,
         )
 
 
@@ -30,9 +32,7 @@ class FailedMetric:
     data_requirements = (DataRequirement(source_type=SourceDatasetType.CMIP6, filters=(), group_by=None),)
 
     def run(self, definition: MetricExecutionDefinition) -> MetricResult:
-        return MetricResult(
-            successful=False,
-        )
+        return MetricResult.build_from_failure(definition)
 
 
 @pytest.fixture
diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py
index 03fb82c..856efcb 100644
--- a/packages/ref-core/tests/unit/test_executor.py
+++ b/packages/ref-core/tests/unit/test_executor.py
@@ -25,7 +25,7 @@ def test_run_metric(self, metric_definition, mock_metric):
 
         result = executor.run_metric(mock_metric, metric_definition)
         assert result.successful
-        assert result.output_fragment == metric_definition.output_fragment / "output.json"
+        assert result.bundle_filename == metric_definition.output_fragment / "output.json"
 
 
 @pytest.mark.parametrize("executor_name", ["local", None])
diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py
index 2dc6148..7a1b2a1 100644
--- a/packages/ref-core/tests/unit/test_metrics.py
+++ b/packages/ref-core/tests/unit/test_metrics.py
@@ -18,12 +18,16 @@ def test_build(self, tmp_path):
         result = MetricResult.build_from_output_bundle(config, {"data": "value"})
 
         assert result.successful
-        assert result.output_fragment.exists()
-        assert result.output_fragment.is_file()
-        with open(result.output_fragment) as f:
+
+        # Convert relative path to absolute path
+        output_filename = result.to_output_path(result.bundle_filename)
+
+        assert output_filename.exists()
+        assert output_filename.is_file()
+        with open(output_filename) as f:
             assert f.read() == '{"data": "value"}'
 
-        assert result.output_fragment.is_relative_to(tmp_path)
+        assert output_filename.is_relative_to(tmp_path)
 
 
 @pytest.fixture
diff --git a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
index c306e8c..9b0beb7 100644
--- a/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
+++ b/packages/ref-metrics-esmvaltool/tests/unit/test_metrics.py
@@ -25,7 +25,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog):
     ds = cmip6_data_catalog.groupby("instance_id", as_index=False).first()
     output_directory = tmp_path / "output"
 
-    configuration = MetricExecutionDefinition(
+    definition = MetricExecutionDefinition(
         output_directory=output_directory,
         output_fragment=tmp_path,
         key="global_mean_timeseries",
@@ -36,7 +36,7 @@ def test_example_metric(tmp_path, mocker, metric_dataset, cmip6_data_catalog):
         ),
     )
 
-    result_dir = configuration.output_fragment / "results" / "recipe_test_a"
+    result_dir = definition.output_fragment / "results" / "recipe_test_a"
     result = result_dir / "work" / "timeseries" / "script1" / "result.nc"
 
     def mock_check_call(cmd, *args, **kwargs):
@@ -58,11 +58,11 @@ def mock_check_call(cmd, *args, **kwargs):
     )
     open_dataset.return_value.attrs.__getitem__.return_value = "ABC"
 
-    result = metric.run(configuration)
+    result = metric.run(definition)
 
-    output_bundle_path = output_directory / result.output_fragment
+    output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename
 
     assert result.successful
     assert output_bundle_path.exists()
     assert output_bundle_path.is_file()
-    assert result.output_fragment.name == "output.json"
+    assert result.bundle_filename.name == "output.json"
diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py
index 4ba0cf4..b102ecb 100644
--- a/packages/ref-metrics-example/tests/unit/test_metrics.py
+++ b/packages/ref-metrics-example/tests/unit/test_metrics.py
@@ -1,5 +1,4 @@
 import pathlib
-from unittest import mock
 
 import pytest
 from ref_core.datasets import DatasetCollection, MetricDataset, SourceDatasetType
@@ -28,15 +27,16 @@ def test_annual_mean(esgf_data_dir, metric_dataset):
     assert annual_mean.time.size == 286
 
 
-@mock.patch("ref_metrics_example.example.calculate_annual_mean_timeseries")
-def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog):
+def test_example_metric(tmp_path, metric_dataset, cmip6_data_catalog, mocker):
     metric = GlobalMeanTimeseries()
     ds = cmip6_data_catalog.groupby("instance_id").first()
     output_directory = tmp_path / "output"
 
+    mock_calc = mocker.patch("ref_metrics_example.example.calculate_annual_mean_timeseries")
+
     mock_calc.return_value.attrs.__getitem__.return_value = "ABC"
 
-    configuration = MetricExecutionDefinition(
+    definition = MetricExecutionDefinition(
         output_directory=output_directory,
         output_fragment=pathlib.Path(metric.slug),
         key="global_mean_timeseries",
@@ -47,15 +47,14 @@ def test_example_metric(mock_calc, tmp_path, metric_dataset, cmip6_data_catalog)
         ),
     )
 
-    result = metric.run(configuration)
+    result = metric.run(definition)
 
     assert mock_calc.call_count == 1
 
-    assert result.output_fragment == pathlib.Path(metric.slug) / "output.json"
+    assert str(result.bundle_filename) == "output.json"
 
-    output_bundle_path = output_directory / result.output_fragment
+    output_bundle_path = definition.output_directory / definition.output_fragment / result.bundle_filename
 
     assert result.successful
     assert output_bundle_path.exists()
     assert output_bundle_path.is_file()
-    assert result.output_fragment.name == "output.json"
diff --git a/pyproject.toml b/pyproject.toml
index 1ac75b1..15a8af3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ dev-dependencies = [
     "bump-my-version>=0.28.1",
     "pytest-regressions>=2.5.0",
     "ipywidgets>=8.1.5",
+    "prettyprinter>=0.18.0",
 ]
 
 [tool.uv.workspace]
diff --git a/uv.lock b/uv.lock
index 4cd4d75..e98322e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -528,6 +528,7 @@ dev = [
     { name = "pandas-stubs" },
     { name = "pip" },
     { name = "pre-commit" },
+    { name = "prettyprinter" },
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "pytest-loguru" },
@@ -567,6 +568,7 @@ dev = [
     { name = "pandas-stubs", specifier = ">=2.2.3.241009" },
     { name = "pip", specifier = ">=24.3.1" },
     { name = "pre-commit", specifier = ">=3.3.1" },
+    { name = "prettyprinter", specifier = ">=0.18.0" },
     { name = "pytest", specifier = ">=7.3.1" },
     { name = "pytest-cov", specifier = ">=4.0.0" },
     { name = "pytest-loguru", specifier = ">=0.4.0" },
@@ -584,6 +586,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
 ]
 
+[[package]]
+name = "colorful"
+version = "0.5.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fa/5f/38e40c3bc4107c39e4062d943026b8ee25154cb4b185b882f274a1ab65da/colorful-0.5.6.tar.gz", hash = "sha256:b56d5c01db1dac4898308ea889edcb113fbee3e6ec5df4bacffd61d5241b5b8d", size = 209280 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/61/39e7db0cb326c9c8f6a49fad4fc9c2f1241f05a4e10f0643fc31ce26a7e0/colorful-0.5.6-py2.py3-none-any.whl", hash = "sha256:eab8c1c809f5025ad2b5238a50bd691e26850da8cac8f90d660ede6ea1af9f1e", size = 201369 },
+]
+
 [[package]]
 name = "comm"
 version = "0.2.2"
@@ -2625,6 +2639,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/8f/496e10d51edd6671ebe0432e33ff800aa86775d2d147ce7d43389324a525/pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878", size = 218713 },
 ]
 
+[[package]]
+name = "prettyprinter"
+version = "0.18.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorful" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/97/41/967b5e033b5b50eebe0b8154a9e9827c517e244b9b612ec3357c40a4a33c/prettyprinter-0.18.0.tar.gz", hash = "sha256:9fe5da7ec53510881dd35d7a5c677ba45f34cfe6a8e78d1abd20652cf82139a8", size = 651884 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/d0/9effbeca8f1b8df9d33154de3477a51e55a9c46cb15612dd7791a1624397/prettyprinter-0.18.0-py2.py3-none-any.whl", hash = "sha256:358a58f276cb312e3ca29d7a7f244c91e4e0bda7848249d30e4f36d2eb58b67c", size = 48013 },
+]
+
 [[package]]
 name = "prometheus-client"
 version = "0.21.0"
@@ -2713,6 +2740,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712 },
     { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155 },
     { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356 },
+    { url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224 },
 ]
 
 [[package]]

From 036a961aa31f38dbe380defc17c5e2a20527190d Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Tue, 7 Jan 2025 13:59:37 +1100
Subject: [PATCH 6/7] chore: Move pytest-mock as dependency of root package

---
 packages/ref-metrics-esmvaltool/pyproject.toml |  5 -----
 pyproject.toml                                 |  1 +
 uv.lock                                        | 10 ++--------
 3 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/packages/ref-metrics-esmvaltool/pyproject.toml b/packages/ref-metrics-esmvaltool/pyproject.toml
index def11d4..2d26edf 100644
--- a/packages/ref-metrics-esmvaltool/pyproject.toml
+++ b/packages/ref-metrics-esmvaltool/pyproject.toml
@@ -30,11 +30,6 @@ dependencies = [
 [project.license]
 text = "Apache-2.0"
 
-[tool.uv]
-dev-dependencies = [
-    "pytest-mock >= 3.12",
-]
-
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
diff --git a/pyproject.toml b/pyproject.toml
index 15a8af3..f621d27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ text = "Apache-2.0"
 dev-dependencies = [
     "pytest>=7.3.1",
     "pytest-cov>=4.0.0",
+    "pytest-mock >= 3.12",
     "coverage>=7.2.0",
     "mypy>=1.11.0",
     "ruff>=0.6.9",
diff --git a/uv.lock b/uv.lock
index e98322e..c2ce09d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -532,6 +532,7 @@ dev = [
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "pytest-loguru" },
+    { name = "pytest-mock" },
     { name = "pytest-regressions" },
     { name = "ruff" },
     { name = "towncrier" },
@@ -572,6 +573,7 @@ dev = [
     { name = "pytest", specifier = ">=7.3.1" },
     { name = "pytest-cov", specifier = ">=4.0.0" },
     { name = "pytest-loguru", specifier = ">=0.4.0" },
+    { name = "pytest-mock", specifier = ">=3.12" },
     { name = "pytest-regressions", specifier = ">=2.5.0" },
     { name = "ruff", specifier = ">=0.6.9" },
     { name = "towncrier", specifier = ">=24.8.0" },
@@ -3298,11 +3300,6 @@ dependencies = [
     { name = "xarray" },
 ]
 
-[package.dev-dependencies]
-dev = [
-    { name = "pytest-mock" },
-]
-
 [package.metadata]
 requires-dist = [
     { name = "pooch", specifier = ">=1.8" },
@@ -3311,9 +3308,6 @@ requires-dist = [
     { name = "xarray", specifier = ">=2022" },
 ]
 
-[package.metadata.requires-dev]
-dev = [{ name = "pytest-mock", specifier = ">=3.12" }]
-
 [[package]]
 name = "ref-metrics-example"
 version = "0.1.0"

From 9f7a9174e3611577b6381a8b66e4bd69258b1aa7 Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Wed, 8 Jan 2025 10:53:19 +1100
Subject: [PATCH 7/7] test: Improve coverage

---
 packages/ref-core/tests/unit/test_executor.py |  9 +++++++++
 packages/ref-core/tests/unit/test_metrics.py  | 18 ++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py
index 856efcb..47d0407 100644
--- a/packages/ref-core/tests/unit/test_executor.py
+++ b/packages/ref-core/tests/unit/test_executor.py
@@ -27,6 +27,15 @@ def test_run_metric(self, metric_definition, mock_metric):
         assert result.successful
         assert result.bundle_filename == metric_definition.output_fragment / "output.json"
 
+    def test_raises_exception(self, metric_definition, mock_metric):
+        executor = LocalExecutor()
+
+        mock_metric.run = lambda definition: 1 / 0
+
+        result = executor.run_metric(mock_metric, metric_definition)
+        assert result.successful is False
+        assert result.bundle_filename is None
+
 
 @pytest.mark.parametrize("executor_name", ["local", None])
 def test_run_metric_local(monkeypatch, executor_name, mock_metric, provider, metric_definition):
diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py
index 7a1b2a1..4d8d944 100644
--- a/packages/ref-core/tests/unit/test_metrics.py
+++ b/packages/ref-core/tests/unit/test_metrics.py
@@ -8,14 +8,14 @@
 
 
 class TestMetricResult:
-    def test_build(self, tmp_path):
-        config = MetricExecutionDefinition(
+    def test_build_from_output_bundle(self, tmp_path):
+        definition = MetricExecutionDefinition(
             output_fragment=tmp_path, key="mocked-metric-slug", metric_dataset=None
         )
         # Setting the output directory generally happens as a side effect of the executor
-        config = evolve(config, output_directory=tmp_path)
+        definition = evolve(definition, output_directory=tmp_path)
 
-        result = MetricResult.build_from_output_bundle(config, {"data": "value"})
+        result = MetricResult.build_from_output_bundle(definition, {"data": "value"})
 
         assert result.successful
 
@@ -29,6 +29,16 @@ def test_build(self, tmp_path):
 
         assert output_filename.is_relative_to(tmp_path)
 
+    def test_build_from_failure(self):
+        definition = MetricExecutionDefinition(
+            output_fragment="output", key="mocked-metric-slug", metric_dataset=None
+        )
+        result = MetricResult.build_from_failure(definition)
+
+        assert not result.successful
+        assert result.bundle_filename is None
+        assert result.definition == definition
+
 
 @pytest.fixture
 def apply_data_catalog():