CMIP-REF · lewisjared · Nov 4, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 6, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,3 @@
+.esgpull
+out
+.venv
diff --git a/.env.example b/.env.example
@@ -0,0 +1,7 @@
+# Example of an environment file for the CMIP-REF project
+# This allows for running the project in a development environment outside of a container
+
+CELERY_BROKER_URL=redis://localhost:6379/1
+
+REF_OUTPUT_ROOT=out
+REF_ESGF_ROOT=.esgpull/data
diff --git a/.github/actions/test-data/action.yml b/.github/actions/test-data/action.yml
@@ -0,0 +1,21 @@
+name: "Setup test-data cache"
+description: "Share a set of cached data for use in the tests"
+runs:
+  using: "composite"
+  steps:
+    - name: Cache test data
+      id: cache-test-data
+      uses: actions/cache@v4
+      with:
+        path: .esgpull
+        key: test-data-${{ hashFiles('scripts/fetch_test_data.py') }}
+    - name: Install esgpull
+      run: |
+        uv run esgpull self install .esgpull || uv run esgpull self choose .esgpull
+      shell: bash
+    - if: ${{ steps.cache-test-data.outputs.cache-hit != 'true' }}
+      name: Fetch the test data
+      run: |
+        uv run esgpull config api.index_node esgf-node.llnl.gov
+        make fetch-test-data
+      shell: bash
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -34,6 +34,7 @@ jobs:
       - name: Check out repository
         uses: actions/checkout@v4
       - uses: ./.github/actions/setup
+      - uses: ./.github/actions/test-data
       - name: docs
         run: uv run mkdocs build
 
@@ -58,23 +59,14 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       # Share a cache of test data across all test jobs
-      - name: Cache test data
-        id: cache-test-data
-        uses: actions/cache@v4
-        with:
-          path: .esgpull
-          key: test-data-${{ hashFiles('scripts/fetch_test_data.py') }}
-      - name: Install esgpull
-        run: |
-          uv run esgpull self install .esgpull || uv run esgpull self choose .esgpull
-      - if: ${{ steps.cache-test-data.outputs.cache-hit != 'true' }}
-        name: Fetch the test data
-        run: |
-          uv run esgpull config api.index_node esgf-node.llnl.gov
-          make fetch-test-data
+      - uses: ./.github/actions/test-data
+
+      # Run the tests
       - name: Run tests
         run: |
+          cp .env.example .env
           uv run --package ref-core pytest packages/ref-core -r a -v --doctest-modules --cov=packages/ref-core/src --cov-report=term
+          uv run --package ref-celery pytest packages/ref-celery -r a -v --doctest-modules --cov=packages/ref-celery/src --cov-report=term
           uv run --package ref-metrics-example pytest packages/ref-metrics-example -r a -v --doctest-modules --cov=packages/ref-metrics-example/src --cov-report=term --cov-append
           uv run coverage xml
           # Run integration tests (without adding to the coverage)

diff --git a/.gitignore b/.gitignore
@@ -148,3 +148,6 @@ dmypy.json
 
 # Esgpull
 .esgpull
+
+# Local output directory
+out/
diff --git a/Makefile b/Makefile
@@ -30,6 +30,7 @@ pre-commit:  ## run all the linting checks of the codebase
 .PHONY: mypy
 mypy:  ## run mypy on the codebase
 	MYPYPATH=stubs uv run --package ref-core mypy packages/ref-core
+	MYPYPATH=stubs uv run --package ref-celery mypy packages/ref-celery
 	MYPYPATH=stubs uv run --package ref-metrics-example mypy packages/ref-metrics-example
 
 .PHONY: ruff-fixes
@@ -38,11 +39,17 @@ ruff-fixes:  ## fix the code using ruff
 	uv run ruff format
 
 .PHONY: test-core
-test-core:  ## run the tests
+test-core:  ## run the tests for ref-core
 	uv run --package ref-core \
 		pytest packages/ref-core \
 		-r a -v --doctest-modules --cov=packages/ref-core/src
 
+.PHONY: test-celery
+test-celery:  ## run the tests for ref-celery
+	uv run --package ref-celery \
+		pytest packages/ref-celery \
+		-r a -v --doctest-modules --cov=packages/ref-celery/src
+
 .PHONY: test-metrics-example
 test-metrics-example:  ## run the tests
 	uv run --package ref-metrics-example \
@@ -56,7 +63,7 @@ test-integration:  ## run the integration tests
 		-r a -v
 
 .PHONY: test
-test: test-core test-metrics-example test-integration ## run the tests
+test: test-core test-celery test-metrics-example test-integration ## run the tests
 
 # Note on code coverage and testing:
 # If you want to debug what is going on with coverage, we have found

diff --git a/README.md b/README.md
@@ -44,6 +44,36 @@ mamba install -c conda-forge cmip-ref
 conda install -c conda-forge cmip-ref
 ```
 
+## Getting started
+
+### As a metrics provider
+
+Metrics providers are the core of the REF.
+They define the metrics that will be calculated and the data that will be used to calculate them,
+by providing a consistent interface for the REF to interact with.
+
+
+These metrics providers can be run as standalone applications or as part of the REF.
+See
+
+### As a modelling center
+
+The REF requires a number of different services to be running in order to function.
+To make it easy to deploy and use locally,
+we provide a `docker-compose` file that will start all the necessary services.
+
+The background services for the REF can be deployed using the following command:
+
+```bash
+docker-compose up
+```
+
+This will start the following services:
+* `redis` - a message broker for the REF
+* `flower` - a monitoring tool for the background tasks in REF
+* `ref-metrics-example` - A worker for executing the 'example' metric provider
+
+Metric calculations can then be queued up using `scripts/runner.py`
 
 <!--- sec-end-installation -->
 

diff --git a/changelog/7.feature.md b/changelog/7.feature.md
@@ -0,0 +1,5 @@
+Added `ref-celery` package for asynchronous task processing.
+This enables the `ref-metrics-example` package to be run as a Celery worker inside its own docker container.
+
+The `ref-metrics-example` package was also dockerized and a `docker-compose` file was added to track
+the services required.
diff --git a/conftest.py b/conftest.py
@@ -15,3 +15,20 @@ def esgf_data_dir() -> Path:
     pull = esgpull.Esgpull()
 
     return pull.config.paths.data
+
+
+@pytest.fixture
+def test_dataset(esgf_data_dir) -> Path:
+    return (
+        esgf_data_dir
+        / "CMIP6"
+        / "ScenarioMIP"
+        / "CSIRO"
+        / "ACCESS-ESM1-5"
+        / "ssp126"
+        / "r1i1p1f1"
+        / "Amon"
+        / "tas"
+        / "gn"
+        / "v20210318"
+    )
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,35 @@
+# Services to run the Rapid Evaluation Framework (REF)
+# These can be started by running `docker-compose up` in the root directory of the codebase.
+
+name: cmip-ref
+services:
+  redis:
+    image: redis:7
+    restart: always
+    ports:
+      - "6379:6379"
+  flower:
+    image: mher/flower
+    restart: always
+    environment:
+      - CELERY_BROKER_URL=redis://redis:6379/1
+    ports:
+      - "5555:5555"
+    depends_on:
+      - redis
+  metrics-example:
+    image: ref-metrics-example
+    restart: always
+    environment:
+      - CELERY_BROKER_URL=redis://redis:6379/1
+      - REF_OUTPUT_ROOT=/output
+      - REF_ESGF_ROOT=/esgf
+    build:
+      context: packages/ref-metrics-example
+      additional_contexts:
+        - root=.
+    command: ref-celery --package ref-metrics-example
+    volumes:
+      - ./packages/ref-metrics-example:/app
+      - .esgpull/data:/esgf
+      - ./out:/output
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -7,8 +7,18 @@ The default values for these environment variables are generally suitable,
 but if you require updating these values we recommend the use of a `.env` file
 to make the changes easier to reproduce in future.
 
-### `CMIP_REF_EXECUTOR`
+### `REF_EXECUTOR`
 
 Executor to use for running the metrics.
 
 Defaults to use the local executor ("local").
+
+### `REF_ESGF_ROOT`
+
+Path to the root of the ESGF output data.
+
+### `REF_OUTPUT_ROOT`
+
+Path to the root directory where data should be stored.
+This has to be shared between any workers and the parent
+process.
diff --git a/docs/explanation.md b/docs/explanation.md
@@ -54,5 +54,5 @@ The following environments are planned to be supported in the future:
 * Kubernetes (for cloud-based execution)
 * Subprocess (for HPC systems)
 
-The selected executor is defined using the `CMIP_REF_EXECUTOR` environment variable.
+The selected executor is defined using the `REF_EXECUTOR` environment variable.
 See the [Configuration](configuration.md) page for more information.
diff --git a/docs/gen_doc_stubs.py b/docs/gen_doc_stubs.py
@@ -111,13 +111,14 @@ def write_module_page(
     with mkdocs_gen_files.open(write_file, "w") as fh:
         fh.write(f"# {package_full_name}\n")
 
+        fh.write("\n")
+        fh.write(f"::: {package_full_name}")
+
         if sub_packages:
             fh.write("\n")
+            fh.write("## sub-packages \n")
             fh.write(f"{create_sub_packages_table(sub_packages)}\n")
 
-        fh.write("\n")
-        fh.write(f"::: {package_full_name}")
-
     if package.__doc__ is None:
         summary = ""
     else:
@@ -131,7 +132,9 @@ def write_module_page(
 
 
 write_module_page("ref_core")
+write_module_page("ref_celery")
 write_module_page("ref_metrics_example")
 
 with mkdocs_gen_files.open(ROOT_DIR / "NAVIGATION.md", "w") as fh:
+    print(list(nav.build_literate_nav()))
     fh.writelines(nav.build_literate_nav())
diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py
@@ -0,0 +1,92 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.16.4
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+#
+# # Testing metric providers locally
+# Metric providers can be run locally without requiring the rest of the REF infrastructure.
+# This is useful for testing and debugging metrics.
+
+# Running a metric locally requires that the target REF metrics package, e.g. `ref-metrics-example`,
+# and its dependencies are installed in the current Python environment.
+#
+# This guide will walk you through how to run a metric provider locally.
+# %%
+import json
+import pathlib
+
+import ref_metrics_example
+from ref_core.executor import run_metric
+from ref_core.metrics import Configuration, TriggerInfo
+
+# %%
+provider = ref_metrics_example.provider
+provider
+
+# %%
+# Relative path to some CMIP6 data
+example_dataset = (
+    pathlib.Path("CMIP6")
+    / "ScenarioMIP"
+    / "CSIRO"
+    / "ACCESS-ESM1-5"
+    / "ssp126"
+    / "r1i1p1f1"
+    / "Amon"
+    / "tas"
+    / "gn"
+    / "v20210318"
+)
+
+# %%
+configuration = Configuration(output_fragment=pathlib.Path("out"))
+trigger = TriggerInfo(dataset=example_dataset)
+
+# %% [markdown]
+# ## Metric calculations
+#
+# Metric calculations are typically run using an [Executor](ref_core.executor.Executor)
+# which provides an abstraction to enable metrics to be run in multiple different ways.
+#
+# The simplest executor is the `LocalExecutor`.
+# This executor runs a given metric synchronously in the current process.
+#
+# The LocalExectuor is the default executor when using the  `ref_core.executor.run_metric` function.
+# This can be overridden by specifying the `REF_EXECUTOR` environment variable.
+
+# %%
+result = run_metric("annual-global-mean-timeseries", provider, configuration=configuration, trigger=trigger)
+result
+
+# %%
+with open(configuration.as_output_path(result.output_bundle)) as fh:
+    # Load the output bundle and pretty print
+    loaded_result = json.loads(fh.read())
+    print(json.dumps(loaded_result, indent=2))
+
+# %% [markdown]
+# ### Running directly
+#
+# The local executor can be bypassed if you need access to running the metric calculation directly.
+# This will not perform and validation/verification of the output results.
+
+# %%
+metric = provider.get("annual-global-mean-timeseries")
+
+direct_result = metric.run(configuration=configuration, trigger=trigger)
+assert direct_result.successful
+
+direct_result
+
+# %%
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -15,6 +15,7 @@ nav:
   - Tutorials: tutorials.md
   - Further background:
     - Explanation: explanation.md
+  - API: api/
   - Development: development.md
   - Changelog: changelog.md
 
@@ -51,6 +52,10 @@ plugins:
   - gen-files:
       scripts:
         - docs/gen_doc_stubs.py
+  # Make the navigation easier to handle/auto-generate if we wish
+  # https://oprypin.github.io/mkdocs-literate-nav/
+  - literate-nav:
+      nav_file: NAVIGATION.md
   # Notebook support.
   # Working out how to make this play with nb-exec would be nice,
   # then it wouldn't run every time.

diff --git a/packages/ref-celery/README.md b/packages/ref-celery/README.md
@@ -0,0 +1,17 @@
+# ref-celery
+
+This package provides celery task generation from Provider and Metric definitions.
+
+## CLI tool
+
+The `ref-celery` package provides a CLI tool to start a worker instance from a REF metrics provider.
+
+### Usage
+
+For example, to start a worker instance for the `ref-metrics-example` package:
+
+```bash
+ref-celery --package ref-metrics-example
+```
+
+This requires the `ref-metrics-example` package to be installed in the current virtual environment.