From af373aba49b59c6683f5a027fdda1204f44be7cb Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 5 Nov 2024 01:31:33 +1100 Subject: [PATCH 01/13] feat: Add celery implementation --- docker-compose.yaml | 16 ++ packages/ref-celery/README.md | 3 + packages/ref-celery/pyproject.toml | 38 +++++ .../ref-celery/src/ref_celery/__init__.py | 7 + packages/ref-celery/src/ref_celery/app.py | 20 +++ packages/ref-celery/src/ref_celery/py.typed | 0 packages/ref-celery/src/ref_celery/tasks.py | 38 +++++ packages/ref-celery/tests/conftest.py | 0 packages/ref-core/src/ref_core/providers.py | 3 + packages/ref-metrics-example/Dockerfile | 6 + packages/ref-metrics-example/pyproject.toml | 1 + .../src/ref_metrics_example/celery.py | 8 + pyproject.toml | 2 + uv.lock | 147 ++++++++++++++++++ 14 files changed, 289 insertions(+) create mode 100644 docker-compose.yaml create mode 100644 packages/ref-celery/README.md create mode 100644 packages/ref-celery/pyproject.toml create mode 100644 packages/ref-celery/src/ref_celery/__init__.py create mode 100644 packages/ref-celery/src/ref_celery/app.py create mode 100644 packages/ref-celery/src/ref_celery/py.typed create mode 100644 packages/ref-celery/src/ref_celery/tasks.py create mode 100644 packages/ref-celery/tests/conftest.py create mode 100644 packages/ref-metrics-example/Dockerfile create mode 100644 packages/ref-metrics-example/src/ref_metrics_example/celery.py diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..dd623c2 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,16 @@ +name: cmip-ref +services: + redis: + image: redis:7 + restart: always + ports: + - "6379:6379" + flower: + image: mher/flower + restart: always + environment: + - CELERY_BROKER_URL=redis://redis:6379/1 + ports: + - "5555:5555" + depends_on: + - redis diff --git a/packages/ref-celery/README.md b/packages/ref-celery/README.md new file mode 100644 index 0000000..050d4dc --- /dev/null +++ b/packages/ref-celery/README.md @@ -0,0 +1,3 @@ +# ref-celery + +This package provides celery task generation from Provider and Metric definitions. diff --git a/packages/ref-celery/pyproject.toml b/packages/ref-celery/pyproject.toml new file mode 100644 index 0000000..2c20680 --- /dev/null +++ b/packages/ref-celery/pyproject.toml @@ -0,0 +1,38 @@ +[project] +name = "ref-celery" +version = "0.1.0" +description = "Celery app for mananging tasks and workers" +readme = "README.md" +authors = [ + { name = "Jared Lewis", email = "jared.lewis@climate-resource.com" } +] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering", +] +dependencies = [ + "celery[redis]>=5.4.0", + "ref-core" +] + +[tool.uv] +dev-dependencies = [ + +] + +[tool.uv.sources] +ref-core = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/packages/ref-celery/src/ref_celery/__init__.py b/packages/ref-celery/src/ref_celery/__init__.py new file mode 100644 index 0000000..2e220f9 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/__init__.py @@ -0,0 +1,7 @@ +""" +Rapid evaluating CMIP data +""" + +import importlib.metadata + +__version__ = importlib.metadata.version("ref_celery") diff --git a/packages/ref-celery/src/ref_celery/app.py b/packages/ref-celery/src/ref_celery/app.py new file mode 100644 index 0000000..9d5f706 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/app.py @@ -0,0 +1,20 @@ +from celery import Celery + +app = Celery() + + +class Config: + """Celery configuration""" + + broker_url = "redis://localhost:6379/1" + result_backend = "redis://localhost:6379/1" + + +def create_celery_app(name: str) -> Celery: + """ + Create a Celery app + """ + app = Celery(name) + app.config_from_object(Config) + + return app diff --git a/packages/ref-celery/src/ref_celery/py.typed b/packages/ref-celery/src/ref_celery/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/ref-celery/src/ref_celery/tasks.py b/packages/ref-celery/src/ref_celery/tasks.py new file mode 100644 index 0000000..0b450e0 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/tasks.py @@ -0,0 +1,38 @@ +from collections.abc import Callable + +from celery import Celery +from ref_core.metrics import Metric +from ref_core.providers import MetricsProvider + + +def metric_task_factory(metric: Metric) -> Callable: + """ + Create a new task for the given metric + """ + + def task(): + """ + Task to run the metric + """ + return metric.name + + # def task(configuration: Configuration, trigger: TriggerInfo): + # metric.run(configuration, trigger) + + return task + + +def register_celery_tasks(app: Celery, provider: MetricsProvider): + """ + Register all tasks for the given provider + + Parameters + ---------- + app + The Celery app to register the tasks with + provider + The provider to register tasks for + """ + for metric in provider: + print(f"Registering task for metric {metric.name}") + app.task(metric_task_factory(metric), name=f"{provider.name}_{metric.name}", queue=provider.name) diff --git a/packages/ref-celery/tests/conftest.py b/packages/ref-celery/tests/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/ref-core/src/ref_core/providers.py b/packages/ref-core/src/ref_core/providers.py index 7bc14b6..f81d85a 100644 --- a/packages/ref-core/src/ref_core/providers.py +++ b/packages/ref-core/src/ref_core/providers.py @@ -36,6 +36,9 @@ def register(self, metric: Metric) -> None: raise ValueError("Metric must be an instance of Metric") self._metrics[metric.name.lower()] = metric + def __iter__(self): + return iter(self._metrics.values()) + def get(self, name: str) -> Metric: """ Get a metric by name. diff --git a/packages/ref-metrics-example/Dockerfile b/packages/ref-metrics-example/Dockerfile new file mode 100644 index 0000000..1659da8 --- /dev/null +++ b/packages/ref-metrics-example/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY pyproject.toml README.md /app/ +RUN pip install . diff --git a/packages/ref-metrics-example/pyproject.toml b/packages/ref-metrics-example/pyproject.toml index a76af08..e6e6ed8 100644 --- a/packages/ref-metrics-example/pyproject.toml +++ b/packages/ref-metrics-example/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ ] dependencies = [ "ref-core", + "ref-celery", "xarray >= 2022", "netcdf4", "dask>=2024.10.0", diff --git a/packages/ref-metrics-example/src/ref_metrics_example/celery.py b/packages/ref-metrics-example/src/ref_metrics_example/celery.py new file mode 100644 index 0000000..a1d91e1 --- /dev/null +++ b/packages/ref-metrics-example/src/ref_metrics_example/celery.py @@ -0,0 +1,8 @@ +from ref_celery.app import create_celery_app +from ref_celery.tasks import register_celery_tasks + +from ref_metrics_example import provider + +app = create_celery_app("ref_metrics_example") + +register_celery_tasks(app, provider) diff --git a/pyproject.toml b/pyproject.toml index 511c7c6..dbc903f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ ] requires-python = ">=3.10" dependencies = [ + "ref-celery", "ref-core", "ref-metrics-example", ] @@ -50,6 +51,7 @@ members = ["packages/*"] [tool.uv.sources] ref-core = { workspace = true } ref-metrics-example = { workspace = true } +ref-celery = { workspace = true } [tool.coverage.run] source = ["packages"] diff --git a/uv.lock b/uv.lock index b9f7a23..ae60d3e 100644 --- a/uv.lock +++ b/uv.lock @@ -10,6 +10,7 @@ resolution-markers = [ [manifest] members = [ "cmip-ref", + "ref-celery", "ref-core", "ref-metrics-example", ] @@ -58,6 +59,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/12/58f4f11385fddafef5d6f7bfaaf2f42899c8da6b4f95c04b7c3b744851a8/alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e", size = 233217 }, ] +[[package]] +name = "amqp" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "vine" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/32/2c/6eb09fbdeb3c060b37bd33f8873832897a83e7a428afe01aad333fc405ec/amqp-5.2.0.tar.gz", hash = "sha256:a1ecff425ad063ad42a486c902807d1482311481c8ad95a72694b2975e75f7fd", size = 128754 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/f0/8e5be5d5e0653d9e1d02b1144efa33ff7d2963dfad07049e02c0fa9b2e8d/amqp-5.2.0-py3-none-any.whl", hash = "sha256:827cb12fb0baa892aad844fd95258143bce4027fdac4fccddbc43330fd281637", size = 50917 }, +] + [[package]] name = "anyio" version = "4.6.0" @@ -152,6 +165,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/9f/3c3503693386c4b0f245eaf5ca6198e3b28879ca0a40bde6b0e319793453/async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224", size = 6111 }, ] +[[package]] +name = "async-timeout" +version = "5.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/61/1f/44d9efc904bbe4d9967433522b691a9c4f1e81c2c64fbe44bad63d5de646/async_timeout-5.0.0.tar.gz", hash = "sha256:49675ec889daacfe65ff66d2dde7dd1447a6f4b2f23721022e4ba121f8772a85", size = 8951 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/df/32887f4a54676cf151301faed0261fbae969284cd673744371da67452967/async_timeout-5.0.0-py3-none-any.whl", hash = "sha256:904719a4bd6e0520047d0ddae220aabee67b877f7ca17bf8cea20f67f6247ae0", size = 6064 }, +] + [[package]] name = "attrs" version = "24.2.0" @@ -182,6 +204,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, ] +[[package]] +name = "billiard" +version = "4.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/58/1546c970afcd2a2428b1bfafecf2371d8951cc34b46701bea73f4280989e/billiard-4.2.1.tar.gz", hash = "sha256:12b641b0c539073fc8d3f5b8b7be998956665c4233c7c1fcd66a7e677c4fb36f", size = 155031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/da/43b15f28fe5f9e027b41c539abc5469052e9d48fd75f8ff094ba2a0ae767/billiard-4.2.1-py3-none-any.whl", hash = "sha256:40b59a4ac8806ba2c2369ea98d876bc6108b051c227baffd928c644d15d8f3cb", size = 86766 }, +] + [[package]] name = "bleach" version = "6.1.0" @@ -209,6 +240,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/d5/867e75361fc45f6de75fe277dd085627a9db5ebb511a87f27dc1396b5351/cattrs-24.1.2-py3-none-any.whl", hash = "sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0", size = 66446 }, ] +[[package]] +name = "celery" +version = "5.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "billiard" }, + { name = "click" }, + { name = "click-didyoumean" }, + { name = "click-plugins" }, + { name = "click-repl" }, + { name = "kombu" }, + { name = "python-dateutil" }, + { name = "tzdata" }, + { name = "vine" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/9c/cf0bce2cc1c8971bf56629d8f180e4ca35612c7e79e6e432e785261a8be4/celery-5.4.0.tar.gz", hash = "sha256:504a19140e8d3029d5acad88330c541d4c3f64c789d85f94756762d8bca7e706", size = 1575692 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/c4/6a4d3772e5407622feb93dd25c86ce3c0fee746fa822a777a627d56b4f2a/celery-5.4.0-py3-none-any.whl", hash = "sha256:369631eb580cf8c51a82721ec538684994f8277637edde2dfc0dacd73ed97f64", size = 425983 }, +] + +[package.optional-dependencies] +redis = [ + { name = "redis" }, +] + [[package]] name = "certifi" version = "2024.8.30" @@ -385,6 +441,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, ] +[[package]] +name = "click-didyoumean" +version = "0.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/30/ce/217289b77c590ea1e7c24242d9ddd6e249e52c795ff10fac2c50062c48cb/click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463", size = 3089 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/5b/974430b5ffdb7a4f1941d13d83c64a0395114503cc357c6b9ae4ce5047ed/click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c", size = 3631 }, +] + [[package]] name = "click-params" version = "0.5.0" @@ -399,6 +467,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/c7/a04832e84f1c613194231a657612aee2e377d63a44a5847386c83c38bbd6/click_params-0.5.0-py3-none-any.whl", hash = "sha256:bbb2efe44197ab896bffcb50f42f22240fb077e6756b568fbdab3e1700b859d6", size = 13152 }, ] +[[package]] +name = "click-plugins" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/1d/45434f64ed749540af821fd7e42b8e4d23ac04b1eda7c26613288d6cd8a8/click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b", size = 8164 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8", size = 7497 }, +] + +[[package]] +name = "click-repl" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "prompt-toolkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cb/a2/57f4ac79838cfae6912f997b4d1a64a858fb0c86d7fcaae6f7b58d267fca/click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9", size = 10449 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812", size = 10289 }, +] + [[package]] name = "cloudpickle" version = "3.1.0" @@ -413,6 +506,7 @@ name = "cmip-ref" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "ref-celery" }, { name = "ref-core" }, { name = "ref-metrics-example" }, ] @@ -443,6 +537,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "ref-celery", editable = "packages/ref-celery" }, { name = "ref-core", editable = "packages/ref-core" }, { name = "ref-metrics-example", editable = "packages/ref-metrics-example" }, ] @@ -1251,6 +1346,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/a3/285eb1e79dbbd8e9513a3bb1bb2bb3d4c7c22c8a92efb8449baface0b864/jupytext-1.16.4-py3-none-any.whl", hash = "sha256:76989d2690e65667ea6fb411d8056abe7cd0437c07bd774660b83d62acf9490a", size = 153540 }, ] +[[package]] +name = "kombu" +version = "5.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "amqp" }, + { name = "tzdata" }, + { name = "vine" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/4d/b93fcb353d279839cc35d0012bee805ed0cf61c07587916bfc35dbfddaf1/kombu-5.4.2.tar.gz", hash = "sha256:eef572dd2fd9fc614b37580e3caeafdd5af46c1eff31e7fba89138cdb406f2cf", size = 442858 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/ec/7811a3cf9fdfee3ee88e54d08fcbc3fabe7c1b6e4059826c59d7b795651c/kombu-5.4.2-py3-none-any.whl", hash = "sha256:14212f5ccf022fc0a70453bb025a1dcc32782a588c49ea866884047d66e14763", size = 201349 }, +] + [[package]] name = "liccheck" version = "0.9.2" @@ -2340,6 +2449,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 }, ] +[[package]] +name = "redis" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/17/2f4a87ffa4cd93714cf52edfa3ea94589e9de65f71e9f99cbcfa84347a53/redis-5.2.0.tar.gz", hash = "sha256:0b1087665a771b1ff2e003aa5bdd354f15a70c9e25d5a7dbf9c722c16528a7b0", size = 4607878 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/f5/ffa560ecc4bafbf25f7961c3d6f50d627a90186352e27e7d0ba5b1f6d87d/redis-5.2.0-py3-none-any.whl", hash = "sha256:ae174f2bb3b1bf2b09d54bf3e51fbc1469cf6c10aa03e21141f51969801a7897", size = 261428 }, +] + +[[package]] +name = "ref-celery" +version = "0.1.0" +source = { editable = "packages/ref-celery" } +dependencies = [ + { name = "celery", extra = ["redis"] }, + { name = "ref-core" }, +] + +[package.metadata] +requires-dist = [ + { name = "celery", extras = ["redis"], specifier = ">=5.4.0" }, + { name = "ref-core", editable = "packages/ref-core" }, +] + [[package]] name = "ref-core" version = "0.1.0" @@ -2358,6 +2494,7 @@ source = { editable = "packages/ref-metrics-example" } dependencies = [ { name = "dask" }, { name = "netcdf4" }, + { name = "ref-celery" }, { name = "ref-core" }, { name = "xarray" }, ] @@ -2366,6 +2503,7 @@ dependencies = [ requires-dist = [ { name = "dask", specifier = ">=2024.10.0" }, { name = "netcdf4" }, + { name = "ref-celery", editable = "packages/ref-celery" }, { name = "ref-core", editable = "packages/ref-core" }, { name = "xarray", specifier = ">=2022" }, ] @@ -2961,6 +3099,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/0c/785d317eea99c3739821718f118c70537639aa43f96bfa1d83a71f68eaf6/validators-0.22.0-py3-none-any.whl", hash = "sha256:61cf7d4a62bbae559f2e54aed3b000cea9ff3e2fdbe463f51179b92c58c9585a", size = 26195 }, ] +[[package]] +name = "vine" +version = "5.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/e4/d07b5f29d283596b9727dd5275ccbceb63c44a1a82aa9e4bfd20426762ac/vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0", size = 48980 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/ff/7c0c86c43b3cbb927e0ccc0255cb4057ceba4799cd44ae95174ce8e8b5b2/vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc", size = 9636 }, +] + [[package]] name = "virtualenv" version = "20.26.6" From 6a614ed040da312d0b8cc767aad19fdd62c60c82 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 5 Nov 2024 21:19:50 +1100 Subject: [PATCH 02/13] feat: Add Dockerfile for ref-metrics-example --- .dockerignore | 3 +++ docker-compose.yaml | 8 ++++++ packages/ref-metrics-example/Dockerfile | 36 +++++++++++++++++++++++-- packages/ref-metrics-example/README.md | 4 ++- 4 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c582285 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +.esgpull + +.venv diff --git a/docker-compose.yaml b/docker-compose.yaml index dd623c2..2562528 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,3 +14,11 @@ services: - "5555:5555" depends_on: - redis + metrics-example: + image: ref-metrics-example + restart: always + build: + context: packages/ref-metrics-example + additional_contexts: + - root=. + command: ref-celery worker diff --git a/packages/ref-metrics-example/Dockerfile b/packages/ref-metrics-example/Dockerfile index 1659da8..ee4d823 100644 --- a/packages/ref-metrics-example/Dockerfile +++ b/packages/ref-metrics-example/Dockerfile @@ -1,6 +1,38 @@ -FROM python:3.11-slim +# Build the container for the ref-metrics-package +# +# ref-metrics-example is a relatively simple package so can be directly installed via pip. +# More complex packages may require a multi-stage build to build the package before installing it. + +LABEL maintainer="Jared Lewis " +LABEL description="Docker image with the execution environment for the ref-metrics-example package" + +# Build wheels for the core packages +# This is temporary until the package is published on PyPI +FROM python:3.12-slim-bookworm AS build +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +WORKDIR /build + +COPY --from=root . /build + +RUN uv build --package ref-core -o /wheels +RUN uv build --package ref-celery -o /wheels + +# Build the container for the ref-metrics-package +FROM python:3.12-slim-bookworm WORKDIR /app COPY pyproject.toml README.md /app/ -RUN pip install . + +# TODO: Remove this section when the package is published +COPY --from=build /wheels /wheels +RUN pip install --no-cache-dir /wheels/*.whl + +# TODO: Uncomment this line when the package is published +# ref-celery is required enable remote execution of the registered metrics +#RUN pip install ref-celery + +COPY . /app + +RUN pip install -e . diff --git a/packages/ref-metrics-example/README.md b/packages/ref-metrics-example/README.md index f8c4c84..c7b0556 100644 --- a/packages/ref-metrics-example/README.md +++ b/packages/ref-metrics-example/README.md @@ -3,4 +3,6 @@ An example of a basic REF metrics provider. This package provides an example of how to implement a REF metrics provider, -that exposes a single metric, `example_metric`, which is a simple counter. +that exposes a single metric, `example`, which calculates annual global means. + +This package will be turned into a template to help new providers get started. From dc589b3b2fbc90c6c226a75bcb4aa437c8c662fc Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Tue, 5 Nov 2024 22:20:34 +1100 Subject: [PATCH 03/13] feat: Move celery entrypoint with a cli tool --- docker-compose.yaml | 2 +- packages/ref-celery/pyproject.toml | 7 ++- packages/ref-celery/src/ref_celery/cli.py | 49 +++++++++++++++++++ packages/ref-metrics-example/Dockerfile | 7 +-- .../src/ref_metrics_example/celery.py | 8 --- uv.lock | 26 ++++++++++ 6 files changed, 86 insertions(+), 13 deletions(-) create mode 100644 packages/ref-celery/src/ref_celery/cli.py delete mode 100644 packages/ref-metrics-example/src/ref_metrics_example/celery.py diff --git a/docker-compose.yaml b/docker-compose.yaml index 2562528..fab2a9f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -21,4 +21,4 @@ services: context: packages/ref-metrics-example additional_contexts: - root=. - command: ref-celery worker + command: ref-celery --package ref-metrics-example diff --git a/packages/ref-celery/pyproject.toml b/packages/ref-celery/pyproject.toml index 2c20680..225faaa 100644 --- a/packages/ref-celery/pyproject.toml +++ b/packages/ref-celery/pyproject.toml @@ -22,9 +22,14 @@ classifiers = [ ] dependencies = [ "celery[redis]>=5.4.0", - "ref-core" + "ref-core", + "typer" ] +[project.scripts] +ref-celery = "ref_celery.cli:app" + + [tool.uv] dev-dependencies = [ diff --git a/packages/ref-celery/src/ref_celery/cli.py b/packages/ref-celery/src/ref_celery/cli.py new file mode 100644 index 0000000..95bbeaa --- /dev/null +++ b/packages/ref-celery/src/ref_celery/cli.py @@ -0,0 +1,49 @@ +import importlib + +import typer + +from ref_celery.app import create_celery_app +from ref_celery.tasks import register_celery_tasks + +app = typer.Typer() + + +@app.command() +def start_worker( + loglevel: str = typer.Option("info", help="Log level for the worker"), + package: str = typer.Option(help="Package to import tasks from"), + extra_args: list[str] = typer.Argument(None, help="Additional arguments for the worker"), +) -> None: + """ + Start a Celery worker for the given package. + + A celery worker enables the execution of tasks in the background on multiple different nodes. + This worker will register a celery task for each metric in the provider. + The worker tasks can be executed by sending a celery task with the name '{package_name}_{metric_name}'. + + The package must define a 'provider' variable that is an instance of 'ref_core.MetricsProvider'. + """ + # Create a new celery app + celery_app = create_celery_app("ref_celery") + + # Attempt to import the package + try: + imp = importlib.import_module(package.replace("-", "_")) # type: ignore + except ModuleNotFoundError: + raise ValueError(f"Package '{package}' not found") + + # Get the provider from the package + try: + provider = imp.provider + except AttributeError: + raise ValueError("The package must define a 'provider' variable") + + # Wrap each metrics in the provider with a celery tasks + register_celery_tasks(celery_app, provider) + + argv = ["worker", f"--loglevel={loglevel}", *(extra_args or [])] + celery_app.worker_main(argv=argv) + + +if __name__ == "__main__": + app() diff --git a/packages/ref-metrics-example/Dockerfile b/packages/ref-metrics-example/Dockerfile index ee4d823..ce26312 100644 --- a/packages/ref-metrics-example/Dockerfile +++ b/packages/ref-metrics-example/Dockerfile @@ -3,9 +3,7 @@ # ref-metrics-example is a relatively simple package so can be directly installed via pip. # More complex packages may require a multi-stage build to build the package before installing it. -LABEL maintainer="Jared Lewis " -LABEL description="Docker image with the execution environment for the ref-metrics-example package" - +# TODO: Remove build stage when the package is published # Build wheels for the core packages # This is temporary until the package is published on PyPI FROM python:3.12-slim-bookworm AS build @@ -21,6 +19,9 @@ RUN uv build --package ref-celery -o /wheels # Build the container for the ref-metrics-package FROM python:3.12-slim-bookworm +LABEL maintainer="Jared Lewis " +LABEL description="Docker image with the execution environment for the ref-metrics-example package" + WORKDIR /app COPY pyproject.toml README.md /app/ diff --git a/packages/ref-metrics-example/src/ref_metrics_example/celery.py b/packages/ref-metrics-example/src/ref_metrics_example/celery.py deleted file mode 100644 index a1d91e1..0000000 --- a/packages/ref-metrics-example/src/ref_metrics_example/celery.py +++ /dev/null @@ -1,8 +0,0 @@ -from ref_celery.app import create_celery_app -from ref_celery.tasks import register_celery_tasks - -from ref_metrics_example import provider - -app = create_celery_app("ref_metrics_example") - -register_celery_tasks(app, provider) diff --git a/uv.lock b/uv.lock index ae60d3e..cac320b 100644 --- a/uv.lock +++ b/uv.lock @@ -2468,12 +2468,14 @@ source = { editable = "packages/ref-celery" } dependencies = [ { name = "celery", extra = ["redis"] }, { name = "ref-core" }, + { name = "typer" }, ] [package.metadata] requires-dist = [ { name = "celery", extras = ["redis"], specifier = ">=5.4.0" }, { name = "ref-core", editable = "packages/ref-core" }, + { name = "typer" }, ] [[package]] @@ -2764,6 +2766,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/ae/f19306b5a221f6a436d8f2238d5b80925004093fa3edea59835b514d9057/setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2", size = 1248506 }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, +] + [[package]] name = "six" version = "1.16.0" @@ -3045,6 +3056,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, ] +[[package]] +name = "typer" +version = "0.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/58/a79003b91ac2c6890fc5d90145c662fd5771c6f11447f116b63300436bc9/typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722", size = 98953 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/2b/886d13e742e514f704c33c4caa7df0f3b89e5a25ef8db02aa9ca3d9535d5/typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b", size = 47288 }, +] + [[package]] name = "types-python-dateutil" version = "2.9.0.20241003" From 5239c934ea26965ae35ac8d60a2084f1e9f26592 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 11:06:07 +1100 Subject: [PATCH 04/13] feat: Split celery configuration into separate packages --- docker-compose.yaml | 4 ++ packages/ref-celery/pyproject.toml | 6 +- packages/ref-celery/src/ref_celery/app.py | 13 ++-- .../src/ref_celery/celeryconf/__init__.py | 7 +++ .../src/ref_celery/celeryconf/base.py | 13 ++++ .../src/ref_celery/celeryconf/dev.py | 9 +++ .../src/ref_celery/celeryconf/prod.py | 8 +++ packages/ref-celery/src/ref_celery/runner.py | 20 ++++++ uv.lock | 62 ++++++++++++++++++- 9 files changed, 131 insertions(+), 11 deletions(-) create mode 100644 packages/ref-celery/src/ref_celery/celeryconf/__init__.py create mode 100644 packages/ref-celery/src/ref_celery/celeryconf/base.py create mode 100644 packages/ref-celery/src/ref_celery/celeryconf/dev.py create mode 100644 packages/ref-celery/src/ref_celery/celeryconf/prod.py create mode 100644 packages/ref-celery/src/ref_celery/runner.py diff --git a/docker-compose.yaml b/docker-compose.yaml index fab2a9f..2ea7f1b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -17,8 +17,12 @@ services: metrics-example: image: ref-metrics-example restart: always + environment: + - CELERY_BROKER_URL=redis://redis:6379/1 build: context: packages/ref-metrics-example additional_contexts: - root=. command: ref-celery --package ref-metrics-example + volumes: + - ./packages/ref-metrics-example:/app diff --git a/packages/ref-celery/pyproject.toml b/packages/ref-celery/pyproject.toml index 225faaa..9a9f550 100644 --- a/packages/ref-celery/pyproject.toml +++ b/packages/ref-celery/pyproject.toml @@ -21,9 +21,11 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = [ - "celery[redis]>=5.4.0", "ref-core", - "typer" + "celery[redis]>=5.4.0", + "typer>=0.12.0", + "environs>=9", + "loguru>=0.7.2" ] [project.scripts] diff --git a/packages/ref-celery/src/ref_celery/app.py b/packages/ref-celery/src/ref_celery/app.py index 9d5f706..26c9ada 100644 --- a/packages/ref-celery/src/ref_celery/app.py +++ b/packages/ref-celery/src/ref_celery/app.py @@ -1,13 +1,10 @@ -from celery import Celery - -app = Celery() +import os +from celery import Celery -class Config: - """Celery configuration""" +os.environ.setdefault("CELERY_CONFIG_MODULE", "ref_celery.celeryconf.dev") - broker_url = "redis://localhost:6379/1" - result_backend = "redis://localhost:6379/1" +app = Celery() def create_celery_app(name: str) -> Celery: @@ -15,6 +12,6 @@ def create_celery_app(name: str) -> Celery: Create a Celery app """ app = Celery(name) - app.config_from_object(Config) + app.config_from_envvar("CELERY_CONFIG_MODULE") return app diff --git a/packages/ref-celery/src/ref_celery/celeryconf/__init__.py b/packages/ref-celery/src/ref_celery/celeryconf/__init__.py new file mode 100644 index 0000000..6aac0d1 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/celeryconf/__init__.py @@ -0,0 +1,7 @@ +""" +Celery configuration. + +The modules in this package are used to configure Celery for different environments. +The selected environment is determined by the `CELERY_CONFIG_MODULE` environment variable. +The default environment is `ref_celery.celeryconf.dev` which is used when running the app locally. +""" diff --git a/packages/ref-celery/src/ref_celery/celeryconf/base.py b/packages/ref-celery/src/ref_celery/celeryconf/base.py new file mode 100644 index 0000000..1bb4a64 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/celeryconf/base.py @@ -0,0 +1,13 @@ +""" +Base configuration for Celery. + +Other environments can use these settings as a base and override them as needed. +""" + +from environs import Env + +env = Env() +env.read_env() + +broker_url = env.str("CELERY_BROKER_URL", "redis://localhost:6379/1") +result_backend = env.str("CELERY_RESULT_BACKEND", broker_url) diff --git a/packages/ref-celery/src/ref_celery/celeryconf/dev.py b/packages/ref-celery/src/ref_celery/celeryconf/dev.py new file mode 100644 index 0000000..9d65b5c --- /dev/null +++ b/packages/ref-celery/src/ref_celery/celeryconf/dev.py @@ -0,0 +1,9 @@ +"""Configuration for running celery locally""" + +from loguru import logger + +from .base import * # noqa: F403 + +# Currently the dev environment is the same as the base environment + +logger.info("Using dev configuration") diff --git a/packages/ref-celery/src/ref_celery/celeryconf/prod.py b/packages/ref-celery/src/ref_celery/celeryconf/prod.py new file mode 100644 index 0000000..58e4327 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/celeryconf/prod.py @@ -0,0 +1,8 @@ +"""Configuration for running celery in production""" + +from loguru import logger + +from .base import * # noqa: F403 + +# Currently the production environment is the same as the base environment +logger.info("Using prod configuration") diff --git a/packages/ref-celery/src/ref_celery/runner.py b/packages/ref-celery/src/ref_celery/runner.py new file mode 100644 index 0000000..cf5d2c1 --- /dev/null +++ b/packages/ref-celery/src/ref_celery/runner.py @@ -0,0 +1,20 @@ +from ref_celery.app import create_celery_app + + +def main(): + """ + Send a task to the workers + """ + app = create_celery_app("ref_celery") + + # Inquire what tasks are available + i = app.control.inspect() + print(i.registered()) + + res = app.send_task("example_example") + + print(res.get(timeout=10)) + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index cac320b..08b2e85 100644 --- a/uv.lock +++ b/uv.lock @@ -772,6 +772,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "environs" +version = "11.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marshmallow" }, + { name = "python-dotenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/13/3d448cfbed9f1baff5765f49434cd849501351f14fd3f09f0f2e9bd35322/environs-11.0.0.tar.gz", hash = "sha256:069727a8f73d8ba8d033d3cd95c0da231d44f38f1da773bf076cef168d312ee8", size = 25787 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/30/ef8a3022e6cdcedfd7ba03ca88ab29e30334f8e958cdbf5ce120912397e8/environs-11.0.0-py3-none-any.whl", hash = "sha256:e0bcfd41c718c07a7db422f9109e490746450da38793fe4ee197f397b9343435", size = 12216 }, +] + [[package]] name = "esgpull" version = "0.7.3" @@ -1382,6 +1395,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3", size = 4398 }, ] +[[package]] +name = "loguru" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/30/d87a423766b24db416a46e9335b9602b054a72b96a88a241f2b09b560fa8/loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac", size = 145103 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/0a/4f6fed21aa246c6b49b561ca55facacc2a44b87d65b8b92362a8e99ba202/loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb", size = 62549 }, +] + [[package]] name = "mako" version = "1.3.6" @@ -1473,6 +1499,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/87/4c364e0f109eea2402079abecbe33fef4f347b551a11423d1f4e187ea497/MarkupSafe-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295", size = 15741 }, ] +[[package]] +name = "marshmallow" +version = "3.23.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/30/14d8609f65c8aeddddd3181c06d2c9582da6278f063b27c910bbf9903441/marshmallow-3.23.1.tar.gz", hash = "sha256:3a8dfda6edd8dcdbf216c0ede1d1e78d230a6dc9c5a088f58c4083b974a0d468", size = 177488 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/a7/a78ff54e67ef92a3d12126b98eb98ab8abab3de4a8c46d240c87e514d6bb/marshmallow-3.23.1-py3-none-any.whl", hash = "sha256:fece2eb2c941180ea1b7fcbd4a83c51bfdd50093fdd3ad2585ee5e1df2508491", size = 49488 }, +] + [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -2272,6 +2310,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, ] +[[package]] +name = "python-dotenv" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, +] + [[package]] name = "python-json-logger" version = "2.0.7" @@ -2467,6 +2514,8 @@ version = "0.1.0" source = { editable = "packages/ref-celery" } dependencies = [ { name = "celery", extra = ["redis"] }, + { name = "environs" }, + { name = "loguru" }, { name = "ref-core" }, { name = "typer" }, ] @@ -2474,8 +2523,10 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "celery", extras = ["redis"], specifier = ">=5.4.0" }, + { name = "environs", specifier = ">=9" }, + { name = "loguru", specifier = ">=0.7.2" }, { name = "ref-core", editable = "packages/ref-core" }, - { name = "typer" }, + { name = "typer", specifier = ">=0.12.0" }, ] [[package]] @@ -3216,6 +3267,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, ] +[[package]] +name = "win32-setctime" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/dd/f95a13d2b235a28d613ba23ebad55191514550debb968b46aab99f2e3a30/win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2", size = 3676 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/e6/a7d828fef907843b2a5773ebff47fb79ac0c1c88d60c0ca9530ee941e248/win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad", size = 3604 }, +] + [[package]] name = "wrapt" version = "1.16.0" From 67e39e5b8528736418b9074570c5de98b03945cf Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 12:55:47 +1100 Subject: [PATCH 05/13] chore: Pass through to workers --- .dockerignore | 2 +- .gitignore | 3 + docker-compose.yaml | 4 + .../src/ref_celery/celeryconf/base.py | 11 ++- packages/ref-celery/src/ref_celery/env.py | 14 ++++ packages/ref-celery/src/ref_celery/runner.py | 80 ++++++++++++++++++- packages/ref-celery/src/ref_celery/tasks.py | 13 +-- packages/ref-core/src/ref_core/metrics.py | 58 +++++++++++++- packages/ref-core/tests/conftest.py | 4 +- packages/ref-core/tests/unit/test_executor.py | 2 +- packages/ref-core/tests/unit/test_metrics.py | 2 +- packages/ref-metrics-example/Dockerfile | 7 +- .../src/ref_metrics_example/__init__.py | 4 +- .../src/ref_metrics_example/example.py | 10 ++- .../tests/unit/test_metrics.py | 10 +-- 15 files changed, 191 insertions(+), 33 deletions(-) create mode 100644 packages/ref-celery/src/ref_celery/env.py diff --git a/.dockerignore b/.dockerignore index c582285..257d755 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,3 @@ .esgpull - +out .venv diff --git a/.gitignore b/.gitignore index f1f0ef4..e89c424 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,6 @@ dmypy.json # Esgpull .esgpull + +# Local output directory +out/ diff --git a/docker-compose.yaml b/docker-compose.yaml index 2ea7f1b..25b47d4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -19,6 +19,8 @@ services: restart: always environment: - CELERY_BROKER_URL=redis://redis:6379/1 + - REF_OUTPUT_ROOT=/output + - REF_ESGF_ROOT=/esgf build: context: packages/ref-metrics-example additional_contexts: @@ -26,3 +28,5 @@ services: command: ref-celery --package ref-metrics-example volumes: - ./packages/ref-metrics-example:/app + - .esgpull/data:/esgf + - ./out:/output diff --git a/packages/ref-celery/src/ref_celery/celeryconf/base.py b/packages/ref-celery/src/ref_celery/celeryconf/base.py index 1bb4a64..e8427b7 100644 --- a/packages/ref-celery/src/ref_celery/celeryconf/base.py +++ b/packages/ref-celery/src/ref_celery/celeryconf/base.py @@ -4,10 +4,15 @@ Other environments can use these settings as a base and override them as needed. """ -from environs import Env +from ref_celery.env import load_environment -env = Env() -env.read_env() +env = load_environment() broker_url = env.str("CELERY_BROKER_URL", "redis://localhost:6379/1") result_backend = env.str("CELERY_RESULT_BACKEND", broker_url) +broker_connection_retry_on_startup = True + +# Accept JSON and pickle as content +accept_content = ["json", "pickle"] +task_serializer = "pickle" +result_serializer = "pickle" diff --git a/packages/ref-celery/src/ref_celery/env.py b/packages/ref-celery/src/ref_celery/env.py new file mode 100644 index 0000000..376f11e --- /dev/null +++ b/packages/ref-celery/src/ref_celery/env.py @@ -0,0 +1,14 @@ +from environs import Env + + +def load_environment() -> Env: + """ + Load the environment variables from the `.env` file. + """ + env = Env() + env.read_env() + + return env + + +env = load_environment() diff --git a/packages/ref-celery/src/ref_celery/runner.py b/packages/ref-celery/src/ref_celery/runner.py index cf5d2c1..3f22aa1 100644 --- a/packages/ref-celery/src/ref_celery/runner.py +++ b/packages/ref-celery/src/ref_celery/runner.py @@ -1,19 +1,91 @@ +import pathlib +from pathlib import Path + +from celery.exceptions import NotRegistered +from loguru import logger +from ref_core.metrics import Configuration, TriggerInfo + from ref_celery.app import create_celery_app +from ref_celery.env import env + +config = { + "providers": [ + { + "name": "example", + "metrics": ["annual-global-mean-timeseries"], + } + ] +} + + +def _get_changed_dataset() -> Path: + """ + Get a file that has been changed + + This is a placeholder implementation for now + """ + return ( + pathlib.Path("CMIP6") + / "ScenarioMIP" + / "CSIRO" + / "ACCESS-ESM1-5" + / "ssp126" + / "r1i1p1f1" + / "Amon" + / "tas" + / "gn" + / "v20210318" + ) def main(): """ - Send a task to the workers + Execute a set of celery tasks """ app = create_celery_app("ref_celery") # Inquire what tasks are available i = app.control.inspect() - print(i.registered()) + if i.registered() is None: + logger.error("No tasks are registered by any workers. Check that workers are running") + return + + tasks = [] + + # Create the configuration and trigger objects + root_output_dir = env.path("REF_OUTPUT_ROOT") + logger.info(f"Using output directory {root_output_dir}") + + trigger = TriggerInfo(dataset=_get_changed_dataset()) + + # Create a task for each metric in each provider + for provider in config["providers"]: + for metric in provider["metrics"]: + metric_name = f"{provider['name']}_{metric}" + + configuration = Configuration( + output_fragment=Path(metric_name), + ) + + res = app.send_task( + metric_name, + kwargs=dict( + configuration=configuration, + trigger=trigger, + ), + ) + tasks.append(res) - res = app.send_task("example_example") + # Wait for all tasks to complete + for task in tasks: + try: + print(task.get(timeout=10)) + except NotRegistered: + i = app.control.inspect() - print(res.get(timeout=10)) + logger.error(f"Task {task.name} is not registered by any workers") + logger.info(f"Available tasks are: {i.registered()}") + raise if __name__ == "__main__": diff --git a/packages/ref-celery/src/ref_celery/tasks.py b/packages/ref-celery/src/ref_celery/tasks.py index 0b450e0..3012533 100644 --- a/packages/ref-celery/src/ref_celery/tasks.py +++ b/packages/ref-celery/src/ref_celery/tasks.py @@ -1,7 +1,9 @@ from collections.abc import Callable +from typing import Any from celery import Celery -from ref_core.metrics import Metric +from loguru import logger +from ref_core.metrics import Configuration, Metric, MetricResult, TriggerInfo from ref_core.providers import MetricsProvider @@ -10,14 +12,13 @@ def metric_task_factory(metric: Metric) -> Callable: Create a new task for the given metric """ - def task(): + def task(configuration: Configuration, trigger: TriggerInfo, **kwargs: Any) -> MetricResult: """ Task to run the metric """ - return metric.name + logger.info(f"Running metric {metric.name} with configuration {configuration} and trigger {trigger}") - # def task(configuration: Configuration, trigger: TriggerInfo): - # metric.run(configuration, trigger) + return metric.run(configuration, trigger) return task @@ -26,6 +27,8 @@ def register_celery_tasks(app: Celery, provider: MetricsProvider): """ Register all tasks for the given provider + This is run on worker startup to register all tasks a given provider + Parameters ---------- app diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index 2d2bb02..b7e3d3e 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -3,6 +3,7 @@ from typing import Any, Protocol, runtime_checkable from attrs import frozen +from ref_celery.env import env @frozen @@ -11,13 +12,57 @@ class Configuration: Configuration that describes the input data sources """ - output_directory: pathlib.Path + output_fragment: pathlib.Path """ - Directory to write output files to + Directory to write output files to relative to the output root. """ # TODO: Add more configuration options here + @staticmethod + def as_output_path(file_fragment: str | pathlib.Path, ensure_parent_exists: bool = False) -> pathlib.Path: + """ + Get the output path for a file in the output directory. + + Parameters + ---------- + file_fragment + Relative path to a file with respect to the output directory. + ensure_parent_exists + Whether to create the parent directory if it does not exist. + + Returns + ------- + : + The path to the file in the output directory. + """ + root_output_dir = env.path("REF_OUTPUT_ROOT") + + output_path = root_output_dir / file_fragment + + if ensure_parent_exists: + output_path.parent.mkdir(parents=True, exist_ok=True) + return output_path + + @staticmethod + def as_esgf_path(file_fragment: str | pathlib.Path) -> pathlib.Path: + """ + Get the output path for a file in the output directory. + + Parameters + ---------- + file_fragment + Relative path to a file with respect to the esgf directory. + + Returns + ------- + : + The path to the file in the esgf directory. + """ + root_output_dir = env.path("REF_ESGF_ROOT") + + return root_output_dir / file_fragment + @frozen class MetricResult: @@ -63,10 +108,15 @@ def build(configuration: Configuration, cmec_output_bundle: dict[str, Any]) -> " A prepared MetricResult object. The output bundle will be written to the output directory. """ - with open(configuration.output_directory / "output.json", "w") as file_handle: + with open( + configuration.as_output_path( + configuration.output_fragment / "output.json", ensure_parent_exists=True + ), + "w", + ) as file_handle: json.dump(cmec_output_bundle, file_handle) return MetricResult( - output_bundle=configuration.output_directory / "output.json", + output_bundle=configuration.output_fragment / "output.json", successful=True, ) diff --git a/packages/ref-core/tests/conftest.py b/packages/ref-core/tests/conftest.py index d4f3143..98f2314 100644 --- a/packages/ref-core/tests/conftest.py +++ b/packages/ref-core/tests/conftest.py @@ -8,7 +8,7 @@ class MockMetric: def run(self, configuration: Configuration, trigger: TriggerInfo) -> MetricResult: return MetricResult( - output_bundle=configuration.output_directory / "output.json", + output_bundle=configuration.output_fragment / "output.json", successful=True, ) @@ -39,5 +39,5 @@ def mock_metric() -> MockMetric: @pytest.fixture def configuration(tmp_path) -> Configuration: return Configuration( - output_directory=tmp_path, + output_fragment=tmp_path, ) diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index 120986d..71c2023 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -25,7 +25,7 @@ def test_run_metric(self, configuration, mock_metric): result = executor.run_metric(mock_metric, configuration, trigger=None) assert result.successful - assert result.output_bundle == configuration.output_directory / "output.json" + assert result.output_bundle == configuration.output_fragment / "output.json" @pytest.mark.parametrize("executor_name", ["local", None]) diff --git a/packages/ref-core/tests/unit/test_metrics.py b/packages/ref-core/tests/unit/test_metrics.py index d08b557..d114485 100644 --- a/packages/ref-core/tests/unit/test_metrics.py +++ b/packages/ref-core/tests/unit/test_metrics.py @@ -3,7 +3,7 @@ class TestMetricResult: def test_build(self, tmp_path): - config = Configuration(output_directory=tmp_path) + config = Configuration(output_fragment=tmp_path) result = MetricResult.build(config, {"data": "value"}) assert result.successful diff --git a/packages/ref-metrics-example/Dockerfile b/packages/ref-metrics-example/Dockerfile index ce26312..31f4121 100644 --- a/packages/ref-metrics-example/Dockerfile +++ b/packages/ref-metrics-example/Dockerfile @@ -22,7 +22,10 @@ FROM python:3.12-slim-bookworm LABEL maintainer="Jared Lewis " LABEL description="Docker image with the execution environment for the ref-metrics-example package" +ENV C_FORCE_ROOT=false + WORKDIR /app +RUN useradd -ms /bin/bash celery COPY pyproject.toml README.md /app/ @@ -35,5 +38,7 @@ RUN pip install --no-cache-dir /wheels/*.whl #RUN pip install ref-celery COPY . /app - +RUN chown -R celery:celery /app RUN pip install -e . + +USER celery diff --git a/packages/ref-metrics-example/src/ref_metrics_example/__init__.py b/packages/ref-metrics-example/src/ref_metrics_example/__init__.py index f645a72..5fe93a1 100644 --- a/packages/ref-metrics-example/src/ref_metrics_example/__init__.py +++ b/packages/ref-metrics-example/src/ref_metrics_example/__init__.py @@ -6,11 +6,11 @@ from ref_core.providers import MetricsProvider -from ref_metrics_example.example import ExampleMetric +from ref_metrics_example.example import AnnualGlobalMeanTimeseries __version__ = importlib.metadata.version("ref_metrics_example") __core_version__ = importlib.metadata.version("ref_core") # Initialise the metrics manager and register the example metric provider = MetricsProvider("example", __version__) -provider.register(ExampleMetric()) +provider.register(AnnualGlobalMeanTimeseries()) diff --git a/packages/ref-metrics-example/src/ref_metrics_example/example.py b/packages/ref-metrics-example/src/ref_metrics_example/example.py index 0e378c8..ddbaf01 100644 --- a/packages/ref-metrics-example/src/ref_metrics_example/example.py +++ b/packages/ref-metrics-example/src/ref_metrics_example/example.py @@ -74,12 +74,12 @@ def format_cmec_output_bundle(dataset: xr.Dataset) -> dict[str, Any]: return cmec_output -class ExampleMetric: +class AnnualGlobalMeanTimeseries: """ Calculate the annual mean global mean timeseries for a dataset """ - name = "example" + name = "annual-global-mean-timeseries" def run(self, configuration: Configuration, trigger: TriggerInfo | None) -> MetricResult: """ @@ -101,14 +101,16 @@ def run(self, configuration: Configuration, trigger: TriggerInfo | None) -> Metr if trigger is None: # TODO: This should probably raise an exception return MetricResult( - output_bundle=configuration.output_directory / "output.json", + output_bundle=configuration.output_fragment / "output.json", successful=False, ) # This is where one would hook into how ever they want to run # their benchmarking packages. # cmec-driver, python calls, subprocess calls all would work - annual_mean_global_mean_timeseries = calculate_annual_mean_timeseries(trigger.dataset) + annual_mean_global_mean_timeseries = calculate_annual_mean_timeseries( + configuration.as_esgf_path(trigger.dataset) + ) return MetricResult.build( configuration, format_cmec_output_bundle(annual_mean_global_mean_timeseries) diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py index d9f245f..ec6c963 100644 --- a/packages/ref-metrics-example/tests/unit/test_metrics.py +++ b/packages/ref-metrics-example/tests/unit/test_metrics.py @@ -2,7 +2,7 @@ import pytest from ref_core.metrics import Configuration, TriggerInfo -from ref_metrics_example.example import ExampleMetric, calculate_annual_mean_timeseries +from ref_metrics_example.example import AnnualGlobalMeanTimeseries, calculate_annual_mean_timeseries @pytest.fixture @@ -29,10 +29,10 @@ def test_annual_mean(esgf_data_dir, test_dataset): def test_example_metric(tmp_path, test_dataset): - metric = ExampleMetric() + metric = AnnualGlobalMeanTimeseries() configuration = Configuration( - output_directory=tmp_path, + output_fragment=tmp_path, ) result = metric.run(configuration, trigger=TriggerInfo(dataset=test_dataset)) @@ -44,10 +44,10 @@ def test_example_metric(tmp_path, test_dataset): def test_example_metric_no_trigger(tmp_path, test_dataset): - metric = ExampleMetric() + metric = AnnualGlobalMeanTimeseries() configuration = Configuration( - output_directory=tmp_path, + output_fragment=tmp_path, ) result = metric.run(configuration, trigger=None) From d50ffc57f49ad3448c10161fa9785e0a037e5770 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 13:49:05 +1100 Subject: [PATCH 06/13] docs: Add module docs --- .../ref-celery/src/ref_celery/celeryconf/base.py | 2 +- .../ref-celery/src/ref_celery/celeryconf/dev.py | 1 - packages/ref-celery/src/ref_celery/runner.py | 2 +- packages/ref-celery/src/ref_celery/tasks.py | 15 +++++++++++++++ .../ref_celery => ref-core/src/ref_core}/env.py | 0 packages/ref-core/src/ref_core/metrics.py | 3 ++- 6 files changed, 19 insertions(+), 4 deletions(-) rename packages/{ref-celery/src/ref_celery => ref-core/src/ref_core}/env.py (100%) diff --git a/packages/ref-celery/src/ref_celery/celeryconf/base.py b/packages/ref-celery/src/ref_celery/celeryconf/base.py index e8427b7..95e4f0d 100644 --- a/packages/ref-celery/src/ref_celery/celeryconf/base.py +++ b/packages/ref-celery/src/ref_celery/celeryconf/base.py @@ -4,7 +4,7 @@ Other environments can use these settings as a base and override them as needed. """ -from ref_celery.env import load_environment +from ref_core.env import load_environment env = load_environment() diff --git a/packages/ref-celery/src/ref_celery/celeryconf/dev.py b/packages/ref-celery/src/ref_celery/celeryconf/dev.py index 9d65b5c..eda289e 100644 --- a/packages/ref-celery/src/ref_celery/celeryconf/dev.py +++ b/packages/ref-celery/src/ref_celery/celeryconf/dev.py @@ -5,5 +5,4 @@ from .base import * # noqa: F403 # Currently the dev environment is the same as the base environment - logger.info("Using dev configuration") diff --git a/packages/ref-celery/src/ref_celery/runner.py b/packages/ref-celery/src/ref_celery/runner.py index 3f22aa1..5b7e004 100644 --- a/packages/ref-celery/src/ref_celery/runner.py +++ b/packages/ref-celery/src/ref_celery/runner.py @@ -3,10 +3,10 @@ from celery.exceptions import NotRegistered from loguru import logger +from ref_core.env import env from ref_core.metrics import Configuration, TriggerInfo from ref_celery.app import create_celery_app -from ref_celery.env import env config = { "providers": [ diff --git a/packages/ref-celery/src/ref_celery/tasks.py b/packages/ref-celery/src/ref_celery/tasks.py index 3012533..04f113a 100644 --- a/packages/ref-celery/src/ref_celery/tasks.py +++ b/packages/ref-celery/src/ref_celery/tasks.py @@ -1,3 +1,18 @@ +""" +Task generation and registration for Celery + +This module provides a factory function to create Celery tasks for metrics. +These celery tasks are then registered with the Celery app to enable them to be run asynchronously. + +Since the metric definition may be in a different virtual environment it is not possible to directly +import the provider and create the tasks in both the worker and the main process. + +Instead, the tasks are registered only in the worker process. +The main process can then send tasks to the worker using the task name. +The main process is responsible for tracking what metrics have been registered +and to respond to new workers coming online. +""" + from collections.abc import Callable from typing import Any diff --git a/packages/ref-celery/src/ref_celery/env.py b/packages/ref-core/src/ref_core/env.py similarity index 100% rename from packages/ref-celery/src/ref_celery/env.py rename to packages/ref-core/src/ref_core/env.py diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index b7e3d3e..5f4811e 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -3,7 +3,8 @@ from typing import Any, Protocol, runtime_checkable from attrs import frozen -from ref_celery.env import env + +from ref_core.env import env @frozen From ab36dda1ddd5c23233bb9583bcb7821ae4f8cb33 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 14:06:47 +1100 Subject: [PATCH 07/13] docs: Fix up docs --- docker-compose.yaml | 2 ++ docs/gen_doc_stubs.py | 9 ++++++--- mkdocs.yml | 5 +++++ packages/ref-celery/src/ref_celery/__init__.py | 4 +++- packages/ref-celery/src/ref_celery/app.py | 8 ++++++++ packages/ref-celery/src/ref_celery/cli.py | 4 ++++ packages/ref-celery/src/ref_celery/runner.py | 4 ++++ pyproject.toml | 1 + uv.lock | 14 ++++++++++++++ 9 files changed, 47 insertions(+), 4 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 25b47d4..ca12782 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,3 +1,5 @@ +# Services to run the Rapid Evaluation Framework (REF) + name: cmip-ref services: redis: diff --git a/docs/gen_doc_stubs.py b/docs/gen_doc_stubs.py index 0d7cefb..bf51d76 100644 --- a/docs/gen_doc_stubs.py +++ b/docs/gen_doc_stubs.py @@ -111,13 +111,14 @@ def write_module_page( with mkdocs_gen_files.open(write_file, "w") as fh: fh.write(f"# {package_full_name}\n") + fh.write("\n") + fh.write(f"::: {package_full_name}") + if sub_packages: fh.write("\n") + fh.write("## sub-packages \n") fh.write(f"{create_sub_packages_table(sub_packages)}\n") - fh.write("\n") - fh.write(f"::: {package_full_name}") - if package.__doc__ is None: summary = "" else: @@ -131,7 +132,9 @@ def write_module_page( write_module_page("ref_core") +write_module_page("ref_celery") write_module_page("ref_metrics_example") with mkdocs_gen_files.open(ROOT_DIR / "NAVIGATION.md", "w") as fh: + print(list(nav.build_literate_nav())) fh.writelines(nav.build_literate_nav()) diff --git a/mkdocs.yml b/mkdocs.yml index b0461d2..1cb3a6e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,6 +15,7 @@ nav: - Tutorials: tutorials.md - Further background: - Explanation: explanation.md + - API: api/ - Development: development.md - Changelog: changelog.md @@ -51,6 +52,10 @@ plugins: - gen-files: scripts: - docs/gen_doc_stubs.py + # Make the navigation easier to handle/auto-generate if we wish + # https://oprypin.github.io/mkdocs-literate-nav/ + - literate-nav: + nav_file: NAVIGATION.md # Notebook support. # Working out how to make this play with nb-exec would be nice, # then it wouldn't run every time. diff --git a/packages/ref-celery/src/ref_celery/__init__.py b/packages/ref-celery/src/ref_celery/__init__.py index 2e220f9..930e440 100644 --- a/packages/ref-celery/src/ref_celery/__init__.py +++ b/packages/ref-celery/src/ref_celery/__init__.py @@ -1,5 +1,7 @@ """ -Rapid evaluating CMIP data +Celery application for running metrics asynchronously across multiple workers + + """ import importlib.metadata diff --git a/packages/ref-celery/src/ref_celery/app.py b/packages/ref-celery/src/ref_celery/app.py index 26c9ada..0d3da82 100644 --- a/packages/ref-celery/src/ref_celery/app.py +++ b/packages/ref-celery/src/ref_celery/app.py @@ -1,3 +1,7 @@ +""" +Celery app creation +""" + import os from celery import Celery @@ -10,6 +14,10 @@ def create_celery_app(name: str) -> Celery: """ Create a Celery app + + This function creates a new Celery app with the given name and configuration module. + The configuration module is loaded from the environment variable `CELERY_CONFIG_MODULE` + which defaults to `ref_celery.celeryconf.dev` if not set. """ app = Celery(name) app.config_from_envvar("CELERY_CONFIG_MODULE") diff --git a/packages/ref-celery/src/ref_celery/cli.py b/packages/ref-celery/src/ref_celery/cli.py index 95bbeaa..baf9c70 100644 --- a/packages/ref-celery/src/ref_celery/cli.py +++ b/packages/ref-celery/src/ref_celery/cli.py @@ -1,3 +1,7 @@ +""" +CLI for the ref-celery package. +""" + import importlib import typer diff --git a/packages/ref-celery/src/ref_celery/runner.py b/packages/ref-celery/src/ref_celery/runner.py index 5b7e004..8b7e4c5 100644 --- a/packages/ref-celery/src/ref_celery/runner.py +++ b/packages/ref-celery/src/ref_celery/runner.py @@ -1,3 +1,7 @@ +""" +Example script for running a set of metrics asynchronously +""" + import pathlib from pathlib import Path diff --git a/pyproject.toml b/pyproject.toml index dbc903f..a427c86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dev-dependencies = [ "mkdocs-material>=9.5.0", "mkdocs-gen-files>=0.5.0", "mkdocs-section-index>=0.3.9", + "mkdocs-literate-nav>=0.6.1", "mkdocs-autorefs>=1.0.1", "mkdocs-jupyter>=0.24.0", "myst-nb>=1.1.1", diff --git a/uv.lock b/uv.lock index 08b2e85..d0ff099 100644 --- a/uv.lock +++ b/uv.lock @@ -522,6 +522,7 @@ dev = [ { name = "mkdocs-autorefs" }, { name = "mkdocs-gen-files" }, { name = "mkdocs-jupyter" }, + { name = "mkdocs-literate-nav" }, { name = "mkdocs-material" }, { name = "mkdocs-section-index" }, { name = "mkdocstrings", extra = ["python"] }, @@ -553,6 +554,7 @@ dev = [ { name = "mkdocs-autorefs", specifier = ">=1.0.1" }, { name = "mkdocs-gen-files", specifier = ">=0.5.0" }, { name = "mkdocs-jupyter", specifier = ">=0.24.0" }, + { name = "mkdocs-literate-nav", specifier = ">=0.6.1" }, { name = "mkdocs-material", specifier = ">=9.5.0" }, { name = "mkdocs-section-index", specifier = ">=0.3.9" }, { name = "mkdocstrings", extras = ["python"], specifier = ">=0.25.0" }, @@ -1643,6 +1645,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/77/e2afd28ea0af09ed75fcd96c00ac854811e5cbe0658059d7770963a46be2/mkdocs_jupyter-0.25.0-py3-none-any.whl", hash = "sha256:d83d71deef19f0401505945bf92ec3bd5b40615af89308e72d5112929f8ee00b", size = 1456119 }, ] +[[package]] +name = "mkdocs-literate-nav" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mkdocs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/f9/c48a04f3cf484f8016a343c1d7d99c3a1ef01dbb33ceabb1d02e0ecabda7/mkdocs_literate_nav-0.6.1.tar.gz", hash = "sha256:78a7ab6d878371728acb0cdc6235c9b0ffc6e83c997b037f4a5c6ff7cef7d759", size = 16437 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/3b/e00d839d3242844c77e248f9572dd34644a04300839a60fe7d6bf652ab19/mkdocs_literate_nav-0.6.1-py3-none-any.whl", hash = "sha256:e70bdc4a07050d32da79c0b697bd88e9a104cf3294282e9cb20eec94c6b0f401", size = 13182 }, +] + [[package]] name = "mkdocs-material" version = "9.5.39" From fa93f55923a31d2f558a86768758fd8f7c30e36c Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 20:46:45 +1100 Subject: [PATCH 08/13] docs: Add more --- README.md | 30 ++++++ docker-compose.yaml | 1 + docs/configuration.md | 12 ++- docs/explanation.md | 2 +- docs/how-to-guides/running-metrics-locally.py | 92 +++++++++++++++++++ packages/ref-celery/README.md | 14 +++ packages/ref-core/src/ref_core/env.py | 6 +- .../src/ref_core/executor/__init__.py | 8 +- .../ref-core/src/ref_core/executor/local.py | 2 +- packages/ref-core/tests/unit/test_executor.py | 4 +- pyproject.toml | 1 + .../src/ref_celery => scripts}/runner.py | 3 +- uv.lock | 18 ++++ 13 files changed, 179 insertions(+), 14 deletions(-) create mode 100644 docs/how-to-guides/running-metrics-locally.py rename {packages/ref-celery/src/ref_celery => scripts}/runner.py (99%) diff --git a/README.md b/README.md index b9249cd..fd8f593 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,36 @@ mamba install -c conda-forge cmip-ref conda install -c conda-forge cmip-ref ``` +## Getting started + +### As a metrics provider + +Metrics providers are the core of the REF. +They define the metrics that will be calculated and the data that will be used to calculate them, +by providing a consistent interface for the REF to interact with. + + +These metrics providers can be run as standalone applications or as part of the REF. +See + +### As a modelling center + +The REF requires a number of different services to be running in order to function. +To make it easy to deploy and use locally, +we provide a `docker-compose` file that will start all the necessary services. + +The background services for the REF can be deployed using the following command: + +```bash +docker-compose up +``` + +This will start the following services: +* `redis` - a message broker for the REF +* `flower` - a monitoring tool for the background tasks in REF +* `ref-metrics-example` - A worker for executing the 'example' metric provider + +Metric calculations can then be queued up using `scripts/runner.py` diff --git a/docker-compose.yaml b/docker-compose.yaml index ca12782..596ec5c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,4 +1,5 @@ # Services to run the Rapid Evaluation Framework (REF) +# These can be started by running `docker-compose up` in the root directory of the codebase. name: cmip-ref services: diff --git a/docs/configuration.md b/docs/configuration.md index 938e0bb..ce416b0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -7,8 +7,18 @@ The default values for these environment variables are generally suitable, but if you require updating these values we recommend the use of a `.env` file to make the changes easier to reproduce in future. -### `CMIP_REF_EXECUTOR` +### `REF_EXECUTOR` Executor to use for running the metrics. Defaults to use the local executor ("local"). + +### `REF_ESGF_ROOT` + +Path to the root of the ESGF output data. + +### `REF_OUTPUT_ROOT` + +Path to the root directory where data should be stored. +This has to be shared between any workers and the parent +process. diff --git a/docs/explanation.md b/docs/explanation.md index acaf71b..3b371f2 100644 --- a/docs/explanation.md +++ b/docs/explanation.md @@ -54,5 +54,5 @@ The following environments are planned to be supported in the future: * Kubernetes (for cloud-based execution) * Subprocess (for HPC systems) -The selected executor is defined using the `CMIP_REF_EXECUTOR` environment variable. +The selected executor is defined using the `REF_EXECUTOR` environment variable. See the [Configuration](configuration.md) page for more information. diff --git a/docs/how-to-guides/running-metrics-locally.py b/docs/how-to-guides/running-metrics-locally.py new file mode 100644 index 0000000..4e0bffc --- /dev/null +++ b/docs/how-to-guides/running-metrics-locally.py @@ -0,0 +1,92 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# +# # Testing metric providers locally +# Metric providers can be run locally without requiring the rest of the REF infrastructure. +# This is useful for testing and debugging metrics. + +# Running a metric locally requires that the target REF metrics package, e.g. `ref-metrics-example`, +# and its dependencies are installed in the current Python environment. +# +# This guide will walk you through how to run a metric provider locally. +# %% +import json +import pathlib + +import ref_metrics_example +from ref_core.executor import run_metric +from ref_core.metrics import Configuration, TriggerInfo + +# %% +provider = ref_metrics_example.provider +provider + +# %% +# Relative path to some CMIP6 data +example_dataset = ( + pathlib.Path("CMIP6") + / "ScenarioMIP" + / "CSIRO" + / "ACCESS-ESM1-5" + / "ssp126" + / "r1i1p1f1" + / "Amon" + / "tas" + / "gn" + / "v20210318" +) + +# %% +configuration = Configuration(output_fragment=pathlib.Path("out")) +trigger = TriggerInfo(dataset=example_dataset) + +# %% [markdown] +# ## Metric calculations +# +# Metric calculations are typically run using an [Executor](ref_core.executor.Executor) +# which provides an abstraction to enable metrics to be run in multiple different ways. +# +# The simplest executor is the `LocalExecutor`. +# This executor runs a given metric synchronously in the current process. +# +# The LocalExectuor is the default executor when using the `ref_core.executor.run_metric` function. +# This can be overridden by specifying the `REF_EXECUTOR` environment variable. + +# %% +result = run_metric("annual-global-mean-timeseries", provider, configuration=configuration, trigger=trigger) +result + +# %% +with open(configuration.as_output_path(result.output_bundle)) as fh: + # Load the output bundle and pretty print + loaded_result = json.loads(fh.read()) + print(json.dumps(loaded_result, indent=2)) + +# %% [markdown] +# ### Running directly +# +# The local executor can be bypassed if you need access to running the metric calculation directly. +# This will not perform and validation/verification of the output results. + +# %% +metric = provider.get("annual-global-mean-timeseries") + +direct_result = metric.run(configuration=configuration, trigger=trigger) +assert direct_result.successful + +direct_result + +# %% diff --git a/packages/ref-celery/README.md b/packages/ref-celery/README.md index 050d4dc..5fa5e32 100644 --- a/packages/ref-celery/README.md +++ b/packages/ref-celery/README.md @@ -1,3 +1,17 @@ # ref-celery This package provides celery task generation from Provider and Metric definitions. + +## CLI tool + +The `ref-celery` package provides a CLI tool to start a worker instance from a REF metrics provider. + +### Usage + +For example, to start a worker instance for the `ref-metrics-example` package: + +```bash +ref-celery --package ref-metrics-example +``` + +This requires the `ref-metrics-example` package to be installed in the current virtual environment. diff --git a/packages/ref-core/src/ref_core/env.py b/packages/ref-core/src/ref_core/env.py index 376f11e..de164ac 100644 --- a/packages/ref-core/src/ref_core/env.py +++ b/packages/ref-core/src/ref_core/env.py @@ -5,10 +5,10 @@ def load_environment() -> Env: """ Load the environment variables from the `.env` file. """ - env = Env() - env.read_env() + new_env = Env() + new_env.read_env(verbose=True) - return env + return new_env env = load_environment() diff --git a/packages/ref-core/src/ref_core/executor/__init__.py b/packages/ref-core/src/ref_core/executor/__init__.py index 5c4ceb4..f21f8d5 100644 --- a/packages/ref-core/src/ref_core/executor/__init__.py +++ b/packages/ref-core/src/ref_core/executor/__init__.py @@ -4,7 +4,7 @@ We support running metrics in different environments, such as locally, in a separate process, or in a container. These environments are represented by `Executor` classes. -The `CMIP_REF_EXECUTOR` environment variable determines which executor is used. +The `REF_EXECUTOR` environment variable determines which executor is used. The simplest executor is the `LocalExecutor`, which runs the metric in the same process. This is useful for local testing and debugging. @@ -119,7 +119,7 @@ def run_metric(metric_name: str, /, metrics_provider: MetricsProvider, **kwargs: """ Run a metric using the default executor - The executor is determined by the `CMIP_REF_EXECUTOR` environment variable. + The executor is determined by the `REF_EXECUTOR` environment variable. The arguments will be updated in the future as the metric execution interface is expanded. TODO: migrate to a configuration object rather than relying on environment variables. @@ -138,12 +138,12 @@ def run_metric(metric_name: str, /, metrics_provider: MetricsProvider, **kwargs: : The result of the metric execution """ - executor_name = os.environ.get("CMIP_REF_EXECUTOR", "local") + executor_name = os.environ.get("REF_EXECUTOR", "local") executor = get_executor(executor_name) metric = metrics_provider.get(metric_name) - result = executor.run_metric(metric, trigger=None, **kwargs) + result = executor.run_metric(metric, **kwargs) # TODO: Validate the result # TODO: Log the result diff --git a/packages/ref-core/src/ref_core/executor/local.py b/packages/ref-core/src/ref_core/executor/local.py index 3fdecc2..1d6757e 100644 --- a/packages/ref-core/src/ref_core/executor/local.py +++ b/packages/ref-core/src/ref_core/executor/local.py @@ -12,7 +12,7 @@ class LocalExecutor: the exact manner of which is yet to be determined. """ - name = "local" + name: str = "local" def run_metric( self, metric: Metric, configuration: Configuration, trigger: TriggerInfo | None, **kwargs: Any diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index 71c2023..feec036 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -31,13 +31,13 @@ def test_run_metric(self, configuration, mock_metric): @pytest.mark.parametrize("executor_name", ["local", None]) def test_run_metric_local(monkeypatch, executor_name, mock_metric, provider, configuration): if executor_name: - monkeypatch.setenv("CMIP_REF_EXECUTOR", executor_name) + monkeypatch.setenv("REF_EXECUTOR", executor_name) result = run_metric("mock", provider, configuration=configuration) assert result.successful def test_run_metric_unknown_executor(monkeypatch, provider): - monkeypatch.setenv("CMIP_REF_EXECUTOR", "missing") + monkeypatch.setenv("REF_EXECUTOR", "missing") with pytest.raises(KeyError): run_metric("mock", metrics_provider=provider, kwarg="test") diff --git a/pyproject.toml b/pyproject.toml index a427c86..9521626 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dev-dependencies = [ "jupyterlab>=4.2.0", "jupytext>=1.16.3", "esgpull>=0.7.3", + "notebook>=7", ] [tool.uv.workspace] diff --git a/packages/ref-celery/src/ref_celery/runner.py b/scripts/runner.py similarity index 99% rename from packages/ref-celery/src/ref_celery/runner.py rename to scripts/runner.py index 8b7e4c5..88a9c26 100644 --- a/packages/ref-celery/src/ref_celery/runner.py +++ b/scripts/runner.py @@ -7,11 +7,10 @@ from celery.exceptions import NotRegistered from loguru import logger +from ref_celery.app import create_celery_app from ref_core.env import env from ref_core.metrics import Configuration, TriggerInfo -from ref_celery.app import create_celery_app - config = { "providers": [ { diff --git a/uv.lock b/uv.lock index d0ff099..8c7a2ba 100644 --- a/uv.lock +++ b/uv.lock @@ -528,6 +528,7 @@ dev = [ { name = "mkdocstrings", extra = ["python"] }, { name = "mypy" }, { name = "myst-nb" }, + { name = "notebook" }, { name = "pip" }, { name = "pre-commit" }, { name = "pytest" }, @@ -560,6 +561,7 @@ dev = [ { name = "mkdocstrings", extras = ["python"], specifier = ">=0.25.0" }, { name = "mypy", specifier = ">=1.11.0" }, { name = "myst-nb", specifier = ">=1.1.1" }, + { name = "notebook", specifier = ">=7" }, { name = "pip", specifier = ">=24.3.1" }, { name = "pre-commit", specifier = ">=3.3.1" }, { name = "pytest", specifier = ">=7.3.1" }, @@ -1934,6 +1936,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 }, ] +[[package]] +name = "notebook" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, + { name = "jupyterlab" }, + { name = "jupyterlab-server" }, + { name = "notebook-shim" }, + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/33/30b83c1c84e368087059bde1269549612584924db156bff53654e165a498/notebook-7.2.2.tar.gz", hash = "sha256:2ef07d4220421623ad3fe88118d687bc0450055570cdd160814a59cf3a1c516e", size = 4948876 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/77/53732fbf48196af9e51c2a61833471021c1d77d335d57b96ee3588c0c53d/notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c", size = 5037123 }, +] + [[package]] name = "notebook-shim" version = "0.2.4" From 80376f0af60fd4d18aea6370de3f7a7bcfe27d60 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 21:35:42 +1100 Subject: [PATCH 09/13] test: Fix core tests --- .env.example | 7 +++++++ .github/workflows/ci.yaml | 2 ++ Makefile | 10 ++++++++-- conftest.py | 17 ++++++++++++++++ packages/ref-core/tests/unit/test_executor.py | 6 ++++-- .../ref-core/tests/unit/test_providers.py | 6 ++++++ .../tests/unit/test_metrics.py | 20 ------------------- ruff.toml | 3 ++- 8 files changed, 46 insertions(+), 25 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..5870cdc --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +# Example of an environment file for the CMIP-REF project +# This allows for running the project in a development environment outside of a container + +CELERY_BROKER_URL=redis://localhost:6379/1 + +REF_OUTPUT_ROOT=out +REF_ESGF_ROOT=.esgpull/data diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 067426f..6cb4de4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,7 +74,9 @@ jobs: make fetch-test-data - name: Run tests run: | + cp .env.example .env uv run --package ref-core pytest packages/ref-core -r a -v --doctest-modules --cov=packages/ref-core/src --cov-report=term + uv run --package ref-celery pytest packages/ref-celery -r a -v --doctest-modules --cov=packages/ref-celery/src --cov-report=term uv run --package ref-metrics-example pytest packages/ref-metrics-example -r a -v --doctest-modules --cov=packages/ref-metrics-example/src --cov-report=term --cov-append uv run coverage xml # Run integration tests (without adding to the coverage) diff --git a/Makefile b/Makefile index b3e9a05..47e1bb5 100644 --- a/Makefile +++ b/Makefile @@ -38,11 +38,17 @@ ruff-fixes: ## fix the code using ruff uv run ruff format .PHONY: test-core -test-core: ## run the tests +test-core: ## run the tests for ref-core uv run --package ref-core \ pytest packages/ref-core \ -r a -v --doctest-modules --cov=packages/ref-core/src +.PHONY: test-celery +test-celery: ## run the tests for ref-celery + uv run --package ref-celery \ + pytest packages/ref-celery \ + -r a -v --doctest-modules --cov=packages/ref-celery/src + .PHONY: test-metrics-example test-metrics-example: ## run the tests uv run --package ref-metrics-example \ @@ -56,7 +62,7 @@ test-integration: ## run the integration tests -r a -v .PHONY: test -test: test-core test-metrics-example test-integration ## run the tests +test: test-core test-celery test-metrics-example test-integration ## run the tests # Note on code coverage and testing: # If you want to debug what is going on with coverage, we have found diff --git a/conftest.py b/conftest.py index cc30b67..9377045 100644 --- a/conftest.py +++ b/conftest.py @@ -15,3 +15,20 @@ def esgf_data_dir() -> Path: pull = esgpull.Esgpull() return pull.config.paths.data + + +@pytest.fixture +def test_dataset(esgf_data_dir) -> Path: + return ( + esgf_data_dir + / "CMIP6" + / "ScenarioMIP" + / "CSIRO" + / "ACCESS-ESM1-5" + / "ssp126" + / "r1i1p1f1" + / "Amon" + / "tas" + / "gn" + / "v20210318" + ) diff --git a/packages/ref-core/tests/unit/test_executor.py b/packages/ref-core/tests/unit/test_executor.py index feec036..f9c60bf 100644 --- a/packages/ref-core/tests/unit/test_executor.py +++ b/packages/ref-core/tests/unit/test_executor.py @@ -1,6 +1,7 @@ import pytest from ref_core.executor import Executor, ExecutorManager, run_metric from ref_core.executor.local import LocalExecutor +from ref_core.metrics import TriggerInfo class TestExecutorManager: @@ -29,10 +30,11 @@ def test_run_metric(self, configuration, mock_metric): @pytest.mark.parametrize("executor_name", ["local", None]) -def test_run_metric_local(monkeypatch, executor_name, mock_metric, provider, configuration): +def test_run_metric_local(monkeypatch, executor_name, mock_metric, provider, configuration, test_dataset): if executor_name: monkeypatch.setenv("REF_EXECUTOR", executor_name) - result = run_metric("mock", provider, configuration=configuration) + trigger = TriggerInfo(dataset=test_dataset) + result = run_metric("mock", provider, configuration=configuration, trigger=trigger) assert result.successful diff --git a/packages/ref-core/tests/unit/test_providers.py b/packages/ref-core/tests/unit/test_providers.py index a9007cf..0931632 100644 --- a/packages/ref-core/tests/unit/test_providers.py +++ b/packages/ref-core/tests/unit/test_providers.py @@ -1,3 +1,4 @@ +import pytest from ref_core.metrics import Metric from ref_core.providers import MetricsProvider @@ -18,6 +19,11 @@ def test_provider_register(self, mock_metric): assert "mock" in provider._metrics assert isinstance(provider.get("mock"), Metric) + def test_provider_register_invalid(self, mock_metric): + provider = MetricsProvider("provider_name", "v0.23") + with pytest.raises(ValueError): + provider.register("invalid") + def test_provider_fixture(self, provider): assert provider.name == "mock_provider" assert provider.version == "v0.1.0" diff --git a/packages/ref-metrics-example/tests/unit/test_metrics.py b/packages/ref-metrics-example/tests/unit/test_metrics.py index ec6c963..9bb529b 100644 --- a/packages/ref-metrics-example/tests/unit/test_metrics.py +++ b/packages/ref-metrics-example/tests/unit/test_metrics.py @@ -1,27 +1,7 @@ -from pathlib import Path - -import pytest from ref_core.metrics import Configuration, TriggerInfo from ref_metrics_example.example import AnnualGlobalMeanTimeseries, calculate_annual_mean_timeseries -@pytest.fixture -def test_dataset(esgf_data_dir) -> Path: - return ( - esgf_data_dir - / "CMIP6" - / "ScenarioMIP" - / "CSIRO" - / "ACCESS-ESM1-5" - / "ssp126" - / "r1i1p1f1" - / "Amon" - / "tas" - / "gn" - / "v20210318" - ) - - def test_annual_mean(esgf_data_dir, test_dataset): annual_mean = calculate_annual_mean_timeseries(test_dataset) diff --git a/ruff.toml b/ruff.toml index 3351c69..7978890 100644 --- a/ruff.toml +++ b/ruff.toml @@ -29,7 +29,8 @@ ignore = [ "test*.py" = [ "D", # Documentation not needed in tests "S101", # S101 Use of `assert` detected - "PLR2004" # Magic value used in comparison + "PLR2004", # Magic value used in comparison + "PLR0913", # Too many arguments in function definition ] "conftest.py" = [ "D", # Documentation not needed in tests From 92cbc391c8ef2da07cf20601989ef59efba4739d Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 22:06:32 +1100 Subject: [PATCH 10/13] chore: Pass test suite --- packages/ref-celery/pyproject.toml | 2 +- packages/ref-celery/src/ref_celery/cli.py | 8 +++--- packages/ref-celery/tests/test_cli.py | 33 +++++++++++++++++++++++ packages/ref-celery/tests/test_tasks.py | 1 + uv.lock | 20 ++++++++++++++ 5 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 packages/ref-celery/tests/test_cli.py create mode 100644 packages/ref-celery/tests/test_tasks.py diff --git a/packages/ref-celery/pyproject.toml b/packages/ref-celery/pyproject.toml index 9a9f550..8bc66d3 100644 --- a/packages/ref-celery/pyproject.toml +++ b/packages/ref-celery/pyproject.toml @@ -34,7 +34,7 @@ ref-celery = "ref_celery.cli:app" [tool.uv] dev-dependencies = [ - + "pytest-mock>=3.14.0", ] [tool.uv.sources] diff --git a/packages/ref-celery/src/ref_celery/cli.py b/packages/ref-celery/src/ref_celery/cli.py index baf9c70..eb3af27 100644 --- a/packages/ref-celery/src/ref_celery/cli.py +++ b/packages/ref-celery/src/ref_celery/cli.py @@ -34,13 +34,15 @@ def start_worker( try: imp = importlib.import_module(package.replace("-", "_")) # type: ignore except ModuleNotFoundError: - raise ValueError(f"Package '{package}' not found") + typer.echo(f"Package '{package}' not found") + raise typer.Abort() # Get the provider from the package try: provider = imp.provider except AttributeError: - raise ValueError("The package must define a 'provider' variable") + typer.echo("The package must define a 'provider' variable") + raise typer.Abort() # Wrap each metrics in the provider with a celery tasks register_celery_tasks(celery_app, provider) @@ -49,5 +51,5 @@ def start_worker( celery_app.worker_main(argv=argv) -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover app() diff --git a/packages/ref-celery/tests/test_cli.py b/packages/ref-celery/tests/test_cli.py new file mode 100644 index 0000000..1570840 --- /dev/null +++ b/packages/ref-celery/tests/test_cli.py @@ -0,0 +1,33 @@ +from ref_celery.cli import app +from typer.testing import CliRunner + +runner = CliRunner() + + +def test_cli_help(): + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + + +def test_cli_spawns_worker(mocker): + mock_app = mocker.patch("ref_celery.cli.create_celery_app") + result = runner.invoke(app, ["--package", "ref-metrics-example"]) + assert result.exit_code == 0 + + mock_app().worker_main.assert_called_once() + + +def test_cli_wrong_package(): + result = runner.invoke(app, ["--package", "missing"]) + assert result.exit_code == 1 + + print(result.output) + assert "Package 'missing' not found" in result.output + + +def test_cli_missing_provider(): + result = runner.invoke(app, ["--package", "pandas"]) + assert result.exit_code == 1 + + print(result.output) + assert "The package must define a 'provider' variable" in result.output diff --git a/packages/ref-celery/tests/test_tasks.py b/packages/ref-celery/tests/test_tasks.py new file mode 100644 index 0000000..c535280 --- /dev/null +++ b/packages/ref-celery/tests/test_tasks.py @@ -0,0 +1 @@ +# TODO: add tests for executing tasks diff --git a/uv.lock b/uv.lock index 8c7a2ba..1d09c28 100644 --- a/uv.lock +++ b/uv.lock @@ -2330,6 +2330,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652", size = 21990 }, ] +[[package]] +name = "pytest-mock" +version = "3.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/90/a955c3ab35ccd41ad4de556596fa86685bf4fc5ffcc62d22d856cfd4e29a/pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0", size = 32814 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2552,6 +2564,11 @@ dependencies = [ { name = "typer" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest-mock" }, +] + [package.metadata] requires-dist = [ { name = "celery", extras = ["redis"], specifier = ">=5.4.0" }, @@ -2561,6 +2578,9 @@ requires-dist = [ { name = "typer", specifier = ">=0.12.0" }, ] +[package.metadata.requires-dev] +dev = [{ name = "pytest-mock", specifier = ">=3.14.0" }] + [[package]] name = "ref-core" version = "0.1.0" From 8727101673e49c5c37b83feeeb97dff5f1de4d03 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 22:36:18 +1100 Subject: [PATCH 11/13] chore: Fix mypy issues --- Makefile | 1 + packages/ref-celery/src/ref_celery/cli.py | 2 +- packages/ref-celery/src/ref_celery/tasks.py | 7 +- packages/ref-core/src/ref_core/metrics.py | 16 ++--- packages/ref-core/src/ref_core/providers.py | 4 +- pyproject.toml | 2 + stubs/celery/__init__.pyi | 13 ++++ tests/integration/conftest.py | 9 +++ uv.lock | 75 +++++++++++++++++++++ 9 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 stubs/celery/__init__.pyi create mode 100644 tests/integration/conftest.py diff --git a/Makefile b/Makefile index 47e1bb5..581102e 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ pre-commit: ## run all the linting checks of the codebase .PHONY: mypy mypy: ## run mypy on the codebase MYPYPATH=stubs uv run --package ref-core mypy packages/ref-core + MYPYPATH=stubs uv run --package ref-celery mypy packages/ref-celery MYPYPATH=stubs uv run --package ref-metrics-example mypy packages/ref-metrics-example .PHONY: ruff-fixes diff --git a/packages/ref-celery/src/ref_celery/cli.py b/packages/ref-celery/src/ref_celery/cli.py index eb3af27..83f076c 100644 --- a/packages/ref-celery/src/ref_celery/cli.py +++ b/packages/ref-celery/src/ref_celery/cli.py @@ -32,7 +32,7 @@ def start_worker( # Attempt to import the package try: - imp = importlib.import_module(package.replace("-", "_")) # type: ignore + imp = importlib.import_module(package.replace("-", "_")) except ModuleNotFoundError: typer.echo(f"Package '{package}' not found") raise typer.Abort() diff --git a/packages/ref-celery/src/ref_celery/tasks.py b/packages/ref-celery/src/ref_celery/tasks.py index 04f113a..5c16904 100644 --- a/packages/ref-celery/src/ref_celery/tasks.py +++ b/packages/ref-celery/src/ref_celery/tasks.py @@ -18,11 +18,14 @@ from celery import Celery from loguru import logger +from mypy_extensions import Arg, KwArg from ref_core.metrics import Configuration, Metric, MetricResult, TriggerInfo from ref_core.providers import MetricsProvider -def metric_task_factory(metric: Metric) -> Callable: +def metric_task_factory( + metric: Metric, +) -> Callable[[Arg(Configuration, "configuration"), Arg(TriggerInfo, "trigger"), KwArg(Any)], MetricResult]: """ Create a new task for the given metric """ @@ -38,7 +41,7 @@ def task(configuration: Configuration, trigger: TriggerInfo, **kwargs: Any) -> M return task -def register_celery_tasks(app: Celery, provider: MetricsProvider): +def register_celery_tasks(app: Celery, provider: MetricsProvider) -> None: """ Register all tasks for the given provider diff --git a/packages/ref-core/src/ref_core/metrics.py b/packages/ref-core/src/ref_core/metrics.py index 5f4811e..73049ad 100644 --- a/packages/ref-core/src/ref_core/metrics.py +++ b/packages/ref-core/src/ref_core/metrics.py @@ -1,5 +1,5 @@ import json -import pathlib +from pathlib import Path from typing import Any, Protocol, runtime_checkable from attrs import frozen @@ -13,7 +13,7 @@ class Configuration: Configuration that describes the input data sources """ - output_fragment: pathlib.Path + output_fragment: Path """ Directory to write output files to relative to the output root. """ @@ -21,7 +21,7 @@ class Configuration: # TODO: Add more configuration options here @staticmethod - def as_output_path(file_fragment: str | pathlib.Path, ensure_parent_exists: bool = False) -> pathlib.Path: + def as_output_path(file_fragment: str | Path, ensure_parent_exists: bool = False) -> Path: """ Get the output path for a file in the output directory. @@ -37,7 +37,7 @@ def as_output_path(file_fragment: str | pathlib.Path, ensure_parent_exists: bool : The path to the file in the output directory. """ - root_output_dir = env.path("REF_OUTPUT_ROOT") + root_output_dir: Path = env.path("REF_OUTPUT_ROOT", Path("out")) output_path = root_output_dir / file_fragment @@ -46,7 +46,7 @@ def as_output_path(file_fragment: str | pathlib.Path, ensure_parent_exists: bool return output_path @staticmethod - def as_esgf_path(file_fragment: str | pathlib.Path) -> pathlib.Path: + def as_esgf_path(file_fragment: str | Path) -> Path: """ Get the output path for a file in the output directory. @@ -60,7 +60,7 @@ def as_esgf_path(file_fragment: str | pathlib.Path) -> pathlib.Path: : The path to the file in the esgf directory. """ - root_output_dir = env.path("REF_ESGF_ROOT") + root_output_dir: Path = env.path("REF_ESGF_ROOT", Path(".esgpull/data")) return root_output_dir / file_fragment @@ -76,7 +76,7 @@ class MetricResult: # Do we want to load a serialised version of the output bundle here or just a file path? - output_bundle: pathlib.Path | None + output_bundle: Path | None """ Path to the output bundle file. @@ -128,7 +128,7 @@ class TriggerInfo: The reason why the metric was run. """ - dataset: pathlib.Path + dataset: Path """ Path to the dataset that triggered the metric run. """ diff --git a/packages/ref-core/src/ref_core/providers.py b/packages/ref-core/src/ref_core/providers.py index f81d85a..75caded 100644 --- a/packages/ref-core/src/ref_core/providers.py +++ b/packages/ref-core/src/ref_core/providers.py @@ -4,6 +4,8 @@ This defines how metrics packages interoperate with the REF framework. """ +from collections.abc import Iterator + from ref_core.metrics import Metric @@ -36,7 +38,7 @@ def register(self, metric: Metric) -> None: raise ValueError("Metric must be an instance of Metric") self._metrics[metric.name.lower()] = metric - def __iter__(self): + def __iter__(self) -> Iterator[Metric]: return iter(self._metrics.values()) def get(self, name: str) -> Metric: diff --git a/pyproject.toml b/pyproject.toml index 9521626..bf339c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ dependencies = [ dev-dependencies = [ "pytest>=7.3.1", "pytest-cov>=4.0.0", + "pytest-celery[redis]>=1.0.0", + "celery-types>=0.22.0", "coverage>=7.2.0", "mypy>=1.11.0", "ruff>=0.6.9", diff --git a/stubs/celery/__init__.pyi b/stubs/celery/__init__.pyi new file mode 100644 index 0000000..e3a3b56 --- /dev/null +++ b/stubs/celery/__init__.pyi @@ -0,0 +1,13 @@ +from collections.abc import Callable +from typing import Any + +class Celery: + def __init__(self, name: str | None = None) -> None: ... + def config_from_envvar(self, envvar: str) -> None: ... + def worker_main(self, argv: list[str] | None = None) -> None: ... + def task( + self, + task: Callable[..., Any], + name: str | None = None, + queue: str | None = None, + ) -> None: ... diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..779724f --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,9 @@ +import pytest +from pytest_celery import CeleryBrokerCluster, RedisTestBroker + + +@pytest.fixture +def celery_broker_cluster(celery_redis_broker: RedisTestBroker) -> CeleryBrokerCluster: + cluster = CeleryBrokerCluster(celery_redis_broker) + yield cluster + cluster.teardown() diff --git a/uv.lock b/uv.lock index 1d09c28..7b6ca63 100644 --- a/uv.lock +++ b/uv.lock @@ -265,6 +265,18 @@ redis = [ { name = "redis" }, ] +[[package]] +name = "celery-types" +version = "0.22.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/0a/f7d6089e39b43528d74f99f3f58b9900fe76894e8208ec4f22ffa71e4a73/celery_types-0.22.0.tar.gz", hash = "sha256:0ecad2fa5a6eded0a1f919e5e1e381cc2ff0635fe4b21db53b4661b6876d5b30", size = 26654 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/fc/ab9ed137f6a7a54746cb27410e475f6b375dbb9e20f8c2d3317186d0a63e/celery_types-0.22.0-py3-none-any.whl", hash = "sha256:79a66637d1d6af5992d1dc80259d9538869941325e966006f1e795220519b9ac", size = 41166 }, +] + [[package]] name = "certifi" version = "2024.8.30" @@ -513,6 +525,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "celery-types" }, { name = "coverage" }, { name = "esgpull" }, { name = "jupyterlab" }, @@ -532,6 +545,7 @@ dev = [ { name = "pip" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-celery", extra = ["redis"] }, { name = "pytest-cov" }, { name = "ruff" }, { name = "towncrier" }, @@ -546,6 +560,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "celery-types", specifier = ">=0.22.0" }, { name = "coverage", specifier = ">=7.2.0" }, { name = "esgpull", specifier = ">=0.7.3" }, { name = "jupyterlab", specifier = ">=4.2.0" }, @@ -565,6 +580,7 @@ dev = [ { name = "pip", specifier = ">=24.3.1" }, { name = "pre-commit", specifier = ">=3.3.1" }, { name = "pytest", specifier = ">=7.3.1" }, + { name = "pytest-celery", extras = ["redis"], specifier = ">=1.0.0" }, { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "ruff", specifier = ">=0.6.9" }, { name = "towncrier", specifier = ">=24.8.0" }, @@ -767,6 +783,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/41/9307e4f5f9976bc8b7fea0b66367734e8faf3ec84bc0d412d8cfabbb66cd/distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", size = 468850 }, ] +[[package]] +name = "docker" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774 }, +] + [[package]] name = "docutils" version = "0.21.2" @@ -2317,6 +2347,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, ] +[[package]] +name = "pytest-celery" +version = "1.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "celery" }, + { name = "debugpy" }, + { name = "docker" }, + { name = "psutil" }, + { name = "pytest-docker-tools" }, + { name = "setuptools" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/ed/cf73ad1119f2c9cc009d10a806d21aa165361e4bd04076c76e4f75d51789/pytest_celery-1.1.3.tar.gz", hash = "sha256:ac7eee546b4d9fb5c742eaaece98187f1f5e5f5622fbaa8e7729bb46923c54fc", size = 29770 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/a8/13f300c73143caaf22806953ba0e18b1aef1425357a9070cc2de99101b7c/pytest_celery-1.1.3-py3-none-any.whl", hash = "sha256:4cdb5f658dc472509e8be71f745d26bcb8246397661534f5709d2a55edc43286", size = 49032 }, +] + +[package.optional-dependencies] +redis = [ + { name = "redis" }, +] + [[package]] name = "pytest-cov" version = "5.0.0" @@ -2330,6 +2383,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652", size = 21990 }, ] +[[package]] +name = "pytest-docker-tools" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docker" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/a2/620ff42d20a2c2b107805a12633a2cb9eb01db3a4eb371a6bc1f71728217/pytest_docker_tools-3.1.3.tar.gz", hash = "sha256:c7e28841839d67b3ac80ad7b345b953701d5ae61ffda97586114244292aeacc0", size = 37136 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/f6/961e9b5c6a3006be78d2725713e0d6b2811dc20ae78b2b21b575185b448d/pytest_docker_tools-3.1.3-py3-none-any.whl", hash = "sha256:63e659043160f41d89f94ea42616102594bcc85682aac394fcbc14f14cd1b189", size = 24807 }, +] + [[package]] name = "pytest-mock" version = "3.14.0" @@ -3056,6 +3122,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 }, ] +[[package]] +name = "tenacity" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/94/91fccdb4b8110642462e653d5dcb27e7b674742ad68efd146367da7bdb10/tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b", size = 47421 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169 }, +] + [[package]] name = "terminado" version = "0.18.1" From 7ed54b5c4c2a1578497250bb3a8ef0cc43f9dfc9 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 6 Nov 2024 22:41:36 +1100 Subject: [PATCH 12/13] refactor: Move test data jobs into their standalone action --- .github/actions/test-data/action.yml | 21 +++++++++++++++++++++ .github/workflows/ci.yaml | 18 ++++-------------- 2 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 .github/actions/test-data/action.yml diff --git a/.github/actions/test-data/action.yml b/.github/actions/test-data/action.yml new file mode 100644 index 0000000..6a4a1d5 --- /dev/null +++ b/.github/actions/test-data/action.yml @@ -0,0 +1,21 @@ +name: "Setup test-data cache" +description: "Share a set of cached data for use in the tests" +runs: + using: "composite" + steps: + - name: Cache test data + id: cache-test-data + uses: actions/cache@v4 + with: + path: .esgpull + key: test-data-${{ hashFiles('scripts/fetch_test_data.py') }} + - name: Install esgpull + run: | + uv run esgpull self install .esgpull || uv run esgpull self choose .esgpull + shell: bash + - if: ${{ steps.cache-test-data.outputs.cache-hit != 'true' }} + name: Fetch the test data + run: | + uv run esgpull config api.index_node esgf-node.llnl.gov + make fetch-test-data + shell: bash diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6cb4de4..5e21249 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,6 +34,7 @@ jobs: - name: Check out repository uses: actions/checkout@v4 - uses: ./.github/actions/setup + - uses: ./.github/actions/test-data - name: docs run: uv run mkdocs build @@ -58,20 +59,9 @@ jobs: python-version: ${{ matrix.python-version }} # Share a cache of test data across all test jobs - - name: Cache test data - id: cache-test-data - uses: actions/cache@v4 - with: - path: .esgpull - key: test-data-${{ hashFiles('scripts/fetch_test_data.py') }} - - name: Install esgpull - run: | - uv run esgpull self install .esgpull || uv run esgpull self choose .esgpull - - if: ${{ steps.cache-test-data.outputs.cache-hit != 'true' }} - name: Fetch the test data - run: | - uv run esgpull config api.index_node esgf-node.llnl.gov - make fetch-test-data + - uses: ./.github/actions/test-data + + # Run the tests - name: Run tests run: | cp .env.example .env From 57091071c3a2aeebdaa26852ea55642f8474d01f Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Thu, 7 Nov 2024 09:51:17 +1100 Subject: [PATCH 13/13] docs: Changelog --- changelog/7.feature.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/7.feature.md diff --git a/changelog/7.feature.md b/changelog/7.feature.md new file mode 100644 index 0000000..1556f22 --- /dev/null +++ b/changelog/7.feature.md @@ -0,0 +1,5 @@ +Added `ref-celery` package for asynchronous task processing. +This enables the `ref-metrics-example` package to be run as a Celery worker inside its own docker container. + +The `ref-metrics-example` package was also dockerized and a `docker-compose` file was added to track +the services required.