From ad6645bf21c616bbc5c368c5ad3777a10056a6a2 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 20 Jun 2024 12:10:38 +0100 Subject: [PATCH 01/64] first commit --- .github/workflows/python-publish.yml | 74 +++++++++ .gitignore | 188 +++++++++++++++++++++ .pre-commit-config.yaml | 70 ++++++++ .readthedocs.yaml | 17 ++ LICENSE | 201 +++++++++++++++++++++++ README.md | 43 ++++- docs/Makefile | 22 +++ docs/_static/logo.png | Bin 0 -> 17128 bytes docs/_static/style.css | 48 ++++++ docs/_templates/.gitkeep | 0 docs/conf.py | 126 ++++++++++++++ docs/index.rst | 61 +++++++ docs/installing.rst | 31 ++++ docs/modules/dates.rst | 8 + docs/requirements.txt | 10 ++ pyproject.toml | 92 +++++++++++ src/anemoi/registry/__init__.py | 9 + src/anemoi/registry/__main__.py | 28 ++++ src/anemoi/registry/commands/__init__.py | 24 +++ src/anemoi/registry/commands/hello.py | 32 ++++ tests/test_graphs.py | 14 ++ 21 files changed, 1097 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/python-publish.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .readthedocs.yaml create mode 100644 LICENSE create mode 100644 docs/Makefile create mode 100644 docs/_static/logo.png create mode 100644 docs/_static/style.css create mode 100644 docs/_templates/.gitkeep create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/installing.rst create mode 100644 docs/modules/dates.rst create mode 100644 docs/requirements.txt create mode 100644 pyproject.toml create mode 100644 src/anemoi/registry/__init__.py create mode 100644 src/anemoi/registry/__main__.py create mode 100644 src/anemoi/registry/commands/__init__.py create mode 100644 src/anemoi/registry/commands/hello.py create mode 100644 tests/test_graphs.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..666f65d --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,74 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + + push: {} + + release: + types: [created] + +jobs: + quality: + name: Code QA + runs-on: ubuntu-latest + steps: + - run: sudo apt-get install -y pandoc # Needed by sphinx for notebooks + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - uses: pre-commit/action@v3.0.1 + + checks: + strategy: + fail-fast: false + matrix: + platform: ["ubuntu-latest", "macos-latest"] + python-version: ["3.10"] + + name: Python ${{ matrix.python-version }} on ${{ matrix.platform }} + runs-on: ${{ matrix.platform }} + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install + run: | + pip install -e .[all,tests] + pip freeze + + - name: Tests + run: pytest + + deploy: + + if: ${{ github.event_name == 'release' }} + runs-on: ubuntu-latest + needs: [checks, quality] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python -m build + twine upload dist/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2137d4c --- /dev/null +++ b/.gitignore @@ -0,0 +1,188 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +*.grib +*.onnx +*.ckpt +*.swp +*.npy +*.download +? +?.* +foo +bar +*.grib +*.nc +*.npz +*.json +*.zarr/ +~$images.pptx +test.py +cutout.png +*.out + +_build/ +? +?.* +~* +*.sync +_version.py +*.code-workspace diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f6839ac --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,70 @@ +repos: +# Empty notebookds +- repo: local + hooks: + - id: clear-notebooks-output + name: clear-notebooks-output + files: tools/.*\.ipynb$ + stages: [commit] + language: python + entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace + additional_dependencies: [jupyter] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml # Check YAML files for syntax errors only + args: [--unsafe, --allow-multiple-documents] + - id: debug-statements # Check for debugger imports and py37+ breakpoint() + - id: end-of-file-fixer # Ensure files end in a newline + - id: trailing-whitespace # Trailing whitespace checker + - id: no-commit-to-branch # Prevent committing to main / master + - id: check-added-large-files # Check for large files added to git + - id: check-merge-conflict # Check for files that contain merge conflict +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.4.2 + hooks: + - id: black + args: [--line-length=120] +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + args: + - -l 120 + - --force-single-line-imports + - --profile black +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.6 + hooks: + - id: ruff + # Next line if for documenation cod snippets + exclude: '^[A-Za-z]\w*_\.py$' + args: + - --line-length=120 + - --fix + - --exit-non-zero-on-fix + - --preview +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.9.1 + hooks: + - id: sphinx-lint +# For now, we use it. But it does not support a lot of sphinx features +- repo: https://github.com/dzhu/rstfmt + rev: v0.0.14 + hooks: + - id: rstfmt + exclude: 'cli/.*' # Because we use argparse +- repo: https://github.com/b8raoult/pre-commit-docconvert + rev: "0.1.5" + hooks: + - id: docconvert + args: ["numpy"] +- repo: https://github.com/b8raoult/optional-dependencies-all + rev: "0.0.6" + hooks: + - id: optional-dependencies-all + args: ["--inplace", "--exclude-keys=dev,docs,tests", "--group=dev=all,docs,tests"] +- repo: https://github.com/tox-dev/pyproject-fmt + rev: "2.1.3" + hooks: + - id: pyproject-fmt diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..c03429e --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,17 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: docs/requirements.txt + - method: pip + path: . + extra_requirements: + - docs diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 2941de6..44bc618 100644 --- a/README.md +++ b/README.md @@ -1 +1,42 @@ -# anemoi-registry \ No newline at end of file +# anemoi-registry + +**DISCLAIMER** +This project is **BETA** and will be **Experimental** for the foreseeable future. +Interfaces and functionality are likely to change, and the project itself may be scrapped. +**DO NOT** use this software in any project/software that is operational. + +A package to manahe a registry or data-driven forecasts. + +## Documentation + +The documentation can be found at https://anemoi-registry.readthedocs.io/. + +## Install + +Install via `pip` with: + +``` +$ pip install anemoi-registry +``` + +## License + +``` +Copyright 2022, European Centre for Medium Range Weather Forecasts. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +In applying this licence, ECMWF does not waive the privileges and immunities +granted to it by virtue of its status as an intergovernmental organisation +nor does it submit to any jurisdiction. +``` diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..6c0762a --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,22 @@ +#!/usr/bin/env make -f + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/logo.png b/docs/_static/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..f78572e73c02e791661a509be50fb9e90f0d3a56 GIT binary patch literal 17128 zcmeIZWpo_9vNk#%V`65EnVFfHnH@7TGcz+Y+cC!sF~%{o9W%$w%ycJvzvrC2&i(F> z`~T}%JyJ_5m7t`qeyS9yC@%s38Rs(q0Dza06jcTQz_vi~02nCH|C~mLHvsUt#8N~= zQA$LFP|?ZW%+kgb0FaDKO@~%YUd9-@cqrx21_G6Z;l&c<0aS(I^j~a3ND~x=VM5GA zM85_$6A}lFM%6}9wqXb^L_xwv2N*5FH$+fYX?`!fM#1`d-tBoY!NbCFz(Mhl=X{>) zZT?OGASIQdlnqV=NMf2q|9s=EsCT?l9cLs2LKZ++^rj3sb85Ho_>~-_3{|jK` zl@3;`*8k(9CyO~edjJaXIoXMrB086T4|SHv5LRp%KuBHc7`$dv7_(X;i!6nk;*=hf zT&_jD^UKmtfQ^EA4HiI(0iaDBKp_JcERo##jpx?DiZ~-bst!cZCp3*qS)ZL_DamV7 z7$u+O5~dovoL>aL~%0cSHd**zH^y(5|hFy0bBnmZQUdG{`XGsJ?lB)AT!R$i?Ni05i{{g zMvgHGqcAb-?Q3l*VunGuM$tysfeNK-&UjD(hApwdaR&qeZj-P^WfGwIfyc-Wv141O ze{JuSb4zWV&&83J|1v{9m45a_^lkb@F59YrMsLPMCOXYUZ2#STW-cmOKEwEZ$|6cx z=*WTd-0SXDbq>taip^1IjxH{_KtGzg;1V1-KMWLY@8tfz=~&joDAIoP<4C6swa9a2 zn`&8rEmypiv8gd^z-g?!eqepJiap}2VqR>`NwVJ2;K;~G+A`J+I$%f~jF57C(83Bp zSpslK9m0rqDl?W`M*O@LNL&DbF93%N`1BpX-V4?SK->Zc3Q|TuI2uqHfa6O-6DPo` z2e1`Dqy$bF;C=&hHDKU`i3>0;K+l1~>_v9~zYXxaM3U@9^7u5^3(W{7gakz_WFiSZ z8yZ9CJPcJNVwZr31`#Uas0d7f`$Wjp1k{F26+%~pZ-TZ8@C%d|!Yr^>WN;zC2@o%6 zoyNohLpA(d4UgM@W5_)o0KP5h@hJz+{3oOXQzvq5*vWRk1FR2NH`qfz$|D(X8~_F= z3||=jU2qy%FG5k2m-rKraJ6s>aV64S1i2_r5vn6Bkx&dVKgo^!U|~)o|;f^r! zI2-5$kqUiveHuI2m+4pWmlrS)LHvD@-xGQ%&`>HrNqo}y1RgRaie7|06Wx|UNp44q zPnbusLD5Z;C^<-qsLV^1-IVxSutU;I__;(-{ky6GwF8V0+syW71 zi^LJ0(2T7}D{GND%UJo+%;4N~z_{;lV3RS&T5dlnao2u~fBf}M`OfUl?~VaS6`mkg zHI@#i1}B~^G0Q9qmd%;X-CAz}FIOh>ggKqB)#bdt9kI!Pj-|$-W>4F@xua>Y*|q7d zDRhOWiNPxV_}h`kVacKDan3yU4B1J?(X}g$>(o(qL8UVGZ&oX|I}}%01(_**h3Zy` zIvsoA{4#=4{gTbDjMYz2=n38_#~Wh;5P}Cq;EnmQD6bUM;gw>9DLkqMyTOSsLGnF;};Co5N#wXFSNh zV7|x=&pKdLV65}o(1k66>?a;7nlb8Co?sqs7;c!qp7-g0y1Ho{zsUJmAKjpyu*#no zu|_`uoDiS5owTx>vD;*8XRo%9>gcb|x7fDWpN^lkx&1ghUrs&?K3iOMU-EZ$a6#f? z<%;3N;U?!icj=TVpYA{0yRDL^q);fImoLxd=i%Wr=OJ+RJ2To&I(8b~djhTi&x3qn zp>*$bwQ9Y;lG&-YLbT?uk)C^<(>{1Tl;cC;_rs}(f8Jr-Ni9z;AHFF$kYF&c zOVvB*Lf-J(DBW1+r{-7UXX97!x${l-#q}L}J$*}i$$gM`f_}EY>KXfO){qH5UJn=w zNCis=Mg}4WvIQyxj|Yhgo%K052wpC1Nf3z>1tlyfB8|eW<2-Z5V5DK+p9tUn{2tm2 zn;)tZUI%j#LnuKT&53qJG|S?ovfEFYOzEg9(PVV{=P>Lro@s*#ohfPQsh-Bz{+H$8 z)y~QvrmLB&mz^QZC)$-33yb=V_ zem0$N97N5GN@8LinuplByoBjr8KonI!Lvz6fQTWGe)nl{ktgTh|Aj$;)QPbs*$-nlObizJ?x+QwlH|< zA57ima#FB(#5}b4W@hzztDY8^()OKE0 za!~cT9bov>v&MztQN6?Y<}vrF7?npMNg=I8knV}jxTaHGO6RN2MMZ$hmoxe^3jgNU zyI#w#xn^_KDI4x+0UaN=>#u$7S2sUziZPQI;M{0WJUp20|D4IdGPvp^bo$&yU8F|m z#!NbE=+@G-NV3%-hv_1g!t z$l%@afCs_FxrlN^=;dIBkKyN?6U8f**~Uu4_3f|&_9=z<$GF>0&0C|x(#ySH79;ti z{^vfX*T=uC;}>;}>y1V1MeWHq(|YDS+_$>cU*fL5Ur`Xu3ds7S>QHK5!32;&YHOOQrj(hS9Do`WhXFu>p#Z=^ zF)+{%0EPpA{8toXy)c=IwNnrqJ&=U&i z4k-ZsS81@V0`UKe0|G#K03j6-DJf8>V(esUYUgZW?~;?CWDZLBv%m3kI=loB%KpkZGo5R3F&&cq9B6G1c`~M*O zoAYn7fA#C%?0ElH#;s`SVQQl(YH14^YEaYoSveVb|HbD2mGeIh{WqnWv#FDay)B5) zh5vu%>L23&%>3Vk|6-~2KP;J;IscR8f8_iJ<=-xFE1Npo+qnK6LNz-}7k*}5hW}IY z|EALVA2NO>R@Q%r{xkJ|Q)vDlDgHC{e^V$pS%SvF@Nd)jnf~GMpK1S;=Vkai_5UFX z|CY0VrGf;F|1&Sce+w=DXMAVyX#jxLR7zAx)dTEI7upAXU?KlD1no$UP!bs#SrN$+ z2m}V*QV%!Mu+vfo8G+q-vkrtkUlJt%X2k?6StA1i0$7C-$jOih6Z9?~1Uez;({;?u z9@0q7{d?r@&-0x3$NkL@WVxKhVfeAY!~>8GApURg|7QpSgTe#!g_O8XUfVSo;Es9Z zHEHiv?_1rI&(S~3Rvr^Z)%4tNb1qnbF#wWYm^SFZv6}|21zMsB=Ru1LoMZazs zELUd)35J7Z&}lJbdP}ABeN4EV#|sNxNlD4vo)s_SFK=$foJ9K{u>U%1A+Xu0bpBana;)}69DFuA+D4~aKGy%sDOv2d*PaQFwR!mn@ey8KIDKfGtVyosCBb->L7M@;i>29`j-fMAMN z_t9N|a_OLcI@X5)1Szs0cwBepYjPVUYc^z{-f5Tx36LKvuqaB<&aH)vgKFgVG~a$x z^xH~UQuM;7k4qR8*DFLs%=f;&6}R6Mxk=xWexR|~7E$-oCr(TtOEnq^1iUIJ8t%81 zF#Lk(C8Q)GDjc88<*BmS?Cu)9==solJDe$=M@2=InelwO-ZSd!>#HywO_8EgtNac5 zDxlh8v%E8r%^|Q>Z#>jPqx#JO3d;;lJt9D%UM`zm?PDiiz)rwscZCYnyFHY;*LX|B2ltIC?BCD@)UFFp-K5 zg~e=HYBX0!Rw zOj-HiZeQBg)>cU=p{AHOI@20)a?RO4mCHjxuiLSUc-oLSbVI3eaGBrTmrSWYM6F)! z@jS_M!Rvlf(>mLQw!3JK>;E1YxH33`opklEVTT;W=FuxRASP&1qI2t5R@G?EUn<*O zL|MW%#sB-fVDC2ZRj2Ijfksa~MfdH5kFrTe4%I5D;N$f`0K+F@c9xigWFVTMoAM3e zXh&k;ZVOT1^>T7NlLfc>YrC?E=}|1>n%~RWpTcnbm8tUlx4jx|v z8VB$vmSaN_hu_EJCLtj~;OmU{G1s*c3j7MRMveB}WP|CL6}^7<*MdGHPETrmMBd8) zK@oHGcLfn+6Sj~%$OV4u1bB091pu~zOq%vf&v-{Jzj?sK&wf=dOYevGhw=4!x^r$7 zj)58`HN)hPLRT4rxLa%_B*MGGLg1xHUCSa!MDnrG(aGra#W7idvQ>VmK_+x=t-l~(c1tC8;K;#uu3K#pI4Jh-t}m{$UHL(%Z2ZL zra80i&E7G`=#ca-S8t&~Hgv(=$3eb7xlxmlw!8J0^Y#P$F_6T=L0;4=Nj*{QfqAT> z+Kb+>C`+JTV}&vhl0n^t7MK~f6F;J%(G9>F22`zt=ai6oLHlru z03az?wJmCY`VSaK9L<^kfrOKqrgwJx(N%=txB1?x?!VFb$Xq_J->}mNxx~Q;x|0 zxaGaOXnB!!4VeXyjieQ}m*=5FabDDQ*P5oR7Gvh~RK$1Oh@2@cHbF{k8B&Ph)iiiS z6h@3c*i_NP!~j3N|19_vYGUR#tVP3Nvaj)MhRk-n8Chw$YqL(+K(FVcnOavcW1LZC z`Eb6Ds^__v`eQ0z;6nY*vaQ8-wI!LUlRb!A-=>q z8wMxMXsb7n?PRg;gQ9yVa(axy*VpEbKB~ue6fg(v@iY{QO1JD}{L;twTpGHjS6-pt zPHlC8kl3ueKlLT})`$ATD$_w$j?*a5PISG)bw|J*M5pT9hdd8O~_45+>aJovPP70UZ#zA=R zS;@|>*5h_YqDPSbi(fT`e2xcfY}>mT8-}rw(NFiYmAe@0vltbFwst1C_fYXz9KJR$ zHn{<>55WyfKM$Bm5v;zgz4Q%#G7gKa`q8Jbl(WrmIZW1GO>YwH?)4>?!*tl%3zP2u$`0{IeG+w3!CI3fXt1M62i zhI~p6Iefpz?U_plcCs2DBIBXB+<@DXgZ!H(lMfO0Ck?o;#MRfc*3Gc3R@$ne!!1p$ z^)|;H4_WWu51K_e31~q6yFT+T{I6E$^})(f(7$bIgQU}9O1YYc^?buVVVR@`6Wf%& zzrR3J^LEQe#|vDZ)b;2=fiH3Jta{TheWQ5&g{trh^yS^R**#qsjE`vD@Uq^tuIpM@ z#-x#in@DQCg*`0wIi)4Y^MT3{KA6aUcmeU3mBZ+IU!lyltqCQrk>s?hp}-sGH=D}) z5@O2Nnn9PoD`YPqdn4O+(fvYRqqURlm#fhjTj={yCYOEQ+t2}Bx3LRbN3z8Ng%!%h zNwxoj=;KDqAy&qsCver!r%u)eN>q&QQ(INLH92ln)6&H^;rm0pg4oYxZ=(FbnHya` zkIOB$Hg>t%T0*$~RMGe?IbFZT#{jWN^L*d?B?$uG-8sFLYz}*J65fz7=84fCwRi^m z?yrwm?G~b`zf-p(;?w92^{#oOgDoyM32w@&F8$VKHOEFqkR2jACbxKI<@WfGdh9p5 zndGuqJt4*ALg%fUSnD>bwHix%i(3~{wnFMedd=a!4^P^qb~(+sj&eP3W8x4IYi|m$ zesIA#8fo?f>RkJey-!8z&^ak9%fh=;Ww&Iye1i+x3hV@PR-~Iqu~x-cu#XF}Qxe}h zye2t|==OcVf8Ecvmt`Z>tR=OWs8}Ad?|CC9m(OX#@CwhM(-FguQTNB%oIFgd`YttB zWUh<+>&dx~!H~||IE%z>F>ufTvC*`D#2O}M`6Q#2U8hOVm;C!z#mARyeLe~zYV-xu zDXs4YLgsVm=7FOy86RP(YLAG$wO zESL;GLCl2x`fk_{k&}P`eh|;-vx4gEC&xIxk&si)xX9;yG+Wxv&~=*>pwYY?V?@ag z*VbjFnNSAUQg^)G9bs8Jl7x`p49t6nm8{zTtLJ>eiO*UYtvB>&gP&!d7=edRCa2WN zFTfk7+6wu65Z}jKq>Mv7*TX-`d1qT@i=U6mr zjpTaH3K1oo@~;_8dodotge`uKi!JO(1l!ZVH=XUfKGtat~LV ze&0g|s4X5V6C&KIyUhywA6_I=@LD(r*cVV-iFx@k63fII1WC^XQ7TgqRnf_pH{<8M z?9c`~1HzINaD55BSrCopR3Oj0_G-$zsgi5L*35~avBH+|(#}_DXwZ9STr~Ige(ikP z&(^&rhAW1gG{6`q&QAtD^;*nG|up?^U5 zE0d|q@HtZ@#Ct^ov9yVEOe+(IwKFJuC*a0a;TdD1Ro1@ACNPO!_|so6W0|YuiW7Dw z*I^NA?7>(|;aZfPe}KjB4|Jz|!)7t7b}nsfJP09R(`H2L47&zu z?}7WIPJ3vDHP45U*cfIVBJFxM1y<)6^1#^yoCOp)N9(;mAxi9N;)`$5Vgva7%o{GG-n7@l*2?+f`xfRTZxGhT@4==K_^8<-RSPVsOJ=N))_^e z805@vH(fo!GISa|k_<6kLWof|jTmB4pWGvS_Y8uxR*WaU2=j(aKn@jzqo{ZgzmU2U z#b+k!cNq9)6d)yG9!qcIV$kbFU&LiQgzL=xLISz_foqJ&SHDu}?rSq8!oSL*tdYkj1;(G-G#Y7+S7!K4> z?p3)^R@@6-?VvnSoE8jojkQ-q?jfF^`LOUT(V6gQzhkg~$*`;#KT^RFY`*kf6HdBV z%R1&p-Y|w?>%YF%HUgR@~;6;H0Lr)SYlBp>LKvY7fB}L@>zk6Z=*EXX% zY0`;SZwYspiIL~U_tM1Wbt<8+kwSFCvMR5!OQFNu#HPQJ6@{4^c<|zQuG-0kY-VC{ zJwlm|KDW$SYwN^#Ou1POKUu6aSpsjWa|ykv<#kHa_#O*ZtZ>FRagV{k(+NY6a?4LL z!Uow=>tfh#SJr}FX&aI$erR%bNWF4u5UrV#Rg?7uSOO$U7n4T81ahX)N)a- zMOq07O&`~iobski=BvmxXOl~lF71CSLx_q_ujQ==e>U9}(bd0$RdpsV9o9+5q$?4G zS!`}PBUc948~tDp9C%a>H`gs;;W!K)m)oPn4Yx}tumGXw*=oK6lAAycV=DygdNPehgC1wo9r(^u=<*5^r;kRs7yeWGgKM(=A>U&j zuoIdtpU>5tNDDXcjPQyKIoaNf@cEgjz!X|5QR9f&Y$$CY-b%wBEeTdN3)rvfjci*1 z_dPpCW%N?pJl|tF`y`}hquO3ZU*)SFUKwW}r!<-=6DsnpgHSZgQ%%06klgj+-`eEygO0|rB!}MQR4$%u`ICv9+>#;Sj1i;7Sp?W z_JNQCn{SJI$&Hx<5BJvzo<-XNIDxm=Nq~94P!$yC%Xp@Q^TS1g@+L!32<1Z2z>nGp zC9LG1&C&WUFwUHR(u$NJDB=d zFlfN7bKQ}uV_5Znw_EQxXtiA}!y5+!BZOe*un&IKZ9ul%pIW5+G9MhIZnNC5*YTvT zhAnk1FY>fPS5<}ZWjB8SL2*JN4zHqLUPT2s)^9Yult7?9EghLzjXX`Q zx&U;kyD9<)K|;QSv`(;>!8~^*84_>s7T^S8!b6aC^*y)G>4Oy*VnA1ar% z!&+e&XU03XFM%2{CZ^=sGd&$HN5nGTU%q(t+OhP4V3M;yF7CYsS&`%3gR2uX*Ufmx zc)t6LhTDN?OqJqkny#en{&3h3Z66q{QW?s;3L_-jJCS4qJ_}29bZg366Vb(sX(eKGCt9mEP$5Fu% z6KtPl&NATlAMMB=(pE;f@#lWK(C>NY>Tt0eG%+yqz zz(5esP+NH#P1Dvbo%?W)mP)_7v%w=|Va~bGEfFQGrI<;!iq|IWu^)*3_|cAJzhc~( zyoSSOC98Z!_D2W#vb)AIi#RE0*dI_#wd=8Gb;zY}gb>?(h)3B7l=oAhxhAjrB`wZR zjJ~J8ObOh$VyL)A=DX)FyJnLOUxdgzVtuG~?oEuw@qui3pBt%b_3ShXXhfO3A6ssg zqQ3A8>?3Y)PS+@nsH2ENJb8eb=%M`59{vzjy))!Gt0FyQV}wI5%KK;Go5gOxyv7zs zRD88pFW^ThbWEv4yt0kXJa|{gdhc-xb-vK^JR|EU@fJi$_k6k&^&-|*3i)??X3y~t z=d;=|n$Jsrl7`a)I;Xb_*zqlOn|$~Gw9Z-pcWF2POg6e;JZyDe z2r$oy-WOcNDL)BPgbU+gT|91=Z#5!7KLctYK1VNWXCkllf``^YRlq(X3^EEa51bo1 z9-6R>M|Mlyb7%#-4;?X;*)h@~wvSpyy#912EpGe?KpQfo<9JG@e-9yy7qf@j?RV%)mrqnO4Ku|Vgk0fGdzLwDr`sVy=U;uu4+Zf*! zg4Zsf&?vV$w6VlnWB?x@D>{hwB*QSeccnLu&*q~2HMh$qu=hSY{(jFSFsjQgX-#_ixEdMYf7aLl@>a^H~6}V|C%2lNI5!E2rbK$&i5xm z%((~aOM!*6R8$l-F77dtad`zqiL*Kr9r2X(+0BdamNd(N>!|#Vf$$_0| zkuhvos5Um&6ueLSFZ)DWjtMIM>zt!bncZBA;rb)j#HY1k1np%_3W3t3B-h&#-N?c< z=S9O`N2=7HJ6SNUDZV=j5>(&3&F};5+Os7SeN_>CrbokO)o`$=e;oX#x>GW-@lhDu z3!-_l^Qc8nL*X+}7PF>QWkWoCD=U29kxEqv&x1n`a({*1(_Ho-;Un7#VogcIOacoc znss^teWCL873wj%3cBqVB0})qY0W|O8*9!X5(qqxpz}hnZ z>BBeabh4ALhHn>W6VjX7G1qLf<6)?2N!taYAOM^KX0A2}$Il74cRzUsXej zGCu9Tb)b+3C9yc8Bnd3+^itpdO|9LJRKejmA&Bq||c_!>EiTnF(Ne<6RXrbGP;na-( zf*XbLm~Hz#oI^pNL|%PC;;n>D`SA6O2nh+NgR(%2C$Y<~qH74@!jvxafI-i{5gTTK zhm_h2aaUWBy{*je^=H`&&bdek*Mp$KiK5%wA+%JI|56_ANDupIP533MN*_rVega{P-(UIUUk}T zLW|dzWI~?l?z+B4k@#sioUGDfuanf|XK60MdVj7`Qk#;m;U8q~zU-|hdI5UX+y{mYdgdnRN=GaC>9+k7vId{ACBMX^sc4|$ zkU}KdivikDn?(?pSj1i~_!V>E+ALSDPT$)ECX0HrDdPfaWS{-=T^`01y-hw%FRXS+ z*FFB0MoB*GmOHBndk#m?-%YYDENUT#mm1u4@yOb8!vS)rgqTetl*=H0l3}CHDxUQt z_qe$zL{S=X^SiP+_zU!AT3x}Y|0$K+?k?{!D)jBlacn=p+hnw#Lh!l>1)r&xn5sf# zvW#tZIR@*o4*Rjdb@82B`XC;pt$btq=CXUbLEJEv>n$8v;ebk`=JTXj0A!s97)Mkq zcb^tr67U)1v-d)61gZziv+@0=kcQH9hhb;<{mqH5IEz5bi}USEVrg?(FnHF`PP|UJ zzH05dU4YWnv#vJdg^G-P8d{m(Ua^=%&FVXyShZjXA|pqq!Ur;NV_q6Mr(W4?pgZ2@ zTlkZb^tWE56C4AT@PZleT5SU)79k}ZCk~+)hNx+31srUpXTKTnm|01Y-Yw+p1=d)3 zuUZcz)&tgKehm{2-o=Rz_@Kuy6$rGNL=~+#eD#qGR1#g!ko!Z0njzn~5bSmW&5m1_ z4;~z<=;230_6dRt7V2_U4{XIxlyu;Zp%7s((Lai)WT$4pyGt4OhNv0xn0ux93~m8+ zF=RSNRIU{r%V8;s;GzzGC2u*YWCjqus4YTTbw3BU+!E&8F1#jC{J$b_d;pMV8XtcA z)=4#1EDl0v&N{NT9+_ut(Ji$q^J$`=kt${5QxTfwsz^+sww2L zw(pZ8mZ=mzW7<9q49I#1qj55%*X|Kl>T;a zdVEQrF35^Pz62#%gi;Soe&uKR$cAQ9P0D_6^kdVcv2Q_)Yxd}#6cMaYq%KZNoZ&X_ zcd~3TdV~G~c7<&T_2p-8Z(A>P2etFQ&4<`7IXQx=nMs%rgzT3faRm|gH;Zn5$g3Xj zp2eVuh|WprC8F+UQ_q=2N+ykqw3#2)RFwLm#r2irstqP1AYIr@Q5?jlQc}Wt{o*?4 zDPOx`FYW0GJsM>|yBYuZd>eOoDhi9yqElM0cA7Y7ac{YC5baJcBJjc33b9SKLRp&+ zZSUb+2TuAXqr23rZx#wl(wdF|&H-!Qw>wG4rd*SkM5Iflb`@}y>Vt{KxAr8I9)5#v zYeDD=;dUtRLQ@(6j{>Pt_ulTmmnGt?@LEbr<@`R*cr|Vi#n!44nHc#AtW*%DmM|D> zp^^}U>1xfi);-fZe^WiXx0xn%)FD~G3FbL(3-(hcDN}@p$FZ2!>NkF7gLCMNQ^Ybb zA^|^L;Ih(MFHn%u(zfm(Ug>QP>m^wPDgBwy2)yM+TMZbpsOIV8Qc?C!rA3a``%~;S zG_;w=BpGNUG_I@m1j1aRpSMMj!BpVUsU_RLTd_;e`gPW5Hy8bAbnpwkL6XNQF17<= zYgKa6bz5N-;1b{q0`^V<*q2;PaJB>aU0oleF&XsGBr;qIn1AhHfX8eI zZ-plIOMJ5`Ge%RJW0cOvVYw69Wk5l1dj1tP^9z&VwKHeHyWL*Ym{y|n2bk~4MAB# zHwGoMJXl6K8CIgIYieo=AI%=uQw;-1#`R<#_EN|&p&*WbS2&{-vyWuwj;TG^mE4i*h^TSz3y?Z8ZTvKm)ZL=GUD z#W4&7!&qKk4(`mkz-TnHY*v80enS-p_!xa@OlNZOzAnQyP&FHvGn2O_sj2rbxP*nf zKgUMOIl)BA`9pw}^sQ5YkvZ$?%|+W-LyUV$)`A^Kq1|Frp{5k`g(pqiYIm$^IjfO3 zU!uTlBI{)|g{rukqGc4VO1%c>YyxTnWI!1_^bnjI4f4L|dJq`RG=rjpkcEv)aJ}}} zTnoNN#)+Q#dMzdyi=$G=W~(PhLHD)%L$z>!bF8QA<6b^WTlAq$l)>qmAig# zq@!?*^}MjBKV;yN02_>O#9^OGHj1`?fcY`{n~E}lVKFF9pmD? zkmQ-zpbhnK+yl$7TrQjoiO#?u{FkBA0_}jNi@1y$e;EgKpmwv>+0pNcWG@4I(jLDz zuX+$hw*8Itf=vQ9O?e3kXUA7-uJ=YmzJ4pQ1ns2q{P5{t*o<&u_sKL1 zJfPRK5LU|z>_vizDz{kGA)hGt)b(L;Jik;K;}lpmNkqV@CNIixBFElfw$Y_1)0OyQW;>_ivNO<8M!S49JT?q;>hOIqv+CR z)hkr#Yms#1ckk@D_U*vYl%Um9|NGOBEBgnbppmh$C4UBpt7)^bVkkK4k}PIa_h9EU+w8Xv9x zr8uKf=5!xKFvfE5so0r)Xb_5?CNVf{m92JUQqqK+;otOo{9t%B5sQ?)@yll}M0o5a zdGPxG+?B~@UTdB#)P$38@+&x66nlowBsf+m6%TeA_J>*821-@L6A0+V;Bv(Ipm#Gh zeic#=2Kk4*Ul56eLFkT@4(5Y_L!F9YuNL%;(n29Ks`izP?IH`hJ zn0|pdo>A~@izE&ptJ6Gh?D7uYD-rxo6409jGQCwZ7tsQbqWOKFVa8v4!NSvWzinAO zPM<-~e){Q&yMGb()APXOVxub=8qediUFoGY*E{pqs)RqY*l>WwnAzk#yy?CKQX!`cPNkLtU>U9H|r5oo>sX`JFe ziFjTA&a1*xJWd%K;8rLK1|t{yCciSU(WCB*tXaFJYB|vQNW#Scw_s6UhD{g)vFZ z`Et71hACTdr$SNcVpLkDum>1y|VxibB8G>8j;~-aa6u#Kgmkie;%d?2x?NaFK3;!iV#_cGX(zl>)g|9G3`-@F~W;Z&oI{5?j^IAl0ZFWX(Ntwt$G(^hckq`08O~(S6z1w zrBlVEqwgbRcw_`LPnSI85?k{FRe`zibw~4$WPcqU4JlmJrIERErG$?U;jnFtR^^p$ z1=+!266o(AH2nPUV5gX!N%!V#JAw>q(_D^!ewigKRx6iE7lHhqihQ1~fqD%^Aj^w} z{PC9=2SRw$gZw5kycKX@m_`o2F5yannc%)PbCbkNHA_%KYUss}?I?7bxS?OaGzFBq zH-5O+1sphGO)|mbvf~Ad=Y7+xFKPps*ezM0kx@{H4P5G2rwvL5>U!S2N^PhpOv1qs z!l@!47Dw0bR3`-}LgMP1iW|0d8tSQ4QAWAkL_=SpwCTX%sL)w3$E*En$BpepxI+t3KDMTF7Qj~PW0Ca9PHDQyA$!RU458(A^>L-fwQIh z9exiFFF9yb)O}=h)IL^fYF^&ghrhezda?q)rD}@A6S9l=UpuZVlacY{bb>@PgHI$C zt^r&R1eroU_g9S1J=ZhDyDh8Dz^t*dZ3J#sEN0>t#GL0IORQ$JS##MhFge2WZJ?8f zFb0wrn77K*h8Y*YjS8Nm%M{rfkHtLYTFNr;^6L3y3WeJE_TRu6PGMsrSf22Can(9L zu`Ayvqr6mj2R9?5InY8Wy+N71F)UiAp4buz5mKv)yQ@=H(JEf+S``)d@!>HQH*NjQ;dQ1tHc)Qh1u9s36?nfU@McPm1i9cfhWo!_@nzbr2P^ z=E^vh(;)S`dbI`YoX;=1FdbxPH6RmyZtJo{IrQW>*i!GgUt6mA>iu zTn(^}@3nIX1aL{S3BU^UP{DM~!hF~d`XBR83p;8v`MniwMf_>sp3vzG z1RgD1p;m#Ph~c5qh~vS$FaEr z3xj#UumHRVW#7KekexczPy8@4N1G8&-> zv0#A{q!2A?vQ`V#%cLImQ({x@1h*`w9;Mf{5$dI{%lpv_HM6{wHHN8NsxFNb3ZeR; z3wqkr50@L18-4OQ?&Kg3O`Rr-nYpD>3sT}rNF^kMMnrAzjTT#-8IWTv`r%o(8ptmB zWx9`|n?rzrz>xWw{xu34S7?Vzy4XJ82^Jmbd2^3G{k@$a5D`B%I@%gM*oW*|(aNf_ zuv57g0Vhk;z+b7+Y=Q-5_X>5vgkIvp8yX`2S7x4uC$4mOrY|B`3Js1eZ8*{KOFD!8 zI~pEP=Z|(ry-_Yn3bIn29v^cX)unZWB}kBHarjMKV$l^(L?2g3V7`Oh!;I%h3W*Wc z{V6fC%l!50moyeiZOq5aqdtx~m1sM4WFy-7lQBuC$!*gtF)^|7G>z1+kDW(fl$*(N zLyxIqbK&R%hCi}5srb@Dz6_AM7oA~xqQb($klgjl;$*LNf%s0QHVa7TI5^sPef&;L zA-TY*w=vH6?PrM5X+zfCxIbL3r7G=x>eDj7lPMU`VTZp5GX8JycL)M-kstkYJ^vJ3 T1l9fP=$w?8ylAbkLD2sLIU~(~ literal 0 HcmV?d00001 diff --git a/docs/_static/style.css b/docs/_static/style.css new file mode 100644 index 0000000..9a2b3af --- /dev/null +++ b/docs/_static/style.css @@ -0,0 +1,48 @@ +.wy-side-nav-search { + background-color: #f7f7f7; +} + +/*There is a clash between xarray notebook styles and readthedoc*/ + +.rst-content dl.xr-attrs dt { + all: revert; + font-size: 95%; + white-space: nowrap; +} + +.rst-content dl.xr-attrs dd { + font-size: 95%; +} + +.xr-wrap { + font-size: 85%; +} + +.wy-table-responsive table td, .wy-table-responsive table th { + white-space: inherit; +} + +/* +.wy-table-responsive table td, +.wy-table-responsive table th { + white-space: normal !important; + vertical-align: top !important; +} + +.wy-table-responsive { + margin-bottom: 24px; + max-width: 100%; + overflow: visible; +} */ + +/* Hide notebooks warnings */ +.nboutput .stderr { + display: none; +} + +/* +Set logo size +*/ +.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo { + width: 200px; +} diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..3091144 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,126 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import datetime +import os +import sys + +read_the_docs_build = os.environ.get("READTHEDOCS", None) == "True" + +sys.path.insert(0, os.path.join(os.path.abspath(".."), "src")) + +source_suffix = ".rst" +master_doc = "index" +pygments_style = "sphinx" +html_theme_options = {"logo_only": True} +html_logo = "_static/logo.png" + + +# -- Project information ----------------------------------------------------- + +project = "Anemoi Graphs" + +author = "ECMWF" + +year = datetime.datetime.now().year +if year == 2024: + years = "2024" +else: + years = "2024-%s" % (year,) + +copyright = "%s, ECMWF" % (years,) + +try: + from anemoi.registry._version import __version__ + + release = __version__ +except ImportError: + release = "0.0.0" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.todo", + "sphinx_rtd_theme", + "nbsphinx", + "sphinx.ext.graphviz", + "sphinx.ext.intersphinx", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinxarg.ext", +] + +# Add any paths that contain templates here, relative to this directory. +# templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "'**.ipynb_checkpoints'"] + +intersphinx_mapping = { + "python": ("https://python.readthedocs.io/en/latest", None), + "anemoi-utils": ( + "https://anemoi-utils.readthedocs.io/en/latest/", + ("../../anemoi-utils/docs/_build/html/objects.inv", None), + ), + "anemoi-datasets": ( + "https://anemoi-datasets.readthedocs.io/en/latest/", + ("../../anemoi-datasets/docs/_build/html/objects.inv", None), + ), + "anemoi-models": ( + "https://anemoi-models.readthedocs.io/en/latest/", + ("../../anemoi-models/docs/_build/html/objects.inv", None), + ), + "anemoi-training": ( + "https://anemoi-training.readthedocs.io/en/latest/", + ("../../anemoi-training/docs/_build/html/objects.inv", None), + ), + "anemoi-inference": ( + "https://anemoi-inference.readthedocs.io/en/latest/", + ("../../anemoi-inference/docs/_build/html/objects.inv", None), + ), + "anemoi-graphs": ( + "https://anemoi-graphs.readthedocs.io/en/latest/", + ("../../anemoi-graphs/docs/_build/html/objects.inv", None), + ), + "anemoi-registry": ( + "https://anemoi-registry.readthedocs.io/en/latest/", + ("../../anemoi-registry/docs/_build/html/objects.inv", None), + ), +} + + +# https://www.notion.so/Deepnote-Launch-Buttons-63c642a5e875463495ed2341e83a4b2a + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] +html_css_files = ["style.css"] + + +todo_include_todos = not read_the_docs_build + +autodoc_member_order = "bysource" # Keep file order diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..c215922 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,61 @@ +.. _anemoi-registry: + +.. _index-page: + +############################################# + Welcome to `anemoi-registry` documentation! +############################################# + +.. warning:: + + This documentation is work in progress. + +*Anemoi* is a framework for developing machine learning weather +forecasting models. It comprises of components or packages for preparing +training datasets, conducting ML model training and a registry for +datasets and trained models. *Anemoi* provides tools for operational +inference, including interfacing to verification software. As a +framework it seeks to handle many of the complexities that +meteorological organisations will share, allowing them to easily train +models from existing recipes but with their own data. + +This package provides a series of utility functions for used by the rest +of the *Anemoi* packages. + +- :doc:`installing` + +.. toctree:: + :maxdepth: 1 + :hidden: + + installing + +********* + Modules +********* + +.. toctree:: + :maxdepth: 1 + :glob: + + modules/* + +***************** + Anemoi packages +***************** + +- :ref:`anemoi-utils ` +- :ref:`anemoi-datasets ` +- :ref:`anemoi-models ` +- :ref:`anemoi-graphs ` +- :ref:`anemoi-training ` +- :ref:`anemoi-inference ` +- :ref:`anemoi-registry ` + +********* + License +********* + +*Anemoi* is available under the open source `Apache License`__. + +.. __: http://www.apache.org/licenses/LICENSE-2.0.html diff --git a/docs/installing.rst b/docs/installing.rst new file mode 100644 index 0000000..37ca68a --- /dev/null +++ b/docs/installing.rst @@ -0,0 +1,31 @@ +############ + Installing +############ + +To install the package, you can use the following command: + +.. code:: bash + + pip install anemoi-registry[...options...] + +The options are: + +- ``dev``: install the development dependencies +- ``all``: install all the dependencies + +************** + Contributing +************** + +.. code:: bash + + git clone ... + cd anemoi-registry + pip install .[dev] + pip install -r docs/requirements.txt + +You may also have to install pandoc on MacOS: + +.. code:: bash + + brew install pandoc diff --git a/docs/modules/dates.rst b/docs/modules/dates.rst new file mode 100644 index 0000000..558c01b --- /dev/null +++ b/docs/modules/dates.rst @@ -0,0 +1,8 @@ +####### + dates +####### + +.. automodule:: anemoi.registry.dates + :members: + :no-undoc-members: + :show-inheritance: diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..8b8bccd --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,10 @@ +# These are the requirements for readthedoc +sphinx +sphinx_rtd_theme +nbsphinx +sphinx_argparse + +# Also requires `brew install pandoc` on Mac +pandoc + +rstfmt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e20cc60 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ + +[build-system] +requires = [ + "setuptools>=60", + "setuptools-scm>=8", +] + +[project] +name = "anemoi-registry" + +description = "A package to manahe a registry or data-driven forecasts." +keywords = [ + "ai", + "registry", + "tools", +] + +license = { file = "LICENSE" } +authors = [ + { name = "European Centre for Medium-Range Weather Forecasts (ECMWF)", email = "software.support@ecmwf.int" }, +] + +requires-python = ">=3.9" + +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +dynamic = [ + "version", +] +dependencies = [ + "anemoi-datasets", +] + +optional-dependencies.all = [ +] +optional-dependencies.dev = [ + "nbsphinx", + "pandoc", + "pytest", + "requests", + "sphinx", + "sphinx-argparse", + "sphinx-rtd-theme", + "termcolor", + "tomli", +] + +optional-dependencies.docs = [ + "nbsphinx", + "pandoc", + "requests", + "sphinx", + "sphinx-argparse", + "sphinx-rtd-theme", + "termcolor", + "tomli", +] + +optional-dependencies.tests = [ + "pytest", +] + +urls.Documentation = "https://anemoi-registry.readthedocs.io/" +urls.Homepage = "https://github.com/ecmwf/anemoi-registry/" +urls.Issues = "https://github.com/ecmwf/anemoi-registry/issues" +urls.Repository = "https://github.com/ecmwf/anemoi-registry/" +scripts.anemoi-registry = "anemoi.registry.__main__:main" + +[tool.setuptools_scm] +version_file = "src/anemoi/registry/_version.py" diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py new file mode 100644 index 0000000..eef2c1d --- /dev/null +++ b/src/anemoi/registry/__init__.py @@ -0,0 +1,9 @@ +# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +from ._version import __version__ as __version__ diff --git a/src/anemoi/registry/__main__.py b/src/anemoi/registry/__main__.py new file mode 100644 index 0000000..be940c2 --- /dev/null +++ b/src/anemoi/registry/__main__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +from anemoi.utils.cli import cli_main +from anemoi.utils.cli import make_parser + +from . import __version__ +from .commands import COMMANDS + + +# For read-the-docs +def create_parser(): + return make_parser(__doc__, COMMANDS) + + +def main(): + cli_main(__version__, __doc__, COMMANDS) + + +if __name__ == "__main__": + main() diff --git a/src/anemoi/registry/commands/__init__.py b/src/anemoi/registry/commands/__init__.py new file mode 100644 index 0000000..cebb539 --- /dev/null +++ b/src/anemoi/registry/commands/__init__.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +from anemoi.utils.cli import Command +from anemoi.utils.cli import Failed +from anemoi.utils.cli import register_commands + +__all__ = ["Command"] + +COMMANDS = register_commands( + os.path.dirname(__file__), + __name__, + lambda x: x.command(), + lambda name, error: Failed(name, error), +) diff --git a/src/anemoi/registry/commands/hello.py b/src/anemoi/registry/commands/hello.py new file mode 100644 index 0000000..12a0495 --- /dev/null +++ b/src/anemoi/registry/commands/hello.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +from . import Command + + +def say_hello(greetings, who): + print(greetings, who) + + +class Hello(Command): + + def add_arguments(self, command_parser): + command_parser.add_argument("--greetings", default="hello") + command_parser.add_argument("--who", default="world") + + def run(self, args): + say_hello(args.greetings, args.who) + + +command = Hello diff --git a/tests/test_graphs.py b/tests/test_graphs.py new file mode 100644 index 0000000..846ee89 --- /dev/null +++ b/tests/test_graphs.py @@ -0,0 +1,14 @@ +# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +def test_graphs(): + pass + + +if __name__ == "__main__": + test_graphs() From 6ca06c6cb19115ddaefcfb83f3e323d2267810cd Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 20 Jun 2024 13:56:12 +0100 Subject: [PATCH 02/64] Add S3 support --- docs/modules/dates.rst | 8 - docs/modules/s3.rst | 8 + src/anemoi/registry/s3.py | 384 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 392 insertions(+), 8 deletions(-) delete mode 100644 docs/modules/dates.rst create mode 100644 docs/modules/s3.rst create mode 100644 src/anemoi/registry/s3.py diff --git a/docs/modules/dates.rst b/docs/modules/dates.rst deleted file mode 100644 index 558c01b..0000000 --- a/docs/modules/dates.rst +++ /dev/null @@ -1,8 +0,0 @@ -####### - dates -####### - -.. automodule:: anemoi.registry.dates - :members: - :no-undoc-members: - :show-inheritance: diff --git a/docs/modules/s3.rst b/docs/modules/s3.rst new file mode 100644 index 0000000..85f7222 --- /dev/null +++ b/docs/modules/s3.rst @@ -0,0 +1,8 @@ +#### + s3 +#### + +.. automodule:: anemoi.registry.s3 + :members: + :no-undoc-members: + :show-inheritance: diff --git a/src/anemoi/registry/s3.py b/src/anemoi/registry/s3.py new file mode 100644 index 0000000..00d8810 --- /dev/null +++ b/src/anemoi/registry/s3.py @@ -0,0 +1,384 @@ +# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +"""This module provides functions to upload, download, list and delete files and folders on S3. +The functions of this package expect that the AWS credentials are set up in the environment +typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or +by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file +to use a different S3 compatible service:: + + [default] + endpoint_url = https://some-storage.somewhere.world + aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx + aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx + +""" + +import concurrent +import logging +import os +import threading + +import tqdm + +from .humanize import bytes + +LOGGER = logging.getLogger(__name__) + + +# s3_clients are not thread-safe, so we need to create a new client for each thread + +thread_local = threading.local() + + +def _s3_client(): + import boto3 + + if not hasattr(thread_local, "s3_client"): + thread_local.s3_client = boto3.client("s3") + return thread_local.s3_client + + +def _upload_file(source, target, overwrite=False, resume=False, verbosity=1): + from botocore.exceptions import ClientError + + assert target.startswith("s3://") + + _, _, bucket, key = target.split("/", 3) + + size = os.path.getsize(source) + + if verbosity > 0: + LOGGER.info(f"Uploading {source} to {target} ({bytes(size)})") + + s3_client = _s3_client() + + try: + results = s3_client.head_object(Bucket=bucket, Key=key) + remote_size = int(results["ContentLength"]) + except ClientError as e: + if e.response["Error"]["Code"] != "404": + raise + remote_size = None + + if remote_size is not None: + if remote_size != size: + LOGGER.warning(f"{target} already exists, but with different size, re-uploading") + overwrite = True + + if resume: + LOGGER.info(f"{target} already exists, skipping") + return + + if remote_size is not None and not overwrite: + raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") + + if verbosity > 0: + with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: + s3_client.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x)) + else: + s3_client.upload_file(source, bucket, key) + + return size + + +def _local_file_list(source): + for root, _, files in os.walk(source): + for file in files: + yield os.path.join(root, file) + + +def _upload_folder(source, target, overwrite=False, resume=False, threads=1, verbosity=1): + + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: + try: + if verbosity > 0: + LOGGER.info(f"Uploading {source} to {target}") + + total = 0 + ready = 0 + + futures = [] + for local_path in _local_file_list(source): + relative_path = os.path.relpath(local_path, source) + s3_path = os.path.join(target, relative_path) + futures.append( + executor.submit( + _upload_file, + local_path, + s3_path, + overwrite, + resume, + verbosity - 1, + ) + ) + total += os.path.getsize(local_path) + + if len(futures) % 10000 == 0: + if verbosity > 0: + LOGGER.info(f"Preparing upload, {len(futures):,} files... ({bytes(total)})") + done, _ = concurrent.futures.wait( + futures, + timeout=0.001, + return_when=concurrent.futures.FIRST_EXCEPTION, + ) + # Trigger exceptions if any + for n in done: + ready += n.result() + + if verbosity > 0: + LOGGER.info(f"Uploading {len(futures):,} files ({bytes(total)})") + with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: + for future in futures: + pbar.update(future.result()) + else: + for future in futures: + future.result() + + except Exception: + executor.shutdown(wait=False, cancel_futures=True) + raise + + +def upload(source, target, overwrite=False, resume=False, threads=1, verbosity=True): + """Upload a file or a folder to S3. + + Parameters + ---------- + source : str + A path to a file or a folder to upload. + target : str + A URL to a file or a folder on S3. The url should start with 's3://'. + overwrite : bool, optional + If the data is alreay on S3 it will be overwritten, by default False + resume : bool, optional + If the data is alreay on S3 it will not be uploaded, unless the remote file + has a different size, by default False + threads : int, optional + The number of threads to use when uploading a directory, by default 1 + """ + if os.path.isdir(source): + _upload_folder(source, target, overwrite, resume, threads) + else: + _upload_file(source, target, overwrite, resume) + + +def _download_file(source, target, overwrite=False, resume=False, verbosity=0): + s3_client = _s3_client() + _, _, bucket, key = source.split("/", 3) + + response = s3_client.head_object(Bucket=bucket, Key=key) + size = int(response["ContentLength"]) + + if verbosity > 0: + LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})") + + if overwrite: + resume = False + + if resume: + if os.path.exists(target): + if os.path.getsize(target) != size: + LOGGER.warning(f"{target} already with different size, re-downloading") + else: + if verbosity > 0: + LOGGER.info(f"{target} already exists, skipping") + return + + if os.path.exists(target) and not overwrite: + raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") + + if verbosity > 0: + with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: + s3_client.download_file(bucket, key, target, Callback=lambda x: pbar.update(x)) + else: + s3_client.download_file(bucket, key, target) + + return size + + +def _download_folder(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): + assert verbosity > 0 + source = source.rstrip("/") + _, _, bucket, folder = source.split("/", 3) + + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: + try: + if verbosity > 0: + LOGGER.info(f"Downloading {source} to {target}") + + total = 0 + ready = 0 + + futures = [] + for o in _list_objects(source): + name, size = o["Key"], o["Size"] + local_path = os.path.join(target, os.path.relpath(name, folder)) + os.makedirs(os.path.dirname(local_path), exist_ok=True) + futures.append( + executor.submit( + _download_file, + f"s3://{bucket}/{name}", + local_path, + overwrite, + resume, + verbosity - 1, + ) + ) + total += size + if len(futures) % 10000 == 0: + if verbosity > 0: + LOGGER.info(f"Preparing download, {len(futures):,} files... ({bytes(total)})") + + done, _ = concurrent.futures.wait( + futures, + timeout=0.001, + return_when=concurrent.futures.FIRST_EXCEPTION, + ) + # Trigger exceptions if any + for n in done: + ready += n.result() + + if verbosity > 0: + LOGGER.info(f"Downloading {len(futures):,} files ({bytes(total)})") + with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: + for future in futures: + pbar.update(future.result()) + else: + for future in futures: + future.result() + + except Exception: + executor.shutdown(wait=False, cancel_futures=True) + raise + + +def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): + """Download a file or a folder from S3. + + Parameters + ---------- + source : str + The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is + assumed to be a folder, otherwise it is assumed to be a file. + target : str + The local path where the file or folder will be downloaded. + overwrite : bool, optional + If false, files which have already been download will be skipped, unless their size + does not match their size on S3 , by default False + resume : bool, optional + If the data is alreay on local it will not be downloaded, unless the remote file + has a different size, by default False + threads : int, optional + The number of threads to use when downloading a directory, by default 1 + """ + assert source.startswith("s3://") + + if source.endswith("/"): + _download_folder( + source, + target, + overwrite=overwrite, + resume=resume, + verbosity=verbosity, + threads=threads, + ) + else: + _download_file(source, target, overwrite=overwrite, resume=resume, verbosity=verbosity) + + +def _list_objects(target, batch=False): + s3_client = _s3_client() + _, _, bucket, prefix = target.split("/", 3) + + paginator = s3_client.get_paginator("list_objects_v2") + + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + if "Contents" in page: + objects = page["Contents"] + if batch: + yield objects + else: + yield from objects + + +def _delete_folder(target): + s3_client = _s3_client() + _, _, bucket, _ = target.split("/", 3) + + for batch in _list_objects(target, batch=True): + s3_client.delete_objects(Bucket=bucket, Delete={"Objects": batch}) + LOGGER.info(f"Deleted {len(batch)} objects") + + +def _delete_file(target): + from botocore.exceptions import ClientError + + s3_client = _s3_client() + _, _, bucket, key = target.split("/", 3) + + try: + s3_client.head_object(Bucket=bucket, Key=key) + exits = True + except ClientError as e: + if e.response["Error"]["Code"] != "404": + raise + exits = False + + if not exits: + LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'") + return + + LOGGER.info(f"Deleting {target}") + print(s3_client.delete_object(Bucket=bucket, Key=key)) + LOGGER.info(f"{target} is deleted") + + +def delete(target): + """Delete a file or a folder from S3. + + Parameters + ---------- + target : str + The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is + assumed to be a folder, otherwise it is assumed to be a file. + """ + + assert target.startswith("s3://") + + if target.endswith("/"): + _delete_folder(target) + else: + _delete_file(target) + + +def list_folders(folder): + """List the sub folders in a folder on S3. + + Parameters + ---------- + folder : str + The URL of a folder on S3. The url should start with 's3://'. + + Returns + ------- + list + A list of the subfolders names in the folder. + """ + + assert folder.startswith("s3://") + if not folder.endswith("/"): + folder += "/" + + _, _, bucket, prefix = folder.split("/", 3) + + s3_client = _s3_client() + paginator = s3_client.get_paginator("list_objects_v2") + + for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"): + if "CommonPrefixes" in page: + yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")] From 17fbd55afe0f56dbb91b14b0fc85ef3e6ffcdcb8 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 20 Jun 2024 14:06:16 +0100 Subject: [PATCH 03/64] work on pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6839ac..f4b6367 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,7 +38,7 @@ repos: hooks: - id: ruff # Next line if for documenation cod snippets - exclude: '^[A-Za-z]\w*_\.py$' + exclude: '^[^_].*_\.py$' args: - --line-length=120 - --fix From cf51aaab7308320b986d39a378dd9079b7ead849 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 20 Jun 2024 20:48:25 +0100 Subject: [PATCH 04/64] Fix bug in s3 resume --- src/anemoi/registry/s3.py | 80 +++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/src/anemoi/registry/s3.py b/src/anemoi/registry/s3.py index 00d8810..36d42e3 100644 --- a/src/anemoi/registry/s3.py +++ b/src/anemoi/registry/s3.py @@ -22,6 +22,7 @@ import logging import os import threading +from copy import deepcopy import tqdm @@ -43,7 +44,9 @@ def _s3_client(): return thread_local.s3_client -def _upload_file(source, target, overwrite=False, resume=False, verbosity=1): +def _upload_file(source, target, overwrite=False, resume=False, verbosity=1, config=None): + # from boto3.s3.transfer import TransferConfig + # TransferConfig(use_threads=False) from botocore.exceptions import ClientError assert target.startswith("s3://") @@ -67,21 +70,21 @@ def _upload_file(source, target, overwrite=False, resume=False, verbosity=1): if remote_size is not None: if remote_size != size: - LOGGER.warning(f"{target} already exists, but with different size, re-uploading") - overwrite = True - - if resume: - LOGGER.info(f"{target} already exists, skipping") - return - - if remote_size is not None and not overwrite: + LOGGER.warning( + f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})" + ) + elif resume: + # LOGGER.info(f"{target} already exists, skipping") + return size + + if remote_size is not None and not overwrite and not resume: raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") if verbosity > 0: with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x)) + s3_client.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x), Config=config) else: - s3_client.upload_file(source, bucket, key) + s3_client.upload_file(source, bucket, key, Config=config) return size @@ -167,11 +170,20 @@ def upload(source, target, overwrite=False, resume=False, threads=1, verbosity=T _upload_file(source, target, overwrite, resume) -def _download_file(source, target, overwrite=False, resume=False, verbosity=0): +def _download_file(source, target, overwrite=False, resume=False, verbosity=0, config=None): + # from boto3.s3.transfer import TransferConfig + s3_client = _s3_client() _, _, bucket, key = source.split("/", 3) - response = s3_client.head_object(Bucket=bucket, Key=key) + try: + response = s3_client.head_object(Bucket=bucket, Key=key) + except s3_client.exceptions.ClientError as e: + print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key) + if e.response["Error"]["Code"] == "404": + raise ValueError(f"{source} does not exist ({bucket}, {key})") + raise + size = int(response["ContentLength"]) if verbosity > 0: @@ -182,21 +194,22 @@ def _download_file(source, target, overwrite=False, resume=False, verbosity=0): if resume: if os.path.exists(target): - if os.path.getsize(target) != size: - LOGGER.warning(f"{target} already with different size, re-downloading") + local_size = os.path.getsize(target) + if local_size != size: + LOGGER.warning(f"{target} already with different size, re-downloading (remote={size}, local={size})") else: - if verbosity > 0: - LOGGER.info(f"{target} already exists, skipping") - return + # if verbosity > 0: + # LOGGER.info(f"{target} already exists, skipping") + return size if os.path.exists(target) and not overwrite: raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") if verbosity > 0: with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.download_file(bucket, key, target, Callback=lambda x: pbar.update(x)) + s3_client.download_file(bucket, key, target, Callback=lambda x: pbar.update(x), Config=config) else: - s3_client.download_file(bucket, key, target) + s3_client.download_file(bucket, key, target, Config=config) return size @@ -299,7 +312,7 @@ def _list_objects(target, batch=False): for page in paginator.paginate(Bucket=bucket, Prefix=prefix): if "Contents" in page: - objects = page["Contents"] + objects = deepcopy(page["Contents"]) if batch: yield objects else: @@ -382,3 +395,28 @@ def list_folders(folder): for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"): if "CommonPrefixes" in page: yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")] + + +def object_info(target): + """Get information about an object on S3. + + Parameters + ---------- + target : str + The URL of a file or a folder on S3. The url should start with 's3://'. + + Returns + ------- + dict + A dictionary with information about the object. + """ + + s3_client = _s3_client() + _, _, bucket, key = target.split("/", 3) + + try: + return s3_client.head_object(Bucket=bucket, Key=key) + except s3_client.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + raise ValueError(f"{target} does not exist") + raise From b6e9de13bea4fd7be2c32f399110248b79870a3a Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Tue, 25 Jun 2024 08:55:20 +0000 Subject: [PATCH 05/64] remove s3 --- src/anemoi/registry/s3.py | 422 -------------------------------------- 1 file changed, 422 deletions(-) delete mode 100644 src/anemoi/registry/s3.py diff --git a/src/anemoi/registry/s3.py b/src/anemoi/registry/s3.py deleted file mode 100644 index 36d42e3..0000000 --- a/src/anemoi/registry/s3.py +++ /dev/null @@ -1,422 +0,0 @@ -# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -"""This module provides functions to upload, download, list and delete files and folders on S3. -The functions of this package expect that the AWS credentials are set up in the environment -typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or -by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file -to use a different S3 compatible service:: - - [default] - endpoint_url = https://some-storage.somewhere.world - aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx - aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx - -""" - -import concurrent -import logging -import os -import threading -from copy import deepcopy - -import tqdm - -from .humanize import bytes - -LOGGER = logging.getLogger(__name__) - - -# s3_clients are not thread-safe, so we need to create a new client for each thread - -thread_local = threading.local() - - -def _s3_client(): - import boto3 - - if not hasattr(thread_local, "s3_client"): - thread_local.s3_client = boto3.client("s3") - return thread_local.s3_client - - -def _upload_file(source, target, overwrite=False, resume=False, verbosity=1, config=None): - # from boto3.s3.transfer import TransferConfig - # TransferConfig(use_threads=False) - from botocore.exceptions import ClientError - - assert target.startswith("s3://") - - _, _, bucket, key = target.split("/", 3) - - size = os.path.getsize(source) - - if verbosity > 0: - LOGGER.info(f"Uploading {source} to {target} ({bytes(size)})") - - s3_client = _s3_client() - - try: - results = s3_client.head_object(Bucket=bucket, Key=key) - remote_size = int(results["ContentLength"]) - except ClientError as e: - if e.response["Error"]["Code"] != "404": - raise - remote_size = None - - if remote_size is not None: - if remote_size != size: - LOGGER.warning( - f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})" - ) - elif resume: - # LOGGER.info(f"{target} already exists, skipping") - return size - - if remote_size is not None and not overwrite and not resume: - raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") - - if verbosity > 0: - with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x), Config=config) - else: - s3_client.upload_file(source, bucket, key, Config=config) - - return size - - -def _local_file_list(source): - for root, _, files in os.walk(source): - for file in files: - yield os.path.join(root, file) - - -def _upload_folder(source, target, overwrite=False, resume=False, threads=1, verbosity=1): - - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: - try: - if verbosity > 0: - LOGGER.info(f"Uploading {source} to {target}") - - total = 0 - ready = 0 - - futures = [] - for local_path in _local_file_list(source): - relative_path = os.path.relpath(local_path, source) - s3_path = os.path.join(target, relative_path) - futures.append( - executor.submit( - _upload_file, - local_path, - s3_path, - overwrite, - resume, - verbosity - 1, - ) - ) - total += os.path.getsize(local_path) - - if len(futures) % 10000 == 0: - if verbosity > 0: - LOGGER.info(f"Preparing upload, {len(futures):,} files... ({bytes(total)})") - done, _ = concurrent.futures.wait( - futures, - timeout=0.001, - return_when=concurrent.futures.FIRST_EXCEPTION, - ) - # Trigger exceptions if any - for n in done: - ready += n.result() - - if verbosity > 0: - LOGGER.info(f"Uploading {len(futures):,} files ({bytes(total)})") - with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: - for future in futures: - pbar.update(future.result()) - else: - for future in futures: - future.result() - - except Exception: - executor.shutdown(wait=False, cancel_futures=True) - raise - - -def upload(source, target, overwrite=False, resume=False, threads=1, verbosity=True): - """Upload a file or a folder to S3. - - Parameters - ---------- - source : str - A path to a file or a folder to upload. - target : str - A URL to a file or a folder on S3. The url should start with 's3://'. - overwrite : bool, optional - If the data is alreay on S3 it will be overwritten, by default False - resume : bool, optional - If the data is alreay on S3 it will not be uploaded, unless the remote file - has a different size, by default False - threads : int, optional - The number of threads to use when uploading a directory, by default 1 - """ - if os.path.isdir(source): - _upload_folder(source, target, overwrite, resume, threads) - else: - _upload_file(source, target, overwrite, resume) - - -def _download_file(source, target, overwrite=False, resume=False, verbosity=0, config=None): - # from boto3.s3.transfer import TransferConfig - - s3_client = _s3_client() - _, _, bucket, key = source.split("/", 3) - - try: - response = s3_client.head_object(Bucket=bucket, Key=key) - except s3_client.exceptions.ClientError as e: - print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key) - if e.response["Error"]["Code"] == "404": - raise ValueError(f"{source} does not exist ({bucket}, {key})") - raise - - size = int(response["ContentLength"]) - - if verbosity > 0: - LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})") - - if overwrite: - resume = False - - if resume: - if os.path.exists(target): - local_size = os.path.getsize(target) - if local_size != size: - LOGGER.warning(f"{target} already with different size, re-downloading (remote={size}, local={size})") - else: - # if verbosity > 0: - # LOGGER.info(f"{target} already exists, skipping") - return size - - if os.path.exists(target) and not overwrite: - raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") - - if verbosity > 0: - with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.download_file(bucket, key, target, Callback=lambda x: pbar.update(x), Config=config) - else: - s3_client.download_file(bucket, key, target, Config=config) - - return size - - -def _download_folder(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): - assert verbosity > 0 - source = source.rstrip("/") - _, _, bucket, folder = source.split("/", 3) - - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: - try: - if verbosity > 0: - LOGGER.info(f"Downloading {source} to {target}") - - total = 0 - ready = 0 - - futures = [] - for o in _list_objects(source): - name, size = o["Key"], o["Size"] - local_path = os.path.join(target, os.path.relpath(name, folder)) - os.makedirs(os.path.dirname(local_path), exist_ok=True) - futures.append( - executor.submit( - _download_file, - f"s3://{bucket}/{name}", - local_path, - overwrite, - resume, - verbosity - 1, - ) - ) - total += size - if len(futures) % 10000 == 0: - if verbosity > 0: - LOGGER.info(f"Preparing download, {len(futures):,} files... ({bytes(total)})") - - done, _ = concurrent.futures.wait( - futures, - timeout=0.001, - return_when=concurrent.futures.FIRST_EXCEPTION, - ) - # Trigger exceptions if any - for n in done: - ready += n.result() - - if verbosity > 0: - LOGGER.info(f"Downloading {len(futures):,} files ({bytes(total)})") - with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: - for future in futures: - pbar.update(future.result()) - else: - for future in futures: - future.result() - - except Exception: - executor.shutdown(wait=False, cancel_futures=True) - raise - - -def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): - """Download a file or a folder from S3. - - Parameters - ---------- - source : str - The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is - assumed to be a folder, otherwise it is assumed to be a file. - target : str - The local path where the file or folder will be downloaded. - overwrite : bool, optional - If false, files which have already been download will be skipped, unless their size - does not match their size on S3 , by default False - resume : bool, optional - If the data is alreay on local it will not be downloaded, unless the remote file - has a different size, by default False - threads : int, optional - The number of threads to use when downloading a directory, by default 1 - """ - assert source.startswith("s3://") - - if source.endswith("/"): - _download_folder( - source, - target, - overwrite=overwrite, - resume=resume, - verbosity=verbosity, - threads=threads, - ) - else: - _download_file(source, target, overwrite=overwrite, resume=resume, verbosity=verbosity) - - -def _list_objects(target, batch=False): - s3_client = _s3_client() - _, _, bucket, prefix = target.split("/", 3) - - paginator = s3_client.get_paginator("list_objects_v2") - - for page in paginator.paginate(Bucket=bucket, Prefix=prefix): - if "Contents" in page: - objects = deepcopy(page["Contents"]) - if batch: - yield objects - else: - yield from objects - - -def _delete_folder(target): - s3_client = _s3_client() - _, _, bucket, _ = target.split("/", 3) - - for batch in _list_objects(target, batch=True): - s3_client.delete_objects(Bucket=bucket, Delete={"Objects": batch}) - LOGGER.info(f"Deleted {len(batch)} objects") - - -def _delete_file(target): - from botocore.exceptions import ClientError - - s3_client = _s3_client() - _, _, bucket, key = target.split("/", 3) - - try: - s3_client.head_object(Bucket=bucket, Key=key) - exits = True - except ClientError as e: - if e.response["Error"]["Code"] != "404": - raise - exits = False - - if not exits: - LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'") - return - - LOGGER.info(f"Deleting {target}") - print(s3_client.delete_object(Bucket=bucket, Key=key)) - LOGGER.info(f"{target} is deleted") - - -def delete(target): - """Delete a file or a folder from S3. - - Parameters - ---------- - target : str - The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is - assumed to be a folder, otherwise it is assumed to be a file. - """ - - assert target.startswith("s3://") - - if target.endswith("/"): - _delete_folder(target) - else: - _delete_file(target) - - -def list_folders(folder): - """List the sub folders in a folder on S3. - - Parameters - ---------- - folder : str - The URL of a folder on S3. The url should start with 's3://'. - - Returns - ------- - list - A list of the subfolders names in the folder. - """ - - assert folder.startswith("s3://") - if not folder.endswith("/"): - folder += "/" - - _, _, bucket, prefix = folder.split("/", 3) - - s3_client = _s3_client() - paginator = s3_client.get_paginator("list_objects_v2") - - for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"): - if "CommonPrefixes" in page: - yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")] - - -def object_info(target): - """Get information about an object on S3. - - Parameters - ---------- - target : str - The URL of a file or a folder on S3. The url should start with 's3://'. - - Returns - ------- - dict - A dictionary with information about the object. - """ - - s3_client = _s3_client() - _, _, bucket, key = target.split("/", 3) - - try: - return s3_client.head_object(Bucket=bucket, Key=key) - except s3_client.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - raise ValueError(f"{target} does not exist") - raise From b8f521c1da08fa9862a10c83e34c4bad092c6f01 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 25 Jun 2024 09:35:42 +0000 Subject: [PATCH 06/64] wip wip --- src/anemoi/registry/__init__.py | 14 + src/anemoi/registry/commands/_base.py | 81 +++++ src/anemoi/registry/commands/datasets.py | 106 ++++++ src/anemoi/registry/commands/experiments.py | 118 ++++++ src/anemoi/registry/commands/hello.py | 32 -- src/anemoi/registry/commands/list.py | 59 +++ src/anemoi/registry/commands/weights.py | 112 ++++++ src/anemoi/registry/entry/__init__.py | 313 ++++++++++++++++ src/anemoi/registry/rest.py | 185 ++++++++++ src/anemoi/registry/s3.py | 384 -------------------- tests/{test_graphs.py => test_rest.py} | 12 +- 11 files changed, 998 insertions(+), 418 deletions(-) create mode 100644 src/anemoi/registry/commands/_base.py create mode 100644 src/anemoi/registry/commands/datasets.py create mode 100644 src/anemoi/registry/commands/experiments.py delete mode 100644 src/anemoi/registry/commands/hello.py create mode 100644 src/anemoi/registry/commands/list.py create mode 100644 src/anemoi/registry/commands/weights.py create mode 100644 src/anemoi/registry/entry/__init__.py create mode 100644 src/anemoi/registry/rest.py delete mode 100644 src/anemoi/registry/s3.py rename tests/{test_graphs.py => test_rest.py} (73%) diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index eef2c1d..2d2c2c3 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -6,4 +6,18 @@ # nor does it submit to any jurisdiction. +import logging + from ._version import __version__ as __version__ + +LOG = logging.getLogger(__name__) + + +def config(): + from anemoi.utils.config import DotDict + from anemoi.utils.config import load_config + + config = load_config().get("registry") + if not config: + LOG.warning(f"No 'registry' section in config. Config is {load_config()}. Limited functionalities.") + return DotDict(config) diff --git a/src/anemoi/registry/commands/_base.py b/src/anemoi/registry/commands/_base.py new file mode 100644 index 0000000..e64a20b --- /dev/null +++ b/src/anemoi/registry/commands/_base.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging +import os + +from ..entry import CatalogueEntryNotFound +from . import Command + +LOG = logging.getLogger(__name__) + + +class BaseCommand(Command): + internal = True + timestamp = True + + def check_arguments(self, args): + pass + + def is_path(self, name_or_path): + return os.path.exists(name_or_path) + + def is_identifier(self, name_or_path): + try: + self.entry_class(key=name_or_path) + return True + except CatalogueEntryNotFound: + return False + + def run(self, args): + args = vars(args) + LOG.debug("anemoi-registry args:", args) + if "command" in args: + args.pop("command") + name_or_path = args.pop("NAME_OR_PATH") + + if args.get("add_location"): + args["add_location"] = self.parse_location(args["add_location"]) + if args.get("remove_location"): + args["remove_location"] = self.parse_location(args["remove_location"]) + + if self.is_path(name_or_path): + LOG.info(f"Found local {self.kind} at {name_or_path}") + self.run_from_path(name_or_path, **args) + return + + if self.is_identifier(name_or_path): + LOG.info(f"Processing {self.kind} with identifier '{name_or_path}'") + self.run_from_identifier(name_or_path, **args) + return + + def parse_location(self, location): + for x in location: + if "=" not in x: + raise ValueError(f"Invalid location format '{x}', use 'key1=value1 key2=value2' list.") + return {x.split("=")[0]: x.split("=")[1] for x in location} + + def warn_unused_arguments(self, kwargs): + for k, v in kwargs.items(): + if v: + LOG.warn(f"Ignoring argument {k}={v}") + + def run_from_identifier(self, *args, **kwargs): + raise NotImplementedError() + + def run_from_path(self, *args, **kwargs): + raise NotImplementedError() + + +command = BaseCommand diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py new file mode 100644 index 0000000..11acb6b --- /dev/null +++ b/src/anemoi/registry/commands/datasets.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging + +from ..entry import DatasetCatalogueEntry +from ._base import BaseCommand + +LOG = logging.getLogger(__name__) + + +class Datasets(BaseCommand): + internal = True + timestamp = True + entry_class = DatasetCatalogueEntry + kind = "dataset" + + def add_arguments(self, command_parser): + command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") + command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument( + "--unregister", + help="Remove from catalogue (without deleting all)", + action="store_true", + ) + # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") + command_parser.add_argument("--json", help="Output json record", action="store_true") + + command_parser.add_argument("--set-status", help="Set the status to the dataset") + command_parser.add_argument("--add-location", nargs="+", help="Add a location to the dataset") + + def check_arguments(self, args): + pass + + def run_from_identifier( + self, + identifier, + add_location, + set_status, + unregister, + json, + remove_location=False, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(key=identifier) + + if unregister: + entry.unregister() + if add_location: + entry.add_location(**add_location) + if remove_location: + entry.remove_location(**remove_location) + if set_status: + entry.set_status(set_status) + + if json: + print(entry.as_json()) + + def run_from_path( + self, + path, + register, + unregister, + add_location, + json, + set_status, + # remove_location, + # upload, + # upload_uri_pattern, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(path=path) + + if register: + entry.register() + if unregister: + entry.unregister() + if add_location: + entry.add_location(**add_location) + # if remove_location: + # entry.remove_location(**remove_location) + if set_status: + entry.set_status(set_status) + # if delete: + # entry.delete() + + if json: + print(entry.as_json()) + + +command = Datasets diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py new file mode 100644 index 0000000..a8aff83 --- /dev/null +++ b/src/anemoi/registry/commands/experiments.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging +import os + +from ..entry import ExperimentCatalogueEntry +from ._base import BaseCommand + +LOG = logging.getLogger(__name__) + + +class Experiments(BaseCommand): + internal = True + timestamp = True + entry_class = ExperimentCatalogueEntry + kind = "experiment" + + def add_arguments(self, command_parser): + command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") + command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument( + "--unregister", + help="Remove from catalogue (without deleting all)", + action="store_true", + ) + command_parser.add_argument("--json", help="Output json record", action="store_true") + # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") + + command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment") + command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment") + command_parser.add_argument("--overwrite", help="Overwrite if already exists", action="store_true") + + def check_arguments(self, args): + pass + + def is_path(self, name_or_path): + if not os.path.exists(name_or_path): + return False + if not name_or_path.endswith(".yaml"): + return False + return True + + def run_from_identifier( + self, + identifier, + json, + add_weights, + add_plots, + unregister, + overwrite, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(key=identifier) + + if add_weights: + for w in add_weights: + entry.add_weights(w) + if add_plots: + for p in add_plots: + entry.add_plots(p) + + if unregister: + entry.unregister() + + # if delete: + # entry.delete() + + if json: + print(entry.as_json()) + + def run_from_path( + self, + path, + register, + unregister, + add_weights, + add_plots, + overwrite, + json, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(path=path) + + if unregister: + entry.unregister() + if register: + entry.register() + if add_weights: + for w in add_weights: + entry.add_weights(w) + if add_plots: + for p in add_plots: + entry.add_plots(p) + + # if delete: + # entry.delete() + + if json: + print(entry.as_json()) + + +command = Experiments diff --git a/src/anemoi/registry/commands/hello.py b/src/anemoi/registry/commands/hello.py deleted file mode 100644 index 12a0495..0000000 --- a/src/anemoi/registry/commands/hello.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# (C) Copyright 2024 ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. -# - -"""Command place holder. Delete when we have real commands. - -""" - -from . import Command - - -def say_hello(greetings, who): - print(greetings, who) - - -class Hello(Command): - - def add_arguments(self, command_parser): - command_parser.add_argument("--greetings", default="hello") - command_parser.add_argument("--who", default="world") - - def run(self, args): - say_hello(args.greetings, args.who) - - -command = Hello diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py new file mode 100644 index 0000000..1f0bade --- /dev/null +++ b/src/anemoi/registry/commands/list.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import json +import logging + +from anemoi.registry.rest import ReadOnlyRest as Rest + +from . import Command + +LOG = logging.getLogger(__name__) + + +class List(Command): + internal = True + timestamp = True + + def add_arguments(self, command_parser): + sub_parser = command_parser.add_subparsers(dest="subcommand") + + experiment = sub_parser.add_parser("experiments") # noqa: F841 + checkpoint = sub_parser.add_parser("weights") # noqa: F841 + dataset = sub_parser.add_parser("datasets") # noqa: F841 + + def check_arguments(self, args): + pass + + def run(self, args): + if not args.subcommand: + raise ValueError("Missing subcommand") + + subcommand = f"run_{args.subcommand.replace('-', '_')}" + return getattr(self, subcommand)(args) + + def run_experiments(self, args): + payload = Rest().get("experiments") + print(json.dumps(payload, indent=2)) + + def run_weights(self, args): + payload = Rest().get("weights") + print(json.dumps(payload, indent=2)) + + def run_datasets(self, args): + payload = Rest().get("datasets") + print(json.dumps(payload, indent=2)) + + +command = List diff --git a/src/anemoi/registry/commands/weights.py b/src/anemoi/registry/commands/weights.py new file mode 100644 index 0000000..75e0e85 --- /dev/null +++ b/src/anemoi/registry/commands/weights.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging + +from ..entry import WeightCatalogueEntry +from ._base import BaseCommand + +LOG = logging.getLogger(__name__) + + +class Weights(BaseCommand): + internal = True + timestamp = True + entry_class = WeightCatalogueEntry + kind = "weights" + + def add_arguments(self, command_parser): + command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") + command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument( + "--unregister", + help="Remove from catalogue (without deleting all)", + action="store_true", + ) + # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") + command_parser.add_argument("--json", help="Output json record", action="store_true") + + command_parser.add_argument("--add-location", nargs="+", help="Add a location to the weights") + + command_parser.add_argument("--overwrite", help="Overwrite any existing weights", action="store_true") + + def check_arguments(self, args): + pass + + def parse_location(self, location): + for x in location: + if "=" not in x: + raise ValueError(f"Invalid location format '{x}', use 'key1=value1 key2=value2' list.") + return {x.split("=")[0]: x.split("=")[1] for x in location} + + def warn_unused_arguments(self, kwargs): + for k, v in kwargs.items(): + if v: + LOG.info(f"Ignoring argument {k}={v}") + + def run_from_identifier( + self, + identifier, + add_location, + json, + unregister, + remove_location=False, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(key=identifier) + + if add_location: + entry.add_location(**add_location) + if remove_location: + entry.remove_location(**remove_location) + if unregister: + entry.unregister() + + if json: + print(entry.as_json()) + + def run_from_path( + self, + path, + unregister, + register, + add_location, + overwrite, + json, + remove_location=False, + **kwargs, + ): + self.warn_unused_arguments(kwargs) + + entry = self.entry_class(path=path) + + if unregister: + entry.unregister() + if register: + entry.register(overwrite=overwrite) + + if add_location: + entry.add_location(**add_location) + # if remove_location: + # entry.remove_location(**remove_location) + # if delete: + # entry.delete() + + if json: + print(entry.as_json()) + + +command = Weights diff --git a/src/anemoi/registry/entry/__init__.py b/src/anemoi/registry/entry/__init__.py new file mode 100644 index 0000000..be159a4 --- /dev/null +++ b/src/anemoi/registry/entry/__init__.py @@ -0,0 +1,313 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import datetime +import json +import logging +import os + +import requests +import yaml +from anemoi.datasets import open_dataset +from anemoi.utils.checkpoints import load_metadata as load_checkpoint_metadata +from anemoi.utils.s3 import upload + +from anemoi.registry import config +from anemoi.registry.rest import AlreadyExists +from anemoi.registry.rest import Rest + +# from anemoi.registry.rest import DryRunRest as Rest + + +def json_dump_pretty(obj, max_line_length=120): + """Custom JSON dump function that keeps dicts and lists on one line if they are short enough. + + Parameters + ---------- + obj + The object to be dumped as JSON. + max_line_length + Maximum allowed line length for pretty-printing. + + Returns + ------- + unknown + JSON string. + """ + + def _format_json(obj, indent_level=0): + """Helper function to format JSON objects with custom pretty-print rules. + + Parameters + ---------- + obj + The object to format. + indent_level + Current indentation level. + + Returns + ------- + unknown + Formatted JSON string. + """ + indent = " " * 4 * indent_level + if isinstance(obj, dict): + items = [] + for key, value in obj.items(): + items.append(f'"{key}": {_format_json(value, indent_level + 1)}') + line = "{" + ", ".join(items) + "}" + if len(line) <= max_line_length: + return line + else: + return "{\n" + ",\n".join([f"{indent} {item}" for item in items]) + "\n" + indent + "}" + elif isinstance(obj, list): + items = [_format_json(item, indent_level + 1) for item in obj] + line = "[" + ", ".join(items) + "]" + if len(line) <= max_line_length: + return line + else: + return "[\n" + ",\n".join([f"{indent} {item}" for item in items]) + "\n" + indent + "]" + elif isinstance(obj, datetime.datetime): + return obj.isoformat() + else: + return json.dumps(obj) + + return _format_json(obj) + + +LOG = logging.getLogger(__name__) + + +class CatalogueEntryNotFound(Exception): + pass + + +class CatalogueEntry: + record = None + path = None + key = None + rest = Rest() + + def __init__(self, key=None, path=None): + assert key is not None or path is not None, "key or path must be provided" + + if path is not None: + assert key is None + self.load_from_path(path) + assert self.record is not None + else: + assert key is not None + self.load_from_key(key) + assert self.record is not None + + assert self.key is not None, "key must be provided" + + def as_json(self): + return json_dump_pretty(self.record) + + @classmethod + def key_exists(cls, key): + try: + cls._get_record_from_catalogue(key) + return True + except CatalogueEntryNotFound: + return False + + def load_from_key(self, key): + self.key = key + self.record = self._get_record_from_catalogue(key) + + @classmethod + def _get_record_from_catalogue(cls, key): + try: + return cls.rest.get(f"{cls.collection_api}/{key}") + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + raise CatalogueEntryNotFound(f"Could not find any {cls.collection_api} with key={key}") + raise + + @property + def main_key(self): + raise NotImplementedError("Subclasses must implement this property") + + def register(self, ignore_existing=True, overwrite=False): + try: + return self.rest.post(self.collection_api, self.record) + except AlreadyExists: + if ignore_existing: + return + if overwrite is True: + LOG.warning(f"{self.key} already exists. Deleting existing one to overwrite it.") + return self.replace() + raise + + def replace(self): + return self.rest.put(f"{self.collection_api}/{self.key}", self.record) + + def patch(self, payload): + return self.rest.patch(f"{self.collection_api}/{self.key}", payload) + + def unregister(self, force=False): + if not self.rest.config.get("allow_delete"): + raise ValueError("Unregister not allowed") + return self.rest.delete(f"{self.collection_api}/{self.key}?force=True") + + def __repr__(self): + return json.dumps(self.record, indent=2) + + +class ExperimentCatalogueEntry(CatalogueEntry): + collection_api = "experiments" + main_key = "expver" + + def load_from_path(self, path): + assert os.path.exists(path), f"{path} does not exist" + assert path.endswith(".yaml"), f"{path} must be a yaml file" + + with open(path, "r") as file: + config = yaml.safe_load(file) + + metadata = config.pop("metadata") + metadata["config"] = config + expver = metadata["expver"] + + self.key = expver + self.record = dict(expver=expver, metadata=metadata) + + def add_plots(self, path, target=None): + """target is a pattern: s3://bucket/{expver}/{basename}""" + + if not os.path.exists(path): + raise FileNotFoundError(f"Could not find plot at {path}") + + if target is None: + target = config()["plots_uri_pattern"] + basename = os.path.basename(path) + target = target.format(expver=self.key, basename=basename, filename=basename) + + LOG.info(f"Uploading {path} to {target}.") + upload(path, target, overwrite=True) + + dic = dict(url=target, name=basename, path=path) + patch = [{"op": "add", "path": "/plots/-", "value": dic}] + self.patch(patch) + + def add_weights(self, path): + """target is a pattern: s3://bucket/{uuid}""" + + weights = WeightCatalogueEntry(path=path) + if not WeightCatalogueEntry.key_exists(weights.key): + weights.register(ignore_existing=False, overwrite=False) + weights.upload(path, overwrite=False) + else: + other = WeightCatalogueEntry(key=weights.key) + if other.record["metadata"]["timestamp"] == weights.record["metadata"]["timestamp"]: + LOG.info( + f"Not updating weights with key={weights.key}, because it already exists and has the same timestamp" + ) + else: + raise ValueError(f"Conflicting weights with key={weights.key}") + + dic = dict(uuid=weights.key, path=path) + patch = [{"op": "add", "path": "/checkpoints/-", "value": dic}] + self.patch(patch) + + +class WeightCatalogueEntry(CatalogueEntry): + collection_api = "weights" + main_key = "uuid" + + def add_location(self, platform, path): + patch = [{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}] + self.patch(patch) + + def default_location(self, **kwargs): + uri = config()["weights_uri_pattern"] + uri = uri.format(uuid=self.key, **kwargs) + return uri + + def default_platform(self): + return config()["weights_platform"] + + def upload(self, path, target=None, overwrite=False): + if target is None: + target = self.default_location() + + LOG.info(f"Uploading {path} to {target}.") + upload(path, target, overwrite=overwrite, resume=not overwrite) + return target + + def register(self, overwrite=False): + assert self.path is not None, "path must be provided" + + platform = self.default_platform() + target = self.upload(self.path) + self.register(overwrite=overwrite) + self.add_location(platform=platform, path=target) + + def load_from_path(self, path): + self.path = path + assert os.path.exists(path), f"{path} does not exist" + + metadata = load_checkpoint_metadata(path) + assert "path" not in metadata + metadata["path"] = os.path.abspath(path) + + metadata["size"] = os.path.getsize(path) + + uuid = metadata.get("uuid") + if uuid is None: + uuid = metadata["run_id"] + LOG.warning(f"Could not find 'uuid' in {path}, using 'run_id' instead: {uuid}") + + self.key = uuid + self.record = dict(uuid=uuid, metadata=metadata) + + +class DatasetCatalogueEntry(CatalogueEntry): + collection_api = "datasets" + main_key = "name" + + def set_status(self, status): + patch = [{"op": "add", "path": "/status", "value": status}] + self.patch(patch) + + def add_location(self, platform, path): + patch = [{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}] + self.patch(patch) + + def load_from_path(self, path): + import zarr + + if not path.startswith("/") and not path.startswith("s3://"): + LOG.warning(f"Dataset path is not absolute: {path}") + if not os.path.exists(path) and not path.startswith("s3://"): + LOG.warning(f"Dataset path does not exist: {path}") + if not path.endswith(".zarr") or path.endswith(".zip"): + LOG.warning("Dataset path extension is neither .zarr nor .zip") + + name, _ = os.path.splitext(os.path.basename(path)) + + z = zarr.open(path) + ds = open_dataset(path) + + metadata = z.attrs.asdict() + + assert "statistics" not in metadata + metadata["statistics"] = {k: v.tolist() for k, v in ds.statistics.items()} + + assert "shape" not in metadata + metadata["shape"] = z.data.shape + + assert "dtype" not in metadata + metadata["dtype"] = str(ds.dtype) + + assert "chunks" not in metadata + metadata["chunks"] = ds.chunks + + self.key = name + self.record = dict(name=name, metadata=metadata) diff --git a/src/anemoi/registry/rest.py b/src/anemoi/registry/rest.py new file mode 100644 index 0000000..d4b471a --- /dev/null +++ b/src/anemoi/registry/rest.py @@ -0,0 +1,185 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import datetime +import logging +import os +import socket +import sys +from getpass import getuser + +import requests +from requests.exceptions import HTTPError + +from anemoi.registry import config +from anemoi.registry._version import version + +LOG = logging.getLogger(__name__) +# LOG.setLevel(logging.DEBUG) + +"""~/.aws/credentials + +[default] +endpoint_url = https://object-store.os-api.cci1.ecmwf.int +aws_access_key_id=xxx +aws_secret_access_key=xxxx + +""" + + +class AlreadyExists(ValueError): + pass + + +def tidy(d): + if isinstance(d, dict): + return {k: tidy(v) for k, v in d.items()} + + if isinstance(d, list): + return [tidy(v) for v in d if v is not None] + + # jsonschema does not support datetime.date + if isinstance(d, datetime.datetime): + return d.isoformat() + + if isinstance(d, datetime.date): + return d.isoformat() + + return d + + +class BaseRest: + def __init__(self): + self.config = config() + + def get(self, collection): + LOG.debug(f"GET {collection}") + try: + r = requests.get(f"{self.config.api_url}/{collection}", headers={"Authorization": f"Bearer {self.token}"}) + self.raise_for_status(r) + return r.json() + except Exception as e: + LOG.error(e) + raise (e) + + def post(self, collection, data): + LOG.debug(f"POST {collection} { {k:'...' for k,v in data.items()} }") + + def patch(self, collection, data): + LOG.debug(f"PATCH {collection} {data}") + + def put(self, collection, data): + LOG.debug(f"PUT {collection} {data}") + + def delete(self, collection): + LOG.debug(f"DELETE {collection}") + + def trace_info(self): + trace = {} + trace["tool_path"] = __file__ + trace["tool_cmd"] = sys.argv + trace["user"] = getuser() + trace["host"] = socket.gethostname() + trace["pid"] = os.getpid() + trace["timestamp"] = datetime.datetime.now().isoformat() + trace["version"] = version + return trace + + def trace_info_dict(self): + return dict(_trace_info=self.trace_info()) + + @property + def token(self): + return self.config.api_token + + def raise_for_status(self, r): + try: + r.raise_for_status() + except HTTPError as e: + # add the response text to the exception message + text = r.text + text = text[:1000] + "..." if len(text) > 1000 else text + e.args = (f"{e.args[0]} : {text}",) + raise e + + +class ReadOnlyRest(BaseRest): + pass + + +class Rest(BaseRest): + def raise_for_status(self, r): + try: + r.raise_for_status() + except HTTPError as e: + # add the response text to the exception message + text = r.text + text = text[:1000] + "..." if len(text) > 1000 else text + e.args = (f"{e.args[0]} : {text}",) + raise e + + def post(self, collection, data): + super().post(collection, data) + try: + r = requests.post( + f"{self.config.api_url}/{collection}", + json=tidy(data), + headers={"Authorization": f"Bearer {self.token}"}, + ) + self.raise_for_status(r) + return r.json() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 409: + raise AlreadyExists(f"{e}Already exists in {collection}") + else: + LOG.error(f"Error in post to {collection} with data:{data}") + LOG.error(e) + raise + except Exception as e: + LOG.error(f"Error in post to {collection} with data:{data}") + LOG.error(e) + raise + + def patch(self, collection, data): + super().patch(collection, data) + try: + r = requests.patch( + f"{self.config.api_url}/{collection}", + json=tidy(data), + headers={"Authorization": f"Bearer {self.token}"}, + ) + self.raise_for_status(r) + return r.json() + except Exception as e: + LOG.error(e) + raise (e) + + def put(self, collection, data): + super().put(collection, data) + try: + r = requests.put( + f"{self.config.api_url}/{collection}", + json=tidy(data), + headers={"Authorization": f"Bearer {self.token}"}, + ) + self.raise_for_status(r) + return r.json() + except Exception as e: + LOG.error(e) + raise (e) + + def delete(self, collection): + super().delete(collection) + try: + r = requests.delete( + f"{self.config.api_url}/{collection}", headers={"Authorization": f"Bearer {self.token}"} + ) + self.raise_for_status(r) + return r.json() + except Exception as e: + LOG.error(e) + raise (e) diff --git a/src/anemoi/registry/s3.py b/src/anemoi/registry/s3.py deleted file mode 100644 index 00d8810..0000000 --- a/src/anemoi/registry/s3.py +++ /dev/null @@ -1,384 +0,0 @@ -# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -"""This module provides functions to upload, download, list and delete files and folders on S3. -The functions of this package expect that the AWS credentials are set up in the environment -typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or -by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file -to use a different S3 compatible service:: - - [default] - endpoint_url = https://some-storage.somewhere.world - aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx - aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx - -""" - -import concurrent -import logging -import os -import threading - -import tqdm - -from .humanize import bytes - -LOGGER = logging.getLogger(__name__) - - -# s3_clients are not thread-safe, so we need to create a new client for each thread - -thread_local = threading.local() - - -def _s3_client(): - import boto3 - - if not hasattr(thread_local, "s3_client"): - thread_local.s3_client = boto3.client("s3") - return thread_local.s3_client - - -def _upload_file(source, target, overwrite=False, resume=False, verbosity=1): - from botocore.exceptions import ClientError - - assert target.startswith("s3://") - - _, _, bucket, key = target.split("/", 3) - - size = os.path.getsize(source) - - if verbosity > 0: - LOGGER.info(f"Uploading {source} to {target} ({bytes(size)})") - - s3_client = _s3_client() - - try: - results = s3_client.head_object(Bucket=bucket, Key=key) - remote_size = int(results["ContentLength"]) - except ClientError as e: - if e.response["Error"]["Code"] != "404": - raise - remote_size = None - - if remote_size is not None: - if remote_size != size: - LOGGER.warning(f"{target} already exists, but with different size, re-uploading") - overwrite = True - - if resume: - LOGGER.info(f"{target} already exists, skipping") - return - - if remote_size is not None and not overwrite: - raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") - - if verbosity > 0: - with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x)) - else: - s3_client.upload_file(source, bucket, key) - - return size - - -def _local_file_list(source): - for root, _, files in os.walk(source): - for file in files: - yield os.path.join(root, file) - - -def _upload_folder(source, target, overwrite=False, resume=False, threads=1, verbosity=1): - - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: - try: - if verbosity > 0: - LOGGER.info(f"Uploading {source} to {target}") - - total = 0 - ready = 0 - - futures = [] - for local_path in _local_file_list(source): - relative_path = os.path.relpath(local_path, source) - s3_path = os.path.join(target, relative_path) - futures.append( - executor.submit( - _upload_file, - local_path, - s3_path, - overwrite, - resume, - verbosity - 1, - ) - ) - total += os.path.getsize(local_path) - - if len(futures) % 10000 == 0: - if verbosity > 0: - LOGGER.info(f"Preparing upload, {len(futures):,} files... ({bytes(total)})") - done, _ = concurrent.futures.wait( - futures, - timeout=0.001, - return_when=concurrent.futures.FIRST_EXCEPTION, - ) - # Trigger exceptions if any - for n in done: - ready += n.result() - - if verbosity > 0: - LOGGER.info(f"Uploading {len(futures):,} files ({bytes(total)})") - with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: - for future in futures: - pbar.update(future.result()) - else: - for future in futures: - future.result() - - except Exception: - executor.shutdown(wait=False, cancel_futures=True) - raise - - -def upload(source, target, overwrite=False, resume=False, threads=1, verbosity=True): - """Upload a file or a folder to S3. - - Parameters - ---------- - source : str - A path to a file or a folder to upload. - target : str - A URL to a file or a folder on S3. The url should start with 's3://'. - overwrite : bool, optional - If the data is alreay on S3 it will be overwritten, by default False - resume : bool, optional - If the data is alreay on S3 it will not be uploaded, unless the remote file - has a different size, by default False - threads : int, optional - The number of threads to use when uploading a directory, by default 1 - """ - if os.path.isdir(source): - _upload_folder(source, target, overwrite, resume, threads) - else: - _upload_file(source, target, overwrite, resume) - - -def _download_file(source, target, overwrite=False, resume=False, verbosity=0): - s3_client = _s3_client() - _, _, bucket, key = source.split("/", 3) - - response = s3_client.head_object(Bucket=bucket, Key=key) - size = int(response["ContentLength"]) - - if verbosity > 0: - LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})") - - if overwrite: - resume = False - - if resume: - if os.path.exists(target): - if os.path.getsize(target) != size: - LOGGER.warning(f"{target} already with different size, re-downloading") - else: - if verbosity > 0: - LOGGER.info(f"{target} already exists, skipping") - return - - if os.path.exists(target) and not overwrite: - raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip") - - if verbosity > 0: - with tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False) as pbar: - s3_client.download_file(bucket, key, target, Callback=lambda x: pbar.update(x)) - else: - s3_client.download_file(bucket, key, target) - - return size - - -def _download_folder(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): - assert verbosity > 0 - source = source.rstrip("/") - _, _, bucket, folder = source.split("/", 3) - - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: - try: - if verbosity > 0: - LOGGER.info(f"Downloading {source} to {target}") - - total = 0 - ready = 0 - - futures = [] - for o in _list_objects(source): - name, size = o["Key"], o["Size"] - local_path = os.path.join(target, os.path.relpath(name, folder)) - os.makedirs(os.path.dirname(local_path), exist_ok=True) - futures.append( - executor.submit( - _download_file, - f"s3://{bucket}/{name}", - local_path, - overwrite, - resume, - verbosity - 1, - ) - ) - total += size - if len(futures) % 10000 == 0: - if verbosity > 0: - LOGGER.info(f"Preparing download, {len(futures):,} files... ({bytes(total)})") - - done, _ = concurrent.futures.wait( - futures, - timeout=0.001, - return_when=concurrent.futures.FIRST_EXCEPTION, - ) - # Trigger exceptions if any - for n in done: - ready += n.result() - - if verbosity > 0: - LOGGER.info(f"Downloading {len(futures):,} files ({bytes(total)})") - with tqdm.tqdm(total=total, initial=ready, unit="B", unit_scale=True) as pbar: - for future in futures: - pbar.update(future.result()) - else: - for future in futures: - future.result() - - except Exception: - executor.shutdown(wait=False, cancel_futures=True) - raise - - -def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1): - """Download a file or a folder from S3. - - Parameters - ---------- - source : str - The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is - assumed to be a folder, otherwise it is assumed to be a file. - target : str - The local path where the file or folder will be downloaded. - overwrite : bool, optional - If false, files which have already been download will be skipped, unless their size - does not match their size on S3 , by default False - resume : bool, optional - If the data is alreay on local it will not be downloaded, unless the remote file - has a different size, by default False - threads : int, optional - The number of threads to use when downloading a directory, by default 1 - """ - assert source.startswith("s3://") - - if source.endswith("/"): - _download_folder( - source, - target, - overwrite=overwrite, - resume=resume, - verbosity=verbosity, - threads=threads, - ) - else: - _download_file(source, target, overwrite=overwrite, resume=resume, verbosity=verbosity) - - -def _list_objects(target, batch=False): - s3_client = _s3_client() - _, _, bucket, prefix = target.split("/", 3) - - paginator = s3_client.get_paginator("list_objects_v2") - - for page in paginator.paginate(Bucket=bucket, Prefix=prefix): - if "Contents" in page: - objects = page["Contents"] - if batch: - yield objects - else: - yield from objects - - -def _delete_folder(target): - s3_client = _s3_client() - _, _, bucket, _ = target.split("/", 3) - - for batch in _list_objects(target, batch=True): - s3_client.delete_objects(Bucket=bucket, Delete={"Objects": batch}) - LOGGER.info(f"Deleted {len(batch)} objects") - - -def _delete_file(target): - from botocore.exceptions import ClientError - - s3_client = _s3_client() - _, _, bucket, key = target.split("/", 3) - - try: - s3_client.head_object(Bucket=bucket, Key=key) - exits = True - except ClientError as e: - if e.response["Error"]["Code"] != "404": - raise - exits = False - - if not exits: - LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'") - return - - LOGGER.info(f"Deleting {target}") - print(s3_client.delete_object(Bucket=bucket, Key=key)) - LOGGER.info(f"{target} is deleted") - - -def delete(target): - """Delete a file or a folder from S3. - - Parameters - ---------- - target : str - The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is - assumed to be a folder, otherwise it is assumed to be a file. - """ - - assert target.startswith("s3://") - - if target.endswith("/"): - _delete_folder(target) - else: - _delete_file(target) - - -def list_folders(folder): - """List the sub folders in a folder on S3. - - Parameters - ---------- - folder : str - The URL of a folder on S3. The url should start with 's3://'. - - Returns - ------- - list - A list of the subfolders names in the folder. - """ - - assert folder.startswith("s3://") - if not folder.endswith("/"): - folder += "/" - - _, _, bucket, prefix = folder.split("/", 3) - - s3_client = _s3_client() - paginator = s3_client.get_paginator("list_objects_v2") - - for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"): - if "CommonPrefixes" in page: - yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")] diff --git a/tests/test_graphs.py b/tests/test_rest.py similarity index 73% rename from tests/test_graphs.py rename to tests/test_rest.py index 846ee89..7912358 100644 --- a/tests/test_graphs.py +++ b/tests/test_rest.py @@ -6,9 +6,17 @@ # nor does it submit to any jurisdiction. -def test_graphs(): +import requests + +from anemoi.registry.rest import Rest + +# patch requests to intercept all requests +requests.request = Rest.request + + +def test_rest(): pass if __name__ == "__main__": - test_graphs() + test_rest() From 722e15276bcc8016c5c92df926bac4d7c1132ebd Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 26 Jun 2024 13:57:47 +0200 Subject: [PATCH 07/64] doc structure --- docs/cli/datasets.rst | 10 ++++++++++ docs/cli/experiments.rst | 10 ++++++++++ docs/cli/introduction.rst | 27 +++++++++++++++++++++++++++ docs/cli/list.rst | 10 ++++++++++ docs/cli/weights.rst | 10 ++++++++++ docs/index.rst | 19 +++++++++++++++++++ docs/modules/s3.rst | 8 -------- 7 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 docs/cli/datasets.rst create mode 100644 docs/cli/experiments.rst create mode 100644 docs/cli/introduction.rst create mode 100644 docs/cli/list.rst create mode 100644 docs/cli/weights.rst delete mode 100644 docs/modules/s3.rst diff --git a/docs/cli/datasets.rst b/docs/cli/datasets.rst new file mode 100644 index 0000000..7511501 --- /dev/null +++ b/docs/cli/datasets.rst @@ -0,0 +1,10 @@ +datasets +======== + + + +.. argparse:: + :module: anemoi.registry.__main__ + :func: create_parser + :prog: anemoi-registry + :path: datasets diff --git a/docs/cli/experiments.rst b/docs/cli/experiments.rst new file mode 100644 index 0000000..10ae558 --- /dev/null +++ b/docs/cli/experiments.rst @@ -0,0 +1,10 @@ +experiments +=========== + + + +.. argparse:: + :module: anemoi.registry.__main__ + :func: create_parser + :prog: anemoi-registry + :path: experiments diff --git a/docs/cli/introduction.rst b/docs/cli/introduction.rst new file mode 100644 index 0000000..ed2f3ee --- /dev/null +++ b/docs/cli/introduction.rst @@ -0,0 +1,27 @@ +Introduction +============ + +When you install the `anemoi-registry` package, this will also install command line tool +called ``anemoi-registry`` which can be used to manage the registry. + +The tool can provide help with the ``--help`` options: + +.. code-block:: bash + + % anemoi-registry --help + +The commands are: + +.. toctree:: + :maxdepth: 1 + + list + datasets + experiments + weights + +.. argparse:: + :module: anemoi.registry.__main__ + :func: create_parser + :prog: anemoi-registry + :nosubcommands: diff --git a/docs/cli/list.rst b/docs/cli/list.rst new file mode 100644 index 0000000..4599dc8 --- /dev/null +++ b/docs/cli/list.rst @@ -0,0 +1,10 @@ +list +==== + + + +.. argparse:: + :module: anemoi.registry.__main__ + :func: create_parser + :prog: anemoi-registry + :path: list diff --git a/docs/cli/weights.rst b/docs/cli/weights.rst new file mode 100644 index 0000000..d4819c1 --- /dev/null +++ b/docs/cli/weights.rst @@ -0,0 +1,10 @@ +weights +======== + + + +.. argparse:: + :module: anemoi.registry.__main__ + :func: create_parser + :prog: anemoi-registry + :path: weights diff --git a/docs/index.rst b/docs/index.rst index c215922..c1e0db4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,25 @@ of the *Anemoi* packages. installing +**Command line tool** + +- :doc:`cli/introduction` +- :doc:`cli/list` +- :doc:`cli/datasets` +- :doc:`cli/weights` +- :doc:`cli/experiments` + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Command line tool + + cli/introduction + cli/list + cli/datasets + cli/experiments + cli/weights + ********* Modules ********* diff --git a/docs/modules/s3.rst b/docs/modules/s3.rst deleted file mode 100644 index 85f7222..0000000 --- a/docs/modules/s3.rst +++ /dev/null @@ -1,8 +0,0 @@ -#### - s3 -#### - -.. automodule:: anemoi.registry.s3 - :members: - :no-undoc-members: - :show-inheritance: From e91f31d7033cef094b287bfdfe0670e450375c1b Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 26 Jun 2024 13:58:57 +0200 Subject: [PATCH 08/64] first draft refactor fix typo refactor wip update github action to trigger on pull requests refactor wip remove trace_info add recipy cleanup refactor refactor clean tasks raise when missing config added entry cli wip wip first / rename queues to tasks refactor wip wip secret config with no "." fix fix fix use tmp to download wip wip wip wip added heartbeat fix up up up --- .github/workflows/python-publish.yml | 2 +- README.md | 2 +- pyproject.toml | 11 +- src/anemoi/registry/__init__.py | 11 +- src/anemoi/registry/commands/__init__.py | 2 +- src/anemoi/registry/commands/_base.py | 81 ------ src/anemoi/registry/commands/base.py | 98 +++++++ src/anemoi/registry/commands/datasets.py | 82 ++---- src/anemoi/registry/commands/entry.py | 208 +++++++++++++++ src/anemoi/registry/commands/experiments.py | 73 +---- src/anemoi/registry/commands/list.py | 81 ++++-- src/anemoi/registry/commands/tasks.py | 124 +++++++++ src/anemoi/registry/commands/weights.py | 78 +----- src/anemoi/registry/commands/worker.py | 76 ++++++ src/anemoi/registry/entry/__init__.py | 278 ++------------------ src/anemoi/registry/entry/dataset.py | 68 +++++ src/anemoi/registry/entry/experiment.py | 78 ++++++ src/anemoi/registry/entry/weights.py | 68 +++++ src/anemoi/registry/rest.py | 234 ++++++++-------- src/anemoi/registry/tasks.py | 138 ++++++++++ src/anemoi/registry/utils.py | 15 ++ src/anemoi/registry/workers.py | 256 ++++++++++++++++++ 22 files changed, 1399 insertions(+), 665 deletions(-) delete mode 100644 src/anemoi/registry/commands/_base.py create mode 100644 src/anemoi/registry/commands/base.py create mode 100644 src/anemoi/registry/commands/entry.py create mode 100644 src/anemoi/registry/commands/tasks.py create mode 100644 src/anemoi/registry/commands/worker.py create mode 100644 src/anemoi/registry/entry/dataset.py create mode 100644 src/anemoi/registry/entry/experiment.py create mode 100644 src/anemoi/registry/entry/weights.py create mode 100644 src/anemoi/registry/tasks.py create mode 100644 src/anemoi/registry/utils.py create mode 100644 src/anemoi/registry/workers.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 666f65d..759adae 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -6,7 +6,7 @@ name: Upload Python Package on: push: {} - + pull_request: release: types: [created] diff --git a/README.md b/README.md index 44bc618..5d5ec0d 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This project is **BETA** and will be **Experimental** for the foreseeable future Interfaces and functionality are likely to change, and the project itself may be scrapped. **DO NOT** use this software in any project/software that is operational. -A package to manahe a registry or data-driven forecasts. +A package to manage a registry or data-driven forecasts. ## Documentation diff --git a/pyproject.toml b/pyproject.toml index e20cc60..c38b821 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,15 +51,19 @@ dynamic = [ ] dependencies = [ "anemoi-datasets", + "jsonpatch", + "requests", ] optional-dependencies.all = [ + "boto3", ] + optional-dependencies.dev = [ + "boto3", "nbsphinx", "pandoc", "pytest", - "requests", "sphinx", "sphinx-argparse", "sphinx-rtd-theme", @@ -70,7 +74,6 @@ optional-dependencies.dev = [ optional-dependencies.docs = [ "nbsphinx", "pandoc", - "requests", "sphinx", "sphinx-argparse", "sphinx-rtd-theme", @@ -78,6 +81,10 @@ optional-dependencies.docs = [ "tomli", ] +optional-dependencies.s3 = [ + "boto3", +] + optional-dependencies.tests = [ "pytest", ] diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index 2d2c2c3..83be460 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -14,10 +14,13 @@ def config(): - from anemoi.utils.config import DotDict from anemoi.utils.config import load_config - config = load_config().get("registry") + config = load_config(secrets=["api_token"]) if not config: - LOG.warning(f"No 'registry' section in config. Config is {load_config()}. Limited functionalities.") - return DotDict(config) + raise ValueError("Anemoi config is required.") + + config = config.get("registry") + if not config: + raise ValueError("Section 'registry' is missing in config.") + return config diff --git a/src/anemoi/registry/commands/__init__.py b/src/anemoi/registry/commands/__init__.py index cebb539..6523e77 100644 --- a/src/anemoi/registry/commands/__init__.py +++ b/src/anemoi/registry/commands/__init__.py @@ -19,6 +19,6 @@ COMMANDS = register_commands( os.path.dirname(__file__), __name__, - lambda x: x.command(), + lambda x: x.command() if hasattr(x, "command") else None, lambda name, error: Failed(name, error), ) diff --git a/src/anemoi/registry/commands/_base.py b/src/anemoi/registry/commands/_base.py deleted file mode 100644 index e64a20b..0000000 --- a/src/anemoi/registry/commands/_base.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -# (C) Copyright 2024 ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. -# - -"""Command place holder. Delete when we have real commands. - -""" - -import logging -import os - -from ..entry import CatalogueEntryNotFound -from . import Command - -LOG = logging.getLogger(__name__) - - -class BaseCommand(Command): - internal = True - timestamp = True - - def check_arguments(self, args): - pass - - def is_path(self, name_or_path): - return os.path.exists(name_or_path) - - def is_identifier(self, name_or_path): - try: - self.entry_class(key=name_or_path) - return True - except CatalogueEntryNotFound: - return False - - def run(self, args): - args = vars(args) - LOG.debug("anemoi-registry args:", args) - if "command" in args: - args.pop("command") - name_or_path = args.pop("NAME_OR_PATH") - - if args.get("add_location"): - args["add_location"] = self.parse_location(args["add_location"]) - if args.get("remove_location"): - args["remove_location"] = self.parse_location(args["remove_location"]) - - if self.is_path(name_or_path): - LOG.info(f"Found local {self.kind} at {name_or_path}") - self.run_from_path(name_or_path, **args) - return - - if self.is_identifier(name_or_path): - LOG.info(f"Processing {self.kind} with identifier '{name_or_path}'") - self.run_from_identifier(name_or_path, **args) - return - - def parse_location(self, location): - for x in location: - if "=" not in x: - raise ValueError(f"Invalid location format '{x}', use 'key1=value1 key2=value2' list.") - return {x.split("=")[0]: x.split("=")[1] for x in location} - - def warn_unused_arguments(self, kwargs): - for k, v in kwargs.items(): - if v: - LOG.warn(f"Ignoring argument {k}={v}") - - def run_from_identifier(self, *args, **kwargs): - raise NotImplementedError() - - def run_from_path(self, *args, **kwargs): - raise NotImplementedError() - - -command = BaseCommand diff --git a/src/anemoi/registry/commands/base.py b/src/anemoi/registry/commands/base.py new file mode 100644 index 0000000..2599df6 --- /dev/null +++ b/src/anemoi/registry/commands/base.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging +import os + +from ..entry import CatalogueEntryNotFound +from . import Command + +LOG = logging.getLogger(__name__) + + +class BaseCommand(Command): + internal = True + timestamp = True + + def check_arguments(self, args): + pass + + def is_path(self, name_or_path): + return os.path.exists(name_or_path) + + def is_identifier(self, name_or_path): + try: + self.entry_class(key=name_or_path) + return True + except CatalogueEntryNotFound: + return False + + def process_task(self, entry, args, k, func_name=None, /, **kwargs): + """ + Call the method `k` on the entry object. + The args/kwargs given to the method are extracted from from the argument `k` in the `args` object. + + Additionally the argument `k` is casted to the correct type, + depending on if this is a string, int, float, list or dict, or a boolean. + + The provided **kwargs are also passed to the method. + The method name can be changed by providing the `func_name` argument. + """ + + assert isinstance(k, str), k + if func_name is None: + func_name = k + + v = getattr(args, k) + + if v is None: + return + if v is True: + LOG.debug(f"{entry.key} : Processing task {k}") + return getattr(entry, func_name)(**kwargs) + if v is False: + return + if isinstance(v, (str, int, float)): + LOG.debug(f"{entry.key} : Processing task {k} with {v}") + return getattr(entry, func_name)(v, **kwargs) + if isinstance(v, list): + v_str = ", ".join(str(x) for x in v) + LOG.debug(f"{entry.key} : Processing task {k} with {v_str}") + return getattr(entry, func_name)(*v, **kwargs) + if isinstance(v, dict): + v_str = ", ".join(f"{k_}={v_}" for k_, v_ in v.items()) + LOG.debug(f"{entry.key} : Processing task {k} with {v_str}") + return getattr(entry, func_name)(**v, **kwargs) + raise ValueError(f"Invalid task {k}={v}. type(v)= {type(v)}") + + def run(self, args): + LOG.debug(f"anemoi-registry args: {args}") + name_or_path = args.NAME_OR_PATH + entry = self.get_entry(name_or_path) + self._run(entry, args) + + def get_entry(self, name_or_path): + if self.is_path(name_or_path): + LOG.info(f"Found local {self.kind} at {name_or_path}") + return self.entry_class(path=name_or_path) + + if self.is_identifier(name_or_path): + LOG.info(f"Processing {self.kind} with identifier '{name_or_path}'") + return self.entry_class(key=name_or_path) + + def run_from_identifier(self, *args, **kwargs): + raise NotImplementedError() + + def run_from_path(self, *args, **kwargs): + raise NotImplementedError() diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 11acb6b..7833ded 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -14,13 +14,15 @@ import logging -from ..entry import DatasetCatalogueEntry -from ._base import BaseCommand +from ..entry.dataset import DatasetCatalogueEntry +from .base import BaseCommand LOG = logging.getLogger(__name__) class Datasets(BaseCommand): + """Manage datasets in the catalogue. Register, add locations, set status, etc.""" + internal = True timestamp = True entry_class = DatasetCatalogueEntry @@ -35,72 +37,26 @@ def add_arguments(self, command_parser): action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--json", help="Output json record", action="store_true") - command_parser.add_argument("--set-status", help="Set the status to the dataset") - command_parser.add_argument("--add-location", nargs="+", help="Add a location to the dataset") + command_parser.add_argument("--add-recipe", help="Add a recipe file") + command_parser.add_argument( + "--add-location", + nargs="+", + help="Path to add a location to the dataset. Implies --platform", + ) + command_parser.add_argument("--platform", help="Platform to add the location to.") def check_arguments(self, args): pass - def run_from_identifier( - self, - identifier, - add_location, - set_status, - unregister, - json, - remove_location=False, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(key=identifier) - - if unregister: - entry.unregister() - if add_location: - entry.add_location(**add_location) - if remove_location: - entry.remove_location(**remove_location) - if set_status: - entry.set_status(set_status) - - if json: - print(entry.as_json()) - - def run_from_path( - self, - path, - register, - unregister, - add_location, - json, - set_status, - # remove_location, - # upload, - # upload_uri_pattern, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(path=path) - - if register: - entry.register() - if unregister: - entry.unregister() - if add_location: - entry.add_location(**add_location) - # if remove_location: - # entry.remove_location(**remove_location) - if set_status: - entry.set_status(set_status) - # if delete: - # entry.delete() - - if json: - print(entry.as_json()) + def _run(self, entry, args): + # order matters + self.process_task(entry, args, "unregister") + self.process_task(entry, args, "register") + # self.process_task(entry, args, "remove_location") + self.process_task(entry, args, "add_location", platform=args.platform) + self.process_task(entry, args, "add_recipe") + self.process_task(entry, args, "set_status") command = Datasets diff --git a/src/anemoi/registry/commands/entry.py b/src/anemoi/registry/commands/entry.py new file mode 100644 index 0000000..71aee7f --- /dev/null +++ b/src/anemoi/registry/commands/entry.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import json +import logging +import os +import subprocess +from tempfile import TemporaryDirectory + +import jsonpatch +import yaml + +from anemoi.registry.rest import Rest + +from . import Command + +LOG = logging.getLogger(__name__) + + +class Entry(Command): + """Admin tool. Manage entries in the catalogue.""" + + internal = True + timestamp = True + + def add_arguments(self, command_parser): + command_parser.add_argument("path", help="API path to the entry.") + + group = command_parser.add_mutually_exclusive_group(required=True) + + group.add_argument( + "--dump", + action="store_true", + help=( + "Extract the metadata from the entry and print it to the standard output" + " or the file specified by ``--output``, in JSON or YAML format." + ), + ) + group.add_argument( + "--load", + action="store_true", + help="Set the metadata in the entry from the content of a file specified by the ``--input`` argument.", + ) + + group.add_argument( + "--edit", + action="store_true", + help="Edit the metadata in place, using the specified editor. See the ``--editor`` argument for more information.", + ) + + group.add_argument("--remove", action="store_true", help="Remove the entry.") + + command_parser.add_argument("--input", help="The output file name to be used by the ``--load`` option.") + command_parser.add_argument("--output", help="The output file name to be used by the ``--dump`` option.") + command_parser.add_argument( + "--create", + action="store_true", + help="Create the entry if it does not exist., use with --load", + ) + + command_parser.add_argument( + "--editor", + help="Editor to use for the ``--edit`` option. Default to ``$EDITOR`` if defined, else ``vi``.", + default=os.environ.get("EDITOR", "vi"), + ) + + command_parser.add_argument( + "--json", action="store_true", help="Use the JSON format with ``--dump`` and ``--edit``." + ) + + command_parser.add_argument( + "--yaml", action="store_true", help="Use the YAML format with ``--dump`` and ``--edit``." + ) + + def check_arguments(self, args): + pass + + def run(self, args): + path = args.path + if "/" not in path[1:] or not path.startswith("/"): + raise ValueError("Invalid API path {path}") + + _, collection, *_ = path.split("/") + if collection not in ("datasets", "experiments", "weights", "tasks"): + LOG.warning(f"Unknown collection {collection}") + + if args.edit: + return self.edit(args) + + if args.remove: + return self.remove(args) + + if args.dump: + return self.dump(args) + + if args.load: + return self.load(args) + + def edit(self, args): + rest = Rest() + + if args.json: + ext = "json" + dump = json.dump + load = json.load + kwargs = {"indent": 4, "sort_keys": True} + else: + ext = "yaml" + dump = yaml.dump + load = yaml.safe_load + kwargs = {"default_flow_style": False} + + with TemporaryDirectory() as temp_dir: + + path = os.path.join(temp_dir, f"anemoi-registry-edit.{ext}") + + metadata = rest.get(args.path) + + with open(path, "w") as f: + dump(metadata, f, **kwargs) + + subprocess.check_call([args.editor, path]) + + with open(path) as f: + edited = load(f) + + if edited != metadata: + patch = jsonpatch.make_patch(metadata, edited) + patch = list(patch) + LOG.debug(f"Applying patch to {args.path}: {patch}") + rest.patch(args.path, patch) + LOG.info(f"{args.path} has been updated.") + else: + LOG.info("No changes made.") + + def dump(self, args): + rest = Rest() + + if args.output: + file = open(args.output, "w") + else: + file = None + + metadata = rest.get(args.path) + + if args.yaml: + print(yaml.dump(metadata, indent=2, sort_keys=True), file=file) + return + + if args.json or True: + print(json.dumps(metadata, indent=4, sort_keys=True), file=file) + return + + def load(self, args): + rest = Rest() + + if args.input is None: + raise ValueError("Please specify a value for --input") + + _, ext = os.path.splitext(args.input) + if ext == ".json" or args.json: + with open(args.input) as f: + edited = json.load(f) + + elif ext in (".yaml", ".yml") or args.yaml: + with open(args.input) as f: + edited = yaml.safe_load(f) + + else: + raise ValueError(f"Unknown file extension {ext}. Please specify --json or --yaml") + + if rest.exists(args.path): + # if the entry exists, we patch it. + metadata = rest.get(args.path) + patch = jsonpatch.make_patch(metadata, edited) + patch = list(patch) + LOG.debug(f"Applying patch to {args.path}: {patch}") + rest.patch(args.path, patch) + LOG.info(f"{args.path} has been updated.") + + else: + # if the entry does not exist, we post it if requested. + if not args.create: + LOG.error(f"Entry in {args.path} does not exists. Using --create to create it.") + raise ValueError(f"Entry in {args.path} does not exists. Using --create to create it.") + + _, collection, *_ = args.path.split("/") + res = rest.post(collection, edited) + LOG.info(f"Entry in {collection} has been created : {res}.") + + def remove(self, args): + rest = Rest() + rest.delete(args.path) + LOG.info(f"{args.path} has been deleted.") + + +command = Entry diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index a8aff83..1e89a22 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -15,13 +15,15 @@ import logging import os -from ..entry import ExperimentCatalogueEntry -from ._base import BaseCommand +from ..entry.experiment import ExperimentCatalogueEntry +from .base import BaseCommand LOG = logging.getLogger(__name__) class Experiments(BaseCommand): + """Manage experiments in the catalogue. Register, unregister, add weights, add plots, etc.""" + internal = True timestamp = True entry_class = ExperimentCatalogueEntry @@ -35,7 +37,6 @@ def add_arguments(self, command_parser): help="Remove from catalogue (without deleting all)", action="store_true", ) - command_parser.add_argument("--json", help="Output json record", action="store_true") # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment") @@ -52,67 +53,11 @@ def is_path(self, name_or_path): return False return True - def run_from_identifier( - self, - identifier, - json, - add_weights, - add_plots, - unregister, - overwrite, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(key=identifier) - - if add_weights: - for w in add_weights: - entry.add_weights(w) - if add_plots: - for p in add_plots: - entry.add_plots(p) - - if unregister: - entry.unregister() - - # if delete: - # entry.delete() - - if json: - print(entry.as_json()) - - def run_from_path( - self, - path, - register, - unregister, - add_weights, - add_plots, - overwrite, - json, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(path=path) - - if unregister: - entry.unregister() - if register: - entry.register() - if add_weights: - for w in add_weights: - entry.add_weights(w) - if add_plots: - for p in add_plots: - entry.add_plots(p) - - # if delete: - # entry.delete() - - if json: - print(entry.as_json()) + def _run(self, entry, args): + self.process_task(entry, args, "unregister") + self.process_task(entry, args, "register", overwrite=args.overwrite) + self.process_task(entry, args, "add_weights") + self.process_task(entry, args, "add_plots") command = Experiments diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py index 1f0bade..28ac7a8 100644 --- a/src/anemoi/registry/commands/list.py +++ b/src/anemoi/registry/commands/list.py @@ -12,10 +12,15 @@ """ -import json +import datetime import logging -from anemoi.registry.rest import ReadOnlyRest as Rest +from anemoi.utils.humanize import json_pretty_dump +from anemoi.utils.humanize import when +from anemoi.utils.text import table + +from anemoi.registry.rest import RestItemList +from anemoi.registry.utils import list_to_dict from . import Command @@ -23,15 +28,27 @@ class List(Command): + """List elements in the catalogue.""" + internal = True timestamp = True def add_arguments(self, command_parser): sub_parser = command_parser.add_subparsers(dest="subcommand") - experiment = sub_parser.add_parser("experiments") # noqa: F841 - checkpoint = sub_parser.add_parser("weights") # noqa: F841 - dataset = sub_parser.add_parser("datasets") # noqa: F841 + experiment = sub_parser.add_parser("experiments") + experiment.add_argument("filter", nargs="*") + + checkpoint = sub_parser.add_parser("weights") + checkpoint.add_argument("filter", nargs="*") + + dataset = sub_parser.add_parser("datasets") + dataset.add_argument("filter", nargs="*") + + # tasks = sub_parser.add_parser("tasks") + # tasks.add_argument("filter", nargs="*") + # tasks.add_argument("-l", "--long", help="Details", action="store_true") + # tasks.add_argument("--sort", help="Sort by date", choices=["created", "updated"], default="updated") def check_arguments(self, args): pass @@ -40,20 +57,46 @@ def run(self, args): if not args.subcommand: raise ValueError("Missing subcommand") - subcommand = f"run_{args.subcommand.replace('-', '_')}" - return getattr(self, subcommand)(args) - - def run_experiments(self, args): - payload = Rest().get("experiments") - print(json.dumps(payload, indent=2)) - - def run_weights(self, args): - payload = Rest().get("weights") - print(json.dumps(payload, indent=2)) - - def run_datasets(self, args): - payload = Rest().get("datasets") - print(json.dumps(payload, indent=2)) + getattr(self, f"run_{args.subcommand}", self._run_default)(args) + + def _run_default(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + print(json_pretty_dump(payload)) + + def run_tasks(self, args): + collection = "tasks" + request = list_to_dict(args.filter) + data = RestItemList(collection).get(params=request) + self.print_tasks(data, long=args.long, sort=args.sort) + + def print_tasks(self, data, long=False, sort="updated"): + data = sorted(data, key=lambda x: x[sort]) + + rows = [] + for v in data: + if not isinstance(v, dict): + raise ValueError(v) + created = datetime.datetime.fromisoformat(v.pop("created")) + updated = datetime.datetime.fromisoformat(v.pop("updated")) + + uuid = v.pop("uuid") + content = " ".join(f"{k}={v}" for k, v in v.items()) + if not long: + content = content[:20] + "..." + rows.append( + [ + when(created), + when(updated), + v.pop("status"), + v.pop("progress", ""), + content, + uuid, + ] + ) + print(table(rows, ["Created", "Updated", "Status", "%", "Details", "UUID"], ["<", "<", "<", "<", "<", "<"])) + return command = List diff --git a/src/anemoi/registry/commands/tasks.py b/src/anemoi/registry/commands/tasks.py new file mode 100644 index 0000000..6de4bc0 --- /dev/null +++ b/src/anemoi/registry/commands/tasks.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging + +from anemoi.registry.commands.base import BaseCommand +from anemoi.registry.entry import CatalogueEntryNotFound +from anemoi.registry.tasks import TaskCatalogueEntry +from anemoi.registry.tasks import TaskCatalogueEntryList +from anemoi.registry.utils import list_to_dict + +LOG = logging.getLogger(__name__) + + +class Tasks(BaseCommand): + """Admin tool. Manage tasks in the catalogue.""" + + internal = True + timestamp = True + entry_class = TaskCatalogueEntry + + collection = "tasks" + + def add_arguments(self, command_parser): + command_parser.add_argument("TASK", help="The uuid of the task", nargs="?") + command_parser.add_argument("--set-status", help="Set status of the given task", metavar="STATUS") + command_parser.add_argument( + "--set-progress", help="Set progress of the given task (0 to 100 percents)", type=int, metavar="N" + ) + command_parser.add_argument("--own", help="Take ownership of a task", action="store_true") + command_parser.add_argument("--disown", help="Release a task and requeue it", action="store_true") + + group = command_parser.add_mutually_exclusive_group() + group.add_argument("--new", help="Add a new queue entry", nargs="*", metavar="K=V") + group.add_argument( + "--take-one", help="Take ownership of the oldest entry with status=queued", nargs="*", metavar="K=V" + ) + group.add_argument("--list", help="List tasks", nargs="*", metavar="K=V") + group.add_argument("--delete-many", help="Batch remove multiple tasks", nargs="*", metavar="K=V") + + command_parser.add_argument( + "--sort", + help="Sort by date. Use with --list, --take-one", + choices=["created", "updated"], + default="updated", + ) + command_parser.add_argument("-l", "--long", help="Details, use with --list", action="store_true") + command_parser.add_argument("-y", "--yes", help="Assume yes", action="store_true") + + def run(self, args): + if args.TASK is not None and (args.new is not None or args.take_one is not None or args.list is not None): + raise ValueError("Cannot use positional argument TASK with --new, --take-one or --list") + + if args.TASK: + return self.run_with_uuid(args.TASK, args) + if args.new is not None: + self.run_new(args) + if args.take_one is not None: + self.run_take_one(args) + if args.list is not None: + self.run_list(args) + if args.delete_many is not None: + assert args.TASK is None + self.run_delete_many(args) + + def run_with_uuid(self, uuid, args): + + uuid = args.TASK + entry = self.entry_class(key=uuid) + + self.process_task(entry, args, "disown", "release_ownership") + self.process_task(entry, args, "own", "take_ownership") + self.process_task(entry, args, "set_status") + self.process_task(entry, args, "set_progress") + + def run_new(self, args): + cat = TaskCatalogueEntryList() + new = list_to_dict(args.new) + uuid = cat.add_new_task(**new) + print(uuid) + + def run_list(self, args): + cat = TaskCatalogueEntryList(*args.list, sort=args.sort) + print(cat.to_str(args.long)) + + def run_delete_many(self, args): + cat = TaskCatalogueEntryList(*args.delete_many, sort=args.sort) + if not cat: + LOG.info("No tasks found") + return + if not args.yes: + print(f"Do you really want to delete these {len(cat)} entries? (y/n)", end=" ") + if input("").lower() != "y": + return + while cat: + try: + entry = cat[0] + entry.unregister() + LOG.info(f"Task {entry.key} deleted.") + except CatalogueEntryNotFound: + LOG.warning(f"Task {entry.key} not found.") + LOG.info(f"{len(cat)} tasks deleted.") + + def run_take_one(self, args): + cat = TaskCatalogueEntryList(*args.take_one, status="queued", sort=args.sort) + uuid = cat.take_last() + if uuid is None: + return + else: + print(uuid) + + +command = Tasks diff --git a/src/anemoi/registry/commands/weights.py b/src/anemoi/registry/commands/weights.py index 75e0e85..0a66d6b 100644 --- a/src/anemoi/registry/commands/weights.py +++ b/src/anemoi/registry/commands/weights.py @@ -14,13 +14,15 @@ import logging -from ..entry import WeightCatalogueEntry -from ._base import BaseCommand +from ..entry.weights import WeightCatalogueEntry +from .base import BaseCommand LOG = logging.getLogger(__name__) class Weights(BaseCommand): + """Manage weights in the catalogue. Register, add locations, etc.""" + internal = True timestamp = True entry_class = WeightCatalogueEntry @@ -35,78 +37,18 @@ def add_arguments(self, command_parser): action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--json", help="Output json record", action="store_true") - - command_parser.add_argument("--add-location", nargs="+", help="Add a location to the weights") + command_parser.add_argument("--add-location", help="Add a location to the weights.") + command_parser.add_argument("--platform", help="Platform where to add the location.") command_parser.add_argument("--overwrite", help="Overwrite any existing weights", action="store_true") def check_arguments(self, args): pass - def parse_location(self, location): - for x in location: - if "=" not in x: - raise ValueError(f"Invalid location format '{x}', use 'key1=value1 key2=value2' list.") - return {x.split("=")[0]: x.split("=")[1] for x in location} - - def warn_unused_arguments(self, kwargs): - for k, v in kwargs.items(): - if v: - LOG.info(f"Ignoring argument {k}={v}") - - def run_from_identifier( - self, - identifier, - add_location, - json, - unregister, - remove_location=False, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(key=identifier) - - if add_location: - entry.add_location(**add_location) - if remove_location: - entry.remove_location(**remove_location) - if unregister: - entry.unregister() - - if json: - print(entry.as_json()) - - def run_from_path( - self, - path, - unregister, - register, - add_location, - overwrite, - json, - remove_location=False, - **kwargs, - ): - self.warn_unused_arguments(kwargs) - - entry = self.entry_class(path=path) - - if unregister: - entry.unregister() - if register: - entry.register(overwrite=overwrite) - - if add_location: - entry.add_location(**add_location) - # if remove_location: - # entry.remove_location(**remove_location) - # if delete: - # entry.delete() - - if json: - print(entry.as_json()) + def _run(self, entry, args): + self.process_task(entry, args, "unregister") + self.process_task(entry, args, "register", overwrite=args.overwrite) + self.process_task(entry, args, "add_location", platform=args.platform) command = Weights diff --git a/src/anemoi/registry/commands/worker.py b/src/anemoi/registry/commands/worker.py new file mode 100644 index 0000000..a90ae74 --- /dev/null +++ b/src/anemoi/registry/commands/worker.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging + +from anemoi.registry.commands.base import BaseCommand +from anemoi.registry.tasks import TaskCatalogueEntry +from anemoi.registry.utils import list_to_dict +from anemoi.registry.workers import Worker + +LOG = logging.getLogger(__name__) + + +class WorkerCommand(BaseCommand): + """Run a worker, taking ownership tasks, running them.""" + + internal = True + timestamp = True + entry_class = TaskCatalogueEntry + + collection = "tasks" + + def add_arguments(self, command_parser): + command_parser.add_argument("--timeout", help="Die with timeout (SIGALARM) after TIMEOUT seconds.", type=int) + command_parser.add_argument("--wait", help="Check for new task every WAIT seconds.", type=int, default=60) + + command_parser.add_argument( + "action", + help="Action to perform", + choices=["transfer-dataset"], + default="transfer-dataset", + nargs="?", + ) + command_parser.add_argument( + "--target-dir", help="The actual target directory where the worker will write.", default="." + ) + command_parser.add_argument("--published-target-dir", help="The target directory published in the catalogue.") + command_parser.add_argument("--destination", help="Platform destination (e.g. leonardo, lumi, marenostrum)") + command_parser.add_argument("--request", help="Filter tasks to process (key=value list)", nargs="*", default=[]) + command_parser.add_argument("--threads", help="Number of threads to use", type=int, default=1) + command_parser.add_argument("--heartbeat", help="Heartbeat interval", type=int, default=60) + command_parser.add_argument( + "--max-no-heartbeat", + help="Max interval without heartbeat before considering task needs to be freed.", + type=int, + default=0, + ) + command_parser.add_argument("--loop", help="Run in a loop", action="store_true") + command_parser.add_argument( + "--check-todo", + help="See if there are tasks for this worker and exit with 0 if there are task to do.", + action="store_true", + ) + + def run(self, args): + kwargs = vars(args) + kwargs["request"] = list_to_dict(kwargs["request"]) + kwargs.pop("command") + kwargs.pop("debug") + kwargs.pop("version") + + Worker(**kwargs).run() + + +command = WorkerCommand diff --git a/src/anemoi/registry/entry/__init__.py b/src/anemoi/registry/entry/__init__.py index be159a4..00c513f 100644 --- a/src/anemoi/registry/entry/__init__.py +++ b/src/anemoi/registry/entry/__init__.py @@ -5,80 +5,20 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -import datetime import json import logging -import os +from functools import cached_property -import requests -import yaml -from anemoi.datasets import open_dataset -from anemoi.utils.checkpoints import load_metadata as load_checkpoint_metadata -from anemoi.utils.s3 import upload +from anemoi.utils.humanize import json_pretty_dump from anemoi.registry import config from anemoi.registry.rest import AlreadyExists -from anemoi.registry.rest import Rest +from anemoi.registry.rest import RestItem +from anemoi.registry.rest import RestItemList # from anemoi.registry.rest import DryRunRest as Rest -def json_dump_pretty(obj, max_line_length=120): - """Custom JSON dump function that keeps dicts and lists on one line if they are short enough. - - Parameters - ---------- - obj - The object to be dumped as JSON. - max_line_length - Maximum allowed line length for pretty-printing. - - Returns - ------- - unknown - JSON string. - """ - - def _format_json(obj, indent_level=0): - """Helper function to format JSON objects with custom pretty-print rules. - - Parameters - ---------- - obj - The object to format. - indent_level - Current indentation level. - - Returns - ------- - unknown - Formatted JSON string. - """ - indent = " " * 4 * indent_level - if isinstance(obj, dict): - items = [] - for key, value in obj.items(): - items.append(f'"{key}": {_format_json(value, indent_level + 1)}') - line = "{" + ", ".join(items) + "}" - if len(line) <= max_line_length: - return line - else: - return "{\n" + ",\n".join([f"{indent} {item}" for item in items]) + "\n" + indent + "}" - elif isinstance(obj, list): - items = [_format_json(item, indent_level + 1) for item in obj] - line = "[" + ", ".join(items) + "]" - if len(line) <= max_line_length: - return line - else: - return "[\n" + ",\n".join([f"{indent} {item}" for item in items]) + "\n" + indent + "]" - elif isinstance(obj, datetime.datetime): - return obj.isoformat() - else: - return json.dumps(obj) - - return _format_json(obj) - - LOG = logging.getLogger(__name__) @@ -90,7 +30,6 @@ class CatalogueEntry: record = None path = None key = None - rest = Rest() def __init__(self, key=None, path=None): assert key is not None or path is not None, "key or path must be provided" @@ -106,208 +45,49 @@ def __init__(self, key=None, path=None): assert self.key is not None, "key must be provided" + self.rest_item = RestItem(self.collection, self.key) + self.rest_collection = RestItemList(self.collection) + def as_json(self): - return json_dump_pretty(self.record) + return json_pretty_dump(self.record) @classmethod def key_exists(cls, key): - try: - cls._get_record_from_catalogue(key) - return True - except CatalogueEntryNotFound: - return False + return RestItem(cls.collection, key).exists() def load_from_key(self, key): - self.key = key - self.record = self._get_record_from_catalogue(key) - - @classmethod - def _get_record_from_catalogue(cls, key): - try: - return cls.rest.get(f"{cls.collection_api}/{key}") - except requests.exceptions.HTTPError as e: - if e.response.status_code == 404: - raise CatalogueEntryNotFound(f"Could not find any {cls.collection_api} with key={key}") - raise + rest_item = RestItem(self.collection, key) + if rest_item.exists(): + self.key = key + self.record = rest_item.get() + else: + raise CatalogueEntryNotFound(f"Could not find any {self.collection} with key={key}") @property def main_key(self): raise NotImplementedError("Subclasses must implement this property") - def register(self, ignore_existing=True, overwrite=False): + def register(self, overwrite=False, ignore_existing=True): + assert self.record, "record must be set" try: - return self.rest.post(self.collection_api, self.record) + return self.rest_collection.post(self.record) except AlreadyExists: - if ignore_existing: - return if overwrite is True: LOG.warning(f"{self.key} already exists. Deleting existing one to overwrite it.") - return self.replace() + return self.rest_item.put(self.record) + if ignore_existing: + LOG.info(f"{self.key} already exists. Ok.") + return raise - def replace(self): - return self.rest.put(f"{self.collection_api}/{self.key}", self.record) + def json(self): + print(self.as_json()) - def patch(self, payload): - return self.rest.patch(f"{self.collection_api}/{self.key}", payload) + def patch(self, data): + return self.rest_item.patch(data) - def unregister(self, force=False): - if not self.rest.config.get("allow_delete"): - raise ValueError("Unregister not allowed") - return self.rest.delete(f"{self.collection_api}/{self.key}?force=True") + def unregister(self): + return self.rest_item.delete() def __repr__(self): - return json.dumps(self.record, indent=2) - - -class ExperimentCatalogueEntry(CatalogueEntry): - collection_api = "experiments" - main_key = "expver" - - def load_from_path(self, path): - assert os.path.exists(path), f"{path} does not exist" - assert path.endswith(".yaml"), f"{path} must be a yaml file" - - with open(path, "r") as file: - config = yaml.safe_load(file) - - metadata = config.pop("metadata") - metadata["config"] = config - expver = metadata["expver"] - - self.key = expver - self.record = dict(expver=expver, metadata=metadata) - - def add_plots(self, path, target=None): - """target is a pattern: s3://bucket/{expver}/{basename}""" - - if not os.path.exists(path): - raise FileNotFoundError(f"Could not find plot at {path}") - - if target is None: - target = config()["plots_uri_pattern"] - basename = os.path.basename(path) - target = target.format(expver=self.key, basename=basename, filename=basename) - - LOG.info(f"Uploading {path} to {target}.") - upload(path, target, overwrite=True) - - dic = dict(url=target, name=basename, path=path) - patch = [{"op": "add", "path": "/plots/-", "value": dic}] - self.patch(patch) - - def add_weights(self, path): - """target is a pattern: s3://bucket/{uuid}""" - - weights = WeightCatalogueEntry(path=path) - if not WeightCatalogueEntry.key_exists(weights.key): - weights.register(ignore_existing=False, overwrite=False) - weights.upload(path, overwrite=False) - else: - other = WeightCatalogueEntry(key=weights.key) - if other.record["metadata"]["timestamp"] == weights.record["metadata"]["timestamp"]: - LOG.info( - f"Not updating weights with key={weights.key}, because it already exists and has the same timestamp" - ) - else: - raise ValueError(f"Conflicting weights with key={weights.key}") - - dic = dict(uuid=weights.key, path=path) - patch = [{"op": "add", "path": "/checkpoints/-", "value": dic}] - self.patch(patch) - - -class WeightCatalogueEntry(CatalogueEntry): - collection_api = "weights" - main_key = "uuid" - - def add_location(self, platform, path): - patch = [{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}] - self.patch(patch) - - def default_location(self, **kwargs): - uri = config()["weights_uri_pattern"] - uri = uri.format(uuid=self.key, **kwargs) - return uri - - def default_platform(self): - return config()["weights_platform"] - - def upload(self, path, target=None, overwrite=False): - if target is None: - target = self.default_location() - - LOG.info(f"Uploading {path} to {target}.") - upload(path, target, overwrite=overwrite, resume=not overwrite) - return target - - def register(self, overwrite=False): - assert self.path is not None, "path must be provided" - - platform = self.default_platform() - target = self.upload(self.path) - self.register(overwrite=overwrite) - self.add_location(platform=platform, path=target) - - def load_from_path(self, path): - self.path = path - assert os.path.exists(path), f"{path} does not exist" - - metadata = load_checkpoint_metadata(path) - assert "path" not in metadata - metadata["path"] = os.path.abspath(path) - - metadata["size"] = os.path.getsize(path) - - uuid = metadata.get("uuid") - if uuid is None: - uuid = metadata["run_id"] - LOG.warning(f"Could not find 'uuid' in {path}, using 'run_id' instead: {uuid}") - - self.key = uuid - self.record = dict(uuid=uuid, metadata=metadata) - - -class DatasetCatalogueEntry(CatalogueEntry): - collection_api = "datasets" - main_key = "name" - - def set_status(self, status): - patch = [{"op": "add", "path": "/status", "value": status}] - self.patch(patch) - - def add_location(self, platform, path): - patch = [{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}] - self.patch(patch) - - def load_from_path(self, path): - import zarr - - if not path.startswith("/") and not path.startswith("s3://"): - LOG.warning(f"Dataset path is not absolute: {path}") - if not os.path.exists(path) and not path.startswith("s3://"): - LOG.warning(f"Dataset path does not exist: {path}") - if not path.endswith(".zarr") or path.endswith(".zip"): - LOG.warning("Dataset path extension is neither .zarr nor .zip") - - name, _ = os.path.splitext(os.path.basename(path)) - - z = zarr.open(path) - ds = open_dataset(path) - - metadata = z.attrs.asdict() - - assert "statistics" not in metadata - metadata["statistics"] = {k: v.tolist() for k, v in ds.statistics.items()} - - assert "shape" not in metadata - metadata["shape"] = z.data.shape - - assert "dtype" not in metadata - metadata["dtype"] = str(ds.dtype) - - assert "chunks" not in metadata - metadata["chunks"] = ds.chunks - - self.key = name - self.record = dict(name=name, metadata=metadata) + return f"{self.__class__.__name__}({self.rest_collection}, {self.key})" diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py new file mode 100644 index 0000000..33157f4 --- /dev/null +++ b/src/anemoi/registry/entry/dataset.py @@ -0,0 +1,68 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging +import os + +import yaml +from anemoi.datasets import open_dataset + +from . import CatalogueEntry + +LOG = logging.getLogger(__name__) + + +class DatasetCatalogueEntry(CatalogueEntry): + collection = "datasets" + main_key = "name" + + def set_status(self, status): + self.rest_item.patch([{"op": "add", "path": "/status", "value": status}]) + + def add_location(self, path, platform): + self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) + + def add_recipe(self, file): + if not os.path.exists(file): + raise FileNotFoundError(f"Recipe file not found: {file}") + if not file.endswith(".yaml"): + LOG.warning("Recipe file extension is not .yaml") + with open(file) as f: + recipe = yaml.safe_load(f) + self.rest_item.patch([{"op": "add", "path": "/recipe", "value": recipe}]) + + def load_from_path(self, path): + import zarr + + if not path.startswith("/") and not path.startswith("s3://"): + LOG.warning(f"Dataset path is not absolute: {path}") + if not os.path.exists(path) and not path.startswith("s3://"): + LOG.warning(f"Dataset path does not exist: {path}") + if not path.endswith(".zarr") or path.endswith(".zip"): + LOG.warning("Dataset path extension is neither .zarr nor .zip") + + name, _ = os.path.splitext(os.path.basename(path)) + + z = zarr.open(path) + ds = open_dataset(path) + + metadata = z.attrs.asdict() + + assert "statistics" not in metadata + metadata["statistics"] = {k: v.tolist() for k, v in ds.statistics.items()} + + assert "shape" not in metadata + metadata["shape"] = z.data.shape + + assert "dtype" not in metadata + metadata["dtype"] = str(ds.dtype) + + assert "chunks" not in metadata + metadata["chunks"] = ds.chunks + + self.key = name + self.record = dict(name=name, metadata=metadata) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py new file mode 100644 index 0000000..ac05a1f --- /dev/null +++ b/src/anemoi/registry/entry/experiment.py @@ -0,0 +1,78 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging +import os + +import yaml +from anemoi.utils.s3 import upload + +from .. import config +from . import CatalogueEntry +from .weights import WeightCatalogueEntry + +LOG = logging.getLogger(__name__) + + +class ExperimentCatalogueEntry(CatalogueEntry): + collection = "experiments" + main_key = "expver" + + def load_from_path(self, path): + assert os.path.exists(path), f"{path} does not exist" + assert path.endswith(".yaml"), f"{path} must be a yaml file" + + with open(path, "r") as file: + config = yaml.safe_load(file) + + metadata = config.pop("metadata") + metadata["config"] = config + expver = metadata["expver"] + + self.key = expver + self.record = dict(expver=expver, metadata=metadata) + + def add_plots(self, *paths, **kwargs): + for path in paths: + self._add_one_plot(path, **kwargs) + + def _add_one_plot(self, path, **kwargs): + """target is a pattern: s3://bucket/{expver}/{basename}""" + + if not os.path.exists(path): + raise FileNotFoundError(f"Could not find plot at {path}") + + target = config()["plots_uri_pattern"] + basename = os.path.basename(path) + target = target.format(expver=self.key, basename=basename, filename=basename) + + LOG.info(f"Uploading {path} to {target}.") + upload(path, target, overwrite=True) + + dic = dict(url=target, name=basename, path=path) + self.rest_item.patch([{"op": "add", "path": "/plots/-", "value": dic}]) + + def add_weights(self, *paths, **kwargs): + for path in paths: + self._add_one_weights(path, **kwargs) + + def _add_one_weights(self, path, **kwargs): + weights = WeightCatalogueEntry(path=path) + if not WeightCatalogueEntry.key_exists(weights.key): + weights.register(ignore_existing=False, overwrite=False) + weights.upload(path, overwrite=False) + else: + other = WeightCatalogueEntry(key=weights.key) + if other.record["metadata"]["timestamp"] == weights.record["metadata"]["timestamp"]: + LOG.info( + f"Not updating weights with key={weights.key}, because it already exists and has the same timestamp" + ) + else: + raise ValueError(f"Conflicting weights with key={weights.key}") + + dic = dict(uuid=weights.key, path=path) + self.rest_item.patch([{"op": "add", "path": "/checkpoints/-", "value": dic}]) diff --git a/src/anemoi/registry/entry/weights.py b/src/anemoi/registry/entry/weights.py new file mode 100644 index 0000000..0c5a6aa --- /dev/null +++ b/src/anemoi/registry/entry/weights.py @@ -0,0 +1,68 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging +import os + +from anemoi.utils.checkpoints import load_metadata as load_checkpoint_metadata +from anemoi.utils.s3 import upload + +from .. import config +from . import CatalogueEntry + +LOG = logging.getLogger(__name__) + + +class WeightCatalogueEntry(CatalogueEntry): + collection = "weights" + main_key = "uuid" + + def add_location(self, path, platform): + self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) + + def default_location(self, **kwargs): + uri = config()["weights_uri_pattern"] + uri = uri.format(uuid=self.key, **kwargs) + return uri + + def default_platform(self): + return config()["weights_platform"] + + def upload(self, path, target=None, overwrite=False): + if target is None: + target = self.default_location() + + LOG.info(f"Uploading {path} to {target}.") + upload(path, target, overwrite=overwrite, resume=not overwrite) + return target + + def register(self, overwrite=False): + assert self.path is not None, "path must be provided" + + super().register(overwrite=overwrite) + + platform = self.default_platform() + target = self.upload(self.path) + self.add_location(platform=platform, path=target) + + def load_from_path(self, path): + self.path = path + assert os.path.exists(path), f"{path} does not exist" + + metadata = load_checkpoint_metadata(path) + assert "path" not in metadata + metadata["path"] = os.path.abspath(path) + + metadata["size"] = os.path.getsize(path) + + uuid = metadata.get("uuid") + if uuid is None: + uuid = metadata["run_id"] + LOG.warning(f"Could not find 'uuid' in {path}, using 'run_id' instead: {uuid}") + + self.key = uuid + self.record = dict(uuid=uuid, metadata=metadata) diff --git a/src/anemoi/registry/rest.py b/src/anemoi/registry/rest.py index d4b471a..4d06759 100644 --- a/src/anemoi/registry/rest.py +++ b/src/anemoi/registry/rest.py @@ -9,14 +9,14 @@ import logging import os import socket -import sys from getpass import getuser import requests from requests.exceptions import HTTPError from anemoi.registry import config -from anemoi.registry._version import version + +from ._version import __version__ LOG = logging.getLogger(__name__) # LOG.setLevel(logging.DEBUG) @@ -52,51 +52,85 @@ def tidy(d): return d -class BaseRest: +def trace_info(): + trace = {} + trace["user"] = getuser() + trace["host"] = socket.gethostname() + trace["pid"] = os.getpid() + trace["version"] = __version__ + return trace + + +class Rest: def __init__(self): - self.config = config() + self.session = requests.Session() + self.session.headers.update({"Authorization": f"Bearer {self.token}"}) + for k, v in trace_info().items(): + self.session.headers.update({f"x-anemoi-registry-{k}": str(v)}) + + @property + def token(self): + return config().api_token + + def get(self, path, params=None, errors={}): + self.log_debug("GET", path, params) + + kwargs = dict() + if params is not None: + kwargs["params"] = params + + r = self.session.get(f"{config().api_url}/{path}", **kwargs) + self.raise_for_status(r, errors=errors) + return r.json() - def get(self, collection): - LOG.debug(f"GET {collection}") + def exists(self, *args, **kwargs): try: - r = requests.get(f"{self.config.api_url}/{collection}", headers={"Authorization": f"Bearer {self.token}"}) - self.raise_for_status(r) - return r.json() - except Exception as e: - LOG.error(e) - raise (e) - - def post(self, collection, data): - LOG.debug(f"POST {collection} { {k:'...' for k,v in data.items()} }") - - def patch(self, collection, data): - LOG.debug(f"PATCH {collection} {data}") - - def put(self, collection, data): - LOG.debug(f"PUT {collection} {data}") - - def delete(self, collection): - LOG.debug(f"DELETE {collection}") - - def trace_info(self): - trace = {} - trace["tool_path"] = __file__ - trace["tool_cmd"] = sys.argv - trace["user"] = getuser() - trace["host"] = socket.gethostname() - trace["pid"] = os.getpid() - trace["timestamp"] = datetime.datetime.now().isoformat() - trace["version"] = version - return trace + self.get(*args, **kwargs) + return True + except HTTPError as e: + if e.response.status_code == 404: + return False + + def put(self, path, data, errors={}): + self.log_debug("PUT", path, data) + if not data: + raise ValueError(f"PUT data must be provided for {path}") + r = self.session.put(f"{config().api_url}/{path}", json=tidy(data)) + self.raise_for_status(r, errors=errors) + return r.json() + + def patch(self, path, data, errors={}): + self.log_debug("PATCH", path, data) + if not data: + raise ValueError(f"PATCH data must be provided for {path}") + r = self.session.patch(f"{config().api_url}/{path}", json=tidy(data)) + self.raise_for_status(r, errors=errors) + return r.json() + + def post(self, path, data, errors={}): + r = self.session.post(f"{config().api_url}/{path}", json=tidy(data)) + self.raise_for_status(r, errors=errors) + return r.json() + + def delete(self, path, errors={}): + if not config().get("allow_delete"): + raise ValueError("Unregister not allowed") + r = self.session.delete(f"{config().api_url}/{path}", params=dict(force=True)) + self.raise_for_status(r, errors=errors) + return r.json() + + def log_debug(self, verb, collection, data): + if len(str(data)) > 100: + if isinstance(data, dict): + data = {k: "..." for k, v in data.items()} + else: + data = str(data)[:100] + "..." + LOG.debug(f"{verb} {collection} {data}") def trace_info_dict(self): return dict(_trace_info=self.trace_info()) - @property - def token(self): - return self.config.api_token - - def raise_for_status(self, r): + def raise_for_status(self, r, errors={}): try: r.raise_for_status() except HTTPError as e: @@ -104,82 +138,58 @@ def raise_for_status(self, r): text = r.text text = text[:1000] + "..." if len(text) > 1000 else text e.args = (f"{e.args[0]} : {text}",) - raise e + exception_handler = errors.get(e.response.status_code) + errcode = e.response.status_code + LOG.debug("HTTP error: ", errcode, exception_handler) + if exception_handler: + raise exception_handler(e) + else: + raise e -class ReadOnlyRest(BaseRest): - pass +class RestItem: + def __init__(self, collection, key): + self.collection = collection + self.key = key + self.rest = Rest() + self.path = f"{collection}/{key}" -class Rest(BaseRest): - def raise_for_status(self, r): + def exists(self): try: - r.raise_for_status() + self.get() + return True except HTTPError as e: - # add the response text to the exception message - text = r.text - text = text[:1000] + "..." if len(text) > 1000 else text - e.args = (f"{e.args[0]} : {text}",) - raise e + if e.response.status_code == 404: + return False - def post(self, collection, data): - super().post(collection, data) - try: - r = requests.post( - f"{self.config.api_url}/{collection}", - json=tidy(data), - headers={"Authorization": f"Bearer {self.token}"}, - ) - self.raise_for_status(r) - return r.json() - except requests.exceptions.HTTPError as e: - if e.response.status_code == 409: - raise AlreadyExists(f"{e}Already exists in {collection}") - else: - LOG.error(f"Error in post to {collection} with data:{data}") - LOG.error(e) - raise - except Exception as e: - LOG.error(f"Error in post to {collection} with data:{data}") - LOG.error(e) - raise - - def patch(self, collection, data): - super().patch(collection, data) - try: - r = requests.patch( - f"{self.config.api_url}/{collection}", - json=tidy(data), - headers={"Authorization": f"Bearer {self.token}"}, - ) - self.raise_for_status(r) - return r.json() - except Exception as e: - LOG.error(e) - raise (e) - - def put(self, collection, data): - super().put(collection, data) - try: - r = requests.put( - f"{self.config.api_url}/{collection}", - json=tidy(data), - headers={"Authorization": f"Bearer {self.token}"}, - ) - self.raise_for_status(r) - return r.json() - except Exception as e: - LOG.error(e) - raise (e) - - def delete(self, collection): - super().delete(collection) - try: - r = requests.delete( - f"{self.config.api_url}/{collection}", headers={"Authorization": f"Bearer {self.token}"} - ) - self.raise_for_status(r) - return r.json() - except Exception as e: - LOG.error(e) - raise (e) + def get(self, params=None, errors={}): + return self.rest.get(self.path, params=params, errors=errors) + + def patch(self, data): + return self.rest.patch(self.path, data) + + def put(self, data): + return self.rest.put(self.path, data) + + def delete(self): + return self.rest.delete(self.path) + + def __repr__(self): + return f"{self.__class__.__name__}({self.collection}, {self.key})" + + +class RestItemList: + def __init__(self, collection): + self.collection = collection + self.rest = Rest() + self.path = collection + + def get(self, params=None, errors={}): + return self.rest.get(self.path, params=params, errors=errors) + + def post(self, data): + return self.rest.post(self.path, data, errors={409: AlreadyExists}) + + def __repr__(self): + return f"{self.__class__.__name__}({self.collection})" diff --git a/src/anemoi/registry/tasks.py b/src/anemoi/registry/tasks.py new file mode 100644 index 0000000..9ce0567 --- /dev/null +++ b/src/anemoi/registry/tasks.py @@ -0,0 +1,138 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import datetime +import logging + +from anemoi.utils.humanize import when +from anemoi.utils.text import table + +from anemoi.registry.entry import CatalogueEntry +from anemoi.registry.rest import RestItemList +from anemoi.registry.rest import trace_info +from anemoi.registry.utils import list_to_dict + +LOG = logging.getLogger(__name__) + + +class TaskCatalogueEntryList: + collection = "tasks" + main_key = "uuid" + + def __init__(self, *args, sort="updated", **kwargs): + if args: + for k, v in list_to_dict(args).items(): + if k in kwargs: + raise ValueError(f"Duplicate argument {k}={v} and {k}={kwargs[k]}") + kwargs[k] = v + self.kwargs = kwargs + self.sort = sort + + self.rest_collection = RestItemList(self.collection) + + def get(self): + data = self.rest_collection.get(params=self.kwargs) + return sorted(data, key=lambda x: x[self.sort]) + + def __iter__(self): + for v in self.get(): + yield TaskCatalogueEntry(key=v["uuid"]) + + def __getitem__(self, key): + return list(self)[key] + + def __len__(self): + return len(self.get()) + + def add_new_task(self, **kwargs): + kwargs = kwargs.copy() + assert "action" in kwargs, kwargs + kwargs["action"] = kwargs["action"].replace("_", "-").lower() + + # actor_factory(**kwargs).check() + + res = self.rest_collection.post(kwargs) + uuid = res["uuid"] + LOG.debug(f"New task created {uuid}: {res}") + return uuid + + def take_last(self): + uuids = [v["uuid"] for v in self.get()] + if not uuids: + LOG.info("No available task has been found.") + return + latest = uuids[-1] + + entry = TaskCatalogueEntry(key=latest) + res = entry.take_ownership() + LOG.debug(f"Task {latest} taken: {res}") + uuid = res["uuid"] + return uuid + + def to_str(self, long): + rows = [] + for v in self.get(): + if not isinstance(v, dict): + raise ValueError(v) + created = datetime.datetime.fromisoformat(v.pop("created")) + updated = datetime.datetime.fromisoformat(v.pop("updated")) + + uuid = v.pop("uuid") + status = v.pop("status") + progress = v.pop("progress", "") + action = v.pop("action", "") + if not long: + if "worker" in v: + v["worker"] = v["worker"].get("host") + content = " ".join(f"{k}={v}" for k, v in v.items()) + rows.append( + [action, when(created, use_utc=True), when(updated, use_utc=True), status, progress, content, uuid] + ) + return table( + rows, + ["Action", "Created", "Updated", "Status", "%", "Details", "UUID"], + ["<", "<", "<", "<", "<", "<", "<"], + ) + + +class TaskCatalogueEntry(CatalogueEntry): + collection = "tasks" + main_key = "uuid" + + def set_status(self, status): + patch = [{"op": "add", "path": "/status", "value": status}] + self.rest_item.patch(patch) + + def unregister(self): + return self.rest_item.delete() + + def take_ownership(self): + trace = trace_info() + trace["timestamp"] = datetime.datetime.now().isoformat() + return self.rest_item.patch( + [ + {"op": "test", "path": "/status", "value": "queued"}, + {"op": "replace", "path": "/status", "value": "running"}, + {"op": "add", "path": "/worker", "value": trace}, + ] + ) + + def release_ownership(self): + self.rest_item.patch( + [ + {"op": "test", "path": "/status", "value": "running"}, + {"op": "replace", "path": "/status", "value": "queued"}, + {"op": "remove", "path": "/worker"}, + ] + ) + + def set_progress(self, progress): + assert isinstance(progress, int), progress + if not (0 <= progress <= 100): + raise ValueError("Progress must be between 0 and 100") + patch = [{"op": "add", "path": "/progress", "value": progress}] + self.rest_item.patch(patch) diff --git a/src/anemoi/registry/utils.py b/src/anemoi/registry/utils.py new file mode 100644 index 0000000..c990add --- /dev/null +++ b/src/anemoi/registry/utils.py @@ -0,0 +1,15 @@ +# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +def list_to_dict(lst): + assert isinstance(lst, (tuple, list)), f"lst must be a list. Got {lst} of type {type(lst)}." + for x in lst: + assert isinstance(x, str), f"lst must be a list of strings. Got {x} of type {type(x)}." + if "=" not in x: + raise ValueError(f"Invalid key-value pairs format '{x}', use 'key1=value1 key2=value2' list.") + return {x.split("=")[0]: x.split("=")[1] for x in lst} diff --git a/src/anemoi/registry/workers.py b/src/anemoi/registry/workers.py new file mode 100644 index 0000000..edc17e9 --- /dev/null +++ b/src/anemoi/registry/workers.py @@ -0,0 +1,256 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import datetime +import logging +import os +import signal +import sys +import threading +import time + +from anemoi.utils.humanize import when + +from anemoi.registry.entry.dataset import DatasetCatalogueEntry +from anemoi.registry.tasks import TaskCatalogueEntryList + +# from anemoi.utils.provenance import trace_info + +LOG = logging.getLogger(__name__) + + +class Worker: + def __init__( + self, + action, + destination, + timeout=None, + wait=60, + stop_if_finished=True, + target_dir=".", + publish_target_dir=None, + auto_register=True, + threads=1, + heartbeat=60, + max_no_heartbeat=0, + loop=False, + check_todo=False, + request={}, + ): + """Run a worker that will process tasks in the queue. + timeout: Kill itself after `timeout` seconds. + wait: When no task is found, wait `wait` seconds before checking again. + """ + assert action == "transfer-dataset", action + + if not destination: + raise ValueError("No destination platform specified") + if not action: + raise ValueError("No action specified") + + self.destination = destination + self.target_dir = target_dir + self.publish_target_dir = publish_target_dir or target_dir + self.request = request + self.threads = threads + self.heartbeat = heartbeat + self.max_no_heartbeat = max_no_heartbeat + self.loop = loop + self.check_todo = check_todo + + self.wait = wait + self.stop_if_finished = stop_if_finished + self.auto_register = auto_register + if timeout: + signal.alarm(timeout) + + if not os.path.exists(target_dir): + raise ValueError(f"Target directory {target_dir} must already exist") + + def run(self): + if self.check_todo: + task = self.choose_task() + if task: + LOG.info("There are tasks to do.") + sys.exit(0) + else: + LOG.info("No tasks to do.") + sys.exit(1) + + if self.loop: + while True: + res = self.process_one_task() + + if self.stop_if_finished and res is None: + LOG.info("All tasks have been processed, stopping.") + return + + LOG.info(f"Waiting {self.wait} seconds before checking again.") + time.sleep(self.wait) + else: + self.process_one_task() + + def choose_task(self): + request = self.request.copy() + request["destination"] = request.get("destination", self.destination) + request["action"] = "transfer-dataset" + + # if a task is queued, take it + for entry in TaskCatalogueEntryList(status="queued", **request): + return entry + + # else if a task is running, check if it has been running for too long, and free it + if self.max_no_heartbeat == 0: + return None + + cat = TaskCatalogueEntryList(status="running", **request) + if not cat: + LOG.info("No queued tasks found") + else: + LOG.info(cat.to_str(long=True)) + for entry in cat: + updated = datetime.datetime.fromisoformat(entry.record["updated"]) + LOG.info(f"Task {entry.key} is already running, last update {when(updated, use_utc=True)}.") + if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: + LOG.warning( + f"Task {entry.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." + ) + entry.release_ownership() + + def process_one_task(self): + entry = self.choose_task() + if not entry: + return False + + uuid = entry.key + LOG.info(f"Processing task {uuid}: {entry}") + self.parse_entry(entry) # for checking only + + entry.take_ownership() + self.process_entry_with_heartbeat(entry) + LOG.info(f"Task {uuid} completed.") + entry.unregister() + LOG.info(f"Task {uuid} deleted.") + return True + + def process_entry_with_heartbeat(self, entry): + STOP = [] + + # create another thread to send heartbeat + def send_heartbeat(): + while True: + try: + entry.set_status("running") + except Exception: + return + for _ in range(self.heartbeat): + time.sleep(1) + if len(STOP) > 0: + STOP.pop() + return + + thread = threading.Thread(target=send_heartbeat) + thread.start() + + try: + self.process_entry(entry) + finally: + STOP.append(1) # stop the heartbeat thread + thread.join() + + def process_entry(self, entry): + destination, source, dataset = self.parse_entry(entry) + dataset_entry = DatasetCatalogueEntry(key=dataset) + + LOG.info(f"Transferring {dataset} from '{source}' to '{destination}'") + + def get_source_path(): + e = dataset_entry.record + if "locations" not in e: + raise ValueError(f"Dataset {dataset} has no locations") + locations = e["locations"] + + if source not in locations: + raise ValueError( + f"Dataset {dataset} is not available at {source}. Available locations: {list(locations.keys())}" + ) + + if "path" not in locations[source]: + raise ValueError(f"Dataset {dataset} has no path at {source}") + + path = locations[source]["path"] + + return path + + source_path = get_source_path() + basename = os.path.basename(source_path) + target_path = os.path.join(self.target_dir, basename) + if os.path.exists(target_path): + LOG.error(f"Target path {target_path} already exists, skipping.") + return + + from anemoi.utils.s3 import download + + LOG.info(f"Source path: {source_path}") + LOG.info(f"Target path: {target_path}") + + if source_path.startswith("s3://"): + source_path = source_path + "/" if not source_path.endswith("/") else source_path + + if target_path.startswith("s3://"): + # untested + download(source_path, target_path, resume=True, threads=self.threads) + return + else: + target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) + os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) + download(source_path, target_tmp_path, resume=True, threads=self.threads) + os.rename(target_tmp_path, target_path) + + if self.auto_register: + published_target_path = os.path.join(self.publish_target_dir, basename) + dataset_entry.add_location(platform=destination, path=published_target_path) + + @classmethod + def parse_entry(cls, entry): + data = entry.record.copy() + + assert isinstance(data, dict), data + assert data["action"] == "transfer-dataset", data["action"] + + def is_alphanumeric(s): + assert isinstance(s, str), s + return all(c.isalnum() or c in ("-", "_") for c in s) + + destination = data.pop("destination") + source = data.pop("source") + dataset = data.pop("dataset") + assert is_alphanumeric(destination), destination + assert is_alphanumeric(source), source + assert is_alphanumeric(dataset), dataset + for k in data: + if k not in ("action", "status", "progress", "created", "updated", "uuid"): + LOG.warning(f"Unknown key {k}=data[k]") + data = None + + if "/" in destination: + raise ValueError(f"Destination {destination} must not contain '/', this is a platform name") + if "." in destination: + raise ValueError(f"Destination {destination} must not contain '.', this is a platform name") + + if "/" in source: + raise ValueError(f"Source {source} must not contain '/', this is a platform name") + if "." in source: + raise ValueError(f"Source {source} must not contain '.', this is a platform name") + + if "." in dataset: + raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") + + assert isinstance(destination, str), destination + assert isinstance(source, str), source + assert isinstance(dataset, str), dataset + return destination, source, dataset From f537f57b5d04e17d04b9b8a438886fea59108079 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 8 Jul 2024 18:20:28 +0200 Subject: [PATCH 09/64] fix typo --- src/anemoi/registry/workers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/anemoi/registry/workers.py b/src/anemoi/registry/workers.py index edc17e9..c392f58 100644 --- a/src/anemoi/registry/workers.py +++ b/src/anemoi/registry/workers.py @@ -32,7 +32,7 @@ def __init__( wait=60, stop_if_finished=True, target_dir=".", - publish_target_dir=None, + published_target_dir=None, auto_register=True, threads=1, heartbeat=60, @@ -54,7 +54,7 @@ def __init__( self.destination = destination self.target_dir = target_dir - self.publish_target_dir = publish_target_dir or target_dir + self.published_target_dir = published_target_dir or target_dir self.request = request self.threads = threads self.heartbeat = heartbeat @@ -212,7 +212,7 @@ def get_source_path(): os.rename(target_tmp_path, target_path) if self.auto_register: - published_target_path = os.path.join(self.publish_target_dir, basename) + published_target_path = os.path.join(self.published_target_dir, basename) dataset_entry.add_location(platform=destination, path=published_target_path) @classmethod From c6fce74ba56ff780ec41c633e3fc4a824ce1b355 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 12:04:50 +0200 Subject: [PATCH 10/64] clean --- src/anemoi/registry/commands/tasks.py | 4 ++- src/anemoi/registry/tasks.py | 35 ++++++++++++++++++--------- src/anemoi/registry/workers.py | 2 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/anemoi/registry/commands/tasks.py b/src/anemoi/registry/commands/tasks.py index 6de4bc0..720d123 100644 --- a/src/anemoi/registry/commands/tasks.py +++ b/src/anemoi/registry/commands/tasks.py @@ -103,14 +103,16 @@ def run_delete_many(self, args): print(f"Do you really want to delete these {len(cat)} entries? (y/n)", end=" ") if input("").lower() != "y": return + count = 0 while cat: try: entry = cat[0] entry.unregister() + count += 1 LOG.info(f"Task {entry.key} deleted.") except CatalogueEntryNotFound: LOG.warning(f"Task {entry.key} not found.") - LOG.info(f"{len(cat)} tasks deleted.") + LOG.info(f"{count} tasks deleted.") def run_take_one(self, args): cat = TaskCatalogueEntryList(*args.take_one, status="queued", sort=args.sort) diff --git a/src/anemoi/registry/tasks.py b/src/anemoi/registry/tasks.py index 9ce0567..78d5339 100644 --- a/src/anemoi/registry/tasks.py +++ b/src/anemoi/registry/tasks.py @@ -85,18 +85,29 @@ def to_str(self, long): status = v.pop("status") progress = v.pop("progress", "") action = v.pop("action", "") - if not long: - if "worker" in v: - v["worker"] = v["worker"].get("host") - content = " ".join(f"{k}={v}" for k, v in v.items()) - rows.append( - [action, when(created, use_utc=True), when(updated, use_utc=True), status, progress, content, uuid] - ) - return table( - rows, - ["Action", "Created", "Updated", "Status", "%", "Details", "UUID"], - ["<", "<", "<", "<", "<", "<", "<"], - ) + source = v.pop("source", "") + destination = v.pop("destination", "") + dataset = v.pop("dataset", "") + row = [ + when(created, use_utc=True), + when(updated, use_utc=True), + status, + action, + source, + destination, + dataset, + progress, + uuid, + ] + rows.append(row) + if long: + content = " ".join(f"{k}={v}" for k, v in v.items()) + row.append(content) + cols = ["Created", "Updated", "Status", "Action", "Src", "Dest", "Dataset", "%", "UUID"] + if long: + cols.append("More") + + return table(rows, cols, "<" * len(cols)) class TaskCatalogueEntry(CatalogueEntry): diff --git a/src/anemoi/registry/workers.py b/src/anemoi/registry/workers.py index c392f58..020bd67 100644 --- a/src/anemoi/registry/workers.py +++ b/src/anemoi/registry/workers.py @@ -202,7 +202,7 @@ def get_source_path(): source_path = source_path + "/" if not source_path.endswith("/") else source_path if target_path.startswith("s3://"): - # untested + LOG.warning("Uploading to S3 is experimental and has not been tested yet.") download(source_path, target_path, resume=True, threads=self.threads) return else: From c33f3fbc268f16c1d4cd123629e55471fdc332fb Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 13:53:09 +0200 Subject: [PATCH 11/64] doc --- docs/cli/introduction.rst | 27 ----------------- docs/cli/list.rst | 5 ++++ docs/configuring.rst | 33 +++++++++++++++++++++ docs/index.rst | 18 +++-------- docs/installing.rst | 5 ++++ src/anemoi/registry/commands/datasets.py | 10 +++---- src/anemoi/registry/commands/experiments.py | 16 ++++++---- src/anemoi/registry/commands/list.py | 23 +++++++++----- src/anemoi/registry/commands/weights.py | 10 ++++--- 9 files changed, 83 insertions(+), 64 deletions(-) delete mode 100644 docs/cli/introduction.rst create mode 100644 docs/configuring.rst diff --git a/docs/cli/introduction.rst b/docs/cli/introduction.rst deleted file mode 100644 index ed2f3ee..0000000 --- a/docs/cli/introduction.rst +++ /dev/null @@ -1,27 +0,0 @@ -Introduction -============ - -When you install the `anemoi-registry` package, this will also install command line tool -called ``anemoi-registry`` which can be used to manage the registry. - -The tool can provide help with the ``--help`` options: - -.. code-block:: bash - - % anemoi-registry --help - -The commands are: - -.. toctree:: - :maxdepth: 1 - - list - datasets - experiments - weights - -.. argparse:: - :module: anemoi.registry.__main__ - :func: create_parser - :prog: anemoi-registry - :nosubcommands: diff --git a/docs/cli/list.rst b/docs/cli/list.rst index 4599dc8..0d3f625 100644 --- a/docs/cli/list.rst +++ b/docs/cli/list.rst @@ -1,6 +1,11 @@ list ==== +.. warning:: + + This feature is experimental and may change in future versions, its output format may also change. + It is provided for convenience in development and testing and debugging. + It is not recommended for production use. .. argparse:: diff --git a/docs/configuring.rst b/docs/configuring.rst new file mode 100644 index 0000000..c8718e1 --- /dev/null +++ b/docs/configuring.rst @@ -0,0 +1,33 @@ +############# + Configuring +############# + +The following configuration files are used to store the registry +configuration. These files allow configuring the catalogue urls, s3 +buckets, API token and object storage credentials. + +The first config file is ``~/.config/anemoi/config.toml``. + +.. code:: + + [registry] + api_url = "https://anemoi.ecmwf.int/api/v1" # Required + + plots_uri_pattern = "s3://ml-artefacts/{expver}/{basename}" # Optional + datasets_uri_pattern = "s3://ml-datasets/{name}" # Optional + weights_uri_pattern = "s3://ml-weights/{uuid}.ckpt" # Optional + weights_platform = "ewc" # Optional + +The second config file is ``~/.config/anemoi/config.secret.toml``. This +file should have the right permissions set to avoid unauthorized access +(`chmod 600 `). All keys are required. + +.. code:: + + [registry] + api_token = "xxxxxxxxxxx" + + [object-storage] + endpoint_url = "https://xxxxxxxxxxxx.xxx" + aws_access_key_id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + aws_secret_access_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" diff --git a/docs/index.rst b/docs/index.rst index c1e0db4..6bf5078 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,41 +23,31 @@ This package provides a series of utility functions for used by the rest of the *Anemoi* packages. - :doc:`installing` +- :doc:`configuring` .. toctree:: :maxdepth: 1 :hidden: installing + configuring **Command line tool** -- :doc:`cli/introduction` -- :doc:`cli/list` - :doc:`cli/datasets` - :doc:`cli/weights` - :doc:`cli/experiments` +- :doc:`cli/list` .. toctree:: :maxdepth: 1 :hidden: :caption: Command line tool - cli/introduction - cli/list cli/datasets cli/experiments cli/weights - -********* - Modules -********* - -.. toctree:: - :maxdepth: 1 - :glob: - - modules/* + cli/list ***************** Anemoi packages diff --git a/docs/installing.rst b/docs/installing.rst index 37ca68a..9020926 100644 --- a/docs/installing.rst +++ b/docs/installing.rst @@ -12,6 +12,11 @@ The options are: - ``dev``: install the development dependencies - ``all``: install all the dependencies +- ``s3``: install the dependencies for S3 support + +When you install the `anemoi-registry` package, this will also install +command line tool called ``anemoi-registry`` which can be used to manage +an anemoi catalogue. ************** Contributing diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 7833ded..28764c6 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -29,16 +29,16 @@ class Datasets(BaseCommand): kind = "dataset" def add_arguments(self, command_parser): - command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") - command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of a {self.kind}.") + command_parser.add_argument("--register", help=f"Register a {self.kind} in the catalogue.", action="store_true") command_parser.add_argument( "--unregister", - help="Remove from catalogue (without deleting all)", + help=f"Remove a {self.kind} from catalogue (without deleting it from its locations)", action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--set-status", help="Set the status to the dataset") - command_parser.add_argument("--add-recipe", help="Add a recipe file") + command_parser.add_argument("--set-status", help="Set the status to the {self.kind}.") + command_parser.add_argument("--add-recipe", help="Add the recipe file to [re-]build the {self.kind}.") command_parser.add_argument( "--add-location", nargs="+", diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 1e89a22..f75a8ba 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -30,18 +30,22 @@ class Experiments(BaseCommand): kind = "experiment" def add_arguments(self, command_parser): - command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") - command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument( + "NAME_OR_PATH", help="Name of an experiment of a path of an experiment config file." + ) + command_parser.add_argument( + "--register", help=f"Register the {self.kind} in the catalogue.", action="store_true" + ) command_parser.add_argument( "--unregister", - help="Remove from catalogue (without deleting all)", + help="Remove from catalogue (without deleting the experiment from other locations)", action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment") - command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment") - command_parser.add_argument("--overwrite", help="Overwrite if already exists", action="store_true") + command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment.") + command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.") + command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") def check_arguments(self, args): pass diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py index 28ac7a8..bd2dbbe 100644 --- a/src/anemoi/registry/commands/list.py +++ b/src/anemoi/registry/commands/list.py @@ -36,14 +36,21 @@ class List(Command): def add_arguments(self, command_parser): sub_parser = command_parser.add_subparsers(dest="subcommand") - experiment = sub_parser.add_parser("experiments") - experiment.add_argument("filter", nargs="*") - - checkpoint = sub_parser.add_parser("weights") - checkpoint.add_argument("filter", nargs="*") - - dataset = sub_parser.add_parser("datasets") - dataset.add_argument("filter", nargs="*") + experiment = sub_parser.add_parser( + "experiments", + help="List experiments in the catalogue, for admin and debug purposes Current output is JSON and may change.", + ) + experiment.add_argument( + "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" + ) + + checkpoint = sub_parser.add_parser("weights", help="List weights in the catalogue.") + checkpoint.add_argument( + "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" + ) + + dataset = sub_parser.add_parser("datasets", help="List datasets in the catalogue.") + dataset.add_argument("filter", nargs="*", help="Filter datasets with a list of key=value.", metavar="key=value") # tasks = sub_parser.add_parser("tasks") # tasks.add_argument("filter", nargs="*") diff --git a/src/anemoi/registry/commands/weights.py b/src/anemoi/registry/commands/weights.py index 0a66d6b..b191113 100644 --- a/src/anemoi/registry/commands/weights.py +++ b/src/anemoi/registry/commands/weights.py @@ -29,18 +29,20 @@ class Weights(BaseCommand): kind = "weights" def add_arguments(self, command_parser): - command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of the {self.kind}") - command_parser.add_argument("--register", help=f"Register the {self.kind}", action="store_true") + command_parser.add_argument("NAME_OR_PATH", help=f"Name or path of a {self.kind}.") + command_parser.add_argument( + "--register", help=f"Register the {self.kind} in the catalogue.", action="store_true" + ) command_parser.add_argument( "--unregister", - help="Remove from catalogue (without deleting all)", + help="Remove from catalogue (without deleting it from its actual locations).", action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") command_parser.add_argument("--add-location", help="Add a location to the weights.") command_parser.add_argument("--platform", help="Platform where to add the location.") - command_parser.add_argument("--overwrite", help="Overwrite any existing weights", action="store_true") + command_parser.add_argument("--overwrite", help="Overwrite any existing weights.", action="store_true") def check_arguments(self, args): pass From e3ce614662f928c5cc5ae0639ce84afcb3170e82 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 14:03:50 +0200 Subject: [PATCH 12/64] add warning for s3 support --- src/anemoi/registry/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index 83be460..9558f3e 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -12,6 +12,11 @@ LOG = logging.getLogger(__name__) +try: + import boto3 +except ImportError: + LOG.warning("boto3 package is not available. To have S3 support, reinstall with : pip install anemoi-registry[s3]") + def config(): from anemoi.utils.config import load_config From cd5d9ca636c1a17832686ba8fa29956fa1487ad5 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 14:16:35 +0200 Subject: [PATCH 13/64] add default config --- src/anemoi/registry/__init__.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index 9558f3e..e458dbb 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -7,6 +7,7 @@ import logging +import os from ._version import __version__ as __version__ @@ -21,11 +22,6 @@ def config(): from anemoi.utils.config import load_config - config = load_config(secrets=["api_token"]) - if not config: - raise ValueError("Anemoi config is required.") - - config = config.get("registry") - if not config: - raise ValueError("Section 'registry' is missing in config.") - return config + default_config = os.path.join(os.path.dirname(__file__), "config.yaml") + config = load_config(secrets=["api_token"], defaults=default_config) + return config.get("registry") From 399a5cbe766a5c68fe6f60a97c57f04c20feb20f Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 16:05:17 +0200 Subject: [PATCH 14/64] wip --- docs/configuring.rst | 17 +- src/anemoi/registry/commands/worker.py | 7 +- src/anemoi/registry/config.yaml | 7 + src/anemoi/registry/workers.py | 256 ------------------------- 4 files changed, 19 insertions(+), 268 deletions(-) create mode 100644 src/anemoi/registry/config.yaml delete mode 100644 src/anemoi/registry/workers.py diff --git a/docs/configuring.rst b/docs/configuring.rst index c8718e1..374f3c2 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -6,21 +6,22 @@ The following configuration files are used to store the registry configuration. These files allow configuring the catalogue urls, s3 buckets, API token and object storage credentials. -The first config file is ``~/.config/anemoi/config.toml``. +The first config file is ``~/.config/anemoi/config.toml``. All keys in +this file are optional and have default values. .. code:: [registry] - api_url = "https://anemoi.ecmwf.int/api/v1" # Required + api_url = "https://anemoi.ecmwf.int/api/v1" - plots_uri_pattern = "s3://ml-artefacts/{expver}/{basename}" # Optional - datasets_uri_pattern = "s3://ml-datasets/{name}" # Optional - weights_uri_pattern = "s3://ml-weights/{uuid}.ckpt" # Optional - weights_platform = "ewc" # Optional + plots_uri_pattern = "s3://ml-artefacts/{expver}/{basename}" + datasets_uri_pattern = "s3://ml-datasets/{name}" + weights_uri_pattern = "s3://ml-weights/{uuid}.ckpt" + weights_platform = "ewc" The second config file is ``~/.config/anemoi/config.secret.toml``. This -file should have the right permissions set to avoid unauthorized access -(`chmod 600 `). All keys are required. +file must have the right permissions set to avoid unauthorized access +(`chmod 600 `). All keys in this file have no default values. .. code:: diff --git a/src/anemoi/registry/commands/worker.py b/src/anemoi/registry/commands/worker.py index a90ae74..2a33d92 100644 --- a/src/anemoi/registry/commands/worker.py +++ b/src/anemoi/registry/commands/worker.py @@ -17,7 +17,7 @@ from anemoi.registry.commands.base import BaseCommand from anemoi.registry.tasks import TaskCatalogueEntry from anemoi.registry.utils import list_to_dict -from anemoi.registry.workers import Worker +from anemoi.registry.workers import TransferDatasetWorker LOG = logging.getLogger(__name__) @@ -38,8 +38,7 @@ def add_arguments(self, command_parser): command_parser.add_argument( "action", help="Action to perform", - choices=["transfer-dataset"], - default="transfer-dataset", + choices=["transfer-dataset", "delete-dataset"], nargs="?", ) command_parser.add_argument( @@ -70,7 +69,7 @@ def run(self, args): kwargs.pop("debug") kwargs.pop("version") - Worker(**kwargs).run() + TransferDatasetWorker(**kwargs).run() command = WorkerCommand diff --git a/src/anemoi/registry/config.yaml b/src/anemoi/registry/config.yaml new file mode 100644 index 0000000..1e79be2 --- /dev/null +++ b/src/anemoi/registry/config.yaml @@ -0,0 +1,7 @@ +registry: + api_url: "https://anemoi.ecmwf.int/api/v1" + + plots_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" + datasets_uri_pattern: "s3://ml-datasets/{name}" + weights_uri_pattern: "s3://ml-weights/{uuid}.ckpt" + weights_platform: "ewc" diff --git a/src/anemoi/registry/workers.py b/src/anemoi/registry/workers.py deleted file mode 100644 index 020bd67..0000000 --- a/src/anemoi/registry/workers.py +++ /dev/null @@ -1,256 +0,0 @@ -# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import datetime -import logging -import os -import signal -import sys -import threading -import time - -from anemoi.utils.humanize import when - -from anemoi.registry.entry.dataset import DatasetCatalogueEntry -from anemoi.registry.tasks import TaskCatalogueEntryList - -# from anemoi.utils.provenance import trace_info - -LOG = logging.getLogger(__name__) - - -class Worker: - def __init__( - self, - action, - destination, - timeout=None, - wait=60, - stop_if_finished=True, - target_dir=".", - published_target_dir=None, - auto_register=True, - threads=1, - heartbeat=60, - max_no_heartbeat=0, - loop=False, - check_todo=False, - request={}, - ): - """Run a worker that will process tasks in the queue. - timeout: Kill itself after `timeout` seconds. - wait: When no task is found, wait `wait` seconds before checking again. - """ - assert action == "transfer-dataset", action - - if not destination: - raise ValueError("No destination platform specified") - if not action: - raise ValueError("No action specified") - - self.destination = destination - self.target_dir = target_dir - self.published_target_dir = published_target_dir or target_dir - self.request = request - self.threads = threads - self.heartbeat = heartbeat - self.max_no_heartbeat = max_no_heartbeat - self.loop = loop - self.check_todo = check_todo - - self.wait = wait - self.stop_if_finished = stop_if_finished - self.auto_register = auto_register - if timeout: - signal.alarm(timeout) - - if not os.path.exists(target_dir): - raise ValueError(f"Target directory {target_dir} must already exist") - - def run(self): - if self.check_todo: - task = self.choose_task() - if task: - LOG.info("There are tasks to do.") - sys.exit(0) - else: - LOG.info("No tasks to do.") - sys.exit(1) - - if self.loop: - while True: - res = self.process_one_task() - - if self.stop_if_finished and res is None: - LOG.info("All tasks have been processed, stopping.") - return - - LOG.info(f"Waiting {self.wait} seconds before checking again.") - time.sleep(self.wait) - else: - self.process_one_task() - - def choose_task(self): - request = self.request.copy() - request["destination"] = request.get("destination", self.destination) - request["action"] = "transfer-dataset" - - # if a task is queued, take it - for entry in TaskCatalogueEntryList(status="queued", **request): - return entry - - # else if a task is running, check if it has been running for too long, and free it - if self.max_no_heartbeat == 0: - return None - - cat = TaskCatalogueEntryList(status="running", **request) - if not cat: - LOG.info("No queued tasks found") - else: - LOG.info(cat.to_str(long=True)) - for entry in cat: - updated = datetime.datetime.fromisoformat(entry.record["updated"]) - LOG.info(f"Task {entry.key} is already running, last update {when(updated, use_utc=True)}.") - if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: - LOG.warning( - f"Task {entry.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." - ) - entry.release_ownership() - - def process_one_task(self): - entry = self.choose_task() - if not entry: - return False - - uuid = entry.key - LOG.info(f"Processing task {uuid}: {entry}") - self.parse_entry(entry) # for checking only - - entry.take_ownership() - self.process_entry_with_heartbeat(entry) - LOG.info(f"Task {uuid} completed.") - entry.unregister() - LOG.info(f"Task {uuid} deleted.") - return True - - def process_entry_with_heartbeat(self, entry): - STOP = [] - - # create another thread to send heartbeat - def send_heartbeat(): - while True: - try: - entry.set_status("running") - except Exception: - return - for _ in range(self.heartbeat): - time.sleep(1) - if len(STOP) > 0: - STOP.pop() - return - - thread = threading.Thread(target=send_heartbeat) - thread.start() - - try: - self.process_entry(entry) - finally: - STOP.append(1) # stop the heartbeat thread - thread.join() - - def process_entry(self, entry): - destination, source, dataset = self.parse_entry(entry) - dataset_entry = DatasetCatalogueEntry(key=dataset) - - LOG.info(f"Transferring {dataset} from '{source}' to '{destination}'") - - def get_source_path(): - e = dataset_entry.record - if "locations" not in e: - raise ValueError(f"Dataset {dataset} has no locations") - locations = e["locations"] - - if source not in locations: - raise ValueError( - f"Dataset {dataset} is not available at {source}. Available locations: {list(locations.keys())}" - ) - - if "path" not in locations[source]: - raise ValueError(f"Dataset {dataset} has no path at {source}") - - path = locations[source]["path"] - - return path - - source_path = get_source_path() - basename = os.path.basename(source_path) - target_path = os.path.join(self.target_dir, basename) - if os.path.exists(target_path): - LOG.error(f"Target path {target_path} already exists, skipping.") - return - - from anemoi.utils.s3 import download - - LOG.info(f"Source path: {source_path}") - LOG.info(f"Target path: {target_path}") - - if source_path.startswith("s3://"): - source_path = source_path + "/" if not source_path.endswith("/") else source_path - - if target_path.startswith("s3://"): - LOG.warning("Uploading to S3 is experimental and has not been tested yet.") - download(source_path, target_path, resume=True, threads=self.threads) - return - else: - target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) - os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) - download(source_path, target_tmp_path, resume=True, threads=self.threads) - os.rename(target_tmp_path, target_path) - - if self.auto_register: - published_target_path = os.path.join(self.published_target_dir, basename) - dataset_entry.add_location(platform=destination, path=published_target_path) - - @classmethod - def parse_entry(cls, entry): - data = entry.record.copy() - - assert isinstance(data, dict), data - assert data["action"] == "transfer-dataset", data["action"] - - def is_alphanumeric(s): - assert isinstance(s, str), s - return all(c.isalnum() or c in ("-", "_") for c in s) - - destination = data.pop("destination") - source = data.pop("source") - dataset = data.pop("dataset") - assert is_alphanumeric(destination), destination - assert is_alphanumeric(source), source - assert is_alphanumeric(dataset), dataset - for k in data: - if k not in ("action", "status", "progress", "created", "updated", "uuid"): - LOG.warning(f"Unknown key {k}=data[k]") - data = None - - if "/" in destination: - raise ValueError(f"Destination {destination} must not contain '/', this is a platform name") - if "." in destination: - raise ValueError(f"Destination {destination} must not contain '.', this is a platform name") - - if "/" in source: - raise ValueError(f"Source {source} must not contain '/', this is a platform name") - if "." in source: - raise ValueError(f"Source {source} must not contain '.', this is a platform name") - - if "." in dataset: - raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") - - assert isinstance(destination, str), destination - assert isinstance(source, str), source - assert isinstance(dataset, str), dataset - return destination, source, dataset From cc68fdf7ebaddc5ecc3e8c43a08cbaddbb3397c8 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 9 Jul 2024 16:50:42 +0200 Subject: [PATCH 15/64] upload artefact --- src/anemoi/registry/commands/experiments.py | 2 + src/anemoi/registry/commands/upload.py | 48 ++++ src/anemoi/registry/config.yaml | 1 + src/anemoi/registry/entry/experiment.py | 24 +- src/anemoi/registry/workers/__init__.py | 268 ++++++++++++++++++++ 5 files changed, 335 insertions(+), 8 deletions(-) create mode 100644 src/anemoi/registry/commands/upload.py create mode 100644 src/anemoi/registry/workers/__init__.py diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index f75a8ba..cf95166 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -45,6 +45,7 @@ def add_arguments(self, command_parser): command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment.") command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.") + command_parser.add_argument("--add-artefacts", nargs="+", help="Add artefacts to the experiment.") command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") def check_arguments(self, args): @@ -61,6 +62,7 @@ def _run(self, entry, args): self.process_task(entry, args, "unregister") self.process_task(entry, args, "register", overwrite=args.overwrite) self.process_task(entry, args, "add_weights") + self.process_task(entry, args, "add_artefacts") self.process_task(entry, args, "add_plots") diff --git a/src/anemoi/registry/commands/upload.py b/src/anemoi/registry/commands/upload.py new file mode 100644 index 0000000..65439e7 --- /dev/null +++ b/src/anemoi/registry/commands/upload.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging +import os + +LOG = logging.getLogger(__name__) + +UPLOAD_ALLOWED = False +if os.environ.get("ANEMOI_FORCE_UPLOAD"): + UPLOAD_ALLOWED = True + + +class Upload: + """Just upload.""" + + internal = True + timestamp = True + + def add_arguments(self, command_parser): + command_parser.add_argument("path", help="Path to upload.") + command_parser.add_argument( + "target", + help="Target s3 path. Please consider using `anemoi-registry experiment --add-artefacts path`", + ) + command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") + + def run(self, args): + if not UPLOAD_ALLOWED: + LOG.error("Direct upload not allowed.") + return + from anemoi.utils.s3 import upload + + upload(args.path, args.target, overwrite=args.overwrite) + + +command = Upload diff --git a/src/anemoi/registry/config.yaml b/src/anemoi/registry/config.yaml index 1e79be2..789af5e 100644 --- a/src/anemoi/registry/config.yaml +++ b/src/anemoi/registry/config.yaml @@ -2,6 +2,7 @@ registry: api_url: "https://anemoi.ecmwf.int/api/v1" plots_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" + artefacts_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" datasets_uri_pattern: "s3://ml-datasets/{name}" weights_uri_pattern: "s3://ml-weights/{uuid}.ckpt" weights_platform: "ewc" diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index ac05a1f..56481c1 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -41,12 +41,24 @@ def add_plots(self, *paths, **kwargs): self._add_one_plot(path, **kwargs) def _add_one_plot(self, path, **kwargs): - """target is a pattern: s3://bucket/{expver}/{basename}""" + return self._add_one_plot_or_artefact("plot", path, **kwargs) + def add_weights(self, *paths, **kwargs): + for path in paths: + self._add_one_weights(path, **kwargs) + + def add_artefacts(self, *paths, **kwargs): + for path in paths: + self._add_one_artefact(path, **kwargs) + + def _add_one_artefact(self, path, **kwargs): + return self._add_one_plot_or_artefact("artefact", path, **kwargs) + + def _add_one_plot_or_artefact(self, kind, path, **kwargs): if not os.path.exists(path): - raise FileNotFoundError(f"Could not find plot at {path}") + raise FileNotFoundError(f"Could not find {kind} to upload at {path}") - target = config()["plots_uri_pattern"] + target = config()[f"{kind}s_uri_pattern"] basename = os.path.basename(path) target = target.format(expver=self.key, basename=basename, filename=basename) @@ -54,11 +66,7 @@ def _add_one_plot(self, path, **kwargs): upload(path, target, overwrite=True) dic = dict(url=target, name=basename, path=path) - self.rest_item.patch([{"op": "add", "path": "/plots/-", "value": dic}]) - - def add_weights(self, *paths, **kwargs): - for path in paths: - self._add_one_weights(path, **kwargs) + self.rest_item.patch([{"op": "add", "path": "/{kind}s/-", "value": dic}]) def _add_one_weights(self, path, **kwargs): weights = WeightCatalogueEntry(path=path) diff --git a/src/anemoi/registry/workers/__init__.py b/src/anemoi/registry/workers/__init__.py new file mode 100644 index 0000000..e01a462 --- /dev/null +++ b/src/anemoi/registry/workers/__init__.py @@ -0,0 +1,268 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import datetime +import logging +import os +import signal +import sys +import threading +import time + +from anemoi.utils.humanize import when + +from anemoi.registry.entry.dataset import DatasetCatalogueEntry +from anemoi.registry.tasks import TaskCatalogueEntryList + +# from anemoi.utils.provenance import trace_info + +LOG = logging.getLogger(__name__) + + +class Worker: + def __init__( + self, + action, + # generic worker options + heartbeat=60, + max_no_heartbeat=0, + loop=False, + check_todo=False, + timeout=None, + wait=60, + stop_if_finished=True, + ): + """Run a worker that will process tasks in the queue. + timeout: Kill itself after `timeout` seconds. + wait: When no task is found, wait `wait` seconds before checking again. + """ + self.heartbeat = heartbeat + self.max_no_heartbeat = max_no_heartbeat + self.loop = loop + self.check_todo = check_todo + + self.wait = wait + self.stop_if_finished = stop_if_finished + if timeout: + signal.alarm(timeout) + + def run(self): + if self.check_todo: + task = self.choose_task() + if task: + LOG.info("There are tasks to do.") + sys.exit(0) + else: + LOG.info("No tasks to do.") + sys.exit(1) + + if self.loop: + while True: + res = self.process_one_task() + + if self.stop_if_finished and res is None: + LOG.info("All tasks have been processed, stopping.") + return + + LOG.info(f"Waiting {self.wait} seconds before checking again.") + time.sleep(self.wait) + else: + self.process_one_task() + + def choose_task(self): + request = self.request.copy() + request["destination"] = request.get("destination", self.destination) + request["action"] = "transfer-dataset" + + # if a task is queued, take it + for entry in TaskCatalogueEntryList(status="queued", **request): + return entry + + # else if a task is running, check if it has been running for too long, and free it + if self.max_no_heartbeat == 0: + return None + + cat = TaskCatalogueEntryList(status="running", **request) + if not cat: + LOG.info("No queued tasks found") + else: + LOG.info(cat.to_str(long=True)) + for entry in cat: + updated = datetime.datetime.fromisoformat(entry.record["updated"]) + LOG.info(f"Task {entry.key} is already running, last update {when(updated, use_utc=True)}.") + if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: + LOG.warning( + f"Task {entry.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." + ) + entry.release_ownership() + + def process_one_task(self): + entry = self.choose_task() + if not entry: + return False + + uuid = entry.key + LOG.info(f"Processing task {uuid}: {entry}") + self.parse_entry(entry) # for checking only + + entry.take_ownership() + self.process_entry_with_heartbeat(entry) + LOG.info(f"Task {uuid} completed.") + entry.unregister() + LOG.info(f"Task {uuid} deleted.") + return True + + def process_entry_with_heartbeat(self, entry): + STOP = [] + + # create another thread to send heartbeat + def send_heartbeat(): + while True: + try: + entry.set_status("running") + except Exception: + return + for _ in range(self.heartbeat): + time.sleep(1) + if len(STOP) > 0: + STOP.pop() + return + + thread = threading.Thread(target=send_heartbeat) + thread.start() + + try: + self.process_entry(entry) + finally: + STOP.append(1) # stop the heartbeat thread + thread.join() + + def process_entry(self, entry): + destination, source, dataset = self.parse_entry(entry) + dataset_entry = DatasetCatalogueEntry(key=dataset) + + LOG.info(f"Transferring {dataset} from '{source}' to '{destination}'") + + def get_source_path(): + e = dataset_entry.record + if "locations" not in e: + raise ValueError(f"Dataset {dataset} has no locations") + locations = e["locations"] + + if source not in locations: + raise ValueError( + f"Dataset {dataset} is not available at {source}. Available locations: {list(locations.keys())}" + ) + + if "path" not in locations[source]: + raise ValueError(f"Dataset {dataset} has no path at {source}") + + path = locations[source]["path"] + + return path + + source_path = get_source_path() + basename = os.path.basename(source_path) + target_path = os.path.join(self.target_dir, basename) + if os.path.exists(target_path): + LOG.error(f"Target path {target_path} already exists, skipping.") + return + + from anemoi.utils.s3 import download + + LOG.info(f"Source path: {source_path}") + LOG.info(f"Target path: {target_path}") + + if source_path.startswith("s3://"): + source_path = source_path + "/" if not source_path.endswith("/") else source_path + + if target_path.startswith("s3://"): + LOG.warning("Uploading to S3 is experimental and has not been tested yet.") + download(source_path, target_path, resume=True, threads=self.threads) + return + else: + target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) + os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) + download(source_path, target_tmp_path, resume=True, threads=self.threads) + os.rename(target_tmp_path, target_path) + + if self.auto_register: + published_target_path = os.path.join(self.published_target_dir, basename) + dataset_entry.add_location(platform=destination, path=published_target_path) + + @classmethod + def parse_entry(cls, entry): + data = entry.record.copy() + + assert isinstance(data, dict), data + assert data["action"] == "transfer-dataset", data["action"] + + def is_alphanumeric(s): + assert isinstance(s, str), s + return all(c.isalnum() or c in ("-", "_") for c in s) + + destination = data.pop("destination") + source = data.pop("source") + dataset = data.pop("dataset") + assert is_alphanumeric(destination), destination + assert is_alphanumeric(source), source + assert is_alphanumeric(dataset), dataset + for k in data: + if k not in ("action", "status", "progress", "created", "updated", "uuid"): + LOG.warning(f"Unknown key {k}=data[k]") + data = None + + if "/" in destination: + raise ValueError(f"Destination {destination} must not contain '/', this is a platform name") + if "." in destination: + raise ValueError(f"Destination {destination} must not contain '.', this is a platform name") + + if "/" in source: + raise ValueError(f"Source {source} must not contain '/', this is a platform name") + if "." in source: + raise ValueError(f"Source {source} must not contain '.', this is a platform name") + + if "." in dataset: + raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") + + assert isinstance(destination, str), destination + assert isinstance(source, str), source + assert isinstance(dataset, str), dataset + return destination, source, dataset + + +class TransferDatasetWorker(Worker): + def __init__( + self, + action, + # specific worker options + destination, + target_dir=".", + published_target_dir=None, + auto_register=True, + threads=1, + request={}, + **kwargs, + ): + super().__init__(action, **kwargs) + + assert action == "transfer-dataset", action + + if not destination: + raise ValueError("No destination platform specified") + if not action: + raise ValueError("No action specified") + + self.destination = destination + self.target_dir = target_dir + self.published_target_dir = published_target_dir or target_dir + self.request = request + self.threads = threads + + self.auto_register = auto_register + if not os.path.exists(target_dir): + raise ValueError(f"Target directory {target_dir} must already exist") From 5e505f9d43b417ede310b3d11decd0377907e5e8 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 10 Jul 2024 14:51:01 +0200 Subject: [PATCH 16/64] commit tests --- src/anemoi/registry/entry/experiment.py | 2 +- tests/dataset_recipe.yaml | 43 ++++++++++++++ tests/test_all.py | 78 +++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 tests/dataset_recipe.yaml create mode 100644 tests/test_all.py diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 56481c1..456f8c5 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -66,7 +66,7 @@ def _add_one_plot_or_artefact(self, kind, path, **kwargs): upload(path, target, overwrite=True) dic = dict(url=target, name=basename, path=path) - self.rest_item.patch([{"op": "add", "path": "/{kind}s/-", "value": dic}]) + self.rest_item.patch([{"op": "add", "path": f"/{kind}s/-", "value": dic}]) def _add_one_weights(self, path, **kwargs): weights = WeightCatalogueEntry(path=path) diff --git a/tests/dataset_recipe.yaml b/tests/dataset_recipe.yaml new file mode 100644 index 0000000..6e96a27 --- /dev/null +++ b/tests/dataset_recipe.yaml @@ -0,0 +1,43 @@ +common: + mars_request: &mars_request + expver: "0001" + class: ea + grid: 20./20. + +dates: + start: 2020-12-30 00:00:00 + end: 2021-01-03 12:00:00 + frequency: 12h + +input: + join: + - mars: + <<: *mars_request + param: [2t] + levtype: sfc + stream: oper + type: an + + - mars: + <<: *mars_request + param: [q, t] + levtype: pl + level: [50, 100] + stream: oper + type: an + + - accumulations: + <<: *mars_request + levtype: sfc + param: [cp, tp] + # accumulation_period: 6h + + - forcings: + template: ${input.join.0.mars} + param: + - cos_latitude + +naming_scheme: "{param}_{levelist}{level_units}_{accumultion_period}" + +statistics: + end: 2021 diff --git a/tests/test_all.py b/tests/test_all.py new file mode 100644 index 0000000..90b6099 --- /dev/null +++ b/tests/test_all.py @@ -0,0 +1,78 @@ +import os +import subprocess + +TEST_DATASET_INPUT = "aifs-ea-an-oper-0001-mars-20p0-1979-1979-6h-v0-testing" + +pid = os.getpid() +TMP_DATASET = f"{TEST_DATASET_INPUT}-{pid}" + +TMP_DATASET_PATH = f"./data/{TMP_DATASET}.zarr" + + +def run(*args): + print(" ".join(args)) + try: + subprocess.check_call(args) + except Exception as e: + e.add_note = f"Command failed: {' '.join(args)}" + + raise + + +def setup_module(): + run("anemoi-datasets", "create", "dataset_recipe.yaml", TMP_DATASET_PATH, "--overwrite") + assert os.path.exists(TMP_DATASET_PATH) + + +# def teardown_module(): +# run("anemoi-registry", "datasets", TMP_DATASET, "--unregister") +# os.remove(TMP_DATASET_PATH) + + +def test_datasets(): + run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--register") + # assert run("anemoi-registry", "datasets", TMP_DATASET) == 1 + run("anemoi-registry", "datasets", TMP_DATASET) + run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./data/recipe.yaml") + run("anemoi-registry", "datasets", TMP_DATASET, "--set-status", "testing") + run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/the/dataset/path", "--platform", "atos") + run( + "anemoi-registry", + "datasets", + TMP_DATASET, + "--add-location", + "/the/dataset/path/other", + "--platform", + "leonardo", + ) + + +def test_weights(): + run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe", "--unregister") + # assert run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe") == 1 + run("anemoi-registry", "weights", "./data/test-checkpoint.ckpt", "--register") + run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe") + run( + "anemoi-registry", + "weights", + "./data/test-checkpoint.ckpt", + "--add-location", + "s3://ml-weights/a5275e04-0000-0000-a0f6-be19591b09fe.ckpt", + "--platform", + "ewc", + ) + + +def test_experiments(): + run("anemoi-registry", "experiments", "./data/config.yaml", "--unregister") + # assert run("anemoi-registry", "experiments", "i4df") == 1 + run("anemoi-registry", "experiments", "./data/config.yaml", "--register") + run("anemoi-registry", "experiments", "i4df") + run("anemoi-registry", "experiments", "i4df", "--add-plots", "./data/quaver.pdf") + run("anemoi-registry", "experiments", "i4df", "--add-weights", "./data/test-checkpoint.ckpt") + + +def test_list_commands(): + run("anemoi-registry", "list", "experiments", ">", "e.txt") + run("anemoi-registry", "list", "weights", ">", "w.txt") + run("anemoi-registry", "list", "datasets", ">", "d.txt") From b6c27d5d6fe8f86ead9686d0ba4ce3be0144b3b9 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 10 Jul 2024 15:10:51 +0200 Subject: [PATCH 17/64] test files --- tests/data/config.yaml | 54 ++++++++++++++++++++++++++++++++++++++++++ tests/data/recipe.yaml | 5 ++++ 2 files changed, 59 insertions(+) create mode 100644 tests/data/config.yaml create mode 100644 tests/data/recipe.yaml diff --git a/tests/data/config.yaml b/tests/data/config.yaml new file mode 100644 index 0000000..8a9bb75 --- /dev/null +++ b/tests/data/config.yaml @@ -0,0 +1,54 @@ +dates: + end: 2023-08-31 00:00:00 + frequency: 24 + start: 2023-06-01 00:00:00 +description: Minimal config +ecflow: + host: ecflow-gen-mlx-001 + limits: + gpu: 10 + port: 3141 + target_running_user: mlx +evaluation: + name: quaver + scores: scorecard +input: + name: mars +metadata: + config_home: /home/mafp/prepml/i4df + expver: i4df + input: scorecard.yaml + owner: mafp + prepml_command: /home/mafp/venvs/mafp-dev24-02/bin/python3 -m prepml + prepml_module_version: '0.1' + time: '2024-02-22T17:10:31.433359' +model: + name: aifs +output: + class: rd + database: marsrd + name: mars +platform: + flavours: + cpu: + host: + slurm: ac-login + late: -c +23:59 + submit_arguments: + account: ecaifs + gpu: + host: + slurm: ac-login + late: -c +00:10 + submit_arguments: + account: ecaifs + cpus-per-task: '32' + gres: gpu:1 + mem: 64G + nice: '100' + partition: gpu + time: 0-00:10 + name: atos +runner: + name: ai-models-module + version: auto diff --git a/tests/data/recipe.yaml b/tests/data/recipe.yaml new file mode 100644 index 0000000..478c6a2 --- /dev/null +++ b/tests/data/recipe.yaml @@ -0,0 +1,5 @@ +- match: + class: od + stream: elda + info: + runs: [6, 18] From 75880fcdca167b442643243f104b3e79a51a0536 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 10 Jul 2024 13:32:07 +0000 Subject: [PATCH 18/64] more tests --- tests/{data => }/config.yaml | 0 tests/{data => }/recipe.yaml | 0 tests/test_all.py | 87 +++++++++++++++++++++++------------- 3 files changed, 56 insertions(+), 31 deletions(-) rename tests/{data => }/config.yaml (100%) rename tests/{data => }/recipe.yaml (100%) mode change 100644 => 100755 tests/test_all.py diff --git a/tests/data/config.yaml b/tests/config.yaml similarity index 100% rename from tests/data/config.yaml rename to tests/config.yaml diff --git a/tests/data/recipe.yaml b/tests/recipe.yaml similarity index 100% rename from tests/data/recipe.yaml rename to tests/recipe.yaml diff --git a/tests/test_all.py b/tests/test_all.py old mode 100644 new mode 100755 index 90b6099..e3d7885 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,61 +1,76 @@ +#!/usr/bin/env python import os import subprocess -TEST_DATASET_INPUT = "aifs-ea-an-oper-0001-mars-20p0-1979-1979-6h-v0-testing" +DATASET = "aifs-ea-an-oper-0001-mars-20p0-1979-1979-6h-v0-testing" +DATASET_PATH = f"{DATASET}.zarr" pid = os.getpid() -TMP_DATASET = f"{TEST_DATASET_INPUT}-{pid}" -TMP_DATASET_PATH = f"./data/{TMP_DATASET}.zarr" +TMP_DATASET = f"{DATASET}-{pid}" +TMP_DATASET_PATH = f"{TMP_DATASET}.zarr" def run(*args): print(" ".join(args)) try: - subprocess.check_call(args) + subprocess.check_call(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except Exception as e: e.add_note = f"Command failed: {' '.join(args)}" - raise def setup_module(): - run("anemoi-datasets", "create", "dataset_recipe.yaml", TMP_DATASET_PATH, "--overwrite") - assert os.path.exists(TMP_DATASET_PATH) + run("anemoi-registry", "experiments", "./config.yaml", "--register") + run("anemoi-registry", "weights", "./test-checkpoint.ckpt", "--register") + + if not os.path.exists(DATASET_PATH): + run("anemoi-datasets", "create", "dataset_recipe.yaml", DATASET_PATH, "--overwrite") + assert os.path.exists(DATASET_PATH) + + os.symlink(DATASET_PATH, TMP_DATASET_PATH) + run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--register") + print("✅ Setup done") -# def teardown_module(): -# run("anemoi-registry", "datasets", TMP_DATASET, "--unregister") -# os.remove(TMP_DATASET_PATH) +def teardown_module(): + print("✅ Start teardown") + e = None + try: + run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe", "--unregister") + except Exception as e: + print(e) + + try: + run("anemoi-registry", "experiments", "./config.yaml", "--unregister") + except Exception as e: + print(e) + + try: + run("anemoi-registry", "datasets", TMP_DATASET, "--unregister") + os.remove(TMP_DATASET_PATH) + except Exception as e: + print(e) + if e: + raise e def test_datasets(): - run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--register") # assert run("anemoi-registry", "datasets", TMP_DATASET) == 1 run("anemoi-registry", "datasets", TMP_DATASET) - run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./data/recipe.yaml") + run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./recipe.yaml") run("anemoi-registry", "datasets", TMP_DATASET, "--set-status", "testing") run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/the/dataset/path", "--platform", "atos") - run( - "anemoi-registry", - "datasets", - TMP_DATASET, - "--add-location", - "/the/dataset/path/other", - "--platform", - "leonardo", - ) + run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/other/path", "--platform", "leonardo") def test_weights(): - run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe", "--unregister") # assert run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe") == 1 - run("anemoi-registry", "weights", "./data/test-checkpoint.ckpt", "--register") run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe") run( "anemoi-registry", "weights", - "./data/test-checkpoint.ckpt", + "./test-checkpoint.ckpt", "--add-location", "s3://ml-weights/a5275e04-0000-0000-a0f6-be19591b09fe.ckpt", "--platform", @@ -64,15 +79,25 @@ def test_weights(): def test_experiments(): - run("anemoi-registry", "experiments", "./data/config.yaml", "--unregister") # assert run("anemoi-registry", "experiments", "i4df") == 1 - run("anemoi-registry", "experiments", "./data/config.yaml", "--register") run("anemoi-registry", "experiments", "i4df") - run("anemoi-registry", "experiments", "i4df", "--add-plots", "./data/quaver.pdf") - run("anemoi-registry", "experiments", "i4df", "--add-weights", "./data/test-checkpoint.ckpt") + run("anemoi-registry", "experiments", "i4df", "--add-plots", "./quaver.pdf") + run("anemoi-registry", "experiments", "i4df", "--add-weights", "./test-checkpoint.ckpt") def test_list_commands(): - run("anemoi-registry", "list", "experiments", ">", "e.txt") - run("anemoi-registry", "list", "weights", ">", "w.txt") - run("anemoi-registry", "list", "datasets", ">", "d.txt") + run("anemoi-registry", "list", "experiments") + run("anemoi-registry", "list", "weights") + run("anemoi-registry", "list", "datasets") + + +if __name__ == "__main__": + test_list_commands() + + setup_module() + try: + test_datasets() + test_weights() + test_experiments() + finally: + teardown_module() From 6b747215ac2aa0c736a62d9e485556fc1bfa0c4e Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 10 Jul 2024 13:50:41 +0000 Subject: [PATCH 19/64] tests --- tests/dummy-checkpoint.ckpt | Bin 0 -> 46325 bytes tests/dummy-quaver.pdf | Bin 0 -> 281 bytes ..._recipe.yaml => dummy-recipe-dataset.yaml} | 0 ...nfig.yaml => dummy-recipe-experiment.yaml} | 0 tests/recipe.yaml | 5 --- tests/test_all.py | 42 ++++++++++-------- 6 files changed, 24 insertions(+), 23 deletions(-) create mode 100644 tests/dummy-checkpoint.ckpt create mode 100644 tests/dummy-quaver.pdf rename tests/{dataset_recipe.yaml => dummy-recipe-dataset.yaml} (100%) rename tests/{config.yaml => dummy-recipe-experiment.yaml} (100%) delete mode 100644 tests/recipe.yaml diff --git a/tests/dummy-checkpoint.ckpt b/tests/dummy-checkpoint.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8bbcbf4e572fc43c53a1325a91e64630c4576412 GIT binary patch literal 46325 zcmb5UWmFtN6E=!Vun;`BySux)EiMVREbi_E!r~5#yF+jbk`QEZCpf_u0tDDV@XPyt z@1J|_kMG<$r)T=9nyK#T(@#%T_2{T0qYxq>AYdSjyPg<-lH4Q1ctvQhMd-CSy4u@$ z+PT`;aei>L_u~8*VCC)TV#jIc?q&nD;t}QL^zycIx8f5O5at)){utnG=jLf^=Q#o8 zV1=rpBIx|TY^wt?*QARP5c1HG5Cs4K+P3}R{ejEf$+^D3$KPOuc$l^@E=bPogp-q) zWWxfPkU5$mB|3$N8T0)0&};KgrBgK9Xcr;$bHQ>uFBA{j^!g=rue^QRFO;93FX{BO zoWB~x^mJ^OosqLF)~nK6zn_79Dy?Z(2dFjDzpM3ZFlF*8ya>#@_7z?y0`8|VYZp?a z=49n&t&TWLSEuG3%wV1oD1PkC$R|v7@l{b9A0Jau9ao(gpA=dmhO*KzcFURSB|(4x zv-r9ihpPh2T<>XbSr;`l@pgo#w&dhy=FV=lK(Y$b({gRr(^Cr4Yw5`g`M^Tc=u(PP?8h#jgcrlG5nTF0G} z>0_k~eY%|>zj~Ycq{?X;-Q>0@K~8>7x-rT`Wo_=6mlx>Q%pp=1P3X$zV@QO-gKQ11 zxA8XvC-9rsu?Hol0<&kigon&USeexsDu`9brc@`!gcv^)GRi?Qv%Kvb;-YbXF?ppT zp}rS23uRev;cwOicUS*a*2v`O@jNjh(L9`03g8TqoBEKL&+(!)uBpXA48m=4h1w}> zo&?lZ!G)CpGwa(Ff;m*m-btcNTNHHxJaJIyVV8k=!(wU8a_jfW;Wh^%qiMF1LPI^# zx{SZ|^P-1*mxnF;06EAI@&PM>rol0fkf|6;C)EOi2E|FSMJEZtK$LO>yJ1k~H@`vPhO z$2j#GQZ-fD(OT_l1q0%0mTlYB+GAK*I}PcTCx*&`Ci3h1#@B4^A#CwKZfXQi4TW;` zIaVDvH#d~h3l@v|1l#0T^3+G|6SRw>+wqmJ=bk*7p256w$cY6B=J^$z%S50lpT z3Th9!Ppw%OR`n|Fn_z0GT+4J84T?PnIlZHKLB{cL3g9`B+qp|7c_Zh7vJunThLKsN zReP*s0f*f^J4h|SKNW(wEQj3i3GE;UeKarD7z>9INJZ>MrS(Z~NMGQS((#+&9Aywz zVr^E{wyS!{obCEAYc_!qQ(%4jY!}7#KmThKr;<42sr22cbg!kVmkcCtvnpe?N`_9= zW(l)2eX-mswV90fCb=&EYWNbV6zT(dZe%B`vmAVL4rC_{u3VJyY{CPw{{Awi8g)M1 z%SpE>BzG&Ug47UeOhWMw@6x`DTqB+R!m9hrXVgm3|CwkFkB7U_wJHCx?C(CK=wZd> zp&bLGVOEQ|WS}X%_yHQGVAE`2pI*q0v?KLDw$PorhS4S{Z&RXhlZ}Y~&gosm0_?*f z&e}p4fcj49jDFiTWIwM#*$vA3^3H9mR!(I}>dNJc9#7b9rLeEev@`gQ)8Pdavaj|7 zEyvHa%w9A!`|rO^(L>40!@hhgiK~z+`edG$O39~N5cvS7vuKF)=*ra?o-y^E?CD&r z=Va{Ol(NB0g>tAt=zi*<&01@+(FFhIu;&ft(zPjWmU3v?a|1=#c;F+9;yTy=+GNS= zj9!{q`ikW`;0#7m`?qJ$RrCq(%3O5eYK+^I`i}8*F57F;Xm`p8c8rn;Q$2LLLQWR4 zTA>Ol>#<0>W0C|Nv`N<;89w_=1RkGJTo?FXTb!0au$gi%ps^!47fyH~#T54`cp%%X=?mxm4=@`B0m?qFd3-KU$`?D3MOf3qvg@-Hn^_kKxtMgG_1JtCRx67;;x zL-Rh6?eYGRIEgIgvO|N^m8&CA0Frd4cZ+Wp_g$ecDa~)ru*P$1Ryqm}D_zQ8IYzL6 z0*;pilId~XMsNMPgXnlW)ml<2Hgg{$O@rpBf&&U+C&h#DjzzDdKq~W@^pgNxNw*2< zhNkN8qgQqji!L?F`nMI++LGBlQa+%QtBmLG*+TcM$&r@Mp9ZcN`?kOc2kPtUjTi>c z3Qaw9DtBp?{)%5-6*u6w2kL=$M0Xk7Fe2mN@?cf&@4GgyB1+;S7OQ#n=CP(=)uxeM z{R7!~eduz1hq|*H>}1#>%MECi$Nx^;tS2bl?rXIwxyG*7MAAiIJU7S730FTua|a3N zGE*;tEAX=YijnLp@u2Q`*DTEn;c6i2@q+OVm3H9mK1s|YU5XXql{~F9_SF!rG}0Bl zYJZesXi#btg>dl&)&YPn_Cgi+C zfn0b~M8(~70!ERC>Zt7ir^$5#JTV@heUIdI+Ow6?K=qlfG)Tz(3NNi;}Xn(!unAo$vdqL2h zxl*1W8NaTVlAd`#d;J^zs-@(hXweAcATy{Ks8Oexf%Iky;25`B3JaoA=mJ&-y`5^P z@KnGG#duPTyb_pMw`$wpjWhU4l&QMA3pJ4V1Tb6B-+mu1L=kTAM&|vQiZa3)9Risi z!Wu0?E`|&-(kvAs4?O}B;+PVG=i6{|l;Wt}a8FFLKb=~tlTaeo)zY%17^U+pp9x@u zn|Dj-(0qGuZ7T0k<>%?rRy#=x97U2hibkS@n;FuT1KgPSrM~`O%gta&9qm zvDR=#{RiHJXA~QR23!NG%?G5ntY|P0UI(i6XC?&sB?Rm}$WYB8hFA#03WcJe^1=zJ zCc=m(!iX-y$U+h(#-IXCP64I;0cUV4D+(_HQeB0FD{4j(1GQL_W6>>T>8KtbSJN{*+ z@1Oga*?0pY-xhq|?_}72+I1LM+a00xlI!GJ4#@-hw8mTzA3k7RZmNlQL|pj(*`&RE zA`144yx{t?iPIl6=*Ru<9C>?t@Ye6o=1XWl7u)vljD+RgaI_2A??EafK|kNRC5V)^ z{$u$=m3~QuzD3~tSGDDy(8L$gASDkds#x+;^0yfdusFjK`WgoOV`*?{xeLjzIV2K$ zVq5vDB-pfYf`hY_mvF27Jt*FH18qxuFv#e85T5@9@&)ov2*%gs@LS$LQixksgAdsL zMK57r)#|t`fq(iLzBbu@ZNj-6i5u`He$f2{m8qSYE;U9aO98>OZmz#jdwdo$=^;hv6{e|>(SQyu?+b6sN>OC0H?q5$cc zT^#dq#do7JbO;cv4j`mq?~fwd9s)*KNnX-V%UOEnTmX{G%c%Ed6{X`;g_&eZDw;CdS25 z#85Coxd)srNJkX zrHsdp@k$yNJw=~kil75+D6}-DVC`?m^w^bGEw3$b@@Af|b-jwEKfVVIA;X`z zMA$n1kG;O(v)T_c)p$kX z7{$fsJo{LM(L@(=`YXN`y?0R(=%BY-R}cnkXbp zKXx{0T(a^mQM0H43Ew;TnZgh+jwKc6(y1M^C8jOgI)x>trCyXRCl4zcI(CTF zFEOXfsJ1L*{TO|wDwKvB(u7}je59;@#yg@^KClwr3?JG%jvc+mTS@!Dt}QZQm|09p z!}N!hz4(*%JFZ{nIy6p^j!r-4?x{zc)&4Rl<%a3d)DpFuaL<~<;CctpoTkW*$=6TfgE|fu*QNwC+4l0BeZZ!wmQ4K4`YE?I6 zagMnk$vAWWb@|4vx(s^`%b$K!0sw+eci)(nYp)Wlcg`F;m&{jM08Cqq9q&eHPGgVG z6afzk!>fMM&&1NJ8fFy|l{>d?ovs!x>pe=VYDFIF|N4T?P1g^7#t#Q?T{>LN-PXHg zS5=a;)BhBDg>|@Wk0sx3o-H@zIhzTyn_&T|BQrX04OTxs%vohnKb;IogPxijI;htx zAB{uHBarS3#nxSaRKn8L_Y~U6p2~rE>b30P3IbTBX9p*VrrGoxwzDq#(ZnMkaaReqpN`}+dA=L|&yKGW&i_c( zJ@x`dlgMX!U!_~Edn4EVl){C0zq>Ajfwx<_3k7*&zw@Jg$cH*a6<%-v zvenl2Y>AuS>Xtz_!_$Q!&7BTb6u(zVl!GeB+Ww`E^0luGX!o9VeyrD5!KtVKgz}Cs z0(NA1T)bm=QY?w_G>QjSc6qof(@j$D??DjhUaU_BTMllw$H-0$Ro7s|^lT*Hz zEPYQoUr=cUCQfVMoK^ICU8}L$gERhzyHd{@C+-#89?1RYP*n~_`icU^d&&ilbghr4 z%i{~QdY3=01oW;5m`Vv>mY5XN=X&7TseT`$oBU4Dr@78f+b@(%y&AM0 zd8FwVN*UCDed_tR%NqK3402B~fJQh#4Qwyb-7iz5w>TRy_$IC_@N4`@QD5+Cs>|N1 zb~RE)u*Csb@O_R#(%-ZR#w$wOD@yz$t>5wvS0z|?vqVzf zQFxn-)PaMsg?~y&f{$^i?+0W1|CCS%ALCJ@4aP>?bO;x(nXTn$Ii6*7gr)SYX*k%M z6plrmIrivmVv@)U)KIIf#X{F&$t$Il5@s3Q81$sasS1*A5qQ@is*HbaL?UchBlgC= zUFHvBW(>x)i9uQZHh*ksydezwTUrE4jt_-E?qTL$XiNF3RWrMg)|x{K(}FtHCy7eQ zA|Q7$C6;UuK9D;L_=2&fE)&EGvY>)4yHO%3IkllT5? zmZw>1wWc53#5i_pJ9dggI~s>0RuQ|H=GT6;-9-RA;0~2-S1iW)hcyor_hh7VUd(fk zK}p0eX2e^Qa z{t)BSFkiY0ojQE*NxTp?*mi5V2ur)@92;2w{Xlv3#MQ9H|B;gG{1hbhWhuD2=1!&I zF2_l#1fmUtJD6XKE(OchaSPU4@t&^yxaVHG6gGZAUUp+xcKarljNCeXI#_>3k<;FX z-0JIYy_|$Qxdx3e5f|riB0=YLGD4?c?p|E3X?5#RQPV+#IDhce*}}e@o9e}cU&iM} z@xkZwM3*wN748p4d>iVeYw~F~DE}bs{3_lbUXUG+&yLcyq>Jv1nw%b$4M678GPk!gsQ`LXqEa-+X&(I(1~70#Uh4ZM+)A1%aeI zWI$Kq#`@Q+D~}1FP_3%4Z-N8m5UaV96{FT0*ghpWh9hO37pxB={Qr$@*BQp1x!^*3#naBihm<(E#B2PR_}8asp`i)lmP< zOmWyO(+N-pS%Sgeo3}D8)hpJu3;Lh(Z1qG77y2$DZ`}{B%Y1)8{v(94r8Kxs>Ys<; zroZEY_H`F{S>N$yF=SXwTwb8i^1;H1HKoG}mIzUjKf8 z-L;|RDN4ZQ@a^EneHA+!)qFZ;LeyQ9gv&2{f|4j3=Ew3fV_9~p|kOhYF4HOL^sLx!snfvo>) zP(Xx-5_d8jng7=y2l}=zcPTI1a#O8xhuCvtTDk5j4!rtE?E z9oEBg+@XQby`h_yr$3t=E;MYnwQyvDKV|Mv9=L@Nw@wZ~Iri-Kc*hTb6YwLWXQ_tE^tMv^?P|HTm%F zhDSX@yk=!XrbPF{SMrB&msj%XaXVJ>%W%h?rg^Upx}wemkD{ZlAYN$zZBf#%)5G9} z9*Bdp;0y>b3UA!@h|6h0GI-5tBUlr#Y(QE@anQ_4@Ukl)6}8VCKW z$7h;7rTdI!b#MEWD|kG^ip#w(@>OBhs#&!OT0SsVKJZ~)ng^C>J8zbgF`BA5emcgq zD%zrYljNKhMW45SJjNvY3a4LT{DJ!Olb_c*?)N2p%>t;d?%^Q|tCyd^o51`(?iun& zFcr7;Xto#C&t>n${Q_I(*HgFa;T~$1Di;C!<-a*PZgAtR|{vkOEj_KkAbDE(< zQM#PRh_Ia>mW5D@w)9eqxAYc?fHtQt#?C|E_(@0g77cB3@0@45-?Dc-2yQ%~orj+O zn_iDS?`~*Ws7OYK3FGj2tuq>jIaP(1@#*Sr+c|dXr)9cwF!Gz-P32zHSI#jnkK2P5 z`~AwbsWW5hY)tJv0m`RM=!6D09o}Ca!@^nis{yL|&kJW8zkgus8uJ!}w$kfjwy}J? zFs3r!_e2n3AeV_9h9dCNqkc(vmI)7|OmYuv(nWC!H{wjNoV6 znkb(#nj|@8RY8c3;b8ODcT4peb%qoks(^!NKv^`Pic&CT7NP>=WG!h=ofT67S}+Ju zVK&4@FDuKD)09Zll;EV!DqS!w%05|89<)x5B>j`9#7uQzOLsXOyEXS(UXAo-N}0}@ z&BZ8aU?@pW5c05=Q&5)#WR(Ri%%U>4*}rIjFKZ$RAKt=VN7F z7kf$>aD|V5(&@%vBauHyeogo-WO|>*vYjxqsS9*lsNH8l@3Y7ch4kuY?Hm|AAJWU< zapP>t&kA;e4nQ0`AZK@55`SAEUt0tqOlaIC<7ahUAJlisb=kbtY;oiHyI`l@l#BPh zZ)0~9=vl2!T(Em{Siz5APO{C0`|~;S+%`k$K-6*QTSM-cUSz4P7@D}6N3)@O z>}`QdpPdrAD(eC^0oO5=hnnq&MdsJUOJ6mswtqqVY7Qk$`#;3xGRIs~qFJEaI*?{_{Ta1rn9A~cc?5}|j!(^{L%^5z?N z?%~6&s*`tR{b*)tZ-%DJIRTzX-MuRBLLxMZ-c?K69oI2O1=q=W2(mQd4ht4{F%hmr z?yl*WE6-I^I-jON{7p0^%gU8v!S_9j0`9xtZqS(>zqlO#u&|=n(za34Cb%kRt*Roc zs1j}HaG%HP!2AtV_Y_FIX^$*Zbr&%46hKYth$(Z~0kTnOMwP+b+c+tv!pqKffRzga zq|OTsWzIcf9hGLxOWtR;kcxcyR$vcbMCE`(v9oslBp>^SF*Xg`Q{w}XOPSnL2AmAa z!CLy(6NP%Q^mU6IL!*yNiGW`64X(LgrLm2^+UsZY>R!EYfLpedc`NtQZ{@1nBM5T! zp^PbIdA9|c7hjAHU(9^c2bB?b2Ak-NgjAIg76u!EG+s0)X}oQMJl_aC89_=yoXSYK zU-}1__k4TQzAc@YB9j=AA5>sjxBxyy6T)BOMhu_If=@;Cch3sr-5<(tBORMzgv1xS z@xq@lLe%qlRJeTPG_q5x4_c~;Vt5nZvis=XLF34;47KKIy5I9!45gZ=r68IV^1FMIc-9a zdSh z&Jr5e$t*hmhmU&-=6uA86Q3yPYQ4wTj@cF5*|Uw#Po%@&F!a%t8%4s$)8K3=mPGS)vVlG-O^7f-+CJj) zl^D2Kisg!lwB$&7eqc-feN1j50gm_G-M6fw7|5f5-t|Jx^+NR3LdGO;CplZewtkibRI%n`&?BbeT8|7OWqv#RqKq{e_h5wIYb>oT#!hhZ+hmWt_ag0NzV zl-`%Omz1sp4P$-_d0J4~PNv#_V4$FBz9Ut_7^{qQk?OVFku zAdupSLo<1=pnEBA5SFU~%T*}UID~7Iz%?qC9H;ML9%ABRQ4|HL5BA^YR^7dm_K(d) z#PE`)W6Q91j^#vpFp{`p%A~)~F}i!#T!-;Xa(O-Pz`OE|8&XUf1Z;`8Lg$-J4<;A( zKn{+9=Ewl2+$FJ(?S&g#gY}_I0CGp4^trQlv_hvKx8_Z*0ibJk!^v4oqLXVCMEn?T z)qSW{+Z_+^b8^j=%undcNC;`$6K2@+W3U;>NI+H@k!9F>o900Z6^XQ^P~=OZB^ymm zAW#{B`lXi?QEB7PE%43pcd1Xe^m~2NC=|?ysE>j?R%f9Fr&+lVEfz^sp_| zx3!!-Q7_9;iZkk2YbscRU2;2UB!|`UKE*oqmq6TLa0#xBPrW)M=W-TZ@s(J9zg}UF z``WMHkz5<7ucF0Iy~S95vLxsK2V?cPlX(B)I{t1k`(MSK^g`JHR^=!H$tS1c_#c8} zpN8H~vdOF1N$K6;8-!wK7OAjZ%O6zf+>sfC;$~XO9AxX<5gCNyWNynGbaGu3P+@h^ zbZ)2iU)Y79S|oUqbH3>wTV?(Iy_<8FPBU52!aoq(r`e8WQW2^n(oOh{3|x+_I0tHr zmvyyKNNDt}j}Pn{>D~$CJp4H2;?Zal3Cb#8Q1_UcH)zFf$XeR(pdjGXR$!k^0@Ijc zDzK|7)P(~L-!`C?F3Gl`eO^YHGo>*lS2zs^T4cXYixGmwK)%KM7#$l$>Wtnn5!nZ z02H)9Zsi=Bq{%r0Dl=KzBiaXHR+90cTH7Pr<60jg*`p8s#>go^Xyv!|^+*--$i~x5 z#xKo#9mjK&?pW+@X4gtfjy_@43;Fp6xj<=^3vN|v1Q;@H$lG$f7&vW;6ybcV?7h1R zH6o1O0=}@vlE!bE#XrK*+MGpYo?Yb8@tCl(?+^(oipwuj%_xG}ju1}I7>v&dm=?IE ztp4QTl{fOLY&hi5oLU7gt@53k&s`^jFqvTPv_2bK6!6TYAg56EAJl5E2L!xt$smMHi%KwTpB>Qne6 zg;dEsDhqqOB(8sxyaKg!0=3eZt3)KLSS1~Rf9vBz10_b23YnF;XZxjhjuk}$RXyQ+ z6n#j_P2cC7+~HyreTd3#-{+)%9dn8Vt{)8KQeAWfy9a(%uRn*@pOd$M-{N`KQ}z6T z{~J?;P*F!M5!nCDFqWk6EaI;t`EkP7(9hRnK{xDVaW>%GIk62{KcN2W8!#mJ>JUjZ zj>7^hTm;X|Ga)x^Cp+Nv;kl(OY-S@yvgx*=;mq`3C*x3_ZczHK!*Qs)-+`@vmnWZu zg(#V2V?MTupEl&_8}pJ8;q+HJt9cjg;O}T89wYa&Bta1IE2nOusfm z$X&^W{_z9$y;N*JckzY#@xz<@{+NEZ;tMk)jbv1FK?{+%drM-&*(H`r-O`l0Ml3%? zl5@8Iutl`8r16!`dL&xQ3huUatbDp{@6SMQ7l8O>b(E*ITFD-bIaKl8wH6{P9I}Gy z^RS98d&gRn7{CPRqiGUv7IOj-P+8Hs9wi}@F~KmrnT-d4JaRHKMOa-=(%-BW_ek#V;yvl2LNApO(wd zz#_le2C>OlH%_gq+x%wOXDQyZ8+cZJ*ulT@TTZZd9=6$K?^nAT1qcFpuE)pSFqMV? zm95^Mp_e`}mLkh|(|pE7n)^m0hN2J~dCc>c61S9I#=HFVbyg_jFV68T^2X@w{V*;q1xahCXW%~A!yJXImZdni_*FgeG;cpkNNz*1jei9>^%(cilcGMqILT=1;0t2u)hM^3q5z+Ivl`xrb`$g=(deRf$SFut+;N{>|65A9~$Zp2F&YgaJX822TIq zZDmb+H@+@u%ni;p5AvOJgbfSK4ax9cJa;^VbJ{^kq^?nfTa=wbr2k|)x$JH}eXy^9 zT{1R@Ft&yyRzDAeUzmnpQg(%P9*M!%Ic?x;h;eg49MO87adT80k@zc0f4#h-_&B28 zR|GB3quSH}zxPWo9~rXESj70{n5MsHw+owFXV6TRVsiZ(tl0tomzx3qmq)0Lq|pIK zfn>l@=0IOmr6pr>oRbduS(8E)9;Aet%ZhS-&ON4;6@8zZ**QLPhhvG{Gg3UgNeabL z9*8L0{yv9P`C=dVI1T>>yA1khsAF$9mSR}WQg37Ulbd- zfjnn6rL{k%ya^xXgdL(;+*DKEL(;bX3k>Bb#d}*AtU+c!<(0#lDL+D>snF7~HfE#cjjNwPz7-3-9#iKAt>_Imi$B^(Y<_E!KFlWd5B|g>eNZPwN7$3Gra(!E zitxbT!9^YUFepMns*JEV=4S(jd1PQN>;ZShln%v+KTwXcji!%dmV43}Xhn`g)5ckK zM2Xm{WDmTn^AR)oOFW4UrenrMbwpR-TQHA8rp{~CZ6uRzxuJ~_uwu_|4kgzx0o8@Q zcLdtW>Z0-!Eb)sEL#hGBosZYaUpheZ8ujI;xj9^&hg0Sb5 zrhylKgX`S>4GB|E=4aP3%5_<9^v}<1_?wuey)xdypU)BJ`e=Ia6vAG!0J7fFpQ(7B z@?e)VvXTt)l8;i=<6)&##P8XXj`Jl!UN<=-t9!Mr%s*Gt_kXi`9t(&E#pFw>*Bx!i zFPI8w2C zD);1R&&2cBUCr>XuBI?wSB}lC#0v4QW^ZNTtn>Ym`GQU3o_KO~KOY+y)2rmLvWR+f zlTeh(HH|`BcxW*Oi(6P!X3G^69=!3E#!A>J{Bgq>@ybshiEyW~3;)<2OD84oU9q(( zjt9S*OY~S3`+aJ|@hGIUs9N#bYfii7*unep$Q+KvcF))ldJwZ1dS)+wn@Ijd+1XYW zPG0tad)%KrON7!RCu2l<3`VffEyhCa;VH&K@4+aJ_g<(J8b7nrOqo*xU{&>SWZ^;{wkzqPAk2>4eWPQJ4`!G3uw!Z5t<0mTOM`$~ASWXx ziG+gQ%{8zQdRtCm$LRXAkta*aBy9_@wnVO8@}7Rt?m{>q%zYs_1!r|$$=1Z3Jn!ut70@pNs97Dp4{l0hLax*lwECC;2sZmBDIfz_ z7nS*Igx^+N*fC}jrJf^Zsn9wX>iX?V3nl=!;qbjB&s=F#qrdVx>Vm4*H*WxU4wrcw zJJFfasNYekWHnIR^sJ)WI?zk#UPtJDJ}GoeDa_U;JR>n;Y=p+vhNp!WWt9}{j4dw| zLr+ehl9)O+Qtj7TLQSVku)xc=(A=dp(%gSp?uG=*!w5+)c55kjdjr?})bBVeOnGlF zv#oKQhZ@pa;)V;K@)>$7I-|LekY0`2QvGQ}HW5GG+r;E|jO&Ibgf5$=T3wWXf`xwq znn#qMo0*aOAKHA-u^9$gTnf6LkajQQ*zAi5R-bksqp{udQx2rYyOx&bIr_viRS)ot zHK2HspZ&y55l6T(!ra#i-#|_2MfAjzIY%7}LAWEAdB#n7K)f34l^rGxn0>WGOBZC! z6old-InI`xI=)wagps0Q$6`388oEzN84qllP95^Uike8Kv#frVW&-!%IvfCuXC9 zfu%bvnzaRte(MW%&vYGovE4@|g+Udqbmvyu?z5S?!}u4%KEue@E{Qd+yULz?v)A1G z+P2Su1DkW9y{}mL;VkK9o${{Gn`sz0}XOIJ?E$_s=)ddiB+{>S)iEM1~ZlEasA(6{VL6pA;5;SLT-X==qB0I|LK9^-6kP0w!)XV#EoQ7%f<_w7A?Gnyq zD3^`q4U<_RKIRK=q@1-wvLK#qM;GIt#bj+?jj72*gHn{{u`is3fAO^)E>Q7s(Az)- z_sV$)t9tmcoFfiPmUNL2-p1@o>1>dIA7nkkSg3Xy$r;eoAhIoxZ?E)zC-2=%b}}8c z&*SHqLh(9q+ZahRD&QFwK0AR?2kc=7j)o+!6}+^))p<-?n8KPcW|me=g+7CN)%SVF zFWzDt1&Hbn-{(!fcnfh9AgdpJpAR^HnW7Qheu|3PM~ZCMiD@4r1+*F0NBu1m$4gV+ zsF3w!wdS-jp1@pw(-OoubRTEkP`b1fL!yyjT*ltN?E9^L!JOxG3CkXtd$0n z3K=2X{ydx~nt;zaR9U%H-+VkX{!-G1jH+&99*bj!R5659m0V!M3qJLG&H7Q@bIEK| z&-&rtbIESoy-o$A4Z$;!xE`kX9#@J*V-NZ4%)E2p=w^lN6>(%>9=}&CpI7$R?$O+C z?c9J@hTufKXZ{aCe2vhd%;%&GMYXaaBZ@`(b0pr6>Rk6=aDU~}fPm~%p6s(c&Gu|k zCqnKh@>y5xVp32s<*khPd4%uAD@I@Qz-!y10**?~{#Wr&I;y-E9ZeMv?_NAyf}zEd zio(bZFHemcPqCsYwsd|b%J5#^F1(!YEn8GEA1RxBF(0)-C`u-^`~kC0E;XC3!6D+(3yF-kA=*Mj@GqaL zQ)1thes44VF+|h->qBXEqkna5;BZX94jAY53zKkUyprA=iu4fb$HMV(V;9N_H z7pf}dJpPBmwU!2Eng_AtOf!t+YKZ6qm7R8+k@IWbO$+R3pig%U<|5{wh4T}Lw1&4z zx8Di>(5O-7g`O&@dlZ6tgv zWefd(p**GbD)kz61@(fDR-?1qyWY#vlu1 zAqgY_M#(FM)HG&^3V`fls`}n7rrvKz3e*&<<}MeykFnXr(g6;Rdg6I=Q?%Y*j-wLA zd3fGf4Bl7~uK2T@Xg$;su9&l&s6BQvofO9agiSTpc4@Loz=cE z>e}jAg%K4FA7G-!fmB(R;T2(n^YLHc?>PFf)tmeug`@TjlksrVRxp-*$X-8h#=N_g zxXVRU5-M9)DQM93aG%%@n~cV{W9JXpr-ctfu2IYCE8cN-0nMzykZJTQF5)q zd>jYu>6mzChoPnP)(=6t>0(~T*RlNw zs#Y{`R9Pz?_$L`FspJ2}^u^rL@Q{-@*(-i+O>wQ#{uIR!m?P<;$Yo|#Bjv4v_Μ zsN$)EJXjsiWR0{x8dNsg=SzamXWnHDUW23G zJbA4{g40Pl1(eX~BsvG>ba|VftIhkrMNBE(KJ)ASUzp-PZJ+Ojab&*6izjk~e1wE~ zB)4OZk{=&&t-NE7l&>D)Mdr?hK20>(J+BXfO1{j{INpT`JuNvo=04u;fDHko^GgGA z!Xep|PkGO1c7NS|wm(lrer*3(hGpgHl%aRB9odsL-=d{85oqp7{_|TA^jqTo4wFb1 z8Ln$8#Um=N>!+yoogH71E@a&2H|T!fzgf7u3sY>AVaz>qRtKq0NtsN!>0FD~t!jgC zQy@_~UKk);bx@Mbly0>sn&4>3Apf^%X39j{oAgNHGV5EwJ0Dr%zvv1U zwz}CWi+1mPP)i%-76nxoeLnXf>I&1IXn*cO)$OD`!ByD&R2}rzXIuXz>l<>~wNBRc z*uPNlyJR1uVS;tTO9L-M3H>JUQLy3H;<{bN?n`hJc*WcBQL!$Jd`o+7m)c@ZxTDlf zQ1RN|B>wPk3sL*E9(XN?byVDc6)YCWs#pMATE)6ekI%*_sLPI=RuJF#N?OoMQg;1k zzVsUY(c988VR}7&?#`cA%!*s-=xF%(v5(XuoTLovRViz^_Q3k{)T+jkWVo8Mi$qa4y z=lt#~jn<6bWo1QHR4}Sf49>TFX`|ra#8YP=|IwBXvI#mc3Q*LXj+$vC0e2s>@W=si^k?3t( z#@Uh@;oK%K#Fv`LVOu8I@yr%WV@=Csnr-?AfNQp=AAU z9y>tjMxrt5CpY+N_a~iXi_cim)@Z_Zg{INT{)T(AHc8=j%EoqIc_;Qw(4`p~ zx`+Zgt3vEp+NvQ+upO-mYf~ZfX zE)19!l$vWvSDEcNO2pImrgDX$a^)KwEoTd5LHZkn)&e~JgZ2>l>qe4Zl;v0=Xl z=r3NKxVS+3x*2aPvo`WfJ>6WPeKm|#h7NA8QO6W#w{ZJ@+t3vGYvol}!+5Ktex8`M z@ju+QzXgQ$&7}pE{a3-+zQ09%|65ce+TSK+@~FZ3dCctlW3%wKR?!#uBg2z^+A~>s z7%t38Wqu$U-bC}{r1F9`Kkx~Tl=4hj9)<@)rR_`U3{BzC@C1)}f&*WD4%6ts3B)(o z0A#}RLqHcI8G?5fnhXa)*2@oGZULmAJxI)DFf!$P3zb(VGO@M?`UU6dn z@}LVCz-Af&U*izX6g9G+6~@M3XA9L7xB&C{=uss|0tOMPkM<~T!Fe;As7{SC7CHjG~d z_v(3vMl{D7C-no4U{Y?w3y}v1# z+dv!}De>qQ9I8T;QUZ*Caj*ZB% zlE@}`$Z0MR z_ASD{qNm($#dp&ImO81zSuujt&77(C-- zzQ`PqR|jKd1_T@1SyBTWkFK|5=eN+IK7VxKqW3EI`N7!fbEB?#x{-Z2i@I6=A8y;00uF@C zhDJHc@^J27zd9%1quW0xv{&;kv*6cy2~8e4ca5xKEUl{e3W4(znPNZwTs8I-dL;|l zZUXVJkD~fdaMmIrX9Y3ml;VWWT#I+GD3@s=7*MR|WXNIH3#NxXN zvWOFs1LsaXPQFigE0_BSvV-JKJzhReaCz3ok$jYG7w|Yrwgvx#D~FBi?og+`e%4k( zxiB+~lSUG8-b`(tfVPjQoLM%Iue?eo&`HG&8CH{0hz5g6y`~yCq0p3rHhwLagt0|b zmI>nG8wf(~5&|Z!ju0wK?8fU7?Nf~4Q@*4XQE1o(oH|v# zX7}2A?LOz__~$dSerogiK!0|&^_0TsnDAbcQ$0dLrnxT_E`EFhQSzI0-zyC&K_>eCQoQ0rcVcdYg&pz9JdsM=v_9W z40`ZL3es>33dX*FB`H|OE#PhLT#*-7s54huO)Uf1?94U*sadvY^g4;sokGfb`xa^& zfB=>)3?;9=1rxIqRo%|X<6a^fuUCH;(57_5!$iXa(LikL-GPv9@9Pys$BsNP4JOoz zWTZ#5DJk8rB#xRAxW9z8Wc;0`#e+l0M%SsWs7K$ALIrD$Ijq~kdQ85FiQ==e>}vDk zHuuFxp&dhO?aFKIQo?mqfK^b!UkyGd3IKm{f_QR*!PtnS#Q{d6E5TZOl*6VxA6eIb zNPh-piltssqBc$(-|B88?GXKg#037-Btho znis_`{AwBR@%AgCVvq@13UrU1@QiF&r40uu`GD-L`h1ejy}5~s@WteLJA2DP^8vR> z^FfczXh50gCIQ3*x#0il&;RdAvcec3c<4sZcG|fgUXEa?W zR^AA#sw=G(vi$#=vp*>lo2oNfr1(nW*-|2zG01L+wFNO*Q zh$pv{0nnxF;din(?RAq3}vD3rdKq2Wuw-ss$zN z86@m&J@U-#4u*V<=FwY=#qDFAwBnrhi*u^L_(C59Y@KPbP9@9d*-ICuz+Rlryk>KP zxpRb0QJl_hi8f4dAZ+JOh?DpnDO6{egclMR19l6Wu|*zTlNmi_H2T6d>jF9ao`zXW zHug*EKDU}be(bDb?5y-PJi`ZUEW6Y-Ekh65uukC<6Rt_v{@zP11#P}c$EY+eB!e}0W#cYL|_kti2Ad=e>!h-{bXA$NMiXz&ux5}^8O+_#X? zaF=FrVz2u`!14}A|A1B6>04-P;7KAk$X-oiZMij9I)DGjkzDA%3=1nQDS(+A{}TG= z%o>SVx&3Lp!SLN1#Ph5O@gwAeZjVjdbJ}-=(e$Q*BKCg@o{x-|2v%8@&$OM6% z`yYX)@Sk?0Q0eAoef!~D=S`W2rAChripe2ZvAzE!ZpiWc?y&Z>^ksa+y7|Ced}f3o zFgJnyy?O0vBB|-Rh1~i805P^fj4+V*<(mLthZ+E%oFFwOxA$KdWvPRlW&qbtk#AzDmVp+Ln6t$qhW^<81ob;^QMiqNKrNn5jKRi82rjk z&4fkFVhf(bi3^55OO_84_lnJ{zyd;nA_RF7vh)k5G%9ZN+>4AuK&JCdg!09HVB`>x zE^`KEam`c+QTaIZM1&&`vmCV#NSVoY^9l(*eH(ZwiT8-%Tp|1ki8OHKG)?l()T!ke zjn90@%+yguX*V_5kFB*dcg|p0*XY(WclK-FS*y$^*v+dL0=e1mcIS7wNOrk^p9$!D2Dp4+xd16iZPEuNsQEC!=Crc{W_!WJ3y`jM=B=7OaF)(kHIcMFu0Lydv<6~)#Lvu| z#LbKs%nQD93lRj#9iPsEl_A#Yg82@J1ruU9nFV*fwN%|d{L=|5P3o0eT{S8Gdpv7) zZ#@iMDgeSolyv35jZ%ocmAsZ$d*j3nf*F1;deUV0K#LWR+Gj6(QVQClc*d)G*4MsG z=vCJ?9~^4?aC1O;x}|u|>*Y}hyqskhZ6y>1(>sO-flP}4`hg=7}? z9BkqAh!tOpO>KEt5|UXGBFbDFnh9B0F_e`07E&6TaamZ=|3~dVAc+ONnSWtn7Cxb0 zB_qGi6Kc0tmWF*@D?l7)`dFaQ!)y4#S#t_kjj(2vYPL+J+hW_2*bAZXl&7b$qp#Hj z%A!Gr9QW_F1j?{%FR*xy3Zwk{#R*k5pY$D1lTgcvtV;wfOW@BHB>42H05KYT`ji00 zF*&Zre z!99s{tH>`x(k2ApzbFA)h4sRB_gAA7Yl|j}iK&&_jg?}M+#zm&hu+C?lINxgF%2R` zC9EKK7z20Mcn)Q5R?+t?vu2@9j!N}!;dM_6K^`EIKS`2q&BRNbhCmiPQu)!OW_qQsh$$1i=Ui)iTmWC^sRA8IZ&b+?XDeAb{c+>d-Ja7ZG#|?FKEu1~(>qiGT-z zOb}X%IL;a^*IHThUoP#AT&+5aH0Gik`btCEDns-%`$9KXt-7#@d$kT|rl81)1ho!i zCjQr6FVJSO;stxDb9Ie#j;mYAu&0r?Cq+l^i&EI3Y7iNXmus}AX7mF{#ZOS?l|iO= z{`4|(JOTUFV;J*y-S4n$<_A#r=Ahvn5}xSiQSyW0(JC;qzG#)=1*4U3c4mGNsdfJy zNoIaVrZLo_c;6jWrZLRoq^U2wa((CtFGn{|XXjUJsD zB{V&1Szk6aVUDdu9nMjEK5z=YhHDoP@5g_d1g=Kl?pri<)C z*74u3kozc30EMOxs34A4#H(}Mz6DrCUo1t401!)ImR|86(t>#8-4_Z`@zNlalLE2_zXfUavUA-trUhz*Yp2S4<5!pD+d7Nf_E~qNj zVescqom|Z&EOTkFoi{k0C=xo@U?ylI%ff0H;Mb5k;<>Mgox~D4nx7q62bVsFSvVFO z$d?-sm?c;b-z&HVXlz2o1_kd&rtHh9Z4$=jNVsY;I3WyA7q!FV(o481F{q+J@(Q&I zDk(K|YXa4KRI&s)E6^eet;mpo{}Pnb^MZib32s5F#b<=k3esWvRP831yD$sC)+Ob1 zl=TdBC3jQXItyDw-F57A&(N|wGg|`Pb-Z-Xu(B;PTTI<`?p??-&JP~4sy|a{co}PW zAs&pydIvVVAYM@jQb<6s0<%I9$qS+Ol)s|5YRus51!@yHx=g8t6^yRcA?&^RD3qwg zn8UmSEWjj`pIFc*ORq7n*es;y-mkpppj_slTtd2-oIo@-qRa?r0wt`CjgVNFvo&&J zu4N`Uut@OXQ~i(BUs_3vD|{#@XN)Ol>S+3?Jstr zzOj<|bRcV+QN3UhM9?uUhydyaGq?*0lmT_aC1DIbRt@EYPEikgnBWx_D|urDg)lB2 zbH0-d}7Rl{$KY`{LqlU>7FAeOYl}^C@}sAp)G+E2#p6 zwj*K4ypl#g2&>uk!p;Xml!5=Zk9F=vv3@Dhyc1&czX8Y^@aC_C-8k%@bdfnM`&@=X z_XpverXBlS)=D9R=sZ^C8GOp9*v6)!LkGgzNWd-{CORH=XK`|Cv3hp#yxlw@8?ln? z0r=^gVY2`pt7J7TuYeNg%tCk*+BpMi?w{bMuJ9%p7j3wi&wYAC0@YB~Uw3N|a+gDz z5L{N!zU$xgVWOwZ7kg45j0=T#)Wr*mft)LRS-Qm(oh256g6|?h+X^Y$gbO(mzM2Zo z2q)8&-SDOK623|bU&8OSLm=KF=8$I6O#^~=GGaRYzc5Q9_=Q?3!3#@JW$wZ28o^v+ z37#Z{$|!31wXOWhj*wciSQLYK2bFn}78^E#a|v1Dh3mX_8*8TZ2<=~X8XF1o9`ry_ z8~HDVf8l}Mw7`JiI`+AH*v=0Lekd>k%oar<01YS|>{R)E-Ryrcx;+`MaAhx=0ijsD zB)Va9g=<{~GQ{Bn+Tl-y-c-0>LifkjMBrm9q^@}w@-c?F3%#uhoj|#=c2<*iM}*eA zQd7&rR>l;VDYuYG;3i4Y=8q~U7RMkKH~yVmP)vzIOv%Z@Uy7X{L^;W{&)kc#NK4 z(|6*ru)`S%ThJ!F?k}Phve1@{v+dD_Kx*CI#nZq+RJzY3b9mx5zf3V~*)j)D;oZaI z$jXXG!A}Ql4#70-_wf1laLpi=W3V=>> zg8kTths6OxqwDRK1LQARJm!Qo>%`HJ-b;7KXu&J#ZY!z>aqdxV;SuSDOnT=N*bgOO zw(wH$t~(f_ym~}=F-H6bdSh1m&!Gods5{4p5wmWeHm1-cokLiO!aBUG+Id(zDQ2;! zNc8$Oh0|zQC27UiRf>|r5hP7e9Vw;;qT(Y3S_+xuf6TqRvXY~6o+6OzK?Vx{aVrFphD+Y$P(+-yN#d-* zNRq5x>i&5wZKSATflG@hYK$&zWYCI~vLw`sl(vM^l9RH;)smC`kI^G^dx7FnrQG6@ zd-fZ9a3mpq8;{mwk1^sf9U}!{oJ&$fA^i_lS_xJfeGZdLG?PX&)4ZMK;NeINQ|JEI zuqEbJeXP3Qr*>}kF7w%1&G@7yIdLmR#U8f2#;KdWQ_W(WjuOIwP>YeK>k8l236*n# zZ8bR@5z43go8dF*f^C&JRNxl1B_Wz1WvZHSt`%JA8ndF}mYFp^K5I?hnewWc@}?%o zaY&k?l2kZHamH%i&W}3R%xs{BnaM%6iC5Nrskpz$l|5}2p zCw=aJDA_%O&+Vqo4G3;xcX@!F`REgb5v4>cm{(MK?tZV_{VK1y1orMxg-iB$YA%rSwb0oh?X=birEUMOmQ$uaum| zIyG0YF}Gu+3Z38<2%YVWItQZ?jQ;~#e+XJ%8Tuu8Q+EJb|1-3{T=gWnCV)~K99!3n z_YTnqr0D~B35=v=R~N4Twu60{Iw8fCE2W;)B^xqezQsgViAU5q$Fw;|*v@l_Hdt^V zd}p3S8!Ffnc8jcV0~PpMy7Xa{sSio%=YcOWPRJy5F&p=#OP@Fx#bAczi=Hw|p9F9V zg7;I^{83`RNM92%h@lQ27e0yLnjkHprv!nr;~KM-{?qzpc>L_ZffRBrQ`myK@P}A; zJQ8QoV`o*k{(MQCWsRM6)bIWTxzWqUvd6K7*XB!TmH;Fra>aD9$g(e{Cz`r1Xz5~6 zW$dOWO1dv7={=EULGbSoinb$3PD1mKQrFo+*EzvVHb$hMtEQ`81cDHyu+jjIK{uq< zem692Q$cuUPhrFj(+)W121DCrTn9)*9+lt7_0jY`^crad{(#7#+qA2_k{|EB=pdk@ zk{s{8Vma8KYko8vPKgmPt0Eg+l5* zG!wZr^Si40vZ@ras${CuATlB5AK+LWbZx|OJIL|X)snUSvi2JD`apk2HvOpBm9k}T z6}7*dV=r)x(C1Ik6aW|tYGjC#(E$de0^LX?!G}1850wQ4Vuj|yrZDIRXrZMjLQa$# z;}EOd%DcZF`4yt==h4W*NOLUWlGa$mVB#*~LS@>JE+b-s*Rqnf#LzmDvW(h=cz9CY z@pG9Mp~;s{LVA8x;BrI-AZ0^Tmzc&6?v`RLReC8-YAMIzbdd5?iSksw`W@og$(Rtp zv6#(9Kf@p@KvUih>2|OhlTdsj`HZZ*+I(`e(C5_6`6(b8C#jO~NXUsdM;--d9#VN~ zACN0k6?(DCR0v(UBJ2c>BM-T}DfB;?SWd%C^3NIGA!0JRqm$e`XLrw#k+p~)O>w{1 zh7Zq@4(D(@A^U>;ir3Bn?4uDd17&sAV?Rn6J3q5_X2;bP72BUYc^Cpk@wOzY#qC|3 zEOK*@j;+>z!V3gJ&(z|~IQDgr3V1;s*Y|aN67YgPcG}&9&Gid$H)(*N{a=Bk#xp>A z5&a`_4!E&{_&EoR=_%*Os;;Q@@;Y0~-*<_d_U@*Iu^TBo_CQMYijf*1Z@Q!Y+e^9e zQs7J&`c-wHN3-FQ=S&#(b!MQ)z2Q>i?3Y9`WOF?pea9=s8Cks<+Odu)w>nU6HO(>c zp%bJmq&TJ=uSzF)yroXL)lI)0dWs>T%s0=M=X!0JPt<|~P^i`>Q8=kH0J?Xf|tnK0Smy(Zd>)iK|TA~MTV~G@< zOVxa_dBqAe5eeTWz$|(kmVOGj#_~1E)i#p83g?UXjnx^!fV5>BC67dXhh`RTOsOBf zO&LE&ACCPJ$syH?B?l%SK!TMbG&4n_Mnf?R@28CCpgHVjb+JZY^ryVbcnDWe7RTVU zAf^?Al*~_)4=9VHCuauYL%yy3dMW&RX>xkL`SsHI^%gBVOUFZ0@i3kHJ8PMN^=A5N zmip0g_~pg2;HP3kp6{`d^LFi#ws71DH0A2e)Wpi>eLULVh!M+&`mqG^zUfO^>Cb1{##fd$wQyx>CueEHFWNacdo;EotZAj5*UCPu zlrIZKDx1q18C`n=7JZ4!@4uF#w}UaYnkh+lqT~3=i&bn|sv>Q zb3RS!=RRJJ;ovOa*!HQ`lkXcbHd7}!)~ki{yxeTv$Z|a_?xD1}8~n6w*o=vHu2lWb zqx{Of2t5R(I46WF59Ood>SP*oU=4~^3~389ulz~ zoubS(UFX*Cb?gS6=Y2H=2rgY}IyFVGI+Hr2xD9T4+GkK&TCK+94K32BT2{0>YAR%H zuyS*ATk7arL0%)wq|@;uWV5Ol-IzO*t5Lusaq1JVndC$sQ-j1~^_F?_Ik0QxNBBBe z(dG`fvV!+oq-5llW$2b9*VTCFHZ=NJw%6Mj=emn!-$!=n_DA$FFZS$Ts(gMK5$9O% zZOB0F5ce^gihx0ra5(3i(*h}yby<)AxlLK!1DV$JpeY&jEg8;{D9e=J&}^4{gijLk zEg8X)DCLx&M&{#pj!Rp!ZSH|C>jB(`Whb--P@3cBk7fD@UT1vw++@Scji0M zzz14ckegJg*;%jMf(u>3E?p2DT@V_q3(umSOd02L)(djw0hrGuXz&8gb3x_-v9)|o zTxs5VH+r_Hj(xY;(32zS-L+A??$pIy(1bOY9VG}kqBnHF1d*BLdSG#BU{Pkzx$vq@ z?djJ)hBI|kKbgJDqAN_br%_1vE2^LDUP0m2tlCqCX~gmE0P4~Js?45A(Ur5>)8juI zpP%~PKN;Q-aX&tx{JSS-W!FSy{}k~ra^RoDUQigGF>J(SY0KwLX_m8^LMyGNhKU~r zx|X~w|1zwW!UtLkT^w7ku*5&LR>HSMhk6v68ZqdyHx!2UO7~-ySu6bd-u>(MHkoC; z*p_*l(*tJ+#$N>dmbfpPepT|wUDYtW;pKpzpE1TQFIpS4<_Ix(%n%dD5wizEtXv8O zgX>kZ@PLUdH-c7R4>tDdTgxqpx@X%Ik7G3aQUNgHTK*{=YhrHvb>RC z;<}g8v!@p=>if^=$d*=)sv7cS*L1vU&dy zsvgi0UL%78q$2_b!vhB2&j6HX0LC)_?HPde3_yJbU_JxTp8?pGXzb ze~+6k;RcnQjgaz^s-T0JG0N%>?Os0TZ|daQnYc=tj3XA{RtA(|y~F~v#v8({3T>ei z@EH^E?TJ~F0r!;&=~5+&MZTqcrx9`ih3U3JU^L!fZpz@M-S{=CV0Rg);}`fVG3-7c zs^D8$D1vCH^f)LsTxdfYn08$_ml!0|2w6X7&#GatcFo&&n=D!2<{zvBybn%eMxhI) z<%?z0N8$#*+6KS82EXotEkO7dAYv;gd@Cnn>k;)=8`7^flwWPgzuHiLdD#eEsK$*o z{OYUR`j9*4ylBs|ah4!ow^k=dKW}>yT@s<-I+JW+Ck^cZ68gK#oGZx}v6I5yx0@l( zNYTR(i`yoX+vPV#M`c(1!SI)Jykp_<#rPp!&*uwG+>LZftAf~5gK&cacf*wHsseY@ ziFAnGE3u6m(nSm7V*q!Ph-6WQbO=iY!i<&phOIya^9&LjjE&01Yzn)Po|dO*OnQ-j z;G8!i*Qxb~sqx3D@&By*+}v1A(yezRiZ@YyIhVPu_GP-x}jq;JkL7Lk6wAlW;`A{XkHfOaBdN0 zWNR5hRBM`CY`pSFZXsoK>%Sb10_FotE+dgud-156gKKmU;|2$B?m5KRiU~1DhBuWU zSyynJTdWIC*>I(8FU#et%44p?WFQ__FX&A*bW4l#_+hGf<<`TEd4cU{I4}p1A-D*W6~X+@&UQUWF~NXA=z5GVf*sq}1wFa0GJl zbiys6Of%^Ikn zU%h9?8P|c61pWra!9G(r-yw|9`7!Al3I-yBPNUM9Q5rU2LP$-^A)cBL~*(MONd_q!aY$*bUA&IHEedEDpA+0J;Eibo3e=(AOM*-faph% ztmEBRn>$`LlhvU+bl!4jDdn9|?rnU|0QsRiqu$IO`fo2i(I<#h<>axMfi&MG#wju z7oS)ovBMc9Q+XT|BQb)TKE0xMvtRrhGA1`?A9%pjdc9u4RnXT{H~jW9i~`KH+t@BJ zV9?j+z}R;RZ#^pgL^3YC*jbO_((3-nkk%Lzhd01lSF>kwk6D_nqfT8darHs8x)wT9 z|8U#`_gZrfGxF=tgG3k|n=etTyc*kb;%-VoFiJrPN`D)L;z-NNE>d9bP)VNBTE0e| zlwlk)T^H@>%4GW4jKIa2MnJ)NRtV2V1xC3ARpN&o^NjurKM0Zxt0`F=hJVS)Q_;bt zylW29g$Mq9?PrVa?=B31Sn@WH4RYk2GMlS(h{Jj2?s80M+%>L=A zJL>hvM`Y}zg*xgJ?`qrkSp7=Yh?;MYiv~=r_f<-)2C)}rtY58JTQm$BC6vtOGz9Ep z5?ipm6nhY58$?@W?n=vP)&j_cU&V=E#A7W+PZ!ObX9doy#zo+Px_72PkEM zjm_kATJ26|42TVTa_+IJ*QFioghdPU^5<09wTXogl(HkLY7B78C-REhg0yIm5N`SN zir2hXBa4#g&;lsY-g(Tx&bW~*JQ9C;XK@re5Pq0}w+pe+r7?tu__DCWw5Sonu%-K= zixAAloT9w^Pi%}D)AQRp^1XGHCtz9cx$wlQomYi1QEllwK@eo(ZtSj?+6&)Kg z?C#S;R}7CeFQp>0g@8S+Aeq7et#RG#@=wbUw@z_*_y-=XX#`>o13KA!ycUlU8jC^? z!>0H%WY4gdrR>SVBO2=bzqV!eZtYkRIPndznWwK$ztdtcl0n&wc^Ai|oJa}V^4FH9>Pd+rd7CA7)aceL9c@x=Y??Ob-tD9f3X5vz=J-~y4AgKl z@SZn;J_k&)8~wM1LO1>wL>L7dnxUTIlb_Uj#lqxD@;*DU$mWY=WUxXiSe1-yblP(Z z8?0&W}e6En+Xg(5}rK!+73zg%6c=XOw}>J)4S4+pYp6D>n^= zmd4ofiDUAa^6sQ!ohae4iBcB7-N@MzL};HcV1V(l(x%8Kp2Y`KuC72~aYkxE+N~`7 zf>UMJ;R;KErUUo96TRyh7Hg+KPYowG-#~n!e&1j^@9e)(se2PRp`=$YHVWNhUik!h zp=h4jmaIrlKPS~gXXds<>UqcL9%&5;YQPYpA$*n&@V9n<6($-j_r z9L`~@DGqPq;c(QJjihco$?%O~_p(P(V4P%OVin6!>1=z_q$>ZAGdh~&s%nuDhJV| z^&qZPr22Q{6 z@4^=(ceV#8O#ndC8pR2QXJL;=L&*~}t?ChncI^H&@`wPB#Z#xHp0DQL91daD!o9Y6 z0Xxn=_hMzbNKFPt?f6=fTVVa0lWWWF9S&Kx^~raBY$xQl?I+Z^daHkG%&ckE5Wg7F z*)kuOY=qU>Kd``FOP5*MHQ=s6z1mY~Of@76&d^ih*5I<$)!yT7cVmk%OvFT|ZoBd@ zJn8s_z8&KHW%$_DvMt7&Q%nfAz7Q_0=_;?`VojwIz-(Lady}L-meSzbsk4nChh?bR zAhm^uCGC*Mm@|fjWXH6o<>R|ctUEmUh)!*ZKU*UyIgxa_!S$c)?@mM>cSMa->jciz zdGd*~Bl5ov&@C)+1%)bw&JCaLDEwJ|DH02=1ndq)e)`o~WlNiJh110ZLoTCI(=smG zC|t91hrcpYa>eB)hFsuaOUrzn7!QxTYsJhgxJ=+Y6s72+l7m+dtN{Hji5=A&$8am8 z7mRwuo%wmRAIH71!q}%&tKPh4ug=KVH9~9@T|YZmah2UEI!4-vdP%EP8JE5cR;S*M zTLlj1NcrwgKQU~}=CysVp>1>6+SspE_4mi^aLP$+o1xkmC!+881~3S2OQfg2MseFd zI+1;M$EmlMR$U*9{ib8$W<(X`V1X*QOvY}4rRceF14a^Dvq2D)&|bR|>Y@wtWM4;g zTEn^WZS0S$-@ErtKxZe_;K;8jMmTF*yM9(^{HaRqHl41J*b3Rf>dL26rD{ekgWW+O z!l9Y(M4@#{dWAQxRXz1#k2D}ES=eVu<>SyPIoCc&iR1X z^6&^~kd$fsFUh{fZr#u2xlax6RKJ<8E$i8p49uJ6ZGobYgpPMDwDtTEMyrM%0}-(z}*auq8Zl9B5o=*9VQ{ z)e(UyJ&BcuQc`@#t%OA)#9-jb@fhzoJ>&n#30|@m`qUc}is_&yYkHe|xqUL}-xxhu z_6CS8<<%%>s(Q9SwmP^7xP~W=3xK`W&c0t?%SrsZ-dNTGy~YPua3R)Fg6ABg^01#a zv#sssz?61n>*n!&S&=Pm!LGZ%mI_LH&2RF6xo=IOYS*fvZlN?2?RKUJK^w zpQFVNPo{5YSnISHo8~$3^~tfS-U0RLHQ*CASz!1kTE#dHIi@Y53(X{pXQn_FpFE#un0=fMP6yHzcl=ETsm$JgCp>3jM2KT^co z4rO}ZMq#p}oo9!mY7e>A?5dSiRYx*vo2aib{5(xGoLK3AVkJw=F7Q+rngNWVJI&#i_%N6{OD#i15t=Z56&7W)*^4sYHy%VC4# zh&^UYZ%ZA3V2b<-WMgY|0^I`_!EO0OknFp`TnhSlRVNTZWI0?rX7!OW=k{tGE~l2; zc#nu6$J9QS{{b9wxtU`zs5AEG#I?&cF-}k^}t5xrrr(k~D>)Ql_ z&}SWk<9$Hlt4+8qv$m~B9!JLf_%{MUQ(F5@wy_M%?=!?n>_r?+TJN(W7Q1_pxpBo-GG!X@^v9E{OYFPhJ~NFb`m=BymCY}mWzTvgv^gJzDiJC zeR^8Mw2jF*j-F7;NLq~)=Bqf|<#>2gMT#_QdDueMSqQqJu`@nQ1X~FPm2o~sgSl}a z&_$Yj={sNlsQvnmsmKOou@GpA%idl$?;Uuy|DDl=h1&;(eT&zgYe|<(EUiE)F`jrj ztzn#ccT<}?%C%o=2K}IH{R`0mf9LLUfH!BS7%XdPnlNH4Iw?JYJE)}OS@74@A7qkq z!+AIiV9@AgsHN*?E(l#M=ul&G6pNsCuCW4vh(1HtmNGm_b)lsIn{mATkU`>Bm0 z%bXjS_AWPVBQ1Q@$)&1WEy-5z!BZD5miTy2 z`RX%Dzp6+pXur&7M}6#(9ToY`QAp+DCv;W!30)%WdAnf?j-`J&<@Q`%9}**C$7?0O&jS zL;v9j$8VIPtg|W8-=zW0g4-jOzGZUSXvfx5vj8^o%VVO6GOR9-2E#X$9vbKL=I04QJ+CCA{cW+jk@Z0ZCvj?)=iYx zDIky=-^XoKsEUiUn5F3WJ#+L!PwAR3BL2*dZi54>fT*wS0=*g@p!vGH<;t~F^vvic zdVJmC;B!k{zAFCqR5#xJv;tHj?>H^@x<6V_>(UStu<^(20RKVGLg^=NtPiVU%i8|D ztsjoLw6AAn?VfM=`DFhTA9F)Yrasx!-jc5a&EV6?JgUepwbjj##Dk)yw^dQbEs(P- z%2~BWg-<#eS4dIXsu(SeM=9gBoOP)NAfns6k+#ddNk9tT#%9Pckg7+MzWbN?i<`wQ zR!~Xx{`g*Sd?m08Md;r)69}*wo^bv)D7$1FudzX98tcX-jAw8( z=Z9Qsj-(dt8r;|wW;bKwv;l}~JJR76W0KGEs=ed)ANRi6RDm8IQlej!Q_Vb>$AnLf8U}q6m4f_( zH$+kWa6rbG0vUw{k<&eTWk_{k2-p0RX8%w#k{QM=ThOvOf=cv>Lp}4B5{uhBT1;;K zkQ7R$?vV%OD$9op2WLUKN%{~D>BZB2v5eebkLRu!V{$*Iv}mqEUqbyiL8=K0nMT;g zzI+F4)wLP6LNDGXZq46o)4yl@`e!bnJwKZo)7SbuWRdZ({%o6s6vTW{QEIFxrFA=! zaZWBNrC?-4hCFR|N?w*s2>mxtKog?GhClVS=K!Y9}0@5z!NS`#ccP1T-=n@jm`;IIH_NG{FRLGy`tVA4Z zHsT^qTbjb8g)AQ8J5DRQ1T4A)UOrorOdhdJ#T4aHZK^lRVM9z_43#+!Rk(CTp^@>z zFP!GN6ols^@tPs?*h}YIALN_)l`VAqQa)uNYl^GC==`HC-E6M~l_em9>iLN{9 z4$tn+4mV5B{OP5z9egg~ zi!g@(XpbafI+VU`Cub_rT!j|qkMcOJ`d2!KG{p6CYu5d=r@UfcSZg7?qqXu(L;b}R zf3A;0G8w9he6AuVmjoRkf5Tte9b?lWgtEKPXC%Yku5EJ^o+i=#nV-G}AwcEpQ8?Z} zamxsPEo|IM&J=R?{X`_vpHtQRrg=FPT)uw691rz&@;g>5_1_GcZN%Kput(?F3^iIM zpa}%DZkvQhfdf+7YcgNiPhaG9-TZ&T`Wz@}Xr}CIrld5T6nsB?iXV6)vPE_$K`J5* z39zpi`C%l{Yhb-hHuspV*P^N0GW+|-yXkUBq4rj_DdT0d-#P7>3T=hA@V0#gT^U^k z4L!XzT}8RYMpo^E)GvBr>Y}A647XPpw<$`M3}Lhpl6jAgl0x$8&l$orXrfLIU}TQF zQpF4d#?H+uq(6gDY`$MbHjKY{rTN}~QT{d4$yx6%HFXl^!M=Ct#jB#;p^ktFpx;yI zIJ4IS&Sspsi+|z2=kgMu^z+6eHnh7yi7zAD0 zN3uFRp2qNgbth+C7%%AD!u3eP&cqu&sApZIOEU@yd1zmp={g6eZA&>Tej8~Vl_=I} zm`}h4zZK5lq6u+wmuY0)fPOM!hA}3=@zP3~$Icp^q2O>@6$P-pZqPD@Wj|DAw@aI0 z$HZ2BA{mP!TO@oCAiN5nX`q)3nwV-B4LAD&06d@(J~+(zC)>;sq;fUMl256QDNJ|{ z)0ffB0e7WmtI~h!0M_C{;ma(Ig;ff#BtPACjr0-pfe1Vhdf;9rtq{c6(fo&h_v!Ta zty001Q(Bp-s>JGj840(yxa&l?MDO#~jC#ZeJLl|mvt4a-j`$H=ZKK9|wH+lGEyrD5 zYv1)m-ooxatBpQmgUNGTd)NiUIgj3O1;n}Cs9!V5Houuao)u~~`E-vR6jI6;*dIl) zL!ByZy`^+pA4ab%+cOhp@OWr#BH43{4sA~C;7_8(k{Q%S0CMCD*r2VeMoY- z_#$Ne5AB8BYUV?-;h){q2cY3!o$CX^kEZI$RgoIahCfB9KJ(<|f+OxeGr}?s(Zgox;k)lA1`*0kW>QXrC_lj_lF=RYGsR zraHgRABf8sQ}5v#jUz4ADHjc3R}m>*PIq5I zws+9#+JXV5X#7~aZt=UnK_SAL7vI`uF<_(Ek`Qd=rcDcpiUGXRlH}TH>f8z-41PLN z=u+xxF*~^v`DvR3zfq`N-8{e15V7s4?h^PrICF3el&Iptc7LoNqPUK{xC5O%R0UMD z5uyMdFp6l`0y@(kEfX<##xz*4Xx;2!0rUq}G9Nq@&OmY&CWyQjXU@BkeqajD(GpY2F;+Fg4B=DEboZCcw!qu(^f#UqWd0-L} zU^INIm6UM3qCWxDY4J>1hD5Jn+K`uV`&}&O?N`$t?fW4wql_mfn_T}-PfjPf{(w7S ztF8M;(>fljK&VXerKjhCe>VGrAu&ldA~#sN9}P3qNx>hLS@O|WW9B1dJ|rSHG8{=) za3!TZ^t={dp!@%y%HBFCu4ii-onderENF0d26vYvxF-a6ch^CJ1Wj;<5ZoalxLa_7 z2MrJy+;xy|e)qfQo_bHc_m4AEy;tvEd#$ItcGpz*Gu^9aL>p|RL+0ZSU&~zQK$b3~ zufrgjA^QQEeK!QI?NwU=D3|)k z-1W<{troaTk;~VG^4L~^xQXEF5-NEF+J;p5H^E=09|g}h-;iv;b$8d+rJq-;qMkT1 zo_VaF2s564`Q9EpPdncDm^Q(cwmyaQoF|n@C+ft8!80|PsP2?}zbd8_D%mSS5@$bR zZsI>X>WjTengfQliZ=KcL}h)F(LHY~9kjd(t&-tsplvl=(2!d|XE~mq7C`NYAv2Pb zS(weJA)68aD>Co4@}4;(huHt3dlUC7t=nB)XM@EFrIiuA$T_76FUM>!=TA2o{?&Ss z&IJwK=kJua0t}kuH;N#0hui=Q=|lk!mz3@VzCmZuqLaX}71h@9D#pfrEs|Yns3Vw% z(ld!t=EM?pNln^YFftBpIKnjPR5hH(`(x}I%`Z&nucec!VgoT@<+?F}Uka4@N|e!B zv6F{EkiQFe+MPP; z{Le$3_b-HpOU|6ZG<)S2&fHT0`U7o(GD|}l4cr>nm90MTzUDC2>>znF=wLMn$fdM9 zFH-;(^f&+j#rsKpSpeXf=I>FWU%aC#x##Qf8I^*YQQeRW;$~?Rkb=IEZMudE50biZ z&?I0*qN^8q(FeCjx*{d1%;Kvd1vQZU@s$xC$e6;7&<@8x#|N*o>b%wC9glrU-fRdR zp6L;h@yYBFK{o7!7y`Rrpn92j`7yD1isTq?&lkLg@~B4XA^X>pWxX#*B`xv;RL}A> z_*}@B2a>!nZ^X^2DU_Gqn$x_O=H@HP;bO5N)t*2ec_+i_FaqwhX;se2%Y0y6bHv$r z3COGjwSzDHtf7r48-W(xD1f(A-{7i$Ph`69vqH`YWw8V@-SD9t^Hzh51^POSd|X6H z%JH=6!0q!2o52@JWC|yS z??URShN0T?(=dMgT8 zFY{Mw#-NnBQ`~(|j<$~4BqoZcANB;z40pj;LyPx=oD#%`bC=`RuumSeV--a4JAy%d z^rB5lG9hDeN3=cH&5&in?(46#?^%2dK91d_M4s4f_$m(FaKI)r&mXH*_z2@-GktbFgz6wJ(Y_QtQvkrz^V}u@byT-rmKyzzJ(pfAMNTIPdP1}ieA7CBr zg3*5yk1qx22gl=$hKc`ve8pEKg_@@B*>|r!B~>|6_w>p&*m>*@#WmQDw@Sc=urthT zToKI1Uf-4rbBK570=Bz&)jZ-}o{Ss9ze_*$klratKfT`io%mv*7tLea%cv7gwQ)2~ z%7HWlm}cS?*A@EIMz%E>b_TW>lnOnoHH|ynE3ZoVdin4`q#u~ZC&VYo$<8Xa_Gef* zR(*Wr&Ey;PQI$!L;-api*=4$KQMW6H*^fqC%=bzb<6Y8}1l4WDq&Po_ipHH;F&*sX zDGapv$emdg(dw_Ic96gpyxQW2C-e_74Rq7*}(BT{szu3bBQe7 z=>xq1Pp9LYds-p60A ziXe3dN*iogy>*Ue7~e!jBaUkm&d58}ybAmT7Onc0h z8Qq`_a->t>MLMoFo=GTSTx+DNy(X@N|DIsS{?!F^xSZZAG|xpbi~SW=+-t1RStt3C zR=RVd7k;n^2D`{9ep8lTIECg@$`!m33_ZJ$9d?w?JzZ*97K0Yb9_k@~0jevYQRYMu zpLD+6j%F=Qo|TF?Mf9$%xTxx0Igdoo5dI$v*W=IP$R>UfJN7gupbS6J;kyEEp70!Y zn$dV#5~OW9xmUi2e=|N1J9MW3q(9&~vcg8kx{0ys{tlY5101b1>h3osAhi~eqsJF1 zze@{s$@}M^4T@3C#Ft*y(Ur)X{ucc>&}Il?R9QQmj$aN>0i(P8qH#t|fqO;2mtMDz zY<^AS%%`}58!Z+_yR)c3yk1T=-B$3p+KS(ScG;mzHZ6v-ENRj7*zH}gPk(Rv?R*hO z1ST*<8_x#@t>EYKC58o$jb%n5pSoFW5CQ_Vw#JOi_c4A_KJ?xUJ%6@1E(ttsUX4|= z<534qkWucuo0`lt%Ol9xjfZdpb_Pr{KsS#D^zvkL#{{0Q`@G3WYha5tVZq~ZN1sjG zoX5ODi_WNCW;f&2Ay4H!n`D8m3;HFBR*LP#dqqD&_Y!In^}?Q{b?;%EJ@5huH>$DL zmE^s~w|jG|r>z#>;H8@e_-mK%@k z%xAH(16`KgQrlrYYY{muUWhWV?>+`X^+b8FU}n~)oG5RbA#ahP3aFq1n~s_U)4}u{ zoyafzfz#d7216tu|0*(*l#N)v9S2aLQ{|u#==;p+ceDgt(N4tg6z`S;^Q|bw#Q?}8 zjEwrghgB`5tVbCs&XUp?F_yCSjX~lbC*`I*WQ+3|~lD|G* zyGrQ7IO&OT_W{*Qm$v+y^EaP#!)-uuRHE&91vk8XjhO9ggT6t4S{GOG-JBbEuAQKM z&UN-0ZN=P{$7#iDb%TS#$h{OP06pZW2Z0sdM9E$_tI$Q-KLSNZH6-ZJQ zvbL+$MW3bftrd0UO4xWy*ib77m>g>4qK{_N&D)RAlLdK-bR5b4}fzG?!DyHt}cpy6Ex2Sy8}=# zs#=f8$UAjT$bVxPuCg0W*YI5ryU_n{+T)R2Dj{ov-OE%& zwVAgoq;P+&Z*nu#K1lssMjD(BihPWkS|aq#tNGto5@9M#p;2fhG-jg z>=_vvY=@cfc9sF#k!jMsp!(8X_Ncy^0AoJXc3h}X7;uM$THJJvrs!LNfkS<*W*6wNL{tjF&`Mr9QffZyz?Y=? z8d0jJx=U(SizUe1B2mp0=&o>zT82LmTT>saK}lG{S17rINa)!*6U z(+jTBy>x1!PZeaT4Qv798T(!Z{(VUwYe&{Di~O++Lf$J~;lfU`G+8SfZsN+<&?>;# z<+l}wa#9tE1y_k)I{DD2@_$U>V;TD%1^2!Gn08(k-D4U&8z^1jz}B-g=Tgc$`AY7o z%7W3~py>BWUKUaRQlGk<6#Z%<{XBoK2{ekkYeh2okpWO*i~R$wwdv9)d_IPn%?+%h z`X^TOKO2WG&XCrV&|KH}Kfvpn!h@H5OyUFJK%0=<05quN)L2IlNunU z_#pD>oZTL{PAetfCrtDYsS=u1)it#Vl9VDylo&d=Cp{D{ z*@NvF2gtR>7R8WeMgvjOO5Slheo#I!B#}=F)rapF$8%V;1I&?XHMo)IBTfgi)DSw$oJNK?N>-A!4~bD&>wRE^_KeTN2hY9Ec^~i z4R4s@rP`E_u9dtCVMbd#Iuz~!$1{=`;Cj}FYQPNV5J4=s9chQh@ zO=-!JvqSHAn^au_qtIr&wyR*nJ|84uwf$SQ(BR*3C(RpT7Gpldw-W=%&ES$(rRQ~t zkFG!;nBS{UhvDvbR62*Jg;?Ypu>kMaXn|yx?AF~R$fvp%8xDX#BhXl|YN$z6o!H>% z2VJ2J9&Gm84-33+>DFiK){sh;Md!r|^t0;Mh^oM(jgsh9i%F?LTf}sbeH*p@@uevK zGBD^Dz3A*evo-81+R*!E$UY(T-jYY>F7^w<^+hWI{HN*D%!_?(u1lgyz0T=derbuK zQIyHBC&M~JJl~=-R^UTBLU~Xt7~$*NdW#h!BdbV5O)_F4##XEvpaNx{%^MAW4#6{!98P^#Duy z=_#j}V;o7(kh0|bHe4MF(>98e0zF<-Y{L^3;<|WHp9r8`ygzU3<8}*cEo705H6IoE zk~&d3Pdurfs4l6;P=5)9-6>h@kz|UcZho z5vc0|&Yp-aN=ZM@U|;J4;pvdWbpTwQ=k4F+WU_^>EFcxZ=zbM31?j_n!9sL(FW_0f zZZ1y1vhu~~qW#d*4d7Wm^N_hnv_1=HOyasffZEDUblDbWib}b06@Nz#FzI{S(`!io zxou%jrcATSJ9-@_UT9uM0N7DZmcrYgO(rLRRz^Iip+HnoOg$?x<;zZ)f^x~j$MRQ1 zwi%*>EUkbV9=5>e*Bgp4EywNn#azK97IE%m>l!i0-%;0}LWjA_*Jf-Hm2_FJ!xeyjLdEyVrWxg?{eTcMgk7RAg5NW47JK<9=`BBQb4S~*5C0gl)_~BOw!(w*rp*rh!Vo( z89X>;f=W(ftNSeq{3LbZ-B5+}Th!0_P98@Iz2JE%+HN>?F?O1=YpiA=PO|a$nK^GT z043P>1N6`QpVtNIe0|z^p)bg`UVw~WYlz>zG{ESn+ggY3pUJVKMC^(3BeyPKtNIb| z@F}u`zLCiwPKsMLT{GwB)4MPzI8RO5Z>)QF82ERG(sg|LYy#y`lF-;LEn|I$jD8Yr z8>JpNYTZ2H6T|-Sv&>MemJ>NbLG;@|0eAU+yPnzDccSJryJktv=F5xU?p?yQfap}w zX5E6?n*H{sK~iVV?rJQ|()@*a__Pz?W4vgqYJm$v<=q8YB`#RDhlzIiIb~C?ctaNp z!t!tFsN#bROF%8g;;p5;=o7;w zW%l$%@W;AyenJ}A8x|h?jK|!RNxsZ2Z;ZOBDSZ3&w4#u4c`7olwO~KcJq*9A&rN!N7 zauM!NNgg7HcEtp9$&^`Tl4lPlk@v_2 zk~pZvEn~RNK5(+i#8uSoF2BuFk*Qss5eq58Pu<>s65IOsCOrJ)0%neYjA{#28kY(j z8zC6CCIf7f0e3h4K~9DP0PkKF-_HySmpHY1@5}bY%3`Dlf3mY#NC`a#RXJh~*Su?Z z2!AuNlT3i1$QrQQvgcxgBU*#QS~HAhj`m)|cHcHJ07v$&x4$x6Vi8ubB1vrNGwGy% zAo3q{Q7J1#zl>YY)X*l@+9M0$DNN(|s`*EqX*LbznrqKQrHmenOy@Qm!^?^`8S|N^ zFpr}yo#fd{wv5BF^lMR)@I3dQ7OVp1LVK0+$*!_oGX2~mp(GPxtHC)_SviFQn^{X) z`gf*-bLPFPAEhVD#v=wBy5lPQBAEUWqY)+lh@l8;gut?r^*}tiIuJpe_2caUUtuu^ z&lWk)A1a=SAw+1Q6g^WzGQJx8^+254Pi#i^bNQ*((GoYVuv+2 zZaVm(@zMLdvU02$o#~rCvcDn&q3AFYAJ<0=$`_$|_~Q|32oDb95fcazYbM){sGoUe z5)IZ^voI6G2m%p@@H79(Y?GC2148v1LW>fY-4pLHnWDqvjQC>)w5-165(agn%uK35 zR8~!8I0;c%8zxIOBnkRFjCTmH%BHeS2vdU0DFsCJUaP>^5Y8=`Jd+U*ViF#wA_N;@ z>nNg5@fdmtr$Su0tb$J|pUBdCt^Cq^Pd5%`S|S|tRqLnxNhmEuB`^z&W8Su9x{xkJh-2Nhm8zsCx44Gb=@uY4kRr^Ye)V^VAMLYZcs z^@-;9d5~0NEKE+#5-pUY%H7V{FTK@YPdjc;E#$XKYYchd+;9hiZkk7uTD$GJR?%iUTm0cT1Id5 zs#^2lJ=Rrq`2BpNmP}Y$f}m}9lP0oLvf;Y*_fB|WC#S}`o`5a;Y(aHYLF!*(Q5C`rr zg<`lD89G&xZ#Pvom3d>h`x!d#Ki_^)-c;s{;jUxotonRw{$^8|C5AhPq0{j5E#2!) zW%?NIXogPg&$qvoHkGMjxIGy<+dtiYQQTA}i{Un9=yd#aYp$@VOccYd!qEBh(=DC+ zCObHWTbQA9Ea`Src2gNMhMR$*Gcf7)i_E4nN(?t1LnlYlt-183G9YFWPLO}hDEU)4 z&_4Y(g6quAa5MQwf2m`P`Yb_y9-}0yav)pUEi%X5cq>7^Go$4An?V26+bXuRpE{e# zW&OWst84j>0mZZ&x4?5*q-BOG&{6!13I}|1HpZ$;oE%(MbVd_5D91w%rm)Oc@s!WzO!(;Nrc!g#Y_N_|cET|0 zGeQHmES#|qy4>b-4>}YOYU%W_585s4#L`$-V1&Oq=u2&P(;TNL-JG@L{weZ6fc-q` zcK=e6%m}~ldH)&r`I+)bm-LC}IO^0*Dg}II78niQl?z0s-lV_7qaKL5wY5X1eyZD{ zr10#wE^=_oy6U|W_Udqzqrw-O!*|0{>~|u?_X$f}tYXk1D}Bc;j#pYkLX~pRXCH*T zAAr6eAbcA={zfs;R_U3X`%}lQNVT&uALTAY>u{0Duk{ zao&G>DEy4pfd>ExsRICFh*CReYb$pvXA3J%GdpV!PIEsKPdg_oPAgXz3tJN&5eTP; zroA3wL59jB9vrIn)xm%WFJ^O3kw;<9K>h*i>pla}&R-Vb>?WvbG zI-_S1fx44W1zXfmBE_5wB6P=j@C8Qji1Tpeb&T=$YQ~(bu00z=;fBY8Bqy}qo*NIg zq|i;(rRv4_y9%!~Rgr67MR)wy9)VJ_n_GqVVUp&;rnZD&gq|npasK|s5p4<`<=kb$ zoW@sVcrw}Xf)J?)6lNe_QQ7$No2(^kNtUR9*0*KN7twRk++U-XMJ@~~{j**C2Z z%b&ev`*T)^wjk`?o0m4M?sGWlMBnJ+KaVNs{B1t^$cb2>JZgs33#t2Pw(1x&cX2L(rXo77$OxY%v!}g51XHyYLkE{u zCB50PJ}fR%x{HsFHt=vB=uG<%z~=zThCwjNX1 ztUv*u_`-Ru&ppROyIv|olTYeQV#m}X{uL|3&og4S_eS*OK&`_J z!Cf>gXkk}#tVFvfKMakFU`c?P-&wtW75`H(p{eJ^lhxYSkOWJ@Z?Z;J+NN$xELoj{ z)AG@B;CH&F^4H%;e5`b}R}4~o0VtE^Q$1Mgo#Q3n>QB_k8;ZM@^zZ#HA2yeXcX+e2 zHLXpY4%~J}U9z*Anh|@2gThJ4t*>7^OZscN$!z~}Cy_<>eQg`F{+u@y=5>X3s2*R1 z9|S(L9dpJ1V3$6SJQktVmmy=5?>6{$EHKadm4KL~5&4mSmhMzw@;kJya&2xkeWQ}L zVdcN`7d|{9_7)Zo+EVdk6I}u)8}|xW+%JVCwJf8D8Rq`@|9YK%qA+G(lyOZayXCv*6qt7xQ z%{8sGZ1`1MNe+M-*MRLjRH{E`WcyC}ArhOFqJ)@^F@fWc@!K>* zbDq;nYT9$t1LoHm5i&1#cz+8ktE4TY=5c6BdC3*y;(P|?=dx_g#%4|Wd?hfgWpmxt ze8YTD-sUz&97fMj<9hBa_jT4xR1zRW)AeM*}LW=3OgF zs?i+V4c_pR%y$ls*(6w@?LtJpW%_jdB#xTAWz(rWe`r-Ts;e8qFlcP(M&rbDn|o6V z!leH|n9yYlG48|$2{e~sR{&gh__CQ5haaAiIMMGk_FbJzK9I(@uGxBIF|=)=!fOLJ*Ls9Ue4 zOOR2sy4PqvtP_RMif^cVHMM3PdynaBcaNJa^abD@zZ%Nm87Vo1;xCQXjpo@OtYqD{ zAz}XshzcS?6(+SKHHAo9K%+A0t%qKo3U8P>%5H5zhk%J7Dl~BwyP)oiPqZ6%&|Od- z*jMuZ$ruBCfg31Lk^)O83E*Eh04gRA+KmrT5+vhjIY2w!scbhEkQ=!j{gkdd7Ql`4 zPv#~2h60oXXpHO&*%*M5fQ+&KZ>C5K08XU@8E+b~Ng$~AQ-W?~jR0ZRy=-Ps8mdj! ziT1g+wt4B+4pCt1E6HQrD~EBaGRMrqw#0{EN%u^x5SP2VPx(_i9Xs4ULU(=hxkBRu ztHn=zqX~l@?xk%W51kWkLbvPMqjiync`H5E0lrz}9=*!y-^d>Nu#kjLCL7J3Q@?t) zO^F=JOlAHY>K>ZbaKaFvaOEysaXvI7G0?kn-~4)0R8(c9#{6z$s8IsVXH7leCCq1i z`E2U-b_2@TtJY~YO-xL(Jub!fk-2}{qN;<=qMY})=e{9A2X3r{!g0l2e;3XqYR%D>W^B`d_oDN9`lt-j-rw4Y`y{6A6wsN==RQ%S)e* z4u}6;tkb5>qSwX-(@2|bKj3i=xVxjtJGVL)RV?gg&6~dOkkTv@*eaQ)mxFlnzyU55|!sh4dR2EKU^N^&5`pjKZRSz1mxcl=omJjR27j)0qCa?$Q@RGO= z&5nb_odw<(ygK{r|c6nfVKy!p#?IJsM@$#8k{1QRc1*WzdVxQ!}_U2t^81xhPcndi?QbdosTr| zk|ztVmHd&`?DYvgf>7!Y#>o9WjqePO33q;qM?w_ESz63e$xH1Yw7f_{gxAIX*y=ng66(ltu1poNY%8+YL6^$?E3keef`YH?JIq=w1=hLv^*+> z=%t|WWH)3JhcXf#ry@$A*Rfeg-?EtXhJRXm!H%#?$jHyRG3l0LCwlxnmR~ep#yikK z-V6w-+jxEQcNzL4jh)Vu(##eectv`2w3LjWL2EMS780q7fdmEtk)pX> zc0fASN9>fU^1bL9+}wJ=%if0cmJYHQAI;t(p0AzZ{0q3O3?|27f+l-+@2^v7>TuxJ z_st}~kKR!^)#PxVk5kV7^`2POSpPjHvgq=qMik{PV@=!2sD(pT2Cwn&_ZCB9e{r>k z6b0P%RFQdw=LiRX1%aKfMnS|i$NJH4DbEeV}DC5dW4<4<|7?|S~~jm^#D zQf;%;dkeAtG1H5ZEUd1ImX*G432elLn*#M%GUVp2ZE`&*xML%@%jKfY=nj77)%CZ6 z*U070lbzJW#J>e?D}bjHZOcTIblvFoxYvi|$;#+nzU=pI=qh4r(e=+OO*7S7AQ0v z_x<%5rP&&cQUO7n+V5h|pw#+2-*csvy>yWy5Jz3mkTL0&BUuCPmw|7;S&`3rsSEqa zm9{?W?5B&*BI6Ry9lrrixMryQO$*rhTVC*-f&1}Ppr4pDNaCl^LF~dn|E_&aH*2B+ z+?IwYYkz;QVCtqp>oAJ#2jQVfr+kNg#b-UQwF3dA5Bah@x-9Ezb~Amk;gslCU0hr> z0z<{kq_!Fh!_1-Uv%DW9v;~n*Ug#k?fAeUA2^$b%`IHOJKV7Dfkxkn#=KBx4lY|(N zAFIQJBfq(*c1o2y4}+J!WMTz%7{qSJuu^^0+E-UyJiiD*a$@;?m;Po( zSiNE9*=_AU;YTNhNBh2L)uqcNb$XIQJ!eaNIQ8t$ZaS6zKSFjfTdCk!x2arsG=rXFpJg!L-q>$b;{ycJFqY)Yr7{>y=RE$i`< zV~Y@E#u2L=NUSIV-(XCn9a663xPAva7&)2@(UlG{rMvvakFK@y)YZ%wkZdOZH;@@Z zv-gbhKUtCKJR4nP1RH{cNc{gTE8=bC?qTQR%(sIC_@7?iG5+c5GA-{$i{mkf&Slq m@t+9)*(?5yVDR4}{O^54LmB0tw+%o+d_jmFy@GHH0Q?_^jXkXZ literal 0 HcmV?d00001 diff --git a/tests/dummy-quaver.pdf b/tests/dummy-quaver.pdf new file mode 100644 index 0000000000000000000000000000000000000000..77299a4deefeaf5084e12b4644a3cf6bf29aec0d GIT binary patch literal 281 zcmZ9HF%N<;5QX>tidz`9t)(U=gh7ABT&7{D>jIHs`a%bU?o82UqUUlN*?aAPPl4Y4 z3(&nTj9S|+W4II`@()eAMqda6pRh@=lu}4Qdlbi1FQ_L=;+0>EGweNDu2H7a4Ze3Y aPYpgzt;LG(zewv;%u7}P literal 0 HcmV?d00001 diff --git a/tests/dataset_recipe.yaml b/tests/dummy-recipe-dataset.yaml similarity index 100% rename from tests/dataset_recipe.yaml rename to tests/dummy-recipe-dataset.yaml diff --git a/tests/config.yaml b/tests/dummy-recipe-experiment.yaml similarity index 100% rename from tests/config.yaml rename to tests/dummy-recipe-experiment.yaml diff --git a/tests/recipe.yaml b/tests/recipe.yaml deleted file mode 100644 index 478c6a2..0000000 --- a/tests/recipe.yaml +++ /dev/null @@ -1,5 +0,0 @@ -- match: - class: od - stream: elda - info: - runs: [6, 18] diff --git a/tests/test_all.py b/tests/test_all.py index e3d7885..cd30a34 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -21,44 +21,44 @@ def run(*args): def setup_module(): - run("anemoi-registry", "experiments", "./config.yaml", "--register") - run("anemoi-registry", "weights", "./test-checkpoint.ckpt", "--register") + teardown_module(raise_if_error=False) + run("anemoi-registry", "experiments", "./dummy-recipe-experiment.yaml", "--register") + run("anemoi-registry", "weights", "./dummy-checkpoint.ckpt", "--register") if not os.path.exists(DATASET_PATH): - run("anemoi-datasets", "create", "dataset_recipe.yaml", DATASET_PATH, "--overwrite") + run("anemoi-datasets", "create", "./dummy-recipe-dataset.yaml", DATASET_PATH, "--overwrite") assert os.path.exists(DATASET_PATH) os.symlink(DATASET_PATH, TMP_DATASET_PATH) run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--register") - print("✅ Setup done") + print("# Setup done") -def teardown_module(): - print("✅ Start teardown") - e = None +def teardown_module(raise_if_error=True): + error = None try: run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe", "--unregister") except Exception as e: - print(e) + error = e try: - run("anemoi-registry", "experiments", "./config.yaml", "--unregister") + run("anemoi-registry", "experiments", "./dummy-recipe-experiment.yaml", "--unregister") except Exception as e: - print(e) + error = e try: run("anemoi-registry", "datasets", TMP_DATASET, "--unregister") os.remove(TMP_DATASET_PATH) except Exception as e: - print(e) - if e: - raise e + error = e + if error and raise_if_error: + raise error def test_datasets(): # assert run("anemoi-registry", "datasets", TMP_DATASET) == 1 run("anemoi-registry", "datasets", TMP_DATASET) - run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./recipe.yaml") + run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./dummy-recipe-dataset.yaml") run("anemoi-registry", "datasets", TMP_DATASET, "--set-status", "testing") run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/the/dataset/path", "--platform", "atos") run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/other/path", "--platform", "leonardo") @@ -70,7 +70,7 @@ def test_weights(): run( "anemoi-registry", "weights", - "./test-checkpoint.ckpt", + "./dummy-checkpoint.ckpt", "--add-location", "s3://ml-weights/a5275e04-0000-0000-a0f6-be19591b09fe.ckpt", "--platform", @@ -79,10 +79,9 @@ def test_weights(): def test_experiments(): - # assert run("anemoi-registry", "experiments", "i4df") == 1 run("anemoi-registry", "experiments", "i4df") - run("anemoi-registry", "experiments", "i4df", "--add-plots", "./quaver.pdf") - run("anemoi-registry", "experiments", "i4df", "--add-weights", "./test-checkpoint.ckpt") + run("anemoi-registry", "experiments", "i4df", "--add-plots", "./dummy-quaver.pdf") + run("anemoi-registry", "experiments", "i4df", "--add-weights", "./dummy-checkpoint.ckpt") def test_list_commands(): @@ -93,11 +92,18 @@ def test_list_commands(): if __name__ == "__main__": test_list_commands() + print() + print("# Start setup") setup_module() try: + print() test_datasets() + print() test_weights() + print() test_experiments() + print() finally: + print("# Start teardown") teardown_module() From d383425234e3e59c65efc5fd8b08092907edde49 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 10 Jul 2024 14:01:04 +0000 Subject: [PATCH 20/64] tests --- .github/workflows/python-publish.yml | 3 ++- tests/test_rest.py | 22 ---------------------- 2 files changed, 2 insertions(+), 23 deletions(-) delete mode 100644 tests/test_rest.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 759adae..e322a28 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -45,7 +45,8 @@ jobs: pip freeze - name: Tests - run: pytest + run: | + cd tests && python3 test_all.py deploy: diff --git a/tests/test_rest.py b/tests/test_rest.py deleted file mode 100644 index 7912358..0000000 --- a/tests/test_rest.py +++ /dev/null @@ -1,22 +0,0 @@ -# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - - -import requests - -from anemoi.registry.rest import Rest - -# patch requests to intercept all requests -requests.request = Rest.request - - -def test_rest(): - pass - - -if __name__ == "__main__": - test_rest() From 4de9c438da8bc938c6bc1956cd1bbc3da9ca4fec Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 11 Jul 2024 13:06:21 +0200 Subject: [PATCH 21/64] polish --- src/anemoi/registry/commands/entry.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/anemoi/registry/commands/entry.py b/src/anemoi/registry/commands/entry.py index 71aee7f..b57262f 100644 --- a/src/anemoi/registry/commands/entry.py +++ b/src/anemoi/registry/commands/entry.py @@ -200,9 +200,12 @@ def load(self, args): LOG.info(f"Entry in {collection} has been created : {res}.") def remove(self, args): + path = args.path + if not path.startswith("/"): + path = "/" + path rest = Rest() - rest.delete(args.path) - LOG.info(f"{args.path} has been deleted.") + rest.delete(path) + LOG.info(f"{path} has been deleted.") command = Entry From ced772c9991f094050ff2b93908af2c6a80eb20a Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 11 Jul 2024 16:06:05 +0000 Subject: [PATCH 22/64] cleaning tests --- tests/dummy-recipe-dataset.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/dummy-recipe-dataset.yaml b/tests/dummy-recipe-dataset.yaml index 6e96a27..0cbd891 100644 --- a/tests/dummy-recipe-dataset.yaml +++ b/tests/dummy-recipe-dataset.yaml @@ -36,8 +36,3 @@ input: template: ${input.join.0.mars} param: - cos_latitude - -naming_scheme: "{param}_{levelist}{level_units}_{accumultion_period}" - -statistics: - end: 2021 From 6521a9899f9735cb76b11636f3bd8c0c1ca8a7f5 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 11 Jul 2024 16:14:01 +0000 Subject: [PATCH 23/64] cleaning tests --- src/anemoi/registry/commands/datasets.py | 4 ++-- src/anemoi/registry/entry/dataset.py | 2 +- tests/dummy-recipe-dataset.yaml | 4 ++-- tests/test_all.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 28764c6..cc91a5d 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -38,7 +38,7 @@ def add_arguments(self, command_parser): ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") command_parser.add_argument("--set-status", help="Set the status to the {self.kind}.") - command_parser.add_argument("--add-recipe", help="Add the recipe file to [re-]build the {self.kind}.") + command_parser.add_argument("--set-recipe", help="Set the recipe file to [re-]build the {self.kind}.") command_parser.add_argument( "--add-location", nargs="+", @@ -55,7 +55,7 @@ def _run(self, entry, args): self.process_task(entry, args, "register") # self.process_task(entry, args, "remove_location") self.process_task(entry, args, "add_location", platform=args.platform) - self.process_task(entry, args, "add_recipe") + self.process_task(entry, args, "set_recipe") self.process_task(entry, args, "set_status") diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 33157f4..4355241 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -26,7 +26,7 @@ def set_status(self, status): def add_location(self, path, platform): self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) - def add_recipe(self, file): + def set_recipe(self, file): if not os.path.exists(file): raise FileNotFoundError(f"Recipe file not found: {file}") if not file.endswith(".yaml"): diff --git a/tests/dummy-recipe-dataset.yaml b/tests/dummy-recipe-dataset.yaml index 0cbd891..8df2ce7 100644 --- a/tests/dummy-recipe-dataset.yaml +++ b/tests/dummy-recipe-dataset.yaml @@ -6,8 +6,8 @@ common: dates: start: 2020-12-30 00:00:00 - end: 2021-01-03 12:00:00 - frequency: 12h + end: 2021-01-03 18:00:00 + frequency: 6h input: join: diff --git a/tests/test_all.py b/tests/test_all.py index cd30a34..f61e745 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -58,7 +58,7 @@ def teardown_module(raise_if_error=True): def test_datasets(): # assert run("anemoi-registry", "datasets", TMP_DATASET) == 1 run("anemoi-registry", "datasets", TMP_DATASET) - run("anemoi-registry", "datasets", TMP_DATASET, "--add-recipe", "./dummy-recipe-dataset.yaml") + run("anemoi-registry", "datasets", TMP_DATASET, "--set-recipe", "./dummy-recipe-dataset.yaml") run("anemoi-registry", "datasets", TMP_DATASET, "--set-status", "testing") run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/the/dataset/path", "--platform", "atos") run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/other/path", "--platform", "leonardo") From 75c2f91fd394f4cdb019ea75f1530540938ad9af Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Fri, 12 Jul 2024 13:28:24 +0000 Subject: [PATCH 24/64] add archive --- src/anemoi/registry/commands/experiments.py | 48 ++++++++++++++-- src/anemoi/registry/commands/list.py | 3 - src/anemoi/registry/config.yaml | 4 +- src/anemoi/registry/entry/experiment.py | 64 +++++++++++++++++---- tests/dummy-recipe-dataset.yaml | 4 +- tests/dummy-recipe-experiment.yaml | 55 ++---------------- 6 files changed, 105 insertions(+), 73 deletions(-) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index cf95166..b25d7b4 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -43,13 +43,34 @@ def add_arguments(self, command_parser): ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--add-weights", nargs="+", help="Add weights to the experiment.") + command_parser.add_argument( + "--add-weights", + nargs="+", + help=( + "Add weights to the experiment and upload them do s3." + "Skip upload if these weights are already uploaded." + ), + ) command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.") - command_parser.add_argument("--add-artefacts", nargs="+", help="Add artefacts to the experiment.") - command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") - def check_arguments(self, args): - pass + command_parser.add_argument( + "--set-archive", help="Input file to register as an archive metadata file to the catalogue." + ) + command_parser.add_argument( + "--get-archive", help="Output file to save the archive metadata file from the catalogue." + ) + command_parser.add_argument( + "--archive-platform", + help="Archive platform. Only relevant for --set-archive and --get-archive.", + ) + command_parser.add_argument( + "--run-number", help="The run number of the experiment. Relevant --set-archive and --get-archive." + ) + command_parser.add_argument( + "--archive-extra-metadata", help="Extra metadata. A list of key=value pairs.", nargs="+" + ) + + command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") def is_path(self, name_or_path): if not os.path.exists(name_or_path): @@ -62,8 +83,23 @@ def _run(self, entry, args): self.process_task(entry, args, "unregister") self.process_task(entry, args, "register", overwrite=args.overwrite) self.process_task(entry, args, "add_weights") - self.process_task(entry, args, "add_artefacts") self.process_task(entry, args, "add_plots") + self.process_task( + entry, + args, + "set_archive", + run_number=args.run_number, + platform=args.archive_platform, + overwrite=args.overwrite, + extras=args.archive_extra_metadata, + ) + self.process_task( + entry, + args, + "get_archive", + run_number=args.run_number, + platform=args.archive_platform, + ) command = Experiments diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py index bd2dbbe..935741b 100644 --- a/src/anemoi/registry/commands/list.py +++ b/src/anemoi/registry/commands/list.py @@ -57,9 +57,6 @@ def add_arguments(self, command_parser): # tasks.add_argument("-l", "--long", help="Details", action="store_true") # tasks.add_argument("--sort", help="Sort by date", choices=["created", "updated"], default="updated") - def check_arguments(self, args): - pass - def run(self, args): if not args.subcommand: raise ValueError("Missing subcommand") diff --git a/src/anemoi/registry/config.yaml b/src/anemoi/registry/config.yaml index 789af5e..b7d82ab 100644 --- a/src/anemoi/registry/config.yaml +++ b/src/anemoi/registry/config.yaml @@ -1,8 +1,10 @@ registry: api_url: "https://anemoi.ecmwf.int/api/v1" + web_url: "https://anemoi.ecmwf.int" + + artefacts_uri_base: "s3://ml-artefacts" plots_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" - artefacts_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" datasets_uri_pattern: "s3://ml-datasets/{name}" weights_uri_pattern: "s3://ml-weights/{uuid}.ckpt" weights_platform: "ewc" diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 456f8c5..9311545 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -5,10 +5,12 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import datetime import logging import os import yaml +from anemoi.utils.s3 import download from anemoi.utils.s3 import upload from .. import config @@ -34,27 +36,63 @@ def load_from_path(self, path): expver = metadata["expver"] self.key = expver - self.record = dict(expver=expver, metadata=metadata) + self.record = dict(expver=expver, metadata=metadata, runs={}) def add_plots(self, *paths, **kwargs): for path in paths: self._add_one_plot(path, **kwargs) - def _add_one_plot(self, path, **kwargs): - return self._add_one_plot_or_artefact("plot", path, **kwargs) - def add_weights(self, *paths, **kwargs): for path in paths: self._add_one_weights(path, **kwargs) - def add_artefacts(self, *paths, **kwargs): - for path in paths: - self._add_one_artefact(path, **kwargs) + def set_archive(self, path, platform, run_number, overwrite, extras): + if run_number is None: + raise ValueError("run_number must be set") + if platform is None: + raise ValueError("platform must be set") + if not os.path.exists(path): + raise FileNotFoundError(f"Could not find archive to upload at {path}") + extras = {v.split("=")[0]: v.split("=")[1] for v in extras} - def _add_one_artefact(self, path, **kwargs): - return self._add_one_plot_or_artefact("artefact", path, **kwargs) + _, ext = os.path.splitext(path) + target = config()["artefacts_uri_base"] + f"/{self.key}/runs/{run_number}/{platform}{ext}" + LOG.info(f"Uploading {path} to {target}.") + upload(path, target, overwrite=overwrite) + + dic = dict(url=target, path=path, updated=datetime.datetime.utcnow().isoformat(), **extras) + + if "runs" not in self.record: + # for backwards compatibility, create '/runs' if it does not exist + e = self.__class__(key=self.key) + if "runs" not in e.record: + e.rest_item.patch([{"op": "add", "path": "/runs", "value": {}}]) + self.record["runs"] = {} + + if str(run_number) not in self.record["runs"]: + # add run_number if it does not exist + self.rest_item.patch( + [ + {"op": "add", "path": "/runs", "value": self.record["runs"]}, + {"op": "add", "path": f"/runs/{run_number}", "value": dict(archives={})}, + ] + ) + + self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/archives/{platform}", "value": dic}]) + + def get_archive(self, path, run_number, platform): + if os.path.exists(path): + raise FileExistsError(f"Path {path} already exists") + if run_number not in self.record["runs"]: + raise ValueError(f"Run number {run_number} not found") + if platform not in self.record["runs"][run_number]["archives"]: + raise ValueError(f"Platform {platform} not found") + url = self.record["runs"][run_number]["archives"][platform]["url"] + print(url) + download(url, path) - def _add_one_plot_or_artefact(self, kind, path, **kwargs): + def _add_one_plot(self, path, **kwargs): + kind = "plot" if not os.path.exists(path): raise FileNotFoundError(f"Could not find {kind} to upload at {path}") @@ -70,10 +108,16 @@ def _add_one_plot_or_artefact(self, kind, path, **kwargs): def _add_one_weights(self, path, **kwargs): weights = WeightCatalogueEntry(path=path) + if not WeightCatalogueEntry.key_exists(weights.key): + # weights with this uuid does not exist, register and upload them weights.register(ignore_existing=False, overwrite=False) weights.upload(path, overwrite=False) + else: + # Weights with this uuid already exist + # Skip if the weights are the same + # Raise an error if the weights are different other = WeightCatalogueEntry(key=weights.key) if other.record["metadata"]["timestamp"] == weights.record["metadata"]["timestamp"]: LOG.info( diff --git a/tests/dummy-recipe-dataset.yaml b/tests/dummy-recipe-dataset.yaml index 8df2ce7..2a73081 100644 --- a/tests/dummy-recipe-dataset.yaml +++ b/tests/dummy-recipe-dataset.yaml @@ -5,8 +5,8 @@ common: grid: 20./20. dates: - start: 2020-12-30 00:00:00 - end: 2021-01-03 18:00:00 + start: 1979-01-01 00:00:00 + end: 1979-01-03 18:00:00 frequency: 6h input: diff --git a/tests/dummy-recipe-experiment.yaml b/tests/dummy-recipe-experiment.yaml index 8a9bb75..5a22ec0 100644 --- a/tests/dummy-recipe-experiment.yaml +++ b/tests/dummy-recipe-experiment.yaml @@ -1,54 +1,7 @@ +description: Minimal config dates: - end: 2023-08-31 00:00:00 - frequency: 24 start: 2023-06-01 00:00:00 -description: Minimal config -ecflow: - host: ecflow-gen-mlx-001 - limits: - gpu: 10 - port: 3141 - target_running_user: mlx + end: 2023-06-03 00:00:00 + frequency: 24 evaluation: - name: quaver - scores: scorecard -input: - name: mars -metadata: - config_home: /home/mafp/prepml/i4df - expver: i4df - input: scorecard.yaml - owner: mafp - prepml_command: /home/mafp/venvs/mafp-dev24-02/bin/python3 -m prepml - prepml_module_version: '0.1' - time: '2024-02-22T17:10:31.433359' -model: - name: aifs -output: - class: rd - database: marsrd - name: mars -platform: - flavours: - cpu: - host: - slurm: ac-login - late: -c +23:59 - submit_arguments: - account: ecaifs - gpu: - host: - slurm: ac-login - late: -c +00:10 - submit_arguments: - account: ecaifs - cpus-per-task: '32' - gres: gpu:1 - mem: 64G - nice: '100' - partition: gpu - time: 0-00:10 - name: atos -runner: - name: ai-models-module - version: auto + name: quaver-basic From c19de87565134b0d9db08beae0240eb1005941a5 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Sat, 13 Jul 2024 22:49:44 +0200 Subject: [PATCH 25/64] workers up --- src/anemoi/registry/commands/worker.py | 59 ++--- src/anemoi/registry/entry/dataset.py | 3 + src/anemoi/registry/workers/__init__.py | 205 +++++------------- src/anemoi/registry/workers/delete_dataset.py | 71 ++++++ .../registry/workers/transfer_dataset.py | 119 ++++++++++ 5 files changed, 280 insertions(+), 177 deletions(-) create mode 100644 src/anemoi/registry/workers/delete_dataset.py create mode 100644 src/anemoi/registry/workers/transfer_dataset.py diff --git a/src/anemoi/registry/commands/worker.py b/src/anemoi/registry/commands/worker.py index 2a33d92..c2b866e 100644 --- a/src/anemoi/registry/commands/worker.py +++ b/src/anemoi/registry/commands/worker.py @@ -17,7 +17,7 @@ from anemoi.registry.commands.base import BaseCommand from anemoi.registry.tasks import TaskCatalogueEntry from anemoi.registry.utils import list_to_dict -from anemoi.registry.workers import TransferDatasetWorker +from anemoi.registry.workers import get_worker_class LOG = logging.getLogger(__name__) @@ -35,41 +35,44 @@ def add_arguments(self, command_parser): command_parser.add_argument("--timeout", help="Die with timeout (SIGALARM) after TIMEOUT seconds.", type=int) command_parser.add_argument("--wait", help="Check for new task every WAIT seconds.", type=int, default=60) - command_parser.add_argument( - "action", - help="Action to perform", - choices=["transfer-dataset", "delete-dataset"], - nargs="?", - ) - command_parser.add_argument( + subparsers = command_parser.add_subparsers(dest="action", help="Action to perform") + + transfer = subparsers.add_parser("transfer-dataset", help="Transfer dataset") + transfer.add_argument( "--target-dir", help="The actual target directory where the worker will write.", default="." ) - command_parser.add_argument("--published-target-dir", help="The target directory published in the catalogue.") - command_parser.add_argument("--destination", help="Platform destination (e.g. leonardo, lumi, marenostrum)") - command_parser.add_argument("--request", help="Filter tasks to process (key=value list)", nargs="*", default=[]) - command_parser.add_argument("--threads", help="Number of threads to use", type=int, default=1) - command_parser.add_argument("--heartbeat", help="Heartbeat interval", type=int, default=60) - command_parser.add_argument( - "--max-no-heartbeat", - help="Max interval without heartbeat before considering task needs to be freed.", - type=int, - default=0, - ) - command_parser.add_argument("--loop", help="Run in a loop", action="store_true") - command_parser.add_argument( - "--check-todo", - help="See if there are tasks for this worker and exit with 0 if there are task to do.", - action="store_true", - ) + transfer.add_argument("--published-target-dir", help="The target directory published in the catalogue.") + transfer.add_argument("--destination", help="Platform destination (e.g. leonardo, lumi, marenostrum)") + transfer.add_argument("--threads", help="Number of threads to use", type=int, default=1) + + delete = subparsers.add_parser("delete-dataset", help="Delete dataset") + delete.add_argument("--platform", help="Platform destination (e.g. leonardo, lumi, marenostrum)") + + for subparser in [transfer, delete]: + subparser.add_argument( + "--filter-tasks", help="Filter tasks to process (key=value list)", nargs="*", default=[] + ) + subparser.add_argument("--heartbeat", help="Heartbeat interval", type=int, default=60) + subparser.add_argument( + "--max-no-heartbeat", + help="Max interval without heartbeat before considering task needs to be freed.", + type=int, + default=0, + ) + subparser.add_argument("--loop", help="Run in a loop", action="store_true") + subparser.add_argument( + "--check-todo", + help="See if there are tasks for this worker and exit with 0 if there are task to do.", + action="store_true", + ) def run(self, args): kwargs = vars(args) - kwargs["request"] = list_to_dict(kwargs["request"]) + kwargs["filter_tasks"] = list_to_dict(kwargs["filter_tasks"]) kwargs.pop("command") kwargs.pop("debug") kwargs.pop("version") - - TransferDatasetWorker(**kwargs).run() + get_worker_class(kwargs.pop("action"))(**kwargs).run() command = WorkerCommand diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 4355241..028838f 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -26,6 +26,9 @@ def set_status(self, status): def add_location(self, path, platform): self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) + def remove_location(self, platform): + self.rest_item.patch([{"op": "remove", "path": f"/locations/{platform}"}]) + def set_recipe(self, file): if not os.path.exists(file): raise FileNotFoundError(f"Recipe file not found: {file}") diff --git a/src/anemoi/registry/workers/__init__.py b/src/anemoi/registry/workers/__init__.py index e01a462..c600f50 100644 --- a/src/anemoi/registry/workers/__init__.py +++ b/src/anemoi/registry/workers/__init__.py @@ -7,7 +7,6 @@ import datetime import logging -import os import signal import sys import threading @@ -15,7 +14,6 @@ from anemoi.utils.humanize import when -from anemoi.registry.entry.dataset import DatasetCatalogueEntry from anemoi.registry.tasks import TaskCatalogueEntryList # from anemoi.utils.provenance import trace_info @@ -24,10 +22,10 @@ class Worker: + name = None + def __init__( self, - action, - # generic worker options heartbeat=60, max_no_heartbeat=0, loop=False, @@ -49,9 +47,14 @@ def __init__( self.stop_if_finished = stop_if_finished if timeout: signal.alarm(timeout) + self.filter_tasks = {"action": self.name} def run(self): + if self.check_todo: + # Check if there are tasks to do + # exit with 0 if there are. + # exit with 1 if there are none. task = self.choose_task() if task: LOG.info("There are tasks to do.") @@ -61,6 +64,7 @@ def run(self): sys.exit(1) if self.loop: + # Process tasks in a loop for ever while True: res = self.process_one_task() @@ -71,59 +75,33 @@ def run(self): LOG.info(f"Waiting {self.wait} seconds before checking again.") time.sleep(self.wait) else: + # Process one task self.process_one_task() - def choose_task(self): - request = self.request.copy() - request["destination"] = request.get("destination", self.destination) - request["action"] = "transfer-dataset" - - # if a task is queued, take it - for entry in TaskCatalogueEntryList(status="queued", **request): - return entry - - # else if a task is running, check if it has been running for too long, and free it - if self.max_no_heartbeat == 0: - return None - - cat = TaskCatalogueEntryList(status="running", **request) - if not cat: - LOG.info("No queued tasks found") - else: - LOG.info(cat.to_str(long=True)) - for entry in cat: - updated = datetime.datetime.fromisoformat(entry.record["updated"]) - LOG.info(f"Task {entry.key} is already running, last update {when(updated, use_utc=True)}.") - if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: - LOG.warning( - f"Task {entry.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." - ) - entry.release_ownership() - def process_one_task(self): - entry = self.choose_task() - if not entry: + task = self.choose_task() + if not task: return False - uuid = entry.key - LOG.info(f"Processing task {uuid}: {entry}") - self.parse_entry(entry) # for checking only + uuid = task.key + LOG.info(f"Processing task {uuid}: {task}") + self.parse_task(task) # for checking only - entry.take_ownership() - self.process_entry_with_heartbeat(entry) + task.take_ownership() + self.process_task_with_heartbeat(task) LOG.info(f"Task {uuid} completed.") - entry.unregister() + task.unregister() LOG.info(f"Task {uuid} deleted.") return True - def process_entry_with_heartbeat(self, entry): + def process_task_with_heartbeat(self, task): STOP = [] # create another thread to send heartbeat def send_heartbeat(): while True: try: - entry.set_status("running") + task.set_status("running") except Exception: return for _ in range(self.heartbeat): @@ -136,133 +114,62 @@ def send_heartbeat(): thread.start() try: - self.process_entry(entry) + self.process_task(task) finally: STOP.append(1) # stop the heartbeat thread thread.join() - def process_entry(self, entry): - destination, source, dataset = self.parse_entry(entry) - dataset_entry = DatasetCatalogueEntry(key=dataset) - - LOG.info(f"Transferring {dataset} from '{source}' to '{destination}'") - - def get_source_path(): - e = dataset_entry.record - if "locations" not in e: - raise ValueError(f"Dataset {dataset} has no locations") - locations = e["locations"] - - if source not in locations: - raise ValueError( - f"Dataset {dataset} is not available at {source}. Available locations: {list(locations.keys())}" - ) - - if "path" not in locations[source]: - raise ValueError(f"Dataset {dataset} has no path at {source}") - - path = locations[source]["path"] - - return path - - source_path = get_source_path() - basename = os.path.basename(source_path) - target_path = os.path.join(self.target_dir, basename) - if os.path.exists(target_path): - LOG.error(f"Target path {target_path} already exists, skipping.") - return - - from anemoi.utils.s3 import download - - LOG.info(f"Source path: {source_path}") - LOG.info(f"Target path: {target_path}") - - if source_path.startswith("s3://"): - source_path = source_path + "/" if not source_path.endswith("/") else source_path - - if target_path.startswith("s3://"): - LOG.warning("Uploading to S3 is experimental and has not been tested yet.") - download(source_path, target_path, resume=True, threads=self.threads) - return - else: - target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) - os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) - download(source_path, target_tmp_path, resume=True, threads=self.threads) - os.rename(target_tmp_path, target_path) - - if self.auto_register: - published_target_path = os.path.join(self.published_target_dir, basename) - dataset_entry.add_location(platform=destination, path=published_target_path) - @classmethod - def parse_entry(cls, entry): - data = entry.record.copy() - + def parse_task(cls, task, *keys): + data = task.record.copy() assert isinstance(data, dict), data - assert data["action"] == "transfer-dataset", data["action"] def is_alphanumeric(s): assert isinstance(s, str), s return all(c.isalnum() or c in ("-", "_") for c in s) - destination = data.pop("destination") - source = data.pop("source") - dataset = data.pop("dataset") - assert is_alphanumeric(destination), destination - assert is_alphanumeric(source), source - assert is_alphanumeric(dataset), dataset + for k in keys: + value = data.pop(k) + assert is_alphanumeric(value), (k, value) for k in data: if k not in ("action", "status", "progress", "created", "updated", "uuid"): LOG.warning(f"Unknown key {k}=data[k]") - data = None - - if "/" in destination: - raise ValueError(f"Destination {destination} must not contain '/', this is a platform name") - if "." in destination: - raise ValueError(f"Destination {destination} must not contain '.', this is a platform name") + return [task.record[k] for k in keys] - if "/" in source: - raise ValueError(f"Source {source} must not contain '/', this is a platform name") - if "." in source: - raise ValueError(f"Source {source} must not contain '.', this is a platform name") - - if "." in dataset: - raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") + def choose_task(self): + for task in TaskCatalogueEntryList(status="queued", **self.filter_tasks): + LOG.info("Found task") + return task + LOG.info("No queued tasks found") - assert isinstance(destination, str), destination - assert isinstance(source, str), source - assert isinstance(dataset, str), dataset - return destination, source, dataset + if self.max_no_heartbeat == 0: + return None + cat = TaskCatalogueEntryList(status="running", **self.filter_tasks) + if not cat: + LOG.info("No queued tasks found") + else: + LOG.info(cat.to_str(long=True)) -class TransferDatasetWorker(Worker): - def __init__( - self, - action, - # specific worker options - destination, - target_dir=".", - published_target_dir=None, - auto_register=True, - threads=1, - request={}, - **kwargs, - ): - super().__init__(action, **kwargs) + # if a task is running, check if it has been running for too long, and free it + for task in cat: + updated = datetime.datetime.fromisoformat(task.record["updated"]) + LOG.info(f"Task {task.key} is already running, last update {when(updated, use_utc=True)}.") + if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: + LOG.warning( + f"Task {task.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." + ) + task.release_ownership() - assert action == "transfer-dataset", action + def process_task(self, task): + raise NotImplementedError("Subclasses must implement this method.") - if not destination: - raise ValueError("No destination platform specified") - if not action: - raise ValueError("No action specified") - self.destination = destination - self.target_dir = target_dir - self.published_target_dir = published_target_dir or target_dir - self.request = request - self.threads = threads +def get_worker_class(action): + from .delete_dataset import DeleteDatasetWorker + from .transfer_dataset import TransferDatasetWorker - self.auto_register = auto_register - if not os.path.exists(target_dir): - raise ValueError(f"Target directory {target_dir} must already exist") + return { + "transfer-dataset": TransferDatasetWorker, + "delete-dataset": DeleteDatasetWorker, + }[action] diff --git a/src/anemoi/registry/workers/delete_dataset.py b/src/anemoi/registry/workers/delete_dataset.py new file mode 100644 index 0000000..75b4d4f --- /dev/null +++ b/src/anemoi/registry/workers/delete_dataset.py @@ -0,0 +1,71 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging +import os + +from anemoi.registry.entry.dataset import DatasetCatalogueEntry + +from . import Worker + +LOG = logging.getLogger(__name__) + + +class DeleteDatasetWorker(Worker): + name = "delete-dataset" + + def __init__( + self, + platform, + filter_tasks={}, + **kwargs, + ): + super().__init__(**kwargs) + + if not platform: + raise ValueError("No destination platform specified") + + self.platform = platform + self.filter_tasks.update(filter_tasks) + self.filter_tasks["platform"] = self.platform + + def process_task(self, task): + platform, dataset = self.parse_task(task) + entry = DatasetCatalogueEntry(key=dataset) + assert platform == self.platform, (platform, self.platform) + + locations = entry.record.get("locations", {}) + if platform not in locations: + LOG.warning(f"Dataset {dataset} has no locations on '{platform}'. Ignoring delete request.") + return + + path = locations[platform]["path"] + LOG.warning(f"Deleting {path} from '{platform}'") + + tmp_path = path + ".deleting" + os.rename(path, tmp_path) + # shutil.rmtree(tmp_path) + LOG.warning(f"Deleted {path} from '{platform}'") + + entry.remove_location(platform) + LOG.warning(f"Removed location from catalogue {path} from '{platform}'") + + @classmethod + def parse_task(cls, task): + assert task.record["action"] == "delete-dataset", task.record["action"] + + platform, dataset = super().parse_task(task, "platform", "dataset") + + if "/" in platform: + raise ValueError(f"Platform {platform} must not contain '/', this is a platform name") + if "." in platform: + raise ValueError(f"Platform {platform} must not contain '.', this is a platform name") + + if "." in dataset: + raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") + + return platform, dataset diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py new file mode 100644 index 0000000..7070421 --- /dev/null +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -0,0 +1,119 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging +import os + +from anemoi.registry.entry.dataset import DatasetCatalogueEntry + +from . import Worker + +LOG = logging.getLogger(__name__) + + +class TransferDatasetWorker(Worker): + name = "transfer-dataset" + + def __init__( + self, + destination, + target_dir=".", + published_target_dir=None, + auto_register=True, + threads=1, + filter_tasks={}, + **kwargs, + ): + super().__init__(**kwargs) + + if not destination: + raise ValueError("No destination platform specified") + + if not os.path.exists(target_dir): + raise ValueError(f"Target directory {target_dir} must already exist") + + self.destination = destination + self.target_dir = target_dir + self.published_target_dir = published_target_dir or target_dir + self.threads = threads + self.filter_tasks.update(filter_tasks) + self.filter_tasks["destination"] = self.destination + self.auto_register = auto_register + + def process_task(self, task): + destination, source, dataset = self.parse_task(task) + entry = DatasetCatalogueEntry(key=dataset) + + LOG.info(f"Transferring {dataset} from '{source}' to '{destination}'") + + def get_source_path(): + e = entry.record + if "locations" not in e: + raise ValueError(f"Dataset {dataset} has no locations") + locations = e["locations"] + + if source not in locations: + raise ValueError( + f"Dataset {dataset} is not available at {source}. Available locations: {list(locations.keys())}" + ) + + if "path" not in locations[source]: + raise ValueError(f"Dataset {dataset} has no path at {source}") + + path = locations[source]["path"] + + return path + + source_path = get_source_path() + basename = os.path.basename(source_path) + target_path = os.path.join(self.target_dir, basename) + if os.path.exists(target_path): + LOG.error(f"Target path {target_path} already exists, skipping.") + return + + from anemoi.utils.s3 import download + + LOG.info(f"Source path: {source_path}") + LOG.info(f"Target path: {target_path}") + + if source_path.startswith("s3://"): + source_path = source_path + "/" if not source_path.endswith("/") else source_path + + if target_path.startswith("s3://"): + LOG.warning("Uploading to S3 is experimental and has not been tested yet.") + download(source_path, target_path, resume=True, threads=self.threads) + return + else: + target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) + os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) + download(source_path, target_tmp_path, resume=True, threads=self.threads) + os.rename(target_tmp_path, target_path) + + if self.auto_register: + published_target_path = os.path.join(self.published_target_dir, basename) + entry.add_location(platform=destination, path=published_target_path) + + @classmethod + def parse_task(cls, task): + assert task.record["action"] == "transfer-dataset", task.record["action"] + + destination, source, dataset = super().parse_task(task, "destination", "source", "dataset") + + if "/" in destination: + raise ValueError(f"Destination {destination} must not contain '/', this is a platform name") + if "." in destination: + raise ValueError(f"Destination {destination} must not contain '.', this is a platform name") + + if "/" in source: + raise ValueError(f"Source {source} must not contain '/', this is a platform name") + if "." in source: + raise ValueError(f"Source {source} must not contain '.', this is a platform name") + + if "." in dataset: + raise ValueError(f"The dataset {dataset} must not contain a '.', this is the name of the dataset.") + + return destination, source, dataset From a13be1812f9834bd8a7aa03210df441ff328c6c1 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 15 Jul 2024 09:26:42 +0000 Subject: [PATCH 26/64] added progress, clean --- src/anemoi/registry/commands/base.py | 3 -- src/anemoi/registry/commands/datasets.py | 3 -- src/anemoi/registry/commands/entry.py | 3 -- src/anemoi/registry/commands/weights.py | 3 -- src/anemoi/registry/entry/__init__.py | 23 +++++++++---- src/anemoi/registry/tasks.py | 8 +++-- .../registry/workers/transfer_dataset.py | 33 +++++++++++++++++-- tests/dummy-recipe-experiment.yaml | 18 +++++++--- 8 files changed, 66 insertions(+), 28 deletions(-) diff --git a/src/anemoi/registry/commands/base.py b/src/anemoi/registry/commands/base.py index 2599df6..8bf1168 100644 --- a/src/anemoi/registry/commands/base.py +++ b/src/anemoi/registry/commands/base.py @@ -25,9 +25,6 @@ class BaseCommand(Command): internal = True timestamp = True - def check_arguments(self, args): - pass - def is_path(self, name_or_path): return os.path.exists(name_or_path) diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index cc91a5d..c4579fb 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -46,9 +46,6 @@ def add_arguments(self, command_parser): ) command_parser.add_argument("--platform", help="Platform to add the location to.") - def check_arguments(self, args): - pass - def _run(self, entry, args): # order matters self.process_task(entry, args, "unregister") diff --git a/src/anemoi/registry/commands/entry.py b/src/anemoi/registry/commands/entry.py index b57262f..8012d17 100644 --- a/src/anemoi/registry/commands/entry.py +++ b/src/anemoi/registry/commands/entry.py @@ -83,9 +83,6 @@ def add_arguments(self, command_parser): "--yaml", action="store_true", help="Use the YAML format with ``--dump`` and ``--edit``." ) - def check_arguments(self, args): - pass - def run(self, args): path = args.path if "/" not in path[1:] or not path.startswith("/"): diff --git a/src/anemoi/registry/commands/weights.py b/src/anemoi/registry/commands/weights.py index b191113..6d74196 100644 --- a/src/anemoi/registry/commands/weights.py +++ b/src/anemoi/registry/commands/weights.py @@ -44,9 +44,6 @@ def add_arguments(self, command_parser): command_parser.add_argument("--platform", help="Platform where to add the location.") command_parser.add_argument("--overwrite", help="Overwrite any existing weights.", action="store_true") - def check_arguments(self, args): - pass - def _run(self, entry, args): self.process_task(entry, args, "unregister") self.process_task(entry, args, "register", overwrite=args.overwrite) diff --git a/src/anemoi/registry/entry/__init__.py b/src/anemoi/registry/entry/__init__.py index 00c513f..f972567 100644 --- a/src/anemoi/registry/entry/__init__.py +++ b/src/anemoi/registry/entry/__init__.py @@ -31,18 +31,26 @@ class CatalogueEntry: path = None key = None - def __init__(self, key=None, path=None): + def __init__(self, key=None, path=None, must_exist=True): assert key is not None or path is not None, "key or path must be provided" if path is not None: assert key is None self.load_from_path(path) - assert self.record is not None - else: - assert key is not None - self.load_from_key(key) - assert self.record is not None + if key is not None: + assert path is None + if self.key_exists(key): + # found in catalogue so load it + self.load_from_key(key) + else: + # not found in catalogue, so create a new one + if must_exist: + raise CatalogueEntryNotFound(f"Could not find any {self.collection} with key={key}") + else: + self.create_from_new_new(key) + + assert self.record is not None assert self.key is not None, "key must be provided" self.rest_item = RestItem(self.collection, self.key) @@ -55,6 +63,9 @@ def as_json(self): def key_exists(cls, key): return RestItem(cls.collection, key).exists() + def exists(self): + return self.rest_item.exists() + def load_from_key(self, key): rest_item = RestItem(self.collection, key) if rest_item.exists(): diff --git a/src/anemoi/registry/tasks.py b/src/anemoi/registry/tasks.py index 78d5339..836509a 100644 --- a/src/anemoi/registry/tasks.py +++ b/src/anemoi/registry/tasks.py @@ -142,8 +142,10 @@ def release_ownership(self): ) def set_progress(self, progress): - assert isinstance(progress, int), progress - if not (0 <= progress <= 100): - raise ValueError("Progress must be between 0 and 100") + # progress can be a dict or an int + if isinstance(progress, int): + if not (0 <= progress <= 100): + raise ValueError("Progress must be between 0 and 100") + progress = dict(percent=progress) patch = [{"op": "add", "path": "/progress", "value": progress}] self.rest_item.patch(patch) diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 7070421..3285963 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -5,6 +5,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import datetime import logging import os @@ -15,6 +16,32 @@ LOG = logging.getLogger(__name__) +class Progress: + latest_progress = None + + def __init__(self, task, frequency=60): + self.task = task + self.frequency = frequency + + def __call__(self, number_of_files, total_size, total_transferred, transfering, **kwargs): + now = datetime.datetime.utcnow() + + if self.latest_progress is not None and (now - self.latest_progress).seconds < self.frequency: + # already updated recently + return + + p = dict( + number_of_files=number_of_files, + total_size=total_size, + total_transferred=total_transferred, + transfering=transfering, + **kwargs, + ) + p["percentage"] = 100 * total_transferred / total_size if total_size and transfering else 0 + + self.task.set_progress(p) + + class TransferDatasetWorker(Worker): name = "transfer-dataset" @@ -83,14 +110,16 @@ def get_source_path(): if source_path.startswith("s3://"): source_path = source_path + "/" if not source_path.endswith("/") else source_path + progress = Progress(task, frequency=60) + if target_path.startswith("s3://"): LOG.warning("Uploading to S3 is experimental and has not been tested yet.") - download(source_path, target_path, resume=True, threads=self.threads) + download(source_path, target_path, resume=True, threads=self.threads, progress=progress) return else: target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) - download(source_path, target_tmp_path, resume=True, threads=self.threads) + download(source_path, target_tmp_path, resume=True, threads=self.threads, progress=progress) os.rename(target_tmp_path, target_path) if self.auto_register: diff --git a/tests/dummy-recipe-experiment.yaml b/tests/dummy-recipe-experiment.yaml index 5a22ec0..2e080c0 100644 --- a/tests/dummy-recipe-experiment.yaml +++ b/tests/dummy-recipe-experiment.yaml @@ -1,7 +1,15 @@ -description: Minimal config dates: - start: 2023-06-01 00:00:00 - end: 2023-06-03 00:00:00 + end: 2023-08-31 00:00:00 frequency: 24 -evaluation: - name: quaver-basic + start: 2023-06-01 00:00:00 +description: Minimal config +input: + name: mars +metadata: + config_home: /home/user/prepml/i4df + expver: i4df + input: scorecard.yaml + owner: user + prepml_command: /home/user/venvs/user-dev24-02/bin/python3 -m prepml + prepml_module_version: '0.1' + time: '2024-02-22T17:10:31.433359' From 65fec898a23dcc903962beecf1814a80f18396c4 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 15 Jul 2024 09:31:00 +0000 Subject: [PATCH 27/64] fix --- src/anemoi/registry/entry/__init__.py | 5 ++--- src/anemoi/registry/entry/experiment.py | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/anemoi/registry/entry/__init__.py b/src/anemoi/registry/entry/__init__.py index f972567..c7509fd 100644 --- a/src/anemoi/registry/entry/__init__.py +++ b/src/anemoi/registry/entry/__init__.py @@ -33,13 +33,12 @@ class CatalogueEntry: def __init__(self, key=None, path=None, must_exist=True): assert key is not None or path is not None, "key or path must be provided" + assert key is None or path is None, "key and path are mutually exclusive" if path is not None: - assert key is None self.load_from_path(path) if key is not None: - assert path is None if self.key_exists(key): # found in catalogue so load it self.load_from_key(key) @@ -48,7 +47,7 @@ def __init__(self, key=None, path=None, must_exist=True): if must_exist: raise CatalogueEntryNotFound(f"Could not find any {self.collection} with key={key}") else: - self.create_from_new_new(key) + self.create_from_new_key(key) assert self.record is not None assert self.key is not None, "key must be provided" diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 9311545..baac8ae 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -8,6 +8,7 @@ import datetime import logging import os +from getpass import getuser import yaml from anemoi.utils.s3 import download @@ -24,6 +25,12 @@ class ExperimentCatalogueEntry(CatalogueEntry): collection = "experiments" main_key = "expver" + def create_from_new_key(self, key): + assert self.key_exists(key) is False, f"{self.collection} with key={key} already exists" + metadata = dict(expver=key, user=getuser()) + self.key = key + self.record = dict(expver=key, metadata=metadata, runs={}) + def load_from_path(self, path): assert os.path.exists(path), f"{path} does not exist" assert path.endswith(".yaml"), f"{path} must be a yaml file" From be2e1ed33d315d7dfa1138ae45ed19f08e47301e Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 15 Jul 2024 14:03:06 +0000 Subject: [PATCH 28/64] clean --- tests/dummy-recipe-experiment.yaml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/dummy-recipe-experiment.yaml b/tests/dummy-recipe-experiment.yaml index 2e080c0..be4183e 100644 --- a/tests/dummy-recipe-experiment.yaml +++ b/tests/dummy-recipe-experiment.yaml @@ -1,15 +1,4 @@ -dates: - end: 2023-08-31 00:00:00 - frequency: 24 - start: 2023-06-01 00:00:00 -description: Minimal config -input: - name: mars +description: Initialising... metadata: - config_home: /home/user/prepml/i4df expver: i4df - input: scorecard.yaml owner: user - prepml_command: /home/user/venvs/user-dev24-02/bin/python3 -m prepml - prepml_module_version: '0.1' - time: '2024-02-22T17:10:31.433359' From 20b54a4cb4d9b6b6095d0c95c337b2e19cb8c178 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 15 Jul 2024 20:37:11 +0000 Subject: [PATCH 29/64] experimental run number and adding archive information to an experiment --- src/anemoi/registry/entry/experiment.py | 69 +++++++++++++++++-------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index baac8ae..cd893f1 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -53,14 +53,48 @@ def add_weights(self, *paths, **kwargs): for path in paths: self._add_one_weights(path, **kwargs) - def set_archive(self, path, platform, run_number, overwrite, extras): + def set_run_status(self, run_number, status): + self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/status", "value": status}]) + + def create_new_run(self, **kwargs): + runs = self.record.get("runs", {}) + numbers = [int(k) for k in runs.keys()] + new = max(numbers) + 1 if numbers else 1 + self._ensure_run_exists(new, **kwargs) + return new + + def _ensure_run_exists(self, run_number, **kwargs): + e = self.__class__(key=self.key) + + if "runs" not in e.record: + # for backwards compatibility, create '/runs' if it does not exist + e.rest_item.patch([{"op": "add", "path": "/runs", "value": {}}]) + e.record["runs"] = {} + + # add run_number if it does not exist + if str(run_number) not in self.record["runs"]: + e.rest_item.patch( + [ + {"op": "test", "path": "/runs", "value": e.record["runs"]}, + {"op": "add", "path": f"/runs/{run_number}", "value": dict(archives={}, **kwargs)}, + ] + ) + e.record["runs"] = {str(run_number): dict(archives={}, **kwargs)} + self.record = e.record + + def set_archive(self, path, platform, run_number, overwrite=True, extras={}): + if not os.path.exists(path): + raise FileNotFoundError(f"Could not find archive to upload at {path}") + if run_number is None: raise ValueError("run_number must be set") + run_number = str(run_number) + if platform is None: raise ValueError("platform must be set") - if not os.path.exists(path): - raise FileNotFoundError(f"Could not find archive to upload at {path}") - extras = {v.split("=")[0]: v.split("=")[1] for v in extras} + + if isinstance(extras, list): + extras = {v.split("=")[0]: v.split("=")[1] for v in extras} _, ext = os.path.splitext(path) target = config()["artefacts_uri_base"] + f"/{self.key}/runs/{run_number}/{platform}{ext}" @@ -69,33 +103,26 @@ def set_archive(self, path, platform, run_number, overwrite, extras): dic = dict(url=target, path=path, updated=datetime.datetime.utcnow().isoformat(), **extras) - if "runs" not in self.record: - # for backwards compatibility, create '/runs' if it does not exist - e = self.__class__(key=self.key) - if "runs" not in e.record: - e.rest_item.patch([{"op": "add", "path": "/runs", "value": {}}]) - self.record["runs"] = {} - - if str(run_number) not in self.record["runs"]: - # add run_number if it does not exist - self.rest_item.patch( - [ - {"op": "add", "path": "/runs", "value": self.record["runs"]}, - {"op": "add", "path": f"/runs/{run_number}", "value": dict(archives={})}, - ] - ) + self._ensure_run_exists(run_number) self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/archives/{platform}", "value": dic}]) - def get_archive(self, path, run_number, platform): + def get_archive(self, path, *, platform, run_number): if os.path.exists(path): raise FileExistsError(f"Path {path} already exists") + + run_number = str(run_number) + if run_number == "latest": + run_number = str(max([int(k) for k in self.record["runs"].keys()])) + LOG.info(f"Using latest run number {run_number}") if run_number not in self.record["runs"]: raise ValueError(f"Run number {run_number} not found") + if platform not in self.record["runs"][run_number]["archives"]: raise ValueError(f"Platform {platform} not found") + url = self.record["runs"][run_number]["archives"][platform]["url"] - print(url) + LOG.info(f"Downloading {url} to {path}.") download(url, path) def _add_one_plot(self, path, **kwargs): From 3e1d70ba6e938d6f9a12b577d6d64b96f3aecfe1 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 16 Jul 2024 11:31:40 +0200 Subject: [PATCH 30/64] update progress bar --- .../registry/workers/transfer_dataset.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 3285963..59c1384 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -8,6 +8,7 @@ import datetime import logging import os +from copy import deepcopy from anemoi.registry.entry.dataset import DatasetCatalogueEntry @@ -17,30 +18,45 @@ class Progress: - latest_progress = None + latest = None def __init__(self, task, frequency=60): self.task = task self.frequency = frequency + self.first_progress = None + self.first_transfer_progress = None + self.previous_progress = None def __call__(self, number_of_files, total_size, total_transferred, transfering, **kwargs): now = datetime.datetime.utcnow() - if self.latest_progress is not None and (now - self.latest_progress).seconds < self.frequency: + if self.latest is not None and (now - self.latest).seconds < self.frequency: # already updated recently return - p = dict( + progress = dict( number_of_files=number_of_files, total_size=total_size, total_transferred=total_transferred, transfering=transfering, + timestamp=now.isoformat(), + percentage=100 * total_transferred / total_size if total_size and transfering else 0, **kwargs, ) - p["percentage"] = 100 * total_transferred / total_size if total_size and transfering else 0 + if self.first_progress is None: + self.first_progress = progress + if self.first_transfer_progress is None and transfering: + self.first_transfer_progress = progress + + p = deepcopy(progress) + p["first_progress"] = self.first_progress + p["first_transfer_progress"] = self.first_transfer_progress + p["previous_progress"] = self.previous_progress self.task.set_progress(p) + self.previous_progress = progress + class TransferDatasetWorker(Worker): name = "transfer-dataset" From 90a0fb1e757396c18c6a0f5418896d00c068586a Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 16 Jul 2024 14:50:52 +0200 Subject: [PATCH 31/64] update utc time --- src/anemoi/registry/tasks.py | 2 +- src/anemoi/registry/workers/transfer_dataset.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/anemoi/registry/tasks.py b/src/anemoi/registry/tasks.py index 836509a..039ce62 100644 --- a/src/anemoi/registry/tasks.py +++ b/src/anemoi/registry/tasks.py @@ -83,7 +83,7 @@ def to_str(self, long): uuid = v.pop("uuid") status = v.pop("status") - progress = v.pop("progress", "") + progress = v.pop("progress", {}).get("percentage", "") action = v.pop("action", "") source = v.pop("source", "") destination = v.pop("destination", "") diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 59c1384..72a375a 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -34,12 +34,16 @@ def __call__(self, number_of_files, total_size, total_transferred, transfering, # already updated recently return + timestamp = now.isoformat() + if not timestamp.endswith("Z"): + timestamp += "Z" + progress = dict( number_of_files=number_of_files, total_size=total_size, total_transferred=total_transferred, transfering=transfering, - timestamp=now.isoformat(), + timestamp=timestamp, percentage=100 * total_transferred / total_size if total_size and transfering else 0, **kwargs, ) From a60349e35459b440b161355a63389d7709576425 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 16 Jul 2024 15:14:40 +0200 Subject: [PATCH 32/64] disable tests on github --- .github/workflows/python-publish.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index e322a28..8099bd1 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -46,7 +46,8 @@ jobs: - name: Tests run: | - cd tests && python3 test_all.py + # disable tests on github to avoid giving away the token + # cd tests && python3 test_all.py deploy: From 9ff9080cda2d2084d056df1a73e90ef40302cf4d Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Tue, 16 Jul 2024 16:00:51 +0100 Subject: [PATCH 33/64] fix doc --- docs/requirements.txt | 2 +- pyproject.toml | 5 +++-- src/anemoi/registry/__init__.py | 5 ----- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8b8bccd..0d25054 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,7 +2,7 @@ sphinx sphinx_rtd_theme nbsphinx -sphinx_argparse +sphinx_argparse<0.5.0 # Also requires `brew install pandoc` on Mac pandoc diff --git a/pyproject.toml b/pyproject.toml index c38b821..f73b9d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ classifiers = [ dynamic = [ "version", ] + dependencies = [ "anemoi-datasets", "jsonpatch", @@ -65,7 +66,7 @@ optional-dependencies.dev = [ "pandoc", "pytest", "sphinx", - "sphinx-argparse", + "sphinx-argparse<0.5", "sphinx-rtd-theme", "termcolor", "tomli", @@ -75,7 +76,7 @@ optional-dependencies.docs = [ "nbsphinx", "pandoc", "sphinx", - "sphinx-argparse", + "sphinx-argparse<0.5", "sphinx-rtd-theme", "termcolor", "tomli", diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index e458dbb..b5c5815 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -13,11 +13,6 @@ LOG = logging.getLogger(__name__) -try: - import boto3 -except ImportError: - LOG.warning("boto3 package is not available. To have S3 support, reinstall with : pip install anemoi-registry[s3]") - def config(): from anemoi.utils.config import load_config From e0abe34008bde66d5f3f0bca5403e80a0bdc1921 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Tue, 16 Jul 2024 16:11:11 +0100 Subject: [PATCH 34/64] fix doc --- .readthedocs.yaml | 1 - docs/requirements.txt | 10 ---------- pyproject.toml | 4 ---- 3 files changed, 15 deletions(-) delete mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c03429e..06c8ab4 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,7 +10,6 @@ sphinx: python: install: - - requirements: docs/requirements.txt - method: pip path: . extra_requirements: diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 0d25054..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -# These are the requirements for readthedoc -sphinx -sphinx_rtd_theme -nbsphinx -sphinx_argparse<0.5.0 - -# Also requires `brew install pandoc` on Mac -pandoc - -rstfmt diff --git a/pyproject.toml b/pyproject.toml index f73b9d6..8e86399 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,8 +68,6 @@ optional-dependencies.dev = [ "sphinx", "sphinx-argparse<0.5", "sphinx-rtd-theme", - "termcolor", - "tomli", ] optional-dependencies.docs = [ @@ -78,8 +76,6 @@ optional-dependencies.docs = [ "sphinx", "sphinx-argparse<0.5", "sphinx-rtd-theme", - "termcolor", - "tomli", ] optional-dependencies.s3 = [ From e5db77447b3c675d6cad7ca22aea50916c358a59 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 16 Jul 2024 17:40:26 +0200 Subject: [PATCH 35/64] clean doc --- docs/index.rst | 2 -- src/anemoi/registry/commands/datasets.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 6bf5078..49849d0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -36,7 +36,6 @@ of the *Anemoi* packages. - :doc:`cli/datasets` - :doc:`cli/weights` -- :doc:`cli/experiments` - :doc:`cli/list` .. toctree:: @@ -45,7 +44,6 @@ of the *Anemoi* packages. :caption: Command line tool cli/datasets - cli/experiments cli/weights cli/list diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index c4579fb..0a601a6 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -44,7 +44,7 @@ def add_arguments(self, command_parser): nargs="+", help="Path to add a location to the dataset. Implies --platform", ) - command_parser.add_argument("--platform", help="Platform to add the location to.") + command_parser.add_argument("--platform", help="Platform to add the location to. Implies --add-location") def _run(self, entry, args): # order matters From 3f65cdd1af6fa239673ac6869a4ac8f142d88bd8 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 16 Jul 2024 18:08:38 +0200 Subject: [PATCH 36/64] fix --- src/anemoi/registry/workers/transfer_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 72a375a..f6a5576 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -33,6 +33,7 @@ def __call__(self, number_of_files, total_size, total_transferred, transfering, if self.latest is not None and (now - self.latest).seconds < self.frequency: # already updated recently return + self.latest = now timestamp = now.isoformat() if not timestamp.endswith("Z"): From 1e6db51c8646419120564c176cfa94a0ea029c92 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 18 Jul 2024 10:18:19 +0200 Subject: [PATCH 37/64] faster progress update --- src/anemoi/registry/workers/transfer_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index f6a5576..1c05bfb 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -131,7 +131,7 @@ def get_source_path(): if source_path.startswith("s3://"): source_path = source_path + "/" if not source_path.endswith("/") else source_path - progress = Progress(task, frequency=60) + progress = Progress(task, frequency=10) if target_path.startswith("s3://"): LOG.warning("Uploading to S3 is experimental and has not been tested yet.") From 73a65f1e423127a24af01e8857b244857cc5d3f7 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 18 Jul 2024 11:44:01 +0200 Subject: [PATCH 38/64] clean workers --- src/anemoi/registry/commands/worker.py | 28 +++++---- src/anemoi/registry/config.yaml | 14 +++++ src/anemoi/registry/workers/__init__.py | 60 +++++++++++++------ src/anemoi/registry/workers/delete_dataset.py | 2 +- src/anemoi/registry/workers/dummy.py | 23 +++++++ .../registry/workers/transfer_dataset.py | 22 ++++--- 6 files changed, 110 insertions(+), 39 deletions(-) create mode 100644 src/anemoi/registry/workers/dummy.py diff --git a/src/anemoi/registry/commands/worker.py b/src/anemoi/registry/commands/worker.py index c2b866e..1be55a2 100644 --- a/src/anemoi/registry/commands/worker.py +++ b/src/anemoi/registry/commands/worker.py @@ -17,7 +17,7 @@ from anemoi.registry.commands.base import BaseCommand from anemoi.registry.tasks import TaskCatalogueEntry from anemoi.registry.utils import list_to_dict -from anemoi.registry.workers import get_worker_class +from anemoi.registry.workers import run_worker LOG = logging.getLogger(__name__) @@ -32,8 +32,6 @@ class WorkerCommand(BaseCommand): collection = "tasks" def add_arguments(self, command_parser): - command_parser.add_argument("--timeout", help="Die with timeout (SIGALARM) after TIMEOUT seconds.", type=int) - command_parser.add_argument("--wait", help="Check for new task every WAIT seconds.", type=int, default=60) subparsers = command_parser.add_subparsers(dest="action", help="Action to perform") @@ -43,21 +41,24 @@ def add_arguments(self, command_parser): ) transfer.add_argument("--published-target-dir", help="The target directory published in the catalogue.") transfer.add_argument("--destination", help="Platform destination (e.g. leonardo, lumi, marenostrum)") - transfer.add_argument("--threads", help="Number of threads to use", type=int, default=1) + transfer.add_argument("--threads", help="Number of threads to use", type=int) + transfer.add_argument("--filter-tasks", help="Filter tasks to process (key=value list)", nargs="*", default=[]) delete = subparsers.add_parser("delete-dataset", help="Delete dataset") delete.add_argument("--platform", help="Platform destination (e.g. leonardo, lumi, marenostrum)") + delete.add_argument("--filter-tasks", help="Filter tasks to process (key=value list)", nargs="*", default=[]) - for subparser in [transfer, delete]: - subparser.add_argument( - "--filter-tasks", help="Filter tasks to process (key=value list)", nargs="*", default=[] - ) - subparser.add_argument("--heartbeat", help="Heartbeat interval", type=int, default=60) + dummy = subparsers.add_parser("dummy", help="Dummy worker for test purposes") + dummy.add_argument("--arg") + + for subparser in [transfer, delete, dummy]: + subparser.add_argument("--timeout", help="Die with timeout (SIGALARM) after TIMEOUT seconds.", type=int) + subparser.add_argument("--wait", help="Check for new task every WAIT seconds.", type=int) + subparser.add_argument("--heartbeat", help="Heartbeat interval", type=int) subparser.add_argument( "--max-no-heartbeat", help="Max interval without heartbeat before considering task needs to be freed.", type=int, - default=0, ) subparser.add_argument("--loop", help="Run in a loop", action="store_true") subparser.add_argument( @@ -68,11 +69,14 @@ def add_arguments(self, command_parser): def run(self, args): kwargs = vars(args) - kwargs["filter_tasks"] = list_to_dict(kwargs["filter_tasks"]) + if "filter_tasks" in kwargs: + kwargs["filter_tasks"] = list_to_dict(kwargs["filter_tasks"]) kwargs.pop("command") kwargs.pop("debug") kwargs.pop("version") - get_worker_class(kwargs.pop("action"))(**kwargs).run() + action = kwargs.pop("action") + kwargs = {k: v for k, v in kwargs.items() if v is not None} + run_worker(action, **kwargs) command = WorkerCommand diff --git a/src/anemoi/registry/config.yaml b/src/anemoi/registry/config.yaml index b7d82ab..7720c09 100644 --- a/src/anemoi/registry/config.yaml +++ b/src/anemoi/registry/config.yaml @@ -8,3 +8,17 @@ registry: datasets_uri_pattern: "s3://ml-datasets/{name}" weights_uri_pattern: "s3://ml-weights/{uuid}.ckpt" weights_platform: "ewc" + + workers: + # These are the default values for the workers + # the are experimental and can change in the future + heartbeat: 60 + max_no_heartbeat: -1 + wait: 10 + transfer-dataset: + target_dir: "." + published_target_dir: null + threads: 1 + auto_register: true + dummy: + arg: default_value diff --git a/src/anemoi/registry/workers/__init__.py b/src/anemoi/registry/workers/__init__.py index c600f50..b39e50f 100644 --- a/src/anemoi/registry/workers/__init__.py +++ b/src/anemoi/registry/workers/__init__.py @@ -14,6 +14,7 @@ from anemoi.utils.humanize import when +from anemoi.registry import config from anemoi.registry.tasks import TaskCatalogueEntryList # from anemoi.utils.provenance import trace_info @@ -26,13 +27,12 @@ class Worker: def __init__( self, - heartbeat=60, - max_no_heartbeat=0, + heartbeat, + max_no_heartbeat, + wait, loop=False, check_todo=False, timeout=None, - wait=60, - stop_if_finished=True, ): """Run a worker that will process tasks in the queue. timeout: Kill itself after `timeout` seconds. @@ -44,7 +44,6 @@ def __init__( self.check_todo = check_todo self.wait = wait - self.stop_if_finished = stop_if_finished if timeout: signal.alarm(timeout) self.filter_tasks = {"action": self.name} @@ -66,14 +65,15 @@ def run(self): if self.loop: # Process tasks in a loop for ever while True: - res = self.process_one_task() - - if self.stop_if_finished and res is None: - LOG.info("All tasks have been processed, stopping.") - return + try: + self.process_one_task() + LOG.info(f"Waiting {self.wait} seconds before checking again.") + time.sleep(self.wait) + except Exception as e: + LOG.error(f"Error for task {task}: {e}") + LOG.error("Waiting 60 seconds after this error before checking again.") + time.sleep(60) - LOG.info(f"Waiting {self.wait} seconds before checking again.") - time.sleep(self.wait) else: # Process one task self.process_one_task() @@ -114,7 +114,7 @@ def send_heartbeat(): thread.start() try: - self.process_task(task) + self.worker_process_task(task) finally: STOP.append(1) # stop the heartbeat thread thread.join() @@ -155,21 +155,47 @@ def choose_task(self): for task in cat: updated = datetime.datetime.fromisoformat(task.record["updated"]) LOG.info(f"Task {task.key} is already running, last update {when(updated, use_utc=True)}.") - if (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat: + if ( + self.max_no_heartbeat >= 0 + and (datetime.datetime.utcnow() - updated).total_seconds() > self.max_no_heartbeat + ): LOG.warning( f"Task {task.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." ) task.release_ownership() - def process_task(self, task): + def worker_process_task(self, task): raise NotImplementedError("Subclasses must implement this method.") -def get_worker_class(action): +def run_worker(action, **kwargs): + from anemoi.registry.workers.dummy import DummyWorker + from .delete_dataset import DeleteDatasetWorker from .transfer_dataset import TransferDatasetWorker - return { + workers_config = config().get("workers", {}) + worker_config = workers_config.get(action, {}) + + LOG.debug(kwargs) + + for k, v in worker_config.items(): + if k not in kwargs: + kwargs[k] = v + + LOG.debug(kwargs) + + for k, v in workers_config.items(): + if isinstance(v, dict): + continue + if k not in kwargs: + kwargs[k] = v + + LOG.info(f"Running worker {action} with kwargs {kwargs}") + + cls = { "transfer-dataset": TransferDatasetWorker, "delete-dataset": DeleteDatasetWorker, + "dummy": DummyWorker, }[action] + cls(**kwargs).run() diff --git a/src/anemoi/registry/workers/delete_dataset.py b/src/anemoi/registry/workers/delete_dataset.py index 75b4d4f..6bfc820 100644 --- a/src/anemoi/registry/workers/delete_dataset.py +++ b/src/anemoi/registry/workers/delete_dataset.py @@ -33,7 +33,7 @@ def __init__( self.filter_tasks.update(filter_tasks) self.filter_tasks["platform"] = self.platform - def process_task(self, task): + def worker_process_task(self, task): platform, dataset = self.parse_task(task) entry = DatasetCatalogueEntry(key=dataset) assert platform == self.platform, (platform, self.platform) diff --git a/src/anemoi/registry/workers/dummy.py b/src/anemoi/registry/workers/dummy.py new file mode 100644 index 0000000..19c3a63 --- /dev/null +++ b/src/anemoi/registry/workers/dummy.py @@ -0,0 +1,23 @@ +# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import logging + +from . import Worker + +LOG = logging.getLogger(__name__) + + +class DummyWorker(Worker): + name = "dummy" + + def __init__(self, arg, **kwargs): + super().__init__(**kwargs) + LOG.warning(f"Dummy worker initialized with kwargs:{kwargs} and args:{arg}") + + def worker_process_task(self, task): + LOG.warning(f"Dummy worker processing task={task}") diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 1c05bfb..0b98abc 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -78,21 +78,25 @@ def __init__( ): super().__init__(**kwargs) - if not destination: - raise ValueError("No destination platform specified") - - if not os.path.exists(target_dir): - raise ValueError(f"Target directory {target_dir} must already exist") - self.destination = destination self.target_dir = target_dir - self.published_target_dir = published_target_dir or target_dir + self.published_target_dir = published_target_dir self.threads = threads + self.auto_register = auto_register + + if self.published_target_dir is None: + self.published_target_dir = self.target_dir + self.filter_tasks.update(filter_tasks) self.filter_tasks["destination"] = self.destination - self.auto_register = auto_register - def process_task(self, task): + if not self.destination: + raise ValueError("No destination platform specified") + + if not os.path.exists(self.target_dir): + raise ValueError(f"Target directory {self.target_dir} must already exist") + + def worker_process_task(self, task): destination, source, dataset = self.parse_task(task) entry = DatasetCatalogueEntry(key=dataset) From 08cf6c5cb5b226b745ba9eb0ca431900a9897335 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 18 Jul 2024 13:21:57 +0200 Subject: [PATCH 39/64] more robust --- src/anemoi/registry/workers/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/anemoi/registry/workers/__init__.py b/src/anemoi/registry/workers/__init__.py index b39e50f..bff70e9 100644 --- a/src/anemoi/registry/workers/__init__.py +++ b/src/anemoi/registry/workers/__init__.py @@ -81,18 +81,23 @@ def run(self): def process_one_task(self): task = self.choose_task() if not task: - return False + return uuid = task.key LOG.info(f"Processing task {uuid}: {task}") self.parse_task(task) # for checking only task.take_ownership() - self.process_task_with_heartbeat(task) + try: + self.process_task_with_heartbeat(task) + except Exception as e: + LOG.error(f"Error for task {task}: {e}") + LOG.exception("Exception occurred during task processing:", exc_info=e) + task.release_ownership() + return LOG.info(f"Task {uuid} completed.") task.unregister() LOG.info(f"Task {uuid} deleted.") - return True def process_task_with_heartbeat(self, task): STOP = [] @@ -149,7 +154,7 @@ def choose_task(self): if not cat: LOG.info("No queued tasks found") else: - LOG.info(cat.to_str(long=True)) + LOG.info(f"Tasks list \n{cat.to_str(long=True)}") # if a task is running, check if it has been running for too long, and free it for task in cat: From 8a888e9581da4920e7586dce571a4f42383df0fe Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 17 Jul 2024 15:13:15 +0000 Subject: [PATCH 40/64] download utility --- src/anemoi/registry/commands/download.py | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/anemoi/registry/commands/download.py diff --git a/src/anemoi/registry/commands/download.py b/src/anemoi/registry/commands/download.py new file mode 100644 index 0000000..98c56d6 --- /dev/null +++ b/src/anemoi/registry/commands/download.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +"""Command place holder. Delete when we have real commands. + +""" + +import logging +import os + +LOG = logging.getLogger(__name__) + + +class Download: + """Just download.""" + + internal = True + timestamp = True + + def add_arguments(self, command_parser): + command_parser.add_argument("path", help="Path to download.") + command_parser.add_argument("target", help="Target path.", nargs="?", default=None) + command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") + + def run(self, args): + from anemoi.utils.s3 import download + + target = args.target + + if target is None: + target = os.path.basename(args.path) + download(args.path, target, overwrite=args.overwrite) + + +command = Download From 31718ef45278ffa19e6324ec345253cd1d475963 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 17 Jul 2024 16:25:22 +0000 Subject: [PATCH 41/64] start support for deleting experiments --- src/anemoi/registry/commands/experiments.py | 6 ++++ src/anemoi/registry/entry/experiment.py | 32 ++++++++++++++++--- src/anemoi/registry/rest.py | 1 - .../registry/workers/transfer_dataset.py | 2 -- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index b25d7b4..9c162e9 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -41,6 +41,11 @@ def add_arguments(self, command_parser): help="Remove from catalogue (without deleting the experiment from other locations)", action="store_true", ) + command_parser.add_argument( + "--delete-artefacts", + help="Remove experiments artefacts (such as plots)", + action="store_true", + ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") command_parser.add_argument( @@ -80,6 +85,7 @@ def is_path(self, name_or_path): return True def _run(self, entry, args): + self.process_task(entry, args, "delete_artefacts") self.process_task(entry, args, "unregister") self.process_task(entry, args, "register", overwrite=args.overwrite) self.process_task(entry, args, "add_weights") diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index cd893f1..9dfff14 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -11,6 +11,7 @@ from getpass import getuser import yaml +from anemoi.utils.s3 import delete from anemoi.utils.s3 import download from anemoi.utils.s3 import upload @@ -125,12 +126,35 @@ def get_archive(self, path, *, platform, run_number): LOG.info(f"Downloading {url} to {path}.") download(url, path) + def delete_artefacts(self): + self.delete_all_plots() + # self.delete_weights() + # self.delete_archives() + + def delete_all_plots(self): + plots = self.record.get("plots", []) + for plot in plots: + url = plot["url"] + LOG.info(f"Deleting {url}") + if not url.startswith("s3://"): + LOG.warning(f"Skipping deletion of {url} because it is not an s3 url") + continue + if f"/{self.key}/" not in url: + LOG.warning(f"Skipping deletion of {url} because it does not belong to this experiment") + continue + delete(url) + self.rest_item.patch( + [ + {"op": "test", "path": "/plots", "value": plots}, + {"op": "add", "path": "/plots", "value": []}, + ] + ) + def _add_one_plot(self, path, **kwargs): - kind = "plot" if not os.path.exists(path): - raise FileNotFoundError(f"Could not find {kind} to upload at {path}") + raise FileNotFoundError(f"Could not find plot to upload at {path}") - target = config()[f"{kind}s_uri_pattern"] + target = config()["plots_uri_pattern"] basename = os.path.basename(path) target = target.format(expver=self.key, basename=basename, filename=basename) @@ -138,7 +162,7 @@ def _add_one_plot(self, path, **kwargs): upload(path, target, overwrite=True) dic = dict(url=target, name=basename, path=path) - self.rest_item.patch([{"op": "add", "path": f"/{kind}s/-", "value": dic}]) + self.rest_item.patch([{"op": "add", "path": "/plots/-", "value": dic}]) def _add_one_weights(self, path, **kwargs): weights = WeightCatalogueEntry(path=path) diff --git a/src/anemoi/registry/rest.py b/src/anemoi/registry/rest.py index 4d06759..4a80d06 100644 --- a/src/anemoi/registry/rest.py +++ b/src/anemoi/registry/rest.py @@ -19,7 +19,6 @@ from ._version import __version__ LOG = logging.getLogger(__name__) -# LOG.setLevel(logging.DEBUG) """~/.aws/credentials diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index 0b98abc..ac41f18 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -36,8 +36,6 @@ def __call__(self, number_of_files, total_size, total_transferred, transfering, self.latest = now timestamp = now.isoformat() - if not timestamp.endswith("Z"): - timestamp += "Z" progress = dict( number_of_files=number_of_files, From 2e541e4db3e16756439c177c08599dfa73fd954c Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 18 Jul 2024 14:49:21 +0000 Subject: [PATCH 42/64] implement also upload to s3 --- src/anemoi/registry/commands/worker.py | 1 + src/anemoi/registry/workers/__init__.py | 38 ++++++++++++++++--- src/anemoi/registry/workers/delete_dataset.py | 3 ++ .../registry/workers/transfer_dataset.py | 14 +++++-- 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/anemoi/registry/commands/worker.py b/src/anemoi/registry/commands/worker.py index 1be55a2..7333d0b 100644 --- a/src/anemoi/registry/commands/worker.py +++ b/src/anemoi/registry/commands/worker.py @@ -66,6 +66,7 @@ def add_arguments(self, command_parser): help="See if there are tasks for this worker and exit with 0 if there are task to do.", action="store_true", ) + subparser.add_argument("--dry-run", help="Dry run, do not actually do anything", action="store_true") def run(self, args): kwargs = vars(args) diff --git a/src/anemoi/registry/workers/__init__.py b/src/anemoi/registry/workers/__init__.py index bff70e9..4d9364b 100644 --- a/src/anemoi/registry/workers/__init__.py +++ b/src/anemoi/registry/workers/__init__.py @@ -33,6 +33,7 @@ def __init__( loop=False, check_todo=False, timeout=None, + dry_run=False, ): """Run a worker that will process tasks in the queue. timeout: Kill itself after `timeout` seconds. @@ -42,6 +43,7 @@ def __init__( self.max_no_heartbeat = max_no_heartbeat self.loop = loop self.check_todo = check_todo + self.dry_run = dry_run self.wait = wait if timeout: @@ -87,16 +89,16 @@ def process_one_task(self): LOG.info(f"Processing task {uuid}: {task}") self.parse_task(task) # for checking only - task.take_ownership() + self.take_ownership(task) try: self.process_task_with_heartbeat(task) except Exception as e: LOG.error(f"Error for task {task}: {e}") LOG.exception("Exception occurred during task processing:", exc_info=e) - task.release_ownership() + self.release_ownership(task) return LOG.info(f"Task {uuid} completed.") - task.unregister() + self.unregister(task) LOG.info(f"Task {uuid} deleted.") def process_task_with_heartbeat(self, task): @@ -106,7 +108,7 @@ def process_task_with_heartbeat(self, task): def send_heartbeat(): while True: try: - task.set_status("running") + self.set_status(task, "running") except Exception: return for _ in range(self.heartbeat): @@ -145,7 +147,7 @@ def choose_task(self): for task in TaskCatalogueEntryList(status="queued", **self.filter_tasks): LOG.info("Found task") return task - LOG.info("No queued tasks found") + LOG.info(f"No queued tasks found with filter_tasks={self.filter_tasks}") if self.max_no_heartbeat == 0: return None @@ -167,7 +169,31 @@ def choose_task(self): LOG.warning( f"Task {task.key} has been running for more than {self.max_no_heartbeat} seconds, freeing it." ) - task.release_ownership() + self.release_ownership(task) + + def take_ownership(self, task): + if self.dry_run: + LOG.warning(f"Would take ownership of task {task.key} but this is only a dry run.") + return + task.take_ownership() + + def release_ownership(self, task): + if self.dry_run: + LOG.warning(f"Would release ownership of task {task.key} but this is only a dry run.") + return + task.release_ownership() + + def unregister(self, task): + if self.dry_run: + LOG.warning(f"Would unregister task {task.key} but this is only a dry run.") + return + task.unregister() + + def set_status(self, task, status): + if self.dry_run: + LOG.warning(f"Would set status of task {task.key} to {status} but this is only a dry run.") + return + task.set_status(status) def worker_process_task(self, task): raise NotImplementedError("Subclasses must implement this method.") diff --git a/src/anemoi/registry/workers/delete_dataset.py b/src/anemoi/registry/workers/delete_dataset.py index 6bfc820..43d96f1 100644 --- a/src/anemoi/registry/workers/delete_dataset.py +++ b/src/anemoi/registry/workers/delete_dataset.py @@ -42,6 +42,9 @@ def worker_process_task(self, task): if platform not in locations: LOG.warning(f"Dataset {dataset} has no locations on '{platform}'. Ignoring delete request.") return + if self.dry_run: + LOG.warning(f"Would delete {locations[platform]['path']} from '{platform}' but this is only a dry run.") + return path = locations[platform]["path"] LOG.warning(f"Deleting {path} from '{platform}'") diff --git a/src/anemoi/registry/workers/transfer_dataset.py b/src/anemoi/registry/workers/transfer_dataset.py index ac41f18..16ae156 100644 --- a/src/anemoi/registry/workers/transfer_dataset.py +++ b/src/anemoi/registry/workers/transfer_dataset.py @@ -91,7 +91,7 @@ def __init__( if not self.destination: raise ValueError("No destination platform specified") - if not os.path.exists(self.target_dir): + if not os.path.exists(self.target_dir) and not self.target_dir.startswith("s3://"): raise ValueError(f"Target directory {self.target_dir} must already exist") def worker_process_task(self, task): @@ -126,6 +126,7 @@ def get_source_path(): return from anemoi.utils.s3 import download + from anemoi.utils.s3 import upload LOG.info(f"Source path: {source_path}") LOG.info(f"Target path: {target_path}") @@ -133,13 +134,18 @@ def get_source_path(): if source_path.startswith("s3://"): source_path = source_path + "/" if not source_path.endswith("/") else source_path + if self.dry_run: + LOG.warning(f"Would tranfer {source_path} to {target_path} but this is only a dry run.") + return + progress = Progress(task, frequency=10) if target_path.startswith("s3://"): - LOG.warning("Uploading to S3 is experimental and has not been tested yet.") - download(source_path, target_path, resume=True, threads=self.threads, progress=progress) - return + # upload to S3 uses function upload() + LOG.info(f"Upload('{source_path}','{target_path}', resume=True, threads={self.threads})") + upload(source_path, target_path, resume=True, threads=self.threads, progress=progress) else: + # download to local uses function download() and a temporary path target_tmp_path = os.path.join(self.target_dir + "-downloading", basename) os.makedirs(os.path.dirname(target_tmp_path), exist_ok=True) download(source_path, target_tmp_path, resume=True, threads=self.threads, progress=progress) From 7a3b0dcf8c00d98d971178f056ff6aff2182a1e0 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Fri, 19 Jul 2024 12:03:00 +0000 Subject: [PATCH 43/64] implement datasets --upload --- docs/configuring.rst | 5 -- src/anemoi/registry/commands/base.py | 4 +- src/anemoi/registry/commands/datasets.py | 79 ++++++++++++++++--- src/anemoi/registry/commands/experiments.py | 3 + src/anemoi/registry/commands/weights.py | 15 ++-- src/anemoi/registry/config.yaml | 5 +- src/anemoi/registry/entry/__init__.py | 5 ++ src/anemoi/registry/entry/dataset.py | 69 +++++++++++++++- src/anemoi/registry/entry/weights.py | 3 +- src/anemoi/registry/workers/delete_dataset.py | 5 ++ tests/test_all.py | 29 ++++++- 11 files changed, 190 insertions(+), 32 deletions(-) diff --git a/docs/configuring.rst b/docs/configuring.rst index 374f3c2..3453d99 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -14,11 +14,6 @@ this file are optional and have default values. [registry] api_url = "https://anemoi.ecmwf.int/api/v1" - plots_uri_pattern = "s3://ml-artefacts/{expver}/{basename}" - datasets_uri_pattern = "s3://ml-datasets/{name}" - weights_uri_pattern = "s3://ml-weights/{uuid}.ckpt" - weights_platform = "ewc" - The second config file is ``~/.config/anemoi/config.secret.toml``. This file must have the right permissions set to avoid unauthorized access (`chmod 600 `). All keys in this file have no default values. diff --git a/src/anemoi/registry/commands/base.py b/src/anemoi/registry/commands/base.py index 8bf1168..4564907 100644 --- a/src/anemoi/registry/commands/base.py +++ b/src/anemoi/registry/commands/base.py @@ -81,11 +81,11 @@ def run(self, args): def get_entry(self, name_or_path): if self.is_path(name_or_path): - LOG.info(f"Found local {self.kind} at {name_or_path}") + LOG.debug(f"Found local {self.kind} at {name_or_path}") return self.entry_class(path=name_or_path) if self.is_identifier(name_or_path): - LOG.info(f"Processing {self.kind} with identifier '{name_or_path}'") + LOG.debug(f"Processing {self.kind} with identifier '{name_or_path}'") return self.entry_class(key=name_or_path) def run_from_identifier(self, *args, **kwargs): diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 0a601a6..1749eb2 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -12,7 +12,9 @@ """ +import argparse import logging +import os from ..entry.dataset import DatasetCatalogueEntry from .base import BaseCommand @@ -29,31 +31,82 @@ class Datasets(BaseCommand): kind = "dataset" def add_arguments(self, command_parser): - command_parser.add_argument("NAME_OR_PATH", help=f"The name or the path of a {self.kind}.") - command_parser.add_argument("--register", help=f"Register a {self.kind} in the catalogue.", action="store_true") + command_parser.add_argument("NAME_OR_PATH", help="The name or the path of a dataset.") + command_parser.add_argument("--register", help="Register a dataset in the catalogue.", action="store_true") command_parser.add_argument( "--unregister", - help=f"Remove a {self.kind} from catalogue (without deleting it from its locations)", + help="Remove a dataset from catalogue (without deleting it from its locations). Ignore all other options.", action="store_true", ) - # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--set-status", help="Set the status to the {self.kind}.") - command_parser.add_argument("--set-recipe", help="Set the recipe file to [re-]build the {self.kind}.") + command_parser.add_argument("--url", help="Print the URL of the dataset.", action="store_true") + # command_parser.add_argument("--delete", help=f"Delete the dataset from the catalogue and from any other location", action="store_true") + command_parser.add_argument("--set-status", help="Set the status to the dataset.", metavar="STATUS") command_parser.add_argument( - "--add-location", - nargs="+", - help="Path to add a location to the dataset. Implies --platform", + "--set-recipe", help="Set the recipe file to [re-]build the dataset.", metavar="FILE" ) - command_parser.add_argument("--platform", help="Platform to add the location to. Implies --add-location") + command_parser.add_argument( + "--add-local", + help=("Platform name to add a new location to the NAME_OR_PATH. " "Requires that NAME_OR_PATH is a path."), + metavar="PLATFORM", + ) + + command_parser.add_argument("--add-location", help="Platform name to add a new location.", metavar="PLATFORM") + command_parser.add_argument( + "--uri-pattern", + help="Path of the new location using {name}, such as 's3://ml-datasets/{name}.zarr' . Requires a platform name in --add-location.", + metavar="PATH", + ) + command_parser.add_argument( + "--upload", + help="Upload the dataset. Requires a platform name in --add-location.", + action=argparse.BooleanOptionalAction, + default=False, + ) + + command_parser.add_argument("--remove-location", help="Platform name to remove.", metavar="PLATFORM") def _run(self, entry, args): + if entry is None: + raise ValueError(f"Dataset {args.NAME_OR_PATH} not found in the catalogue and path does not exists.") + + if args.unregister: + entry.unregister() + return + + if args.add_local and not os.path.exists(args.NAME_OR_PATH): + raise ValueError(f"Path {args.NAME_OR_PATH} does not exists. Cannot use --add-local.") + + if args.upload: + if not os.path.exists(args.NAME_OR_PATH): + raise ValueError(f"Path {args.NAME_OR_PATH} does not exists. Cannot use --upload.") + if not args.add_location: + raise ValueError("Cannot use --upload without --add-location.") + + if args.uri_pattern is not None: + if not args.add_location: + raise ValueError("Cannot use --uri-pattern without --add-location.") + if "{name}" not in args.uri_pattern: + raise ValueError(f"URI pattern {args.uri_pattern} does not contain '{{name}}'") + # order matters - self.process_task(entry, args, "unregister") self.process_task(entry, args, "register") - # self.process_task(entry, args, "remove_location") - self.process_task(entry, args, "add_location", platform=args.platform) self.process_task(entry, args, "set_recipe") self.process_task(entry, args, "set_status") + self.process_task(entry, args, "remove_location") + + if args.add_local: + entry.add_location(args.add_local, path=args.NAME_OR_PATH) + + if args.upload or args.add_location: + path = entry.build_location_path(platform=args.add_location, uri_pattern=args.uri_pattern) + if args.upload: + entry.upload(source=args.NAME_OR_PATH, target=path, platform=args.add_location) + if args.add_location: + LOG.info(f"Adding location to {args.add_location}: {path}") + entry.add_location(platform=args.add_location, path=path) + + if args.url: + print(entry.url) command = Datasets diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 9c162e9..73aa59e 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -41,6 +41,7 @@ def add_arguments(self, command_parser): help="Remove from catalogue (without deleting the experiment from other locations)", action="store_true", ) + command_parser.add_argument("--url", help="Print the URL of the experiment.", action="store_true") command_parser.add_argument( "--delete-artefacts", help="Remove experiments artefacts (such as plots)", @@ -106,6 +107,8 @@ def _run(self, entry, args): run_number=args.run_number, platform=args.archive_platform, ) + if args.url: + print(entry.url) command = Experiments diff --git a/src/anemoi/registry/commands/weights.py b/src/anemoi/registry/commands/weights.py index 6d74196..d26473e 100644 --- a/src/anemoi/registry/commands/weights.py +++ b/src/anemoi/registry/commands/weights.py @@ -35,19 +35,24 @@ def add_arguments(self, command_parser): ) command_parser.add_argument( "--unregister", - help="Remove from catalogue (without deleting it from its actual locations).", + help="Remove from catalogue (without deleting it from its actual locations). Ignore all other options.", action="store_true", ) # command_parser.add_argument("--delete", help=f"Delete the {self.kind} from the catalogue and from any other location", action="store_true") - command_parser.add_argument("--add-location", help="Add a location to the weights.") - command_parser.add_argument("--platform", help="Platform where to add the location.") + command_parser.add_argument("--add-location", help="Platform to add location to the weights.") + command_parser.add_argument("--location-path", help="Path of the new location using {{uuid}}.", metavar="PATH") command_parser.add_argument("--overwrite", help="Overwrite any existing weights.", action="store_true") + command_parser.add_argument("--url", help="Print the URL of the dataset.", action="store_true") def _run(self, entry, args): - self.process_task(entry, args, "unregister") + if args.unregister: + entry.unregister() + return self.process_task(entry, args, "register", overwrite=args.overwrite) - self.process_task(entry, args, "add_location", platform=args.platform) + self.process_task(entry, args, "add_location", path=args.location_path) + if args.url: + print(entry.url) command = Weights diff --git a/src/anemoi/registry/config.yaml b/src/anemoi/registry/config.yaml index 7720c09..adaf732 100644 --- a/src/anemoi/registry/config.yaml +++ b/src/anemoi/registry/config.yaml @@ -5,7 +5,10 @@ registry: artefacts_uri_base: "s3://ml-artefacts" plots_uri_pattern: "s3://ml-artefacts/{expver}/{basename}" - datasets_uri_pattern: "s3://ml-datasets/{name}" + + datasets_uri_pattern: "s3://ml-datasets/{name}.zarr" + datasets_platform: "ewc" + weights_uri_pattern: "s3://ml-weights/{uuid}.ckpt" weights_platform: "ewc" diff --git a/src/anemoi/registry/entry/__init__.py b/src/anemoi/registry/entry/__init__.py index c7509fd..7c2b17d 100644 --- a/src/anemoi/registry/entry/__init__.py +++ b/src/anemoi/registry/entry/__init__.py @@ -30,6 +30,11 @@ class CatalogueEntry: record = None path = None key = None + collection = None + + @property + def url(self): + return f"{config()['web_url']}/{self.collection}/{self.key}" def __init__(self, key=None, path=None, must_exist=True): assert key is not None or path is not None, "key or path must be provided" diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 028838f..749a64b 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -5,11 +5,15 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import datetime import logging import os import yaml from anemoi.datasets import open_dataset +from anemoi.utils.humanize import when + +from anemoi.registry import config from . import CatalogueEntry @@ -23,12 +27,75 @@ class DatasetCatalogueEntry(CatalogueEntry): def set_status(self, status): self.rest_item.patch([{"op": "add", "path": "/status", "value": status}]) - def add_location(self, path, platform): + def build_location_path(self, platform, uri_pattern=None): + if uri_pattern is None: + assert platform == config()["datasets_platform"] + uri_pattern = config()["datasets_uri_pattern"] + LOG.debug(f"Using uri pattern from config: {uri_pattern}") + else: + LOG.debug(f"Using uri pattern: {uri_pattern}") + return uri_pattern.format(name=self.key) + + def add_location(self, platform, path): + LOG.debug(f"Adding location to {platform}: {path}") self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) + return path def remove_location(self, platform): self.rest_item.patch([{"op": "remove", "path": f"/locations/{platform}"}]) + def upload(self, source, target, platform="unknown", resume=True): + LOG.info(f"Uploading from {source} to {target} ") + assert target.startswith("s3://"), target + + source_path = os.path.abspath(source) + kwargs = dict( + action="transfer-dataset", + source="cli", + source_path=source_path, + destination=platform, + target_path=target, + dataset=self.key, + ) + LOG.info(f"Task: {kwargs}") + + from anemoi.utils.s3 import upload + + from anemoi.registry.tasks import TaskCatalogueEntry + from anemoi.registry.tasks import TaskCatalogueEntryList + from anemoi.registry.workers.transfer_dataset import Progress + + def find_or_create_task(**kwargs): + lst = TaskCatalogueEntryList(**kwargs) + + if not lst: + LOG.info("No runnning transfer found, starting one.") + uuid = TaskCatalogueEntryList().add_new_task(**kwargs) + task = TaskCatalogueEntry(key=uuid) + return task + + lst = TaskCatalogueEntryList(**kwargs) + task = lst[0] + updated = datetime.datetime.fromisoformat(task.record["updated"]) + if resume: + LOG.info(f"Resuming from previous transfer (last update {when(updated)})") + else: + raise ValueError(f"Transfer already in progress (last update {when(updated)})") + return task + + task = find_or_create_task(**kwargs) + task.set_status("running") + + progress = Progress(task, frequency=10) + LOG.info(f"Upload('{source_path}','{target}', resume=True, threads=2)") + try: + upload(source_path, target, resume=True, threads=2, progress=progress) + except: + task.set_status("stopped") + raise + + task.unregister() + def set_recipe(self, file): if not os.path.exists(file): raise FileNotFoundError(f"Recipe file not found: {file}") diff --git a/src/anemoi/registry/entry/weights.py b/src/anemoi/registry/entry/weights.py index 0c5a6aa..70b975a 100644 --- a/src/anemoi/registry/entry/weights.py +++ b/src/anemoi/registry/entry/weights.py @@ -21,8 +21,9 @@ class WeightCatalogueEntry(CatalogueEntry): collection = "weights" main_key = "uuid" - def add_location(self, path, platform): + def add_location(self, platform, path): self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) + return path def default_location(self, **kwargs): uri = config()["weights_uri_pattern"] diff --git a/src/anemoi/registry/workers/delete_dataset.py b/src/anemoi/registry/workers/delete_dataset.py index 43d96f1..7fcf8c2 100644 --- a/src/anemoi/registry/workers/delete_dataset.py +++ b/src/anemoi/registry/workers/delete_dataset.py @@ -50,7 +50,12 @@ def worker_process_task(self, task): LOG.warning(f"Deleting {path} from '{platform}'") tmp_path = path + ".deleting" + i = 0 + while os.path.exists(tmp_path): + i += 1 + tmp_path = path + ".deleting." + str(i) os.rename(path, tmp_path) + # shutil.rmtree(tmp_path) LOG.warning(f"Deleted {path} from '{platform}'") diff --git a/tests/test_all.py b/tests/test_all.py index f61e745..c3a6719 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -23,7 +23,10 @@ def run(*args): def setup_module(): teardown_module(raise_if_error=False) run("anemoi-registry", "experiments", "./dummy-recipe-experiment.yaml", "--register") + run("anemoi-registry", "experiments", "./dummy-recipe-experiment.yaml") + run("anemoi-registry", "weights", "./dummy-checkpoint.ckpt", "--register") + run("anemoi-registry", "weights", "./dummy-checkpoint.ckpt") if not os.path.exists(DATASET_PATH): run("anemoi-datasets", "create", "./dummy-recipe-dataset.yaml", DATASET_PATH, "--overwrite") @@ -31,6 +34,7 @@ def setup_module(): os.symlink(DATASET_PATH, TMP_DATASET_PATH) run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--register") + run("anemoi-registry", "datasets", TMP_DATASET_PATH) print("# Setup done") @@ -60,8 +64,25 @@ def test_datasets(): run("anemoi-registry", "datasets", TMP_DATASET) run("anemoi-registry", "datasets", TMP_DATASET, "--set-recipe", "./dummy-recipe-dataset.yaml") run("anemoi-registry", "datasets", TMP_DATASET, "--set-status", "testing") - run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/the/dataset/path", "--platform", "atos") - run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "/other/path", "--platform", "leonardo") + run( + "anemoi-registry", + "datasets", + TMP_DATASET, + "--add-location", + "atos", + "--uri-pattern", + "/the/dataset/path/{name}", + ) + run( + "anemoi-registry", + "datasets", + TMP_DATASET, + "--add-location", + "leonardo", + "--uri-pattern", + "https://other/{name}/path", + ) + run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "ewc") def test_weights(): @@ -72,9 +93,9 @@ def test_weights(): "weights", "./dummy-checkpoint.ckpt", "--add-location", - "s3://ml-weights/a5275e04-0000-0000-a0f6-be19591b09fe.ckpt", - "--platform", "ewc", + "--location-path", + "s3://ml-weights/a5275e04-0000-0000-a0f6-be19591b09fe.ckpt", ) From 08b59d9c23ddd3567d82a068d93adf787711ee64 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Fri, 19 Jul 2024 13:39:31 +0000 Subject: [PATCH 44/64] refactor --- src/anemoi/registry/entry/dataset.py | 14 ++++++++------ tests/test_all.py | 3 +++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 749a64b..0d929bc 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -59,11 +59,8 @@ def upload(self, source, target, platform="unknown", resume=True): ) LOG.info(f"Task: {kwargs}") - from anemoi.utils.s3 import upload - from anemoi.registry.tasks import TaskCatalogueEntry from anemoi.registry.tasks import TaskCatalogueEntryList - from anemoi.registry.workers.transfer_dataset import Progress def find_or_create_task(**kwargs): lst = TaskCatalogueEntryList(**kwargs) @@ -84,16 +81,21 @@ def find_or_create_task(**kwargs): return task task = find_or_create_task(**kwargs) - task.set_status("running") + self.transfer(task, source_path, target, resume=True, threads=2) + + def transfer(self, task, source_path, target, resume, threads): + from anemoi.utils.s3 import upload + + from anemoi.registry.workers.transfer_dataset import Progress progress = Progress(task, frequency=10) LOG.info(f"Upload('{source_path}','{target}', resume=True, threads=2)") + task.set_status("running") try: - upload(source_path, target, resume=True, threads=2, progress=progress) + upload(source_path, target, resume=resume, threads=threads, progress=progress) except: task.set_status("stopped") raise - task.unregister() def set_recipe(self, file): diff --git a/tests/test_all.py b/tests/test_all.py index c3a6719..ab75dcd 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -84,6 +84,9 @@ def test_datasets(): ) run("anemoi-registry", "datasets", TMP_DATASET, "--add-location", "ewc") + # do not upload the dataset to avoid polluting the s3 bucket, until we have a way to clean it up automatically + # run("anemoi-registry", "datasets", TMP_DATASET_PATH, "--add-location", "ewc", "--upload") + def test_weights(): # assert run("anemoi-registry", "weights", "a5275e04-0000-0000-a0f6-be19591b09fe") == 1 From 40bdac2786f7a45eeaa9a36b7a501140037e79af Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 22 Jul 2024 14:51:22 +0000 Subject: [PATCH 45/64] simplify api, and make more robust --- docs/cli/datasets.rst | 1 - src/anemoi/registry/__init__.py | 21 +++++++++++++++++++++ src/anemoi/registry/commands/base.py | 5 ++++- src/anemoi/registry/commands/experiments.py | 4 ++-- src/anemoi/registry/entry/dataset.py | 14 +++++++++++++- src/anemoi/registry/entry/experiment.py | 15 ++++++++++++++- src/anemoi/registry/entry/weights.py | 15 ++++++++++++++- 7 files changed, 68 insertions(+), 7 deletions(-) diff --git a/docs/cli/datasets.rst b/docs/cli/datasets.rst index 7511501..f43dcfc 100644 --- a/docs/cli/datasets.rst +++ b/docs/cli/datasets.rst @@ -2,7 +2,6 @@ datasets ======== - .. argparse:: :module: anemoi.registry.__main__ :func: create_parser diff --git a/src/anemoi/registry/__init__.py b/src/anemoi/registry/__init__.py index b5c5815..763ed82 100644 --- a/src/anemoi/registry/__init__.py +++ b/src/anemoi/registry/__init__.py @@ -20,3 +20,24 @@ def config(): default_config = os.path.join(os.path.dirname(__file__), "config.yaml") config = load_config(secrets=["api_token"], defaults=default_config) return config.get("registry") + + +from .entry.dataset import DatasetCatalogueEntry as Dataset +from .entry.dataset import DatasetCatalogueEntryList as DatasetsList +from .entry.experiment import ExperimentCatalogueEntry as Experiment +from .entry.experiment import ExperimentCatalogueEntryList as ExperimentsList +from .entry.weights import WeightCatalogueEntry as Weights +from .entry.weights import WeightsCatalogueEntryList as WeightsList +from .tasks import TaskCatalogueEntry as Task +from .tasks import TaskCatalogueEntryList as TasksList + +__all__ = [ + "Weights", + "WeightsList", + "Experiment", + "ExperimentsList", + "Dataset", + "DatasetsList", + "Task", + "TasksList", +] diff --git a/src/anemoi/registry/commands/base.py b/src/anemoi/registry/commands/base.py index 4564907..0260d3e 100644 --- a/src/anemoi/registry/commands/base.py +++ b/src/anemoi/registry/commands/base.py @@ -35,7 +35,7 @@ def is_identifier(self, name_or_path): except CatalogueEntryNotFound: return False - def process_task(self, entry, args, k, func_name=None, /, **kwargs): + def process_task(self, entry, args, k, func_name=None, /, _skip_if_not_found=False, **kwargs): """ Call the method `k` on the entry object. The args/kwargs given to the method are extracted from from the argument `k` in the `args` object. @@ -46,6 +46,9 @@ def process_task(self, entry, args, k, func_name=None, /, **kwargs): The provided **kwargs are also passed to the method. The method name can be changed by providing the `func_name` argument. """ + if entry is None and _skip_if_not_found: + LOG.warning(f"Cannot find entry {args.NAME_OR_PATH}. Skipping {k}.") + return assert isinstance(k, str), k if func_name is None: diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 73aa59e..5c5559f 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -86,8 +86,8 @@ def is_path(self, name_or_path): return True def _run(self, entry, args): - self.process_task(entry, args, "delete_artefacts") - self.process_task(entry, args, "unregister") + self.process_task(entry, args, "delete_artefacts", _skip_if_not_found=True) + self.process_task(entry, args, "unregister", _skip_if_not_found=True) self.process_task(entry, args, "register", overwrite=args.overwrite) self.process_task(entry, args, "add_weights") self.process_task(entry, args, "add_plots") diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 0d929bc..6ca9d8c 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -14,14 +14,26 @@ from anemoi.utils.humanize import when from anemoi.registry import config +from anemoi.registry.rest import RestItemList from . import CatalogueEntry LOG = logging.getLogger(__name__) +COLLECTION = "datasets" + + +class DatasetCatalogueEntryList(RestItemList): + def __init__(self, **kwargs): + super().__init__(COLLECTION, **kwargs) + + def __iter__(self): + for v in self.get(): + yield DatasetCatalogueEntry(key=v["name"]) + class DatasetCatalogueEntry(CatalogueEntry): - collection = "datasets" + collection = COLLECTION main_key = "name" def set_status(self, status): diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 9dfff14..7530343 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -15,15 +15,28 @@ from anemoi.utils.s3 import download from anemoi.utils.s3 import upload +from anemoi.registry.rest import RestItemList + from .. import config from . import CatalogueEntry from .weights import WeightCatalogueEntry +COLLECTION = "experiments" + LOG = logging.getLogger(__name__) +class ExperimentCatalogueEntryList(RestItemList): + def __init__(self, **kwargs): + super().__init__(COLLECTION, **kwargs) + + def __iter__(self): + for v in self.get(): + yield ExperimentCatalogueEntry(key=v["expver"]) + + class ExperimentCatalogueEntry(CatalogueEntry): - collection = "experiments" + collection = COLLECTION main_key = "expver" def create_from_new_key(self, key): diff --git a/src/anemoi/registry/entry/weights.py b/src/anemoi/registry/entry/weights.py index 70b975a..755b77e 100644 --- a/src/anemoi/registry/entry/weights.py +++ b/src/anemoi/registry/entry/weights.py @@ -11,14 +11,27 @@ from anemoi.utils.checkpoints import load_metadata as load_checkpoint_metadata from anemoi.utils.s3 import upload +from anemoi.registry.rest import RestItemList + from .. import config from . import CatalogueEntry +COLLECTION = "weights" + LOG = logging.getLogger(__name__) +class WeightsCatalogueEntryList(RestItemList): + def __init__(self, **kwargs): + super().__init__(COLLECTION, **kwargs) + + def __iter__(self): + for v in self.get(): + yield WeightCatalogueEntry(key=v["uuid"]) + + class WeightCatalogueEntry(CatalogueEntry): - collection = "weights" + collection = COLLECTION main_key = "uuid" def add_location(self, platform, path): From 217b86cbc4c48428ec869a8500a53124608d7080 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 23 Jul 2024 09:59:39 +0000 Subject: [PATCH 46/64] add config to pip --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e86399..b27e4f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ requires = [ [project] name = "anemoi-registry" -description = "A package to manahe a registry or data-driven forecasts." +description = "A package to manage a registry or data-driven forecasts." keywords = [ "ai", "registry", @@ -92,5 +92,10 @@ urls.Issues = "https://github.com/ecmwf/anemoi-registry/issues" urls.Repository = "https://github.com/ecmwf/anemoi-registry/" scripts.anemoi-registry = "anemoi.registry.__main__:main" +[tool.setuptools.package-data] +"anemoi.registry" = [ + "*.yaml", +] + [tool.setuptools_scm] version_file = "src/anemoi/registry/_version.py" From 84230688d81d0fc5881bcc32b9f9894369a80a94 Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Tue, 23 Jul 2024 10:11:59 +0000 Subject: [PATCH 47/64] archive-extra-metadata can be empty --- src/anemoi/registry/commands/experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 5c5559f..4babb1b 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -73,7 +73,7 @@ def add_arguments(self, command_parser): "--run-number", help="The run number of the experiment. Relevant --set-archive and --get-archive." ) command_parser.add_argument( - "--archive-extra-metadata", help="Extra metadata. A list of key=value pairs.", nargs="+" + "--archive-extra-metadata", help="Extra metadata. A list of key=value pairs.", nargs="+", default={} ) command_parser.add_argument("--overwrite", help="Overwrite if already exists.", action="store_true") From 663b67967cebf0671b7d26e48e41f931f061ae10 Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Tue, 23 Jul 2024 10:37:59 +0000 Subject: [PATCH 48/64] Support archive files with multiple extensions --- src/anemoi/registry/entry/experiment.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 7530343..481a1ce 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -110,8 +110,9 @@ def set_archive(self, path, platform, run_number, overwrite=True, extras={}): if isinstance(extras, list): extras = {v.split("=")[0]: v.split("=")[1] for v in extras} - _, ext = os.path.splitext(path) - target = config()["artefacts_uri_base"] + f"/{self.key}/runs/{run_number}/{platform}{ext}" + base = os.path.basename(path) + ext = base.split(os.extsep, 1)[-1] # everything after the first dot, to support multiple ext like tar.gz + target = config()["artefacts_uri_base"] + f"/{self.key}/runs/{run_number}/{platform}.{ext}" LOG.info(f"Uploading {path} to {target}.") upload(path, target, overwrite=overwrite) From f247b8ce17a662cec717072870714f6def4247dd Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Tue, 23 Jul 2024 13:20:14 +0000 Subject: [PATCH 49/64] Fix crash if runs doesn't exist yet --- src/anemoi/registry/entry/experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 481a1ce..df24b5e 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -86,7 +86,7 @@ def _ensure_run_exists(self, run_number, **kwargs): e.record["runs"] = {} # add run_number if it does not exist - if str(run_number) not in self.record["runs"]: + if str(run_number) not in self.record.get("runs", {}): e.rest_item.patch( [ {"op": "test", "path": "/runs", "value": e.record["runs"]}, From c84927260c0db751d173d3c1649e9230cdf1aa8f Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 24 Jul 2024 11:51:09 +0200 Subject: [PATCH 50/64] add --remove-archive to experiments --- src/anemoi/registry/commands/experiments.py | 17 ++++++++--------- src/anemoi/registry/entry/experiment.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 4babb1b..3cddabd 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -65,12 +65,16 @@ def add_arguments(self, command_parser): command_parser.add_argument( "--get-archive", help="Output file to save the archive metadata file from the catalogue." ) + command_parser.add_argument( + "--remove-archive", help="Remove the archive metadata file from the catalogue.", action="store_true" + ) command_parser.add_argument( "--archive-platform", - help="Archive platform. Only relevant for --set-archive and --get-archive.", + help="Archive platform. Only relevant for --set-archive and --get-archive and --remove-archive.", ) command_parser.add_argument( - "--run-number", help="The run number of the experiment. Relevant --set-archive and --get-archive." + "--run-number", + help="The run number of the experiment. Relevant --set-archive and --get-archive and --remove-archive.", ) command_parser.add_argument( "--archive-extra-metadata", help="Extra metadata. A list of key=value pairs.", nargs="+", default={} @@ -100,13 +104,8 @@ def _run(self, entry, args): overwrite=args.overwrite, extras=args.archive_extra_metadata, ) - self.process_task( - entry, - args, - "get_archive", - run_number=args.run_number, - platform=args.archive_platform, - ) + self.process_task(entry, args, "get_archive", run_number=args.run_number, platform=args.archive_platform) + self.process_task(entry, args, "remove_archive", run_number=args.run_number, platform=args.archive_platform) if args.url: print(entry.url) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index df24b5e..1630efa 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -122,6 +122,17 @@ def set_archive(self, path, platform, run_number, overwrite=True, extras={}): self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/archives/{platform}", "value": dic}]) + def remove_archive(self, platform, run_number): + if run_number is None: + raise ValueError("run_number must be set") + run_number = str(run_number) + + if platform is None: + raise ValueError("platform must be set") + + LOG.info(f"Removing archive for run {run_number} and platform {platform}") + self.rest_item.patch([{"op": "remove", "path": f"/runs/{run_number}/archives/{platform}"}]) + def get_archive(self, path, *, platform, run_number): if os.path.exists(path): raise FileExistsError(f"Path {path} already exists") From 738e48b8f732c16cdf680e6ff136a77284954ae9 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 24 Jul 2024 13:28:44 +0200 Subject: [PATCH 51/64] added --archive-moved --- src/anemoi/registry/commands/experiments.py | 21 ++++-- src/anemoi/registry/entry/experiment.py | 80 +++++++++++++++++---- 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 3cddabd..1ebd1e0 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -56,25 +56,35 @@ def add_arguments(self, command_parser): "Add weights to the experiment and upload them do s3." "Skip upload if these weights are already uploaded." ), + metavar="FILE", ) - command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.") + command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.", metavar="FILE") command_parser.add_argument( - "--set-archive", help="Input file to register as an archive metadata file to the catalogue." + "--set-archive", help="Input file to register as an archive metadata file to the catalogue.", metavar="FILE" ) command_parser.add_argument( - "--get-archive", help="Output file to save the archive metadata file from the catalogue." + "--get-archive", + help="Output file to save the archive metadata file from the catalogue. Merge metadata file if there are multiple run numbers.", + metavar="FILE", ) + command_parser.add_argument("--remove-archive", help="Delete the archive metadata.", action="store_true") command_parser.add_argument( - "--remove-archive", help="Remove the archive metadata file from the catalogue.", action="store_true" + "--archive-moved", + help="When archive moved to a new location, move the metadata file and update the catalogue.", + nargs=2, + metavar=("OLD", "NEW"), ) + command_parser.add_argument( "--archive-platform", help="Archive platform. Only relevant for --set-archive and --get-archive and --remove-archive.", + metavar="PLATFORM", ) command_parser.add_argument( "--run-number", - help="The run number of the experiment. Relevant --set-archive and --get-archive and --remove-archive.", + help="The run number of the experiment. Relevant --set-archive and --get-archive and --remove-archive. Can be 'all' or 'latest' when applicable.", + metavar="N", ) command_parser.add_argument( "--archive-extra-metadata", help="Extra metadata. A list of key=value pairs.", nargs="+", default={} @@ -106,6 +116,7 @@ def _run(self, entry, args): ) self.process_task(entry, args, "get_archive", run_number=args.run_number, platform=args.archive_platform) self.process_task(entry, args, "remove_archive", run_number=args.run_number, platform=args.archive_platform) + self.process_task(entry, args, "archive_moved", run_number=args.run_number) if args.url: print(entry.url) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 1630efa..ba69bc5 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -8,6 +8,7 @@ import datetime import logging import os +import tempfile from getpass import getuser import yaml @@ -123,33 +124,82 @@ def set_archive(self, path, platform, run_number, overwrite=True, extras={}): self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/archives/{platform}", "value": dic}]) def remove_archive(self, platform, run_number): - if run_number is None: - raise ValueError("run_number must be set") - run_number = str(run_number) - if platform is None: raise ValueError("platform must be set") - LOG.info(f"Removing archive for run {run_number} and platform {platform}") - self.rest_item.patch([{"op": "remove", "path": f"/runs/{run_number}/archives/{platform}"}]) + run_numbers = self._parse_run_number(run_number) - def get_archive(self, path, *, platform, run_number): - if os.path.exists(path): - raise FileExistsError(f"Path {path} already exists") + for run_number in run_numbers: + LOG.info(f"Removing archive for run {run_number} and platform {platform}") + if run_number not in self.record["runs"]: + LOG.info(f"Archive: skipping run {run_number} because it does not exist") + continue + run_record = self.record["runs"][run_number] + + if platform not in run_record.get("archives", {}): + LOG.info(f"Archive: skipping {platform} for run {run_number} because it does not exist") + continue + + url = run_record["archives"][platform]["url"] + delete(url) + self.rest_item.patch([{"op": "remove", "path": f"/runs/{run_number}/archives/{platform}"}]) + + def _list_run_numbers(self): + return [int(k) for k in self.record.get("runs", {}).keys()] + def _parse_run_number(self, run_number): + assert isinstance(run_number, (str, int)), "run_number must be a string or an integer" run_number = str(run_number) + + if run_number.lower() == "all": + return [str(i) for i in self._list_run_numbers()] + if run_number == "latest": - run_number = str(max([int(k) for k in self.record["runs"].keys()])) + run_number = str(max(self._list_run_numbers())) LOG.info(f"Using latest run number {run_number}") + if run_number not in self.record["runs"]: raise ValueError(f"Run number {run_number} not found") - if platform not in self.record["runs"][run_number]["archives"]: - raise ValueError(f"Platform {platform} not found") + return [run_number] + + def archive_moved(self, old, new, run_number, overwrite=None): + run_numbers = self._parse_run_number(run_number) + + with tempfile.TemporaryDirectory() as tmpdir: + print(tmpdir) + for run_number in run_numbers: + tmp_path = os.path.join(tmpdir, str(run_number)) + self.get_archive(tmp_path, platform=old, run_number=run_number) + self.set_archive(tmp_path, platform=new, run_number=run_number, overwrite=overwrite) + self.remove_archive(old, run_number) + + def _get_run_record(self, run_number): + print(self.record.get("runs", {}), run_number, type(run_number)) + print(self.record.get("runs", {}).get(run_number, {})) + return self.record.get("runs", {}).get(run_number, {}) + + def get_archive(self, path, *, platform, run_number): + if os.path.exists(path): + raise FileExistsError(f"Path {path} already exists") + + with tempfile.TemporaryDirectory() as tmpdir: + run_numbers = self._parse_run_number(run_number) + for run_number in run_numbers: + run_record = self._get_run_record(run_number) + + if platform not in run_record.get("archives", {}): + LOG.info(f"Archive: skipping {platform} for run {run_number} because it does not exist") + continue + + tmp_path = os.path.join(tmpdir, str(run_number)) - url = self.record["runs"][run_number]["archives"][platform]["url"] - LOG.info(f"Downloading {url} to {path}.") - download(url, path) + url = run_record["archives"][platform]["url"] + LOG.info(f"Downloading {url} to {tmp_path}.") + download(url, tmp_path) + with open(path, "a+") as f: + with open(tmp_path, "r") as tmp: + f.write(tmp.read()) def delete_artefacts(self): self.delete_all_plots() From 65353fca7ad1ceea4fd78cd517aac262905ff5fe Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 24 Jul 2024 13:52:34 +0200 Subject: [PATCH 52/64] doc --- docs/cli/datasets.rst | 51 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/cli/datasets.rst b/docs/cli/datasets.rst index f43dcfc..10828cf 100644 --- a/docs/cli/datasets.rst +++ b/docs/cli/datasets.rst @@ -2,6 +2,57 @@ datasets ======== +The `datasets` command is used to manage datasets in the registry. +It can be used to register or unregister a datasets, add and remove information about a dataset, and upload it to the catalogue. + + +The dataset names MUST follow the naming convention documented at . +For instance `dataset-name` can be `aifs-ea-an-oper-0001-mars-o96-1979-2022-6h-v6`. + +**Register** + +After creating locally a new dataset (using `anemoi-datasets`), registering it in the catalogue can be done as follow: + +.. code-block:: bash + + anemoi-registry datasets /path/to/dataset-name.zarr --register + +Write credentials are needed to register a dataset, and should be set in the anemoi configuration file. See :ref:`configuring` for more information. + +When registered, the dataset will be added to the catalogue and will be available at the url `anemoi-registry datasets dataset-name --url`. +Using the default configuration, the url is likely to be `https://anemoi.ecmwf.int/datasets/dataset-name`, read credentials are needed to read this url, +see :ref:`configuring` for more information. + +**Adding metadata** + +Additional information should be added to the dataset, such as the recipe used to create it, the status of the dataset, +and the location of the dataset. +This can be done as follow: + +.. code-block:: bash + + anemoi-registry datasets dataset-name --set-recipe ./recipe.yaml + anemoi-registry datasets dataset-name --set-status experimental + + # alternatively the dataset name can be replaced by the dataset path + + anemoi-registry datasets /path/to/dataset-name.zarr --set-recipe ./recipe.yaml + anemoi-registry datasets /path/to/dataset-name.zarr --set-status experimental + + +**Uploading to S3** + +Uploading the dataset to the catalogue to S3 can be done as follow: + +.. code-block:: bash + + anemoi-registry datasets /path/to/dataset-name.zarr --add-location ewc --upload + +The credentials to upload the dataset to S3 should be set in the anemoi configuration file. +See :ref:`configuring` for more information. + + + .. argparse:: :module: anemoi.registry.__main__ :func: create_parser From 9527d683bb09d3656c98b4d78f354a1a1680a514 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Wed, 24 Jul 2024 14:10:08 +0200 Subject: [PATCH 53/64] doc --- docs/cli/datasets.rst | 27 ++++++++++++++------------- docs/configuring.rst | 2 ++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/cli/datasets.rst b/docs/cli/datasets.rst index 10828cf..1c2347f 100644 --- a/docs/cli/datasets.rst +++ b/docs/cli/datasets.rst @@ -1,7 +1,6 @@ datasets ======== - The `datasets` command is used to manage datasets in the registry. It can be used to register or unregister a datasets, add and remove information about a dataset, and upload it to the catalogue. @@ -9,7 +8,8 @@ It can be used to register or unregister a datasets, add and remove information The dataset names MUST follow the naming convention documented at . For instance `dataset-name` can be `aifs-ea-an-oper-0001-mars-o96-1979-2022-6h-v6`. -**Register** + +**Registering** After creating locally a new dataset (using `anemoi-datasets`), registering it in the catalogue can be done as follow: @@ -17,11 +17,8 @@ After creating locally a new dataset (using `anemoi-datasets`), registering it i anemoi-registry datasets /path/to/dataset-name.zarr --register -Write credentials are needed to register a dataset, and should be set in the anemoi configuration file. See :ref:`configuring` for more information. +Write credentials are needed to register a dataset to the catalogue. See :ref:`configuring`. -When registered, the dataset will be added to the catalogue and will be available at the url `anemoi-registry datasets dataset-name --url`. -Using the default configuration, the url is likely to be `https://anemoi.ecmwf.int/datasets/dataset-name`, read credentials are needed to read this url, -see :ref:`configuring` for more information. **Adding metadata** @@ -31,13 +28,15 @@ This can be done as follow: .. code-block:: bash - anemoi-registry datasets dataset-name --set-recipe ./recipe.yaml - anemoi-registry datasets dataset-name --set-status experimental + anemoi-registry datasets /path/to/dataset-name.zarr --register --set-recipe ./recipe.yaml --set-status experimental + - # alternatively the dataset name can be replaced by the dataset path +Alternatively, the metadata can be added to an existing dataset: - anemoi-registry datasets /path/to/dataset-name.zarr --set-recipe ./recipe.yaml - anemoi-registry datasets /path/to/dataset-name.zarr --set-status experimental +.. code-block:: bash + + anemoi-registry datasets dataset-name --set-recipe ./recipe.yaml + anemoi-registry datasets dataset-name --set-status experimental **Uploading to S3** @@ -48,10 +47,12 @@ Uploading the dataset to the catalogue to S3 can be done as follow: anemoi-registry datasets /path/to/dataset-name.zarr --add-location ewc --upload -The credentials to upload the dataset to S3 should be set in the anemoi configuration file. -See :ref:`configuring` for more information. +S3 credentials are required to upload a dataset, see :ref:`configuring`. +***************** +Command line help +***************** .. argparse:: :module: anemoi.registry.__main__ diff --git a/docs/configuring.rst b/docs/configuring.rst index 3453d99..f8bddda 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -1,3 +1,5 @@ +.. _configuring: + ############# Configuring ############# From e1cda008b68aa0358d4d3ff96061baeee77cee20 Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Wed, 24 Jul 2024 14:48:05 +0000 Subject: [PATCH 54/64] Write archive in binary mode --- src/anemoi/registry/entry/experiment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index ba69bc5..1a9245c 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -197,8 +197,8 @@ def get_archive(self, path, *, platform, run_number): url = run_record["archives"][platform]["url"] LOG.info(f"Downloading {url} to {tmp_path}.") download(url, tmp_path) - with open(path, "a+") as f: - with open(tmp_path, "r") as tmp: + with open(path, "ba+") as f: + with open(tmp_path, "br") as tmp: f.write(tmp.read()) def delete_artefacts(self): From fc1f02d8cfd4336ca4da36c8b939ce6da756250d Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Thu, 25 Jul 2024 08:05:54 +0000 Subject: [PATCH 55/64] Fix crash when moving unexisting archive --- src/anemoi/registry/entry/experiment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index 1a9245c..d1abeb9 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -171,6 +171,9 @@ def archive_moved(self, old, new, run_number, overwrite=None): for run_number in run_numbers: tmp_path = os.path.join(tmpdir, str(run_number)) self.get_archive(tmp_path, platform=old, run_number=run_number) + if not os.path.exists(tmp_path): + LOG.info(f"Skipping {old} -> {new} for run {run_number} because it does not exist") + continue self.set_archive(tmp_path, platform=new, run_number=run_number, overwrite=overwrite) self.remove_archive(old, run_number) From 26a25227d68118331c87cbc9df870b4fc5ad3f2f Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Sat, 27 Jul 2024 09:59:07 +0000 Subject: [PATCH 56/64] set-key --- src/anemoi/registry/commands/experiments.py | 14 ++++++++++++++ src/anemoi/registry/entry/experiment.py | 12 ++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/anemoi/registry/commands/experiments.py b/src/anemoi/registry/commands/experiments.py index 1ebd1e0..ec68326 100644 --- a/src/anemoi/registry/commands/experiments.py +++ b/src/anemoi/registry/commands/experiments.py @@ -59,6 +59,18 @@ def add_arguments(self, command_parser): metavar="FILE", ) command_parser.add_argument("--add-plots", nargs="+", help="Add plots to the experiment.", metavar="FILE") + command_parser.add_argument( + "--set-key", + nargs=2, + help="Set VALUE in the KEY to the experiment catalogue. Replace existing value.", + metavar=("KEY", "VALUE"), + ) + command_parser.add_argument( + "--set-key-json", + nargs=2, + help="Set the content of a FILE in the KEY to the experiment catalogue. Replace existing value.", + metavar=("KEY", "FILE"), + ) command_parser.add_argument( "--set-archive", help="Input file to register as an archive metadata file to the catalogue.", metavar="FILE" @@ -105,6 +117,8 @@ def _run(self, entry, args): self.process_task(entry, args, "register", overwrite=args.overwrite) self.process_task(entry, args, "add_weights") self.process_task(entry, args, "add_plots") + self.process_task(entry, args, "set_key", run_number=args.run_number) + self.process_task(entry, args, "set_key_json", run_number=args.run_number) self.process_task( entry, args, diff --git a/src/anemoi/registry/entry/experiment.py b/src/anemoi/registry/entry/experiment.py index d1abeb9..489453f 100644 --- a/src/anemoi/registry/entry/experiment.py +++ b/src/anemoi/registry/entry/experiment.py @@ -242,6 +242,18 @@ def _add_one_plot(self, path, **kwargs): dic = dict(url=target, name=basename, path=path) self.rest_item.patch([{"op": "add", "path": "/plots/-", "value": dic}]) + def set_key_json(self, key, file, run_number): + with open(file, "r") as f: + value = yaml.safe_load(f) + return self.set_key(key, value, run_number) + + def set_key(self, key, value, run_number): + if run_number is None: + self.rest_item.patch([{"op": "add", "path": f"/{key}", "value": value}]) + else: + self._ensure_run_exists(run_number) + self.rest_item.patch([{"op": "add", "path": f"/runs/{run_number}/{key}", "value": value}]) + def _add_one_weights(self, path, **kwargs): weights = WeightCatalogueEntry(path=path) From 2fb9725c1c870a64d4387633c607c7864f296742 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 30 Jul 2024 12:21:16 +0000 Subject: [PATCH 57/64] add delete dataset --- src/anemoi/registry/commands/datasets.py | 8 +++-- src/anemoi/registry/commands/list.py | 39 +++++++++++++++++++++++- src/anemoi/registry/entry/dataset.py | 19 +++++++++++- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 1749eb2..dea69b0 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -39,7 +39,6 @@ def add_arguments(self, command_parser): action="store_true", ) command_parser.add_argument("--url", help="Print the URL of the dataset.", action="store_true") - # command_parser.add_argument("--delete", help=f"Delete the dataset from the catalogue and from any other location", action="store_true") command_parser.add_argument("--set-status", help="Set the status to the dataset.", metavar="STATUS") command_parser.add_argument( "--set-recipe", help="Set the recipe file to [re-]build the dataset.", metavar="FILE" @@ -64,6 +63,11 @@ def add_arguments(self, command_parser): ) command_parser.add_argument("--remove-location", help="Platform name to remove.", metavar="PLATFORM") + command_parser.add_argument( + "--DELETE", + help="Delete the dataset when removing a location. Requires --remove-location.", + action="store_true", + ) def _run(self, entry, args): if entry is None: @@ -92,7 +96,7 @@ def _run(self, entry, args): self.process_task(entry, args, "register") self.process_task(entry, args, "set_recipe") self.process_task(entry, args, "set_status") - self.process_task(entry, args, "remove_location") + self.process_task(entry, args, "remove_location", delete=args.DELETE) if args.add_local: entry.add_location(args.add_local, path=args.NAME_OR_PATH) diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py index 935741b..8f02a90 100644 --- a/src/anemoi/registry/commands/list.py +++ b/src/anemoi/registry/commands/list.py @@ -43,14 +43,17 @@ def add_arguments(self, command_parser): experiment.add_argument( "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" ) + experiment.add_argument("--json", help="Output as JSON", action="store_true") checkpoint = sub_parser.add_parser("weights", help="List weights in the catalogue.") checkpoint.add_argument( "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" ) + checkpoint.add_argument("--json", help="Output as JSON", action="store_true") dataset = sub_parser.add_parser("datasets", help="List datasets in the catalogue.") dataset.add_argument("filter", nargs="*", help="Filter datasets with a list of key=value.", metavar="key=value") + dataset.add_argument("--json", help="Output as JSON", action="store_true") # tasks = sub_parser.add_parser("tasks") # tasks.add_argument("filter", nargs="*") @@ -67,7 +70,41 @@ def _run_default(self, args): collection = args.subcommand request = list_to_dict(args.filter) payload = RestItemList(collection).get(params=request) - print(json_pretty_dump(payload)) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["name"]) + + def run_datasets(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["name"]) + + def run_weights(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["uuid"]) + + def run_experiments(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["expver"]) def run_tasks(self, args): collection = "tasks" diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 6ca9d8c..ae4b7e3 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -53,9 +53,26 @@ def add_location(self, platform, path): self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) return path - def remove_location(self, platform): + def remove_location(self, platform, *, delete): + if delete: + self.delete(platform) self.rest_item.patch([{"op": "remove", "path": f"/locations/{platform}"}]) + def delete(self, platform): + if not config().get("allow_delete"): + raise ValueError("Delete not allowed by configuration") + + path = self.record.get("locations", {}).get(platform, {}).get("path") + if path is None: + LOG.warning(f"Nothing to delete for {self.key} on platform {platform}") + return + if path.startswith("s3://"): + from anemoi.utils.s3 import delete + + return delete(path + "/") + else: + LOG.warning(f"Location is not an s3 path: {path}. Delete not implemented.") + def upload(self, source, target, platform="unknown", resume=True): LOG.info(f"Uploading from {source} to {target} ") assert target.startswith("s3://"), target From 939644289148aafb6956a6ff4dcf9bbb77c4b614 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Fri, 2 Aug 2024 13:40:03 +0000 Subject: [PATCH 58/64] autocompletion --- pyproject.toml | 2 +- src/anemoi/registry/__main__.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b27e4f3..6fc9eaa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,7 +90,7 @@ urls.Documentation = "https://anemoi-registry.readthedocs.io/" urls.Homepage = "https://github.com/ecmwf/anemoi-registry/" urls.Issues = "https://github.com/ecmwf/anemoi-registry/issues" urls.Repository = "https://github.com/ecmwf/anemoi-registry/" -scripts.anemoi-registry = "anemoi.registry.__main__:main" +scripts.anemoi-registry = "anemoi.registry.__main__:main_PYTHON_ARGCOMPLETE_OK" [tool.setuptools.package-data] "anemoi.registry" = [ diff --git a/src/anemoi/registry/__main__.py b/src/anemoi/registry/__main__.py index be940c2..6c51a12 100644 --- a/src/anemoi/registry/__main__.py +++ b/src/anemoi/registry/__main__.py @@ -24,5 +24,11 @@ def main(): cli_main(__version__, __doc__, COMMANDS) +def main_PYTHON_ARGCOMPLETE_OK(): + # this is a workaround for argcomplete + # to make sure the srting "PYTHON_ARGCOMPLETE_OK" is in the final executable script + return main() + + if __name__ == "__main__": main() From 11a06d3ec2f84233ca7d97f28ae52c157a46af17 Mon Sep 17 00:00:00 2001 From: Gert Mertes Date: Fri, 9 Aug 2024 10:17:36 +0000 Subject: [PATCH 59/64] fix: register weights ignore_existing crash --- src/anemoi/registry/entry/weights.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anemoi/registry/entry/weights.py b/src/anemoi/registry/entry/weights.py index 755b77e..a8fe6dd 100644 --- a/src/anemoi/registry/entry/weights.py +++ b/src/anemoi/registry/entry/weights.py @@ -54,10 +54,10 @@ def upload(self, path, target=None, overwrite=False): upload(path, target, overwrite=overwrite, resume=not overwrite) return target - def register(self, overwrite=False): + def register(self, **kwargs): assert self.path is not None, "path must be provided" - super().register(overwrite=overwrite) + super().register(**kwargs) platform = self.default_platform() target = self.upload(self.path) From 6f4c827c289598f632ceee54d04c32933c02f2fb Mon Sep 17 00:00:00 2001 From: Helen Theissen Date: Fri, 9 Aug 2024 15:43:03 +0100 Subject: [PATCH 60/64] Feature/add changelog and action (#2) * ci: add reathedocs pr update * ci: add changelog update pr action * docs: add minimal changelog * ci: replace deploy workflow with cd-pypi * docs: update changelog * Update CHANGELOG.md --------- Co-authored-by: Jesper Dramsch --- .github/workflows/changelog-pr-update.yml | 15 +++++++++ .github/workflows/python-publish.yml | 26 ++------------- .github/workflows/readthedocs-pr-update.yml | 22 +++++++++++++ CHANGELOG.md | 35 +++++++++++++++++++++ 4 files changed, 75 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/changelog-pr-update.yml create mode 100644 .github/workflows/readthedocs-pr-update.yml create mode 100644 CHANGELOG.md diff --git a/.github/workflows/changelog-pr-update.yml b/.github/workflows/changelog-pr-update.yml new file mode 100644 index 0000000..4bc51df --- /dev/null +++ b/.github/workflows/changelog-pr-update.yml @@ -0,0 +1,15 @@ +name: Check Changelog Update on PR +on: + pull_request: + types: [assigned, opened, synchronize, reopened, labeled, unlabeled] + branches: + - main + - develop +jobs: + Check-Changelog: + name: Check Changelog Action + runs-on: ubuntu-20.04 + steps: + - uses: tarides/changelog-check-action@v2 + with: + changelog: CHANGELOG.md diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8099bd1..472e29b 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -35,7 +35,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -51,26 +51,6 @@ jobs: deploy: - if: ${{ github.event_name == 'release' }} - runs-on: ubuntu-latest needs: [checks, quality] - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.x - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build wheel twine - - name: Build and publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - python -m build - twine upload dist/* + uses: ecmwf-actions/reusable-workflows/.github/workflows/cd-pypi.yml@v2 + secrets: inherit diff --git a/.github/workflows/readthedocs-pr-update.yml b/.github/workflows/readthedocs-pr-update.yml new file mode 100644 index 0000000..61232f2 --- /dev/null +++ b/.github/workflows/readthedocs-pr-update.yml @@ -0,0 +1,22 @@ +name: Read the Docs PR Preview +on: + pull_request_target: + types: + - opened + - synchronize + - reopened + # Execute this action only on PRs that touch + # documentation files. + paths: + - "docs/**" + +permissions: + pull-requests: write + +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "anemoi-registry" diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c0c2cd4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,35 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +Please add your functional changes to the appropriate section in the PR. +Keep it human-readable, your future self will thank you! + +## [Unreleased] + +### Added +- CI workflows to check for updates in the changelog and the documentation. + +### Changed +- Replaces the deploy workflow with cd-pypi + +### Removed + +## [0.1.0] Minor Release + +### Added +- Implementation of follwoing commands upload (to S3), download + +## [0.0.1] Initial Release + +### Added +- Documentation +- Initial code release for anemoi-registry: Cataloguing for model checkpoints and datasets + + +## Git Diffs: +[0.1.0]: https://github.com/ecmwf/anemoi-registry/compare/0.0.1...0.1.0 +[0.0.1]: https://github.com/ecmwf/anemoi-registry/releases/tag/0.0.1 From b60bcd97f433d080050b6018ab4d150aaff60599 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 16 Sep 2024 13:21:45 +0200 Subject: [PATCH 61/64] Update configuring.rst --- docs/configuring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuring.rst b/docs/configuring.rst index f8bddda..17baa2e 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -16,7 +16,7 @@ this file are optional and have default values. [registry] api_url = "https://anemoi.ecmwf.int/api/v1" -The second config file is ``~/.config/anemoi/config.secret.toml``. This +The second config file is ``~/.config/anemoi/config.secrets.toml``. This file must have the right permissions set to avoid unauthorized access (`chmod 600 `). All keys in this file have no default values. From aca0b0f9dc22a4b87b9622589c1d0b127977a0d2 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Mon, 16 Sep 2024 14:44:43 +0200 Subject: [PATCH 62/64] Update configuring.rst --- docs/configuring.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuring.rst b/docs/configuring.rst index 17baa2e..9d92bd7 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -8,7 +8,7 @@ The following configuration files are used to store the registry configuration. These files allow configuring the catalogue urls, s3 buckets, API token and object storage credentials. -The first config file is ``~/.config/anemoi/config.toml``. All keys in +The first config file is ``~/.config/anemoi/settings.toml``. All keys in this file are optional and have default values. .. code:: @@ -16,7 +16,7 @@ this file are optional and have default values. [registry] api_url = "https://anemoi.ecmwf.int/api/v1" -The second config file is ``~/.config/anemoi/config.secrets.toml``. This +The second config file is ``~/.config/anemoi/settings.secrets.toml``. This file must have the right permissions set to avoid unauthorized access (`chmod 600 `). All keys in this file have no default values. From 73f8011f82799bf8830de92773dd3e4ce9762f59 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 17 Sep 2024 18:45:40 +0000 Subject: [PATCH 63/64] fix uploading datasets --- src/anemoi/registry/rest.py | 3 +++ src/anemoi/registry/tasks.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/anemoi/registry/rest.py b/src/anemoi/registry/rest.py index 4a80d06..47c2f99 100644 --- a/src/anemoi/registry/rest.py +++ b/src/anemoi/registry/rest.py @@ -114,6 +114,9 @@ def post(self, path, data, errors={}): def delete(self, path, errors={}): if not config().get("allow_delete"): raise ValueError("Unregister not allowed") + return self.unprotected_delete(path, errors=errors) + + def unprotected_delete(self, path, errors={}): r = self.session.delete(f"{config().api_url}/{path}", params=dict(force=True)) self.raise_for_status(r, errors=errors) return r.json() diff --git a/src/anemoi/registry/tasks.py b/src/anemoi/registry/tasks.py index 039ce62..a67767d 100644 --- a/src/anemoi/registry/tasks.py +++ b/src/anemoi/registry/tasks.py @@ -119,7 +119,8 @@ def set_status(self, status): self.rest_item.patch(patch) def unregister(self): - return self.rest_item.delete() + # deleting a task is unprotected because non-admin should be able to delete their tasks + return self.rest_item.unprotected_delete() def take_ownership(self): trace = trace_info() From dc51b5110acf96a42980351f24f5e9d52725c54b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:47:19 +0000 Subject: [PATCH 64/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/configuring.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/configuring.rst b/docs/configuring.rst index 9d92bd7..76a98cd 100644 --- a/docs/configuring.rst +++ b/docs/configuring.rst @@ -16,9 +16,10 @@ this file are optional and have default values. [registry] api_url = "https://anemoi.ecmwf.int/api/v1" -The second config file is ``~/.config/anemoi/settings.secrets.toml``. This -file must have the right permissions set to avoid unauthorized access -(`chmod 600 `). All keys in this file have no default values. +The second config file is ``~/.config/anemoi/settings.secrets.toml``. +This file must have the right permissions set to avoid unauthorized +access (`chmod 600 `). All keys in this file have no default +values. .. code::