From d3d6958329ab6132042b0a24e743a67fda8146f2 Mon Sep 17 00:00:00 2001 From: RoryPTB <47696929+RoryPTB@users.noreply.github.com> Date: Fri, 2 Feb 2024 16:01:14 +0100 Subject: [PATCH] Template refactor: removal of originating centre and sub centre (#47) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Removed header centre and subcentre from mappings * Updated pytest * Added centre and subcentre env variable * Improved implementation of centre and subcentre envvariables * Added brief documentation * Small code cleanup * Documentation updates * Updated GitHub actions with env variables * Centre and subcentre default to missing + documentation updates * Pytest updates * Updated missing value * Pytest again ¯\_(ツ)_/¯ --- .github/workflows/tests.yml | 4 +- .gitignore | 2 + Dockerfile | 6 ++- README.md | 22 ++++++++- docs/source/quickstart.rst | 21 ++++++++ synop2bufr/__init__.py | 49 +++++++++++++++++-- .../resources/synop-mappings-307080.json | 4 +- .../resources/synop-mappings-307096.json | 4 +- tests/test_synop2bufr.py | 12 ++--- 9 files changed, 107 insertions(+), 17 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7fb3eb7..434ffd5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,9 @@ jobs: strategy: matrix: python-version: [3.7, 3.8, 3.9] - + env: + BUFR_ORIGINATING_CENTRE: 123 + BUFR_ORIGINATING_SUBCENTRE: 123 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/.gitignore b/.gitignore index cdcf867..189ff22 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,8 @@ nosetests.xml logs .vscode/ .vscode/settings.json +# Ignore decoded CSV files +decoded_*.csv # pycharm .idea diff --git a/Dockerfile b/Dockerfile index 848b1f1..f8f2940 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,13 @@ RUN echo "Acquire::Check-Valid-Until \"false\";\nAcquire::Check-Date \"false\";" && pip3 install --no-cache-dir https://github.com/wmo-im/csv2bufr/archive/refs/tags/v0.7.4.zip \ && pip3 install --no-cache-dir https://github.com/wmo-im/pymetdecoder/archive/refs/tags/v0.1.10.zip +# Environment variables + ENV LOG_LEVEL=INFO +# The following need to changed to the correct values for your centre! +ENV BUFR_ORIGINATING_CENTRE=65535 +ENV BUFR_ORIGINATING_SUBCENTRE=65535 -#WORKDIR /build # copy the app COPY . /build diff --git a/README.md b/README.md index 2b90a4b..6b0cc49 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,24 @@ The synop2bufr Python module contains both a command line interface and API to c Dependencies are listed in [requirements.txt](https://github.com/wmo-im/synop2bufr/blob/main/requirements.txt). Dependencies are automatically installed during synop2bufr installation. +### Setting Environment Variables + +Before using synop2bufr, we highly encourage you to set the `BUFR_ORIGINATING_CENTRE` and `BUFR_ORIGINATING_SUBCENTRE` environment variables. These variables are used to specify the originating centre and subcentre of the SYNOP messages. **Without these set, they will default to missing (255).** + +It is recommended that you set these environment variables in the Dockerfile, by editing the following lines with your originating centre and subcentre values: + +```bash +ENV BUFR_ORIGINATING_CENTRE= +ENV BUFR_ORIGINATING_SUBCENTRE= +``` + +Alternatively, you can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell: + +```bash +export BUFR_ORIGINATING_CENTRE= +export BUFR_ORIGINATING_SUBCENTRE= +``` + ## Running To run synop2bufr from a Docker container: @@ -33,9 +51,9 @@ synop2bufr data transform --metadata data/station_list.csv --year 2023 --month 0 To run synop2bufr inside a Lambda function on Amazon Web Services, please refer to [aws-lambda/README.md](aws-lambda/README.md) and use this [Dockerfile](aws-lambda/Dockerfile) to build the container image for the Lambda function. -## Usage Guide +## API Usage Guide -Here we detail how synop2bufr can be used. +Here we detail how the synop2bufr API can be used in Python. To begin, suppose we have some SYNOP data. diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 3042dc7..e94b278 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -31,6 +31,27 @@ Command line interface (CLI) --month \ +Setting Environment Variables +----------------------------- + +Before running the `synop2bufr data transform` command, we highly encourage you to set the `BUFR_ORIGINATING_CENTRE` and `BUFR_ORIGINATING_SUBCENTRE` environment variables. These variables are used to specify the originating centre and subcentre of the SYNOP messages. **Without these set, they will default to missing (255).** + +It is recommended that you set these environment variables in the Dockerfile, by editing the following lines with your originating centre and subcentre values: + +.. code-block:: shell + + ENV BUFR_ORIGINATING_CENTRE= + ENV BUFR_ORIGINATING_SUBCENTRE= + +Alternatively, you can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell: + +.. code-block:: shell + + export BUFR_ORIGINATING_CENTRE= + export BUFR_ORIGINATING_SUBCENTRE= + +Now, you can run the `synop2bufr data transform` command as described in the previous section. + Input FM-12 file (input-fm12.txt) --------------------------------- The FM-12 input data format is described in the `WMO Manual on Codes, Volume I.1 `__. diff --git a/synop2bufr/__init__.py b/synop2bufr/__init__.py index d7ed07e..18c3118 100644 --- a/synop2bufr/__init__.py +++ b/synop2bufr/__init__.py @@ -1270,10 +1270,19 @@ def transform(data: str, metadata: str, year: int, :returns: iterator """ + # ============================================= # Make warning and error messages array global + # ============================================= global warning_msgs global error_msgs + # Boolean to ensure environment variable warning is only displayed once + # Note: The resetting of the warning_msgs array for + # each report necessitates this approach, because + # we want to ensure the warning is only appended + # to the first conversion + can_var_warning_be_displayed = True + # =================== # First parse metadata file # =================== @@ -1302,7 +1311,6 @@ def transform(data: str, metadata: str, year: int, error_msgs.append(str(e)) fh.close() - # metadata = metadata_dict[wsi] else: LOGGER.error("Invalid metadata") raise ValueError("Invalid metadata") @@ -1494,12 +1502,47 @@ def transform(data: str, metadata: str, year: int, else: # If station has not been found in the station # list, don't repeat warning unnecessarily - if not (f"Station {tsi} not found in station file" - in warning_msgs): + if f"Station {tsi} not found in station file" not in warning_msgs: # noqa LOGGER.warning(f"Invalid metadata for station {tsi} found in station file, unable to parse") # noqa warning_msgs.append(f"Invalid metadata for station {tsi} found in station file, unable to parse") # noqa + # Add information to the mappings if conversion_success[tsi]: + # First check if the BUFR header centre + # and subcentre codes are present + missing_env_vars = [] + + if os.environ.get("BUFR_ORIGINATING_CENTRE") is None: + missing_env_vars.append("BUFR_ORIGINATING_CENTRE") + else: + # Add the BUFR header centre and subcentre to mappings + mapping["header"].append({ + "eccodes_key": "bufrHeaderCentre", + "value": f"const:{os.environ.get('BUFR_ORIGINATING_CENTRE')}" # noqa + }) + + if os.environ.get("BUFR_ORIGINATING_SUBCENTRE") is None: + missing_env_vars.append("BUFR_ORIGINATING_SUBCENTRE") + else: + mapping["header"].append({ + "eccodes_key": "bufrHeaderSubCentre", + "value": f"const:{os.environ.get('BUFR_ORIGINATING_SUBCENTRE')}" # noqa + }) + + # If either of these environment variables are not set, + # we will default to missing and warn the user once + if missing_env_vars and can_var_warning_be_displayed: + # Display ewarning messages + for var in missing_env_vars: + var_warning = f"The {var} environment variable is not set, will default to missing!" # noqa + LOGGER.warning(var_warning) + warning_msgs.append(var_warning) + can_var_warning_be_displayed = False + # Stop duplicated warnings + can_var_warning_be_displayed = False + + # Now we need to add the mappings for the cloud groups + # of section 3 and 4 try: for idx in range(num_s3_clouds): # Build the dictionary of mappings for section 3 diff --git a/synop2bufr/resources/synop-mappings-307080.json b/synop2bufr/resources/synop-mappings-307080.json index c498366..fc08afb 100644 --- a/synop2bufr/resources/synop-mappings-307080.json +++ b/synop2bufr/resources/synop-mappings-307080.json @@ -8,8 +8,8 @@ "header":[ {"eccodes_key": "edition", "value": "const:4"}, {"eccodes_key": "masterTableNumber", "value": "const:0"}, - {"eccodes_key": "bufrHeaderCentre", "value": "const:0"}, - {"eccodes_key": "bufrHeaderSubCentre", "value": "const:0"}, + {"eccodes_key": "bufrHeaderCentre", "value": "const:65535"}, + {"eccodes_key": "bufrHeaderSubCentre", "value": "const:65535"}, {"eccodes_key": "updateSequenceNumber", "value": "const:0"}, {"eccodes_key": "dataCategory", "value": "const:0"}, {"eccodes_key": "internationalDataSubCategory", "value": "const:2"}, diff --git a/synop2bufr/resources/synop-mappings-307096.json b/synop2bufr/resources/synop-mappings-307096.json index 33b6c1b..36a59a3 100644 --- a/synop2bufr/resources/synop-mappings-307096.json +++ b/synop2bufr/resources/synop-mappings-307096.json @@ -8,8 +8,8 @@ "header":[ {"eccodes_key": "edition", "value": "const:4"}, {"eccodes_key": "masterTableNumber", "value": "const:0"}, - {"eccodes_key": "bufrHeaderCentre", "value": "const:0"}, - {"eccodes_key": "bufrHeaderSubCentre", "value": "const:0"}, + {"eccodes_key": "bufrHeaderCentre", "value": "const:65535"}, + {"eccodes_key": "bufrHeaderSubCentre", "value": "const:65535"}, {"eccodes_key": "updateSequenceNumber", "value": "const:0"}, {"eccodes_key": "dataCategory", "value": "const:0"}, {"eccodes_key": "internationalDataSubCategory", "value": "const:2"}, diff --git a/tests/test_synop2bufr.py b/tests/test_synop2bufr.py index 64cf34f..81f5852 100644 --- a/tests/test_synop2bufr.py +++ b/tests/test_synop2bufr.py @@ -149,9 +149,9 @@ def test_bufr_307080(multiple_reports_307080, metadata_string): for item in result: msgs[item['_meta']['id']] = item # Test the md5 keys - assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == 'f1595e9f82880b650de227fa007eb770' # noqa - assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '21cd8741f8615cc7b0df70060c3a98ff' # noqa - assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == 'f0b736dba245b34985f757b0597e3d54' # noqa + assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '1e564e1ec2d679bbc120141ba031ab7a' # noqa + assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'db62277233118df3f1cf7b6a073f1cbe' # noqa + assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '538db43645fb4b2459edfcb467048b7a' # noqa # Test the bufr template used for all the reports # (they should be the same for every report) @@ -168,9 +168,9 @@ def test_bufr_307096(multiple_reports_307096, metadata_string): for item in result: msgs[item['_meta']['id']] = item # Test the md5 keys - assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '27c990045879acc2eedddb7fdc70db4d' # noqa - assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '9db622c40d53aae4ce4f38a658f36d86' # noqa - assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '89f424b9fc38a6db69c7b195bd71d92f' # noqa + assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '5f1744ec26875630efca0e1583cddca9' # noqa + assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'e2dc1199d4e38fae25d26ded815597da' # noqa + assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '7c352acb43530946f2445a95eb349e68' # noqa # Test the bufr template used for all the reports # (they should be the same for every report)