From d3d6958329ab6132042b0a24e743a67fda8146f2 Mon Sep 17 00:00:00 2001
From: RoryPTB <47696929+RoryPTB@users.noreply.github.com>
Date: Fri, 2 Feb 2024 16:01:14 +0100
Subject: [PATCH] Template refactor: removal of originating centre and sub
 centre (#47)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Removed header centre and subcentre from mappings

* Updated pytest

* Added centre and subcentre env variable

* Improved implementation of centre and subcentre envvariables

* Added brief documentation

* Small code cleanup

* Documentation updates

* Updated GitHub actions with env variables

* Centre and subcentre default to missing + documentation updates

* Pytest updates

* Updated missing value

* Pytest again ¯\_(ツ)_/¯
---
 .github/workflows/tests.yml                   |  4 +-
 .gitignore                                    |  2 +
 Dockerfile                                    |  6 ++-
 README.md                                     | 22 ++++++++-
 docs/source/quickstart.rst                    | 21 ++++++++
 synop2bufr/__init__.py                        | 49 +++++++++++++++++--
 .../resources/synop-mappings-307080.json      |  4 +-
 .../resources/synop-mappings-307096.json      |  4 +-
 tests/test_synop2bufr.py                      | 12 ++---
 9 files changed, 107 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7fb3eb7..434ffd5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,7 +8,9 @@ jobs:
     strategy:
       matrix:
         python-version: [3.7, 3.8, 3.9]
-
+    env:
+      BUFR_ORIGINATING_CENTRE: 123
+      BUFR_ORIGINATING_SUBCENTRE: 123
     steps:
     - uses: actions/checkout@v2
     - uses: actions/setup-python@v2
diff --git a/.gitignore b/.gitignore
index cdcf867..189ff22 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,8 @@ nosetests.xml
 logs
 .vscode/
 .vscode/settings.json
+# Ignore decoded CSV files
+decoded_*.csv
 
 # pycharm
 .idea
diff --git a/Dockerfile b/Dockerfile
index 848b1f1..f8f2940 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,9 +14,13 @@ RUN echo "Acquire::Check-Valid-Until \"false\";\nAcquire::Check-Date \"false\";"
     && pip3 install --no-cache-dir https://github.com/wmo-im/csv2bufr/archive/refs/tags/v0.7.4.zip \
     && pip3 install --no-cache-dir https://github.com/wmo-im/pymetdecoder/archive/refs/tags/v0.1.10.zip
 
+# Environment variables
+
 ENV LOG_LEVEL=INFO
+# The following need to changed to the correct values for your centre!
+ENV BUFR_ORIGINATING_CENTRE=65535
+ENV BUFR_ORIGINATING_SUBCENTRE=65535
 
-#WORKDIR /build
 # copy the app
 COPY . /build
 
diff --git a/README.md b/README.md
index 2b90a4b..6b0cc49 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,24 @@ The synop2bufr Python module contains both a command line interface and API to c
 
 Dependencies are listed in [requirements.txt](https://github.com/wmo-im/synop2bufr/blob/main/requirements.txt). Dependencies are automatically installed during synop2bufr installation.
 
+### Setting Environment Variables
+
+Before using synop2bufr, we highly encourage you to set the `BUFR_ORIGINATING_CENTRE` and `BUFR_ORIGINATING_SUBCENTRE` environment variables. These variables are used to specify the originating centre and subcentre of the SYNOP messages. **Without these set, they will default to missing (255).**
+
+It is recommended that you set these environment variables in the Dockerfile, by editing the following lines with your originating centre and subcentre values:
+
+```bash
+ENV BUFR_ORIGINATING_CENTRE=<centre_value>
+ENV BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
+```
+
+Alternatively, you can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell:
+
+```bash
+export BUFR_ORIGINATING_CENTRE=<centre_value>
+export BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
+```
+
 ## Running
 
 To run synop2bufr from a Docker container:
@@ -33,9 +51,9 @@ synop2bufr data transform --metadata data/station_list.csv --year 2023 --month 0
 
 To run synop2bufr inside a Lambda function on Amazon Web Services, please refer to [aws-lambda/README.md](aws-lambda/README.md) and use this [Dockerfile](aws-lambda/Dockerfile) to build the container image for the Lambda function.
 
-## Usage Guide
+## API Usage Guide
 
-Here we detail how synop2bufr can be used.
+Here we detail how the synop2bufr API can be used in Python.
 
 To begin, suppose we have some SYNOP data.
 
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 3042dc7..e94b278 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -31,6 +31,27 @@ Command line interface (CLI)
        --month <month-of-observation> \
        <input-fm12.txt>
 
+Setting Environment Variables
+-----------------------------
+
+Before running the `synop2bufr data transform` command, we highly encourage you to set the `BUFR_ORIGINATING_CENTRE` and `BUFR_ORIGINATING_SUBCENTRE` environment variables. These variables are used to specify the originating centre and subcentre of the SYNOP messages. **Without these set, they will default to missing (255).**
+
+It is recommended that you set these environment variables in the Dockerfile, by editing the following lines with your originating centre and subcentre values:
+
+.. code-block:: shell
+
+  ENV BUFR_ORIGINATING_CENTRE=<centre_value>
+  ENV BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
+
+Alternatively, you can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell:
+
+.. code-block:: shell
+
+   export BUFR_ORIGINATING_CENTRE=<centre_value>
+   export BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
+
+Now, you can run the `synop2bufr data transform` command as described in the previous section.
+
 Input FM-12 file (input-fm12.txt)
 ---------------------------------
 The FM-12 input data format is described in the `WMO Manual on Codes, Volume I.1 <https://library.wmo.int/doc_num.php?explnum_id=10235>`__.
diff --git a/synop2bufr/__init__.py b/synop2bufr/__init__.py
index d7ed07e..18c3118 100644
--- a/synop2bufr/__init__.py
+++ b/synop2bufr/__init__.py
@@ -1270,10 +1270,19 @@ def transform(data: str, metadata: str, year: int,
 
     :returns: iterator
     """
+    # =============================================
     # Make warning and error messages array global
+    # =============================================
     global warning_msgs
     global error_msgs
 
+    # Boolean to ensure environment variable warning is only displayed once
+    # Note: The resetting of the warning_msgs array for
+    # each report necessitates this approach, because
+    # we want to ensure the warning is only appended
+    # to the first conversion
+    can_var_warning_be_displayed = True
+
     # ===================
     # First parse metadata file
     # ===================
@@ -1302,7 +1311,6 @@ def transform(data: str, metadata: str, year: int,
                 error_msgs.append(str(e))
 
         fh.close()
-        # metadata = metadata_dict[wsi]
     else:
         LOGGER.error("Invalid metadata")
         raise ValueError("Invalid metadata")
@@ -1494,12 +1502,47 @@ def transform(data: str, metadata: str, year: int,
                 else:
                     # If station has not been found in the station
                     # list, don't repeat warning unnecessarily
-                    if not (f"Station {tsi} not found in station file"
-                            in warning_msgs):
+                    if f"Station {tsi} not found in station file" not in warning_msgs:  # noqa
                         LOGGER.warning(f"Invalid metadata for station {tsi} found in station file, unable to parse")  # noqa
                         warning_msgs.append(f"Invalid metadata for station {tsi} found in station file, unable to parse")  # noqa
 
+            # Add information to the mappings
             if conversion_success[tsi]:
+                # First check if the BUFR header centre
+                # and subcentre codes are present
+                missing_env_vars = []
+
+                if os.environ.get("BUFR_ORIGINATING_CENTRE") is None:
+                    missing_env_vars.append("BUFR_ORIGINATING_CENTRE")
+                else:
+                    # Add the BUFR header centre and subcentre to mappings
+                    mapping["header"].append({
+                        "eccodes_key": "bufrHeaderCentre",
+                        "value": f"const:{os.environ.get('BUFR_ORIGINATING_CENTRE')}"  # noqa
+                    })
+
+                if os.environ.get("BUFR_ORIGINATING_SUBCENTRE") is None:
+                    missing_env_vars.append("BUFR_ORIGINATING_SUBCENTRE")
+                else:
+                    mapping["header"].append({
+                        "eccodes_key": "bufrHeaderSubCentre",
+                        "value": f"const:{os.environ.get('BUFR_ORIGINATING_SUBCENTRE')}"  # noqa
+                    })
+
+                # If either of these environment variables are not set,
+                # we will default to missing and warn the user once
+                if missing_env_vars and can_var_warning_be_displayed:
+                    # Display ewarning messages
+                    for var in missing_env_vars:
+                        var_warning = f"The {var} environment variable is not set, will default to missing!"  # noqa
+                        LOGGER.warning(var_warning)
+                        warning_msgs.append(var_warning)
+                        can_var_warning_be_displayed = False
+                    # Stop duplicated warnings
+                    can_var_warning_be_displayed = False
+
+                # Now we need to add the mappings for the cloud groups
+                # of section 3 and 4
                 try:
                     for idx in range(num_s3_clouds):
                         # Build the dictionary of mappings for section 3
diff --git a/synop2bufr/resources/synop-mappings-307080.json b/synop2bufr/resources/synop-mappings-307080.json
index c498366..fc08afb 100644
--- a/synop2bufr/resources/synop-mappings-307080.json
+++ b/synop2bufr/resources/synop-mappings-307080.json
@@ -8,8 +8,8 @@
     "header":[
         {"eccodes_key": "edition", "value": "const:4"},
         {"eccodes_key": "masterTableNumber", "value": "const:0"},
-        {"eccodes_key": "bufrHeaderCentre", "value": "const:0"},
-        {"eccodes_key": "bufrHeaderSubCentre", "value": "const:0"},
+        {"eccodes_key": "bufrHeaderCentre", "value": "const:65535"},
+        {"eccodes_key": "bufrHeaderSubCentre", "value": "const:65535"},
         {"eccodes_key": "updateSequenceNumber", "value": "const:0"},
         {"eccodes_key": "dataCategory", "value": "const:0"},
         {"eccodes_key": "internationalDataSubCategory", "value": "const:2"},
diff --git a/synop2bufr/resources/synop-mappings-307096.json b/synop2bufr/resources/synop-mappings-307096.json
index 33b6c1b..36a59a3 100644
--- a/synop2bufr/resources/synop-mappings-307096.json
+++ b/synop2bufr/resources/synop-mappings-307096.json
@@ -8,8 +8,8 @@
     "header":[
         {"eccodes_key": "edition", "value": "const:4"},
         {"eccodes_key": "masterTableNumber", "value": "const:0"},
-        {"eccodes_key": "bufrHeaderCentre", "value": "const:0"},
-        {"eccodes_key": "bufrHeaderSubCentre", "value": "const:0"},
+        {"eccodes_key": "bufrHeaderCentre", "value": "const:65535"},
+        {"eccodes_key": "bufrHeaderSubCentre", "value": "const:65535"},
         {"eccodes_key": "updateSequenceNumber", "value": "const:0"},
         {"eccodes_key": "dataCategory", "value": "const:0"},
         {"eccodes_key": "internationalDataSubCategory", "value": "const:2"},
diff --git a/tests/test_synop2bufr.py b/tests/test_synop2bufr.py
index 64cf34f..81f5852 100644
--- a/tests/test_synop2bufr.py
+++ b/tests/test_synop2bufr.py
@@ -149,9 +149,9 @@ def test_bufr_307080(multiple_reports_307080, metadata_string):
     for item in result:
         msgs[item['_meta']['id']] = item
     # Test the md5 keys
-    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == 'f1595e9f82880b650de227fa007eb770'  # noqa
-    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '21cd8741f8615cc7b0df70060c3a98ff'  # noqa
-    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == 'f0b736dba245b34985f757b0597e3d54'  # noqa
+    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '1e564e1ec2d679bbc120141ba031ab7a'  # noqa
+    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'db62277233118df3f1cf7b6a073f1cbe'  # noqa
+    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '538db43645fb4b2459edfcb467048b7a'  # noqa
 
     # Test the bufr template used for all the reports
     # (they should be the same for every report)
@@ -168,9 +168,9 @@ def test_bufr_307096(multiple_reports_307096, metadata_string):
     for item in result:
         msgs[item['_meta']['id']] = item
     # Test the md5 keys
-    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '27c990045879acc2eedddb7fdc70db4d'  # noqa
-    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '9db622c40d53aae4ce4f38a658f36d86'  # noqa
-    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '89f424b9fc38a6db69c7b195bd71d92f'  # noqa
+    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '5f1744ec26875630efca0e1583cddca9'  # noqa
+    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'e2dc1199d4e38fae25d26ded815597da'  # noqa
+    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '7c352acb43530946f2445a95eb349e68'  # noqa
 
     # Test the bufr template used for all the reports
     # (they should be the same for every report)