diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d764d4cb..7ebafcdb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}" strategy: matrix: - python-version: [3.7] + python-version: [3.8] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/Makefile b/Makefile index 16a8c9f8..b8b0bc94 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ help: @echo " createdb: create PostgreSQL database" @echo " dropdb: drop PostgreSQL database" @echo " setup: create models and search index" - @echo " setup_data: download core metadata + @echo " setup_data: download core metadata" @echo " teardown: delete models and search index" @echo " test: run tests" @echo " coverage: run code coverage" @@ -92,7 +92,7 @@ init: woudc-data-registry admin init -d $(DATA) package: - python setup.py sdist bdist_wheel + python3 setup.py sdist bdist_wheel setup: woudc-data-registry admin registry setup @@ -107,6 +107,6 @@ teardown: woudc-data-registry admin search teardown test: - python setup.py test + python3 setup.py test .PHONY: clean coverage createdb dropdb flake8 help init package setup setup_data teardown test diff --git a/README.md b/README.md index 7ce5195d..62c42218 100644 --- a/README.md +++ b/README.md @@ -31,19 +31,20 @@ are automatically installed during installation. python3 -m venv --system-site-packages woudc-data-registry cd woudc-data-registry source bin/activate +cd .. # clone woudc-extcsv and install git clone https://github.com/woudc/woudc-extcsv.git cd woudc-extcsv pip install -r requirements.txt -python setup.py install +python3 setup.py install cd .. # clone codebase and install git clone https://github.com/woudc/woudc-data-registry.git cd woudc-data-registry -python setup.py build -python setup.py install +python3 setup.py build +python3 setup.py install # for PostgreSQL backends pip install -r requirements-pg.txt @@ -69,10 +70,6 @@ woudc-data-registry admin registry setup woudc-data-registry admin search setup # load core metadata - -# fetch WMO country list -mkdir data -curl -o data/wmo-countries.json https://www.wmo.int/cpdb/data/membersandterritories.json woudc-data-registry admin init -d data/ # cleanups @@ -104,35 +101,35 @@ eval "$(_WOUDC_DATA_REGISTRY_COMPLETE=source woudc-data-registry)" # list all instances of foo (where foo is one of: # project|dataset|contributor|country|station|instrument|deployment) woudc-data-registry list - e.g. +# e.g. woudc-data-registry contributor list # show a specific instance of foo with a given registry identifier woudc-data-registry show - e.g. +# e.g. woudc-data-registry station show 023 woudc-data-registry instrument show ECC:2Z:4052:002:OzoneSonde # add a new instance of foo (contributor|country|station|instrument|deployment) woudc-data-registry add - e.g. +# e.g. woudc-data-registry deployment add -s 001 -c MSC:WOUDC woudc-data-registry contributor add -id foo -n "Contributor name" -c Canada -w IV -u https://example.org -e you@example.org -f foouser -g -75,45 # update an existing instance of foo with a given registry identifier woudc-data-registry update -id - e.g. +# e.g. woudc-data-registry station update -n "New station name" woudc-data-registry deployment update --end-date 'Deployment end date' # delete an instance of foo with a given registry identifier woudc-data-registry delete - e.g. +# e.g. woudc-data-registry deployment delete 018:MSC:WOUDC # for more information about options on operation (add|update): woudc-data-registry --help - e.g. +# e.g. woudc-data-registry instrument update --help ``` @@ -223,7 +220,7 @@ pip install -r requirements-dev.txt # build local copy of https://woudc.github.io/woudc-data-registry cd docs make html -python -m http.server # view on http://localhost:8000/ +python3 -m http.server # view on http://localhost:8000/ ``` #### Running Tests @@ -231,10 +228,10 @@ python -m http.server # view on http://localhost:8000/ ```bash # run tests like this: cd woudc_data_registry/tests -python test_data_registry.py +python3 test_data_registry.py # or this: -python setup.py test +python3 setup.py test # measure code coverage coverage run --source=woudc_data_registry -m unittest woudc_data_registry.tests.test_data_registry diff --git a/data-registry-ingest.sh b/data-registry-ingest.sh index 4e674359..82397d93 100644 --- a/data-registry-ingest.sh +++ b/data-registry-ingest.sh @@ -1,54 +1,60 @@ # The purpose of this script is to ingest all of the operational WOUDC Archive datasets and rename the generated ingest reports to avoid overwriting # Inital setup and table initialization of WOUDC Data Registry is required to run this script +# Replace YYYY-MM-DD with today's date before proceeding -woudc-data-registry data ingest /path/to/TotalOzone_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/TotalOzone_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/TotalOzone_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/TotalOzone_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzone_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzone_1.0_1_run_report -woudc-data-registry data ingest /path/to/TotalOzone_2.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/TotalOzone_2.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/TotalOzone_2.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/TotalOzone_2.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzone_2.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzone_2.0_1_run_report -woudc-data-registry data ingest /path/to/Broad-band_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Broad-band_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Broad-band_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Broad-band_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Broad-band_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Broad-band_1.0_1_run_report -woudc-data-registry data ingest /path/to/Broad-band_2.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Broad-band_2.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Broad-band_2.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Broad-band_2.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Broad-band_2.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Broad-band_2.0_1_run_report -woudc-data-registry data ingest /path/to/Lidar_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Lidar_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Lidar_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Lidar_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Lidar_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Lidar_1.0_1_run_report -woudc-data-registry data ingest /path/to/Multi-band_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Multi-band_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Multi-band_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Multi-band_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Multi-band_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Multi-band_1.0_1_run_report -woudc-data-registry data ingest /path/to/RocketSonde_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/RocketSonde_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/RocketSonde_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/RocketSonde_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/RocketSonde_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/RocketSonde_1.0_1_run_report -woudc-data-registry data ingest /path/to/OzoneSonde_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/OzoneSonde_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/OzoneSonde_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/OzoneSonde_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/OzoneSonde_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/OzoneSonde_1.0_1_run_report -woudc-data-registry data ingest /path/to/TotalOzoneObs_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/TotalOzoneObs_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/TotalOzoneObs_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/TotalOzoneObs_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzoneObs_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/TotalOzoneObs_1.0_1_run_report -woudc-data-registry data ingest /path/to/Spectral_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Spectral_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Spectral_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Spectral_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Spectral_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Spectral_1.0_1_run_report -woudc-data-registry data ingest /path/to/Spectral_2.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/Spectral_2.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/Spectral_2.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/Spectral_2.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/Spectral_2.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/Spectral_2.0_1_run_report -woudc-data-registry data ingest /path/to/UmkehrN14_1.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/UmkehrN14_1.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/UmkehrN14_1.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/UmkehrN14_1.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/UmkehrN14_1.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/UmkehrN14_1.0_1_run_report -woudc-data-registry data ingest /path/to/UmkehrN14_2.0_1 -y -r /path/to/operator/report -mv /path/to/operator/report/operator-report.csv /path/to/operator/report/UmkehrN14_2.0_1.csv -mv /path/to/operator/report/run_report /path/to/operator/report/UmkehrN14_2.0_1_run_report +woudc-data-registry data ingest /apps/data/web/woudc-archive/Archive-NewFormat/UmkehrN14_2.0_1 -y -r /apps/data/wdr-ingest/YYYY-MM-DD +mv /apps/data/wdr-ingest/YYYY-MM-DD/operator-report.csv /apps/data/wdr-ingest/YYYY-MM-DD/UmkehrN14_2.0_1.csv +mv /apps/data/wdr-ingest/YYYY-MM-DD/run_report /apps/data/wdr-ingest/YYYY-MM-DD/UmkehrN14_2.0_1_run_report + +## Table generation process into registry for totalozone, uv-index and ozonesonde +woudc-data-registry product totalozone generate -y /apps/data/web/woudc-archive/Archive-NewFormat/ +woudc-data-registry product uv-index generate -y /apps/data/web/woudc-archive/Archive-NewFormat/ +woudc-data-registry product ozonesonde generate -y /apps/data/web/woudc-archive/Archive-NewFormat/ diff --git a/debian/control b/debian/control index a50b9cc2..bf798281 100644 --- a/debian/control +++ b/debian/control @@ -2,14 +2,14 @@ Source: woudc-data-registry Section: python Priority: optional Maintainer: WOUDC -Build-Depends: debhelper (>= 9), python3, python3-setuptools +Build-Depends: debhelper (>= 9), python3 (>= 3.8.10), python3-setuptools Standards-Version: 3.9.5 -X-Python-Version: >= 3.4 +X-Python-Version: >= 3.8 Vcs-Git: https://github.com/woudc/woudc-data-registry.git Package: woudc-data-registry Architecture: all -Depends: elasticsearch (>=5.5.0), postgresql, python3-click, python-elasticsearch, python3-psycopg2, python3-requests, python3-sqlalchemy, woudc-extcsv +Depends: elasticsearch (>= 8.12.0), postgresql, python3-click, python3-elasticsearch, python3-psycopg2, python3-requests, python3-sqlalchemy, woudc-extcsv Homepage: https://woudc.org Description: WOUDC Data Registry is a platform that manages Ozone and Ultraviolet Radiation data in support of the World Ozone and Ultraviolet diff --git a/default.env b/default.env index b48a7e2a..0293ed04 100644 --- a/default.env +++ b/default.env @@ -14,22 +14,25 @@ export WDR_DB_USERNAME=postgres export WDR_DB_PASSWORD=postgres export PGPASSWORD=$WDR_DB_PASSWORD -# SQLite +# alternative SQLite export WDR_DB_TYPE=sqlite export WDR_DB_NAME=test.db ## search index configuration export WDR_SEARCH_TYPE=elasticsearch -export WDR_SEARCH_INDEX_BASENAME=woudc-data-registry -export WDR_SEARCH_URL=http://localhost:9200/ -export WDR_SEARCH_USERNAME=elasticsearch -export WDR_SEARCH_PASSWORD=elasticsearch +export WDR_SEARCH_INDEX_BASENAME=woudc_data_registry +export WDR_SEARCH_USERNAME=elastic +export WDR_SEARCH_PASSWORD= +export WDR_SEARCH_URL=https://${WDR_SEARCH_USERNAME}:${WDR_SEARCH_PASSWORD}@woudc-geo-dev3.cmc.ec.gc.ca/elasticsearch/ +# export WDR_SEARCH_URL=http://username:password@localhost:9200/ ## waf configuration export WDR_WAF_BASEURL=https://woudc.org/archive/ export WDR_WAF_BASEDIR=/tmp -# table configurations +# table configurations; optional: WDR_TABLE_SCHEMA, WDR_TABLE_CONFIG +export WDR_TABLE_SCHEMA=/path/to/data/tables-schema.json +export WDR_TABLE_CONFIG=/path/to/data/migrate/tables-backfilling.yml export WDR_ERROR_CONFIG=/path/to/data/errors.csv export WDR_ALIAS_CONFIG=/path/to/data/aliases.yml export WDR_EXTRA_CONFIG=/path/to/data/extra-options.yml @@ -37,5 +40,5 @@ export WDR_EXTRA_CONFIG=/path/to/data/extra-options.yml # UV Index formula table configuration export WDR_UV_INDEX_FORMULA_LOOKUP=/path/to/data/uv-index-formula-lookup.csv -# enable shell autocompletion -eval "$(_WOUDC_DATA_REGISTRY_COMPLETE=source woudc-data-registry)" +# enable shell autocompletion - optional +# eval "$(_WOUDC_DATA_REGISTRY_COMPLETE=source woudc-data-registry)" diff --git a/docs/installation.rst b/docs/installation.rst index caed175d..dec6ca14 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -33,8 +33,8 @@ Instructions | pip install -r requirements.txt # Core dependencies | pip install -r requirements-pg.txt # For PostgreSQL backends | - | python setup.py build - | python setup.py install + | python3 setup.py build + | python3 setup.py install * Set up the project:: | . /path/to/environment/config.env # Set environment variables diff --git a/migration/bps/get-bps-metadata.sh b/migration/bps/get-bps-metadata.sh old mode 100644 new mode 100755 index f22a636b..62a43fd8 --- a/migration/bps/get-bps-metadata.sh +++ b/migration/bps/get-bps-metadata.sh @@ -72,19 +72,19 @@ fi PROJECTS_QUERY="SELECT DISTINCT(project.project_acronym) AS project_id FROM project" -DATASETS_QUERY="SELECT data_category AS dataset_id, data_class, data_level FROM dataset_type_definition" +DATASETS_QUERY="SELECT data_category AS dataset_id, data_class, data_level FROM dataset_type_definition ORDER BY data_category, data_level" -CONTRIBUTORS_QUERY="SELECT agency.agency_name AS name, agency.acronym AS acronym, country.country_code AS country_id, project.project_acronym AS project_id, country.wmo_region AS wmo_region_id, agency.url, REPLACE(email.email_address, ',', ';') AS email, agency.ftpdir AS ftp_username, DATE(agency.eff_start_datetime) AS start_date, DATE(agency.eff_end_datetime) AS end_date, ST_X(agency.the_geom) AS x, ST_Y(agency.the_geom) AS y FROM agency JOIN country USING (country_id) JOIN email USING (email_id) JOIN project USING (project_id)" +CONTRIBUTORS_QUERY="SELECT agency.agency_name AS name, agency.acronym AS acronym, country.country_code AS country_id, project.project_acronym AS project_id, country.wmo_region AS wmo_region_id, agency.url, REPLACE(email.email_address, ',', ';') AS email, agency.ftpdir AS ftp_username, DATE(agency.eff_start_datetime) AS start_date, DATE(agency.eff_end_datetime) AS end_date, ST_X(agency.the_geom) AS x, ST_Y(agency.the_geom) AS y FROM agency JOIN country USING (country_id) JOIN email USING (email_id) JOIN project USING (project_id) ORDER BY name, acronym, country_id" -STATIONS_QUERY="SELECT DISTINCT ON (station_id) platform.woudc_platform_identifier AS station_id, platform.platform_name AS station_name, platform_type AS station_type, gaw.gaw_platform_identifier AS gaw_id, country.country_code AS country_id, country.wmo_region AS wmo_region_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date, ST_X(gaw.the_geom) AS x, ST_Y(gaw.the_geom) AS y, ST_Z(gaw.the_geom) AS z FROM platform JOIN platform_type_definition USING (platform_type_id) JOIN agency USING (agency_id) JOIN country ON platform.country_id = country.country_id JOIN platform_gaw_properties gaw ON platform.platform_id = gaw.platform_id" +STATIONS_QUERY="SELECT DISTINCT ON (station_id) platform.woudc_platform_identifier AS station_id, platform.platform_name AS station_name, platform_type AS station_type, gaw.gaw_platform_identifier AS gaw_id, country.country_code AS country_id, country.wmo_region AS wmo_region_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date, ST_X(gaw.the_geom) AS x, ST_Y(gaw.the_geom) AS y, ST_Z(gaw.the_geom) AS z FROM platform JOIN platform_type_definition USING (platform_type_id) JOIN agency USING (agency_id) JOIN country ON platform.country_id = country.country_id JOIN platform_gaw_properties gaw ON platform.platform_id = gaw.platform_id ORDER BY station_id" -STATION_NAMES_QUERY="(SELECT DISTINCT woudc_platform_identifier AS station_id, data_payload.platform_name AS name FROM data_payload FULL JOIN platform ON data_payload.platform_id = platform.platform_id WHERE data_payload.platform_name IS NOT NULL) UNION (SELECT DISTINCT woudc_platform_identifier AS station_id, platform_name AS name FROM platform) ORDER BY station_id" +STATION_NAMES_QUERY="(SELECT DISTINCT woudc_platform_identifier AS station_id, data_payload.platform_name AS name FROM data_payload FULL JOIN platform ON data_payload.platform_id = platform.platform_id WHERE data_payload.platform_name IS NOT NULL) UNION (SELECT DISTINCT woudc_platform_identifier AS station_id, platform_name AS name FROM platform) ORDER BY station_id, name" -INSTRUMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, dtd.data_category AS dataset_id, dtd.data_level AS data_level, itd.instrument_type AS name, im.instrument_model AS model, instrument.instrument_serial_number AS serial, agency.acronym AS contributor, project_acronym AS project, DATE(instrument.eff_start_datetime) AS start_date, DATE(instrument.eff_end_datetime) AS end_date, ST_X(instrument.the_geom) AS x, ST_Y(instrument.the_geom) AS y, ST_Z(instrument.the_geom) AS z FROM instrument JOIN platform USING (platform_id) JOIN agency USING (agency_id) JOIN project USING (project_id) JOIN dataset_type_definition dtd USING (dataset_type_id) JOIN instrument_model im USING (instrument_model_id) JOIN instrument_type_definition itd USING (instrument_type_id)" +INSTRUMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, dtd.data_category AS dataset_id, dtd.data_level AS data_level, itd.instrument_type AS name, im.instrument_model AS model, instrument.instrument_serial_number AS serial, agency.acronym AS contributor, project_acronym AS project, DATE(instrument.eff_start_datetime) AS start_date, DATE(instrument.eff_end_datetime) AS end_date, ST_X(instrument.the_geom) AS x, ST_Y(instrument.the_geom) AS y, ST_Z(instrument.the_geom) AS z FROM instrument JOIN platform USING (platform_id) JOIN agency USING (agency_id) JOIN project USING (project_id) JOIN dataset_type_definition dtd USING (dataset_type_id) JOIN instrument_model im USING (instrument_model_id) JOIN instrument_type_definition itd USING (instrument_type_id) ORDER BY station_id, dataset_id, data_level, name, model, serial" -DEPLOYMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, CONCAT(agency.acronym, ':', project.project_acronym) AS contributor_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date FROM agency JOIN platform USING (agency_id) JOIN project USING (project_id)" +DEPLOYMENTS_QUERY="SELECT platform.woudc_platform_identifier AS station_id, CONCAT(agency.acronym, ':', project.project_acronym) AS contributor_id, DATE(platform.eff_start_datetime) AS start_date, DATE(platform.eff_end_datetime) AS end_date FROM agency JOIN platform USING (agency_id) JOIN project USING (project_id) ORDER BY station_id, contributor_id" -NOTIFICATIONS_QUERY="SELECT title_en, title_fr, description_en, description_fr, tags_en, tags_fr, published, banner, visible, ST_X(the_geom) AS x, ST_Y(the_geom) AS y FROM notifications" +NOTIFICATIONS_QUERY="SELECT title_en, title_fr, description_en, description_fr, tags_en, tags_fr, published, banner, visible, ST_X(the_geom) AS x, ST_Y(the_geom) AS y FROM notifications ORDER BY published" echo "Extracting metadata from woudc-archive" export PGPASSWORD=$WOUDC_ARCHIVE_PASSWORD diff --git a/requirements-dev.txt b/requirements-dev.txt index 04f09f62..e9eb642c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,6 @@ alembic coverage flake8 +pypandoc sphinx wheel diff --git a/requirements-pg.txt b/requirements-pg.txt index 658130bb..37ec460f 100644 --- a/requirements-pg.txt +++ b/requirements-pg.txt @@ -1 +1 @@ -psycopg2 +psycopg2-binary diff --git a/requirements.txt b/requirements.txt index c4b92ebd..9b0693ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ click -elasticsearch<8 -jsonschema<4.4.0 +elasticsearch +jsonschema pyyaml requests sqlalchemy -woudc-extcsv>=0.5.0 \ No newline at end of file +woudc-extcsv diff --git a/woudc_data_registry/__init__.py b/woudc_data_registry/__init__.py index 2e1a70a9..14bd0430 100644 --- a/woudc_data_registry/__init__.py +++ b/woudc_data_registry/__init__.py @@ -57,7 +57,7 @@ from woudc_data_registry.models import admin from woudc_data_registry.product import product -__version__ = '0.1.dev0' +__version__ = '0.2.dev0' setup_logger(config.WDR_LOGGING_LOGLEVEL, config.WDR_LOGGING_LOGFILE) diff --git a/woudc_data_registry/config.py b/woudc_data_registry/config.py index 922d7435..dd43eda4 100644 --- a/woudc_data_registry/config.py +++ b/woudc_data_registry/config.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -53,26 +53,30 @@ LOGGER = logging.getLogger(__name__) WDR_LOGGING_LOGLEVEL = os.getenv('WDR_LOGGING_LOGLEVEL', 'ERROR') -WDR_LOGGING_LOGFILE = os.getenv('WDR_LOGGING_LOGFILE', None) +WDR_LOGGING_LOGFILE = os.getenv('WDR_LOGGING_LOGFILE') WDR_DB_DEBUG = str2bool(os.getenv('WDR_DB_DEBUG', False)) -WDR_DB_TYPE = os.getenv('WDR_DB_TYPE', None) -WDR_DB_HOST = os.getenv('WDR_DB_HOST', None) +WDR_DB_TYPE = os.getenv('WDR_DB_TYPE') +WDR_DB_HOST = os.getenv('WDR_DB_HOST') WDR_DB_PORT = int(os.getenv('WDR_DB_PORT', 5432)) -WDR_DB_USERNAME = os.getenv('WDR_DB_USERNAME', None) -WDR_DB_PASSWORD = os.getenv('WDR_DB_PASSWORD', None) -WDR_DB_NAME = os.getenv('WDR_DB_NAME', None) +WDR_DB_USERNAME = os.getenv('WDR_DB_USERNAME') +WDR_DB_PASSWORD = os.getenv('WDR_DB_PASSWORD') +WDR_DB_NAME = os.getenv('WDR_DB_NAME') WDR_SEARCH_TYPE = os.getenv('WDR_SEARCH_TYPE', 'elasticsearch') -WDR_SEARCH_INDEX_BASENAME = os.getenv('WDR_SEARCH_INDEX_BASENAME', None) -WDR_SEARCH_URL = os.getenv('WDR_SEARCH_URL', None) -WDR_SEARCH_USERNAME = os.getenv('WDR_SEARCH_USERNAME', None) -WDR_SEARCH_PASSWORD = os.getenv('WDR_SEARCH_PASSWORD', None) -WDR_WAF_BASEDIR = os.getenv('WDR_WAF_BASEDIR', None) +WDR_SEARCH_URL = os.getenv('WDR_SEARCH_URL') +WDR_SEARCH_INDEX_BASENAME = os.getenv('WDR_SEARCH_INDEX_BASENAME') +WDR_WAF_BASEDIR = os.getenv('WDR_WAF_BASEDIR') WDR_WAF_BASEURL = os.getenv('WDR_WAF_BASEURL', 'https://woudc.org/archive') -WDR_ERROR_CONFIG = os.getenv('WDR_ERROR_CONFIG', None) -WDR_ALIAS_CONFIG = os.getenv('WDR_ALIAS_CONFIG', None) -WDR_EXTRA_CONFIG = os.getenv('WDR_EXTRA_CONFIG', None) -WDR_UV_INDEX_FORMULA_LOOKUP = os.getenv('WDR_UV_INDEX_FORMULA_LOOKUP', None) +WDR_ERROR_CONFIG = os.getenv('WDR_ERROR_CONFIG') +WDR_ALIAS_CONFIG = os.getenv('WDR_ALIAS_CONFIG') +WDR_EXTRA_CONFIG = os.getenv('WDR_EXTRA_CONFIG') +WDR_UV_INDEX_FORMULA_LOOKUP = os.getenv('WDR_UV_INDEX_FORMULA_LOOKUP') + +if not WDR_SEARCH_INDEX_BASENAME: + msg = 'WDR_SEARCH_INDEX_BASENAME was not set. \ + Defaulting to: woudc_data_registry' + LOGGER.warning(msg) + WDR_SEARCH_INDEX_BASENAME = 'woudc_data_registry' if WDR_SEARCH_URL is not None: WDR_SEARCH_URL = WDR_SEARCH_URL.rstrip('/') @@ -90,7 +94,7 @@ msg = 'WDR_DB_NAME e is not set!' LOGGER.error(msg) raise EnvironmentError(msg) - WDR_DATABASE_URL = '{}:///{}'.format(WDR_DB_TYPE, WDR_DB_NAME) + WDR_DATABASE_URL = f'{WDR_DB_TYPE}:///{WDR_DB_NAME}' else: if None in [WDR_DB_USERNAME, WDR_DB_PASSWORD, WDR_SEARCH_TYPE, WDR_SEARCH_URL, WDR_WAF_BASEDIR, WDR_WAF_BASEURL]: @@ -98,12 +102,10 @@ LOGGER.error(msg) raise EnvironmentError(msg) - WDR_DATABASE_URL = '{}://{}:{}@{}:{}/{}'.format(WDR_DB_TYPE, - WDR_DB_USERNAME, - WDR_DB_PASSWORD, - WDR_DB_HOST, - WDR_DB_PORT, - WDR_DB_NAME) + auth = f'{WDR_DB_USERNAME}:{WDR_DB_PASSWORD}' + host_port_name = f'{WDR_DB_HOST}:{WDR_DB_PORT}/{WDR_DB_NAME}' + + WDR_DATABASE_URL = f'{WDR_DB_TYPE}://{auth}@{host_port_name}' if None in [WDR_ERROR_CONFIG, WDR_EXTRA_CONFIG]: msg = 'Central configuration environment variables are not set!' @@ -115,6 +117,6 @@ with open(WDR_EXTRA_CONFIG) as extra_config_file: EXTRAS = yaml.safe_load(extra_config_file) except Exception as err: - msg = 'Failed to read extra configurations file due to: {}'.format(err) + msg = f'Failed to read extra configurations file: {err}' LOGGER.error(msg) raise EnvironmentError(msg) diff --git a/woudc_data_registry/controller.py b/woudc_data_registry/controller.py index 5d6b7f01..45065103 100644 --- a/woudc_data_registry/controller.py +++ b/woudc_data_registry/controller.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -113,7 +113,7 @@ def orchestrate(source, working_dir, metadata_only=False, run_report = RunReport(working_dir) for file_to_process, contributor in run_: - click.echo('Processing filename: {}'.format(file_to_process)) + click.echo(f'Processing filename: {file_to_process}') LOGGER.info('Detecting file') if not is_text_file(file_to_process): @@ -171,7 +171,7 @@ def orchestrate(source, working_dir, metadata_only=False, passed.append(file_to_process) except UnicodeDecodeError as err: - LOGGER.error('Unknown file format: {}'.format(err)) + LOGGER.error(f'Unknown file format: {err}') click.echo('Not ingested') failed.append(file_to_process) @@ -179,7 +179,7 @@ def orchestrate(source, working_dir, metadata_only=False, op_report.write_failing_file(file_to_process, contributor) run_report.write_failing_file(file_to_process, contributor) except NonStandardDataError as err: - LOGGER.error('Invalid Extended CSV: {}'.format(err.errors)) + LOGGER.error(f'Invalid Extended CSV: {err.errors}') click.echo('Not ingested') failed.append(file_to_process) @@ -187,7 +187,7 @@ def orchestrate(source, working_dir, metadata_only=False, op_report.write_failing_file(file_to_process, contributor) run_report.write_failing_file(file_to_process, contributor) except MetadataValidationError as err: - LOGGER.error('Invalid Extended CSV: {}'.format(err.errors)) + LOGGER.error(f'Invalid Extended CSV: {err.errors}') click.echo('Not ingested') failed.append(file_to_process) @@ -195,7 +195,7 @@ def orchestrate(source, working_dir, metadata_only=False, op_report.write_failing_file(file_to_process, contributor) run_report.write_failing_file(file_to_process, contributor) except Exception as err: - click.echo('Processing failed: {}'.format(err)) + click.echo(f'Processing failed: {err}') failed.append(file_to_process) op_report.write_failing_file(file_to_process, contributor) @@ -205,12 +205,11 @@ def orchestrate(source, working_dir, metadata_only=False, for name in files_to_process: if name in passed: - click.echo('Pass: {}'.format(name)) + click.echo(f'Pass: {name}') elif name in failed: - click.echo('Fail: {}'.format(name)) + click.echo(f'Fail: {name}') - click.echo('({}/{} files passed)' - .format(len(passed), len(files_to_process))) + click.echo(f'({len(passed)}/{len(files_to_process)} files passed)') @click.group() diff --git a/woudc_data_registry/dataset_validators.py b/woudc_data_registry/dataset_validators.py index 6d925eac..5790869b 100644 --- a/woudc_data_registry/dataset_validators.py +++ b/woudc_data_registry/dataset_validators.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -80,7 +80,7 @@ def get_validator(dataset, reporter): elif dataset in DATASETS: return DatasetValidator(reporter) else: - raise ValueError('Invalid dataset {}'.format(dataset)) + raise ValueError(f'Invalid dataset {dataset}') class DatasetValidator(object): @@ -179,7 +179,7 @@ def check_time_series(self, extcsv): dates_encountered = {} rows_to_remove = [] - extcsv.extcsv['DAILY'].pop('comments', None) + extcsv.extcsv['DAILY'].pop('comments') daily_columns = zip(*extcsv.extcsv['DAILY'].values()) is_string = False @@ -250,7 +250,7 @@ def check_timestamps(self, extcsv): success = True timestamp1_date = extcsv.extcsv['TIMESTAMP']['Date'] - timestamp1_time = extcsv.extcsv['TIMESTAMP'].get('Time', None) + timestamp1_time = extcsv.extcsv['TIMESTAMP'].get('Time') daily_dates = extcsv.extcsv['DAILY']['Date'] timestamp1_startline = extcsv.line_num('TIMESTAMP') @@ -357,7 +357,7 @@ def derive_monthly_from_daily(self, extcsv): LOGGER.debug('Regenerating #MONTHLY table from data') dates_column = extcsv.extcsv['DAILY']['Date'] - ozone_column = extcsv.extcsv['DAILY'].get('ColumnO3', None) + ozone_column = extcsv.extcsv['DAILY'].get('ColumnO3') daily_fieldline = extcsv.line_num('DAILY') + 1 daily_valueline = daily_fieldline + 1 @@ -429,7 +429,7 @@ def check_time_series(self, extcsv): LOGGER.debug('Assessing order of #OBSERVATIONS.Time column') success = True - extcsv.extcsv['OBSERVATIONS'].pop('comments', None) + extcsv.extcsv['OBSERVATIONS'].pop('comments') observations = zip(*extcsv.extcsv['OBSERVATIONS'].values()) observations_valueline = extcsv.line_num('OBSERVATIONS') + 2 @@ -628,14 +628,14 @@ def check_time_series(self, extcsv): level = extcsv.extcsv['CONTENT']['Level'] data_table = 'N14_VALUES' if level == 1.0 else 'C_PROFILE' - LOGGER.debug('Assessing order of #{}.Date column'.format(data_table)) + LOGGER.debug(f'Assessing order of #{data_table}.Date column') success = True data_table_valueline = extcsv.line_num(data_table) + 2 dates_encountered = {} rows_to_remove = [] - extcsv.extcsv[data_table].pop('comments', None) + extcsv.extcsv[data_table].pop('comments') columns = zip(*extcsv.extcsv[data_table].values()) in_order = True @@ -694,7 +694,7 @@ def check_timestamps(self, extcsv): data_table = 'N14_VALUES' if level == 1.0 else 'C_PROFILE' timestamp1_date = extcsv.extcsv['TIMESTAMP']['Date'] - timestamp1_time = extcsv.extcsv['TIMESTAMP'].get('Time', None) + timestamp1_time = extcsv.extcsv['TIMESTAMP'].get('Time') observation_dates = extcsv.extcsv[data_table]['Date'] timestamp1_startline = extcsv.line_num('TIMESTAMP') diff --git a/woudc_data_registry/epicentre/contributor.py b/woudc_data_registry/epicentre/contributor.py index 874a9d9a..9755dcf7 100644 --- a/woudc_data_registry/epicentre/contributor.py +++ b/woudc_data_registry/epicentre/contributor.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -74,7 +74,7 @@ def list_(ctx): """List all contributors""" for c in get_metadata(Contributor): - click.echo('{} {}'.format(c.contributor_id.ljust(24), c.name)) + click.echo(f'{c.contributor_id.ljust(24)} {c.name}') @click.command('show') @@ -128,7 +128,7 @@ def add(ctx, name, acronym, country, project, wmo_region, result = add_metadata(Contributor, contributor_, save_to_registry, save_to_index) - click.echo('Contributor {} added'.format(result.contributor_id)) + click.echo(f'Contributor {result.contributor_id} added') @click.command('update') @@ -179,7 +179,7 @@ def update(ctx, identifier, name, acronym, country, project, update_metadata(Contributor, identifier, contributor_, save_to_registry, save_to_index) - click.echo('Contributor {} updated'.format(identifier)) + click.echo(f'Contributor {identifier} updated') @click.command('delete') @@ -192,13 +192,13 @@ def delete(ctx, identifier): click.echo('Contributor not found') return - q = 'Are you sure you want to delete contributor {}?'.format(identifier) + q = f'Are you sure you want to delete contributor {identifier}?' if click.confirm(q): # noqa delete_metadata(Contributor, identifier, save_to_registry, save_to_index) - click.echo('Contributor {} deleted'.format(identifier)) + click.echo(f'Contributor {identifier} deleted') contributor.add_command(list_) diff --git a/woudc_data_registry/epicentre/deployment.py b/woudc_data_registry/epicentre/deployment.py index 1f926b47..b993e934 100644 --- a/woudc_data_registry/epicentre/deployment.py +++ b/woudc_data_registry/epicentre/deployment.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -92,7 +92,7 @@ def list_(ctx): """List all deployments""" for c in get_metadata(Deployment): - click.echo('{} @ {}'.format(c.contributor_id.ljust(20), c.station_id)) + click.echo(f'{c.contributor_id.ljust(20)} @ {c.station_id}') @click.command('show') @@ -137,7 +137,7 @@ def add(ctx, station, contributor, start_date, end_date): result = add_metadata(Deployment, deployment_, save_to_registry, save_to_index) - click.echo('Deployment {} added'.format(result.deployment_id)) + click.echo(f'Deployment {result.deployment_id} added') @click.command('update') @@ -170,7 +170,7 @@ def update(ctx, identifier, station, contributor, start_date, end_date): update_metadata(Deployment, identifier, deployment_, save_to_registry, save_to_index) - click.echo('Deployment {} updated'.format(identifier)) + click.echo(f'Deployment {identifier} updated') @click.command('delete') @@ -183,13 +183,13 @@ def delete(ctx, identifier): click.echo('Contributor not found') return - q = 'Are you sure you want to delete deployment {}?'.format(identifier) + q = f'Are you sure you want to delete deployment {identifier}?' if click.confirm(q): # noqa delete_metadata(Deployment, identifier, save_to_registry, save_to_index) - click.echo('Deployment {} deleted'.format(identifier)) + click.echo(f'Deployment {identifier} deleted') deployment.add_command(list_) diff --git a/woudc_data_registry/epicentre/instrument.py b/woudc_data_registry/epicentre/instrument.py index f5b0e2e4..e82a9b72 100644 --- a/woudc_data_registry/epicentre/instrument.py +++ b/woudc_data_registry/epicentre/instrument.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -110,9 +110,8 @@ def list_(ctx): for c in get_metadata(Instrument): descriptor = ' '.join([c.name, c.model, c.serial]) - station = '{}{}'.format(c.station.station_type, c.station_id) - click.echo('{} - {}, {}'.format(descriptor.ljust(30), station, - c.dataset_id)) + station = f'{c.station.station_type}{c.station_id}' + click.echo(f'{descriptor.ljust(30)} - {station}, {c.dataset_id}') @click.command('show') @@ -167,7 +166,7 @@ def add(ctx, station, dataset, contributor, name, model, serial, geometry): result = add_metadata(Instrument, instrument_, save_to_registry, save_to_index) - click.echo('Instrument {} added'.format(result.instrument_id)) + click.echo(f'Instrument {result.instrument_id} added') @click.command('update') @@ -226,7 +225,7 @@ def update(ctx, identifier, station, dataset, update_metadata(Instrument, identifier, instrument_, save_to_registry, save_to_index) - click.echo('Instrument {} updated'.format(identifier)) + click.echo(f'Instrument {identifier} updated') @click.command('delete') @@ -239,12 +238,12 @@ def delete(ctx, identifier): click.echo('Instrument not found') return - q = 'Are you sure you want to delete instrument {}?'.format(identifier) + q = f'Are you sure you want to delete instrument {identifier}?' if click.confirm(q): # noqa delete_metadata(Instrument, identifier, save_to_registry, save_to_index) - click.echo('Instrument {} deleted'.format(identifier)) + click.echo(f'Instrument {identifier} deleted') instrument.add_command(list_) diff --git a/woudc_data_registry/epicentre/metadata.py b/woudc_data_registry/epicentre/metadata.py index 03389306..fde1c472 100644 --- a/woudc_data_registry/epicentre/metadata.py +++ b/woudc_data_registry/epicentre/metadata.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -65,12 +65,12 @@ def get_metadata(entity, identifier=None): :returns: `list` of all matching objects """ - LOGGER.debug('Querying metadata objects {}'.format(entity)) + LOGGER.debug(f'Querying metadata objects {entity}') prop = getattr(entity, entity.id_field) if identifier is None: res = REGISTRY.session.query(entity).order_by(prop) else: - LOGGER.debug('Quering identifier {}'.format(identifier)) + LOGGER.debug(f'Quering identifier {identifier}') res = REGISTRY.session.query(entity).filter( prop == identifier).all() @@ -83,7 +83,7 @@ def get_metadata(entity, identifier=None): term = 'results' else: term = 'result' - LOGGER.debug('Found {} {}'.format(count, term)) + LOGGER.debug(f'Found {count} {term}') return res @@ -106,7 +106,7 @@ def add_metadata(entity, dict_, save_to_registry=True, save_to_index=True): Country.name_en == dict_['country_id']) if results.count() == 0: - msg = 'Invalid country: {}'.format(dict_['country_id']) + msg = f"Invalid country: {dict_['country']}" LOGGER.error(msg) raise ValueError(msg) @@ -118,7 +118,7 @@ def add_metadata(entity, dict_, save_to_registry=True, save_to_index=True): Contributor.contributor_id == dict_['contributor_id']) if results.count() == 0: - msg = 'Invalid contributor: {}'.format(dict_['contributor_id']) + msg = f"Invalid contributor: {dict_['contributor_id']}" LOGGER.error(msg) raise ValueError(msg) @@ -160,12 +160,11 @@ def update_metadata(entity, identifier, dict_, records = get_metadata(entity, identifier) if len(records) == 0: - msg = 'identifier {} not found'.format(identifier) + msg = f'identifier {identifier} not found' LOGGER.warning(msg) raise ValueError(msg) else: - LOGGER.debug('Updating metadata entity {}, identifier {}' - .format(entity, identifier)) + LOGGER.debug(f'Updating metadata entity {entity}, identifier {identifier}') # noqa obj = records[0] if 'station_name' in dict_ and 'station_id' in dict_: @@ -188,8 +187,7 @@ def update_metadata(entity, identifier, dict_, try: obj.generate_ids() except Exception as err: - LOGGER.warning('Unable to generate IDS due to: {}' - .format(str(err))) + LOGGER.warning(f'Unable to generate IDS: {err}') if save_to_index and getattr(obj, entity.id_field) != identifier: SEARCH_INDEX.unindex(entity, identifier) @@ -215,8 +213,7 @@ def delete_metadata(entity, identifier, :returns: `bool` of whether the operation was successful. """ - LOGGER.debug('Updating metadata entity {}, identifier {}'.format( - entity, identifier)) + LOGGER.debug(f'Updating metadata entity {entity}, identifier {identifier}') # noqa prop = getattr(entity, entity.id_field) REGISTRY.session.query(entity).filter(prop == identifier).delete() diff --git a/woudc_data_registry/epicentre/notification.py b/woudc_data_registry/epicentre/notification.py index 36d0bd82..7864152a 100644 --- a/woudc_data_registry/epicentre/notification.py +++ b/woudc_data_registry/epicentre/notification.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -75,7 +75,7 @@ def list_(ctx): """List all news notifications""" for c in get_metadata(Notification): - click.echo('{} {}'.format(c.published_date, c.title_en)) + click.echo(f'{c.published_date} {c.title_en}') @click.command('show') @@ -112,7 +112,7 @@ def add(ctx, identifier, path): added = add_metadata(Notification, notification, save_to_registry, save_to_index) - click.echo('Notification {} added'.format(added.notification_id)) + click.echo(f'Notification {added.notification_id} added') @click.command('update') @@ -133,7 +133,7 @@ def update(ctx, identifier, path): update_metadata(Notification, identifier, notification, save_to_registry, save_to_index) - click.echo('Notification {} updated'.format(identifier)) + click.echo(f'Notification {identifier} updated') @click.command('delete') @@ -146,14 +146,13 @@ def delete(ctx, identifier): click.echo('Station not found') return - q = 'Are you sure you want to delete news notification {}?' \ - .format(identifier) + q = f'Are you sure you want to delete news notification {identifier}?' if click.confirm(q): # noqa delete_metadata(Notification, identifier, save_to_registry, save_to_index) - click.echo('News notification {} deleted'.format(identifier)) + click.echo(f'News notification {identifier} deleted') notification.add_command(list_) diff --git a/woudc_data_registry/epicentre/station.py b/woudc_data_registry/epicentre/station.py index 24a25437..91e83ccd 100644 --- a/woudc_data_registry/epicentre/station.py +++ b/woudc_data_registry/epicentre/station.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -69,7 +69,7 @@ def build_station_name(ecsv): station_id = str(ecsv.extcsv['PLATFORM']['ID']) station_name = ecsv.extcsv['PLATFORM']['Name'] - name_id = '{}:{}'.format(station_id, station_name) + name_id = f'{station_id}:{station_name}' observation_time = ecsv.extcsv['TIMESTAMP']['Date'] model = { @@ -95,7 +95,7 @@ def list_(ctx): """List all stations""" for c in get_metadata(Station): - click.echo('{} {}'.format(c.station_id.ljust(3), c.station_name.name)) + click.echo(f'{c.station_id.ljust(3)} {c.station_name.name}') @click.command('show') @@ -152,7 +152,7 @@ def add(ctx, identifier, name, type_, gaw_id, country, } add_metadata(Station, station_, save_to_registry, save_to_index) - click.echo('Station {} added'.format(identifier)) + click.echo(f'Station {identifier} added') @click.command('update') @@ -204,7 +204,7 @@ def update(ctx, identifier, name, type_, gaw_id, country, update_metadata(Station, identifier, station_, save_to_registry, save_to_index) - click.echo('Station {} updated'.format(identifier)) + click.echo(f'Station {identifier} updated') @click.command('delete') @@ -217,12 +217,12 @@ def delete(ctx, identifier): click.echo('Station not found') return - q = 'Are you sure you want to delete station {}?'.format(identifier) + q = f'Are you sure you want to delete station {identifier}?' if click.confirm(q): # noqa delete_metadata(Station, identifier, save_to_registry, save_to_index) - click.echo('Station {} deleted'.format(identifier)) + click.echo(f'Station {identifier} deleted') station.add_command(list_) diff --git a/woudc_data_registry/generate_metadata.py b/woudc_data_registry/generate_metadata.py index 12458a37..559f867b 100644 --- a/woudc_data_registry/generate_metadata.py +++ b/woudc_data_registry/generate_metadata.py @@ -19,7 +19,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2022 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -114,9 +114,7 @@ def generate_metadata(woudc_yaml): else: search_id = key2.lower().replace('-', '') snapshot_id = search_id - uri = '{}/{}/{}/{}'.format( - uri_pre, key, key1, search_id - ) + uri = f'{uri_pre}/{key}/{key1}/{search_id}' time_begin, time_end = \ value1['extent']['time'].split('/') dataset_md["id"] = key2 @@ -233,11 +231,7 @@ def generate_metadata(woudc_yaml): if value2['waf_dir'] != 'none': dataset_md['properties']['waf'] = { - 'url': - '{}/Archive-NewFormat/{}'.format( - WOUDC_ARCHIVE, - value2['waf_dir'] - ), + 'url': f"{WOUDC_ARCHIVE}/Archive-NewFormat/{value2['waf_dir']}", # noqa 'linktype': 'WWW:LINK', 'function': 'download', 'label_en': value2['waf_dir'], @@ -250,13 +244,7 @@ def generate_metadata(woudc_yaml): } dataset_md['properties']['dataset_snapshots'] = { - 'url': - '{}/Summaries' - '/dataset-snapshots/' - '{}.zip'.format( - WOUDC_ARCHIVE, - snapshot_id - ), + 'url': f'{WOUDC_ARCHIVE}/Summaries/dataset-snapshots/{snapshot_id}.zip', # noqa 'linktype': 'WWW:LINK', 'function': 'download', 'label_en': value2['label_en'], @@ -268,10 +256,7 @@ def generate_metadata(woudc_yaml): } dataset_md['properties']['wms'] = { - 'url': '{}?service={}' - '&version={}&' - 'request=GetCapabilities' - .format(WOUDC_OWS, 'WMS', '1.3.0'), + 'url': f'{WOUDC_OWS}?service=WMS&version=1.3.0&request=GetCapabilities', # noqa 'linktype': 'OGC:WMS', 'function': 'download', 'label_en': key2, @@ -281,10 +266,7 @@ def generate_metadata(woudc_yaml): } dataset_md['properties']['wfs'] = { - 'url': '{}?service={}' - '&version={}&' - 'request=GetCapabilities' - .format(WOUDC_OWS, 'WFS', '1.3.0'), + 'url': f'{WOUDC_OWS}?service=WFS&version=1.1.0&request=GetCapabilities', # noqa 'linktype': 'OGC:WFS', 'function': 'download', 'label_en': key2, @@ -296,8 +278,7 @@ def generate_metadata(woudc_yaml): } dataset_md['properties']['search'] = { - 'url': - '{}?dataset={}'.format(WOUDC_DATA, key2), + 'url': f'{WOUDC_DATA}?dataset={key2}', 'linktype': 'WWW:LINK', 'function': 'search', 'label_en': value2['label_en'], @@ -409,9 +390,7 @@ def update_extents(): for curr_level in levels: is_included = False for level in md_loads['properties']['levels']: - if level['label_en'] == 'Level {}'.format( - curr_level - ): + if level['label_en'] == f'Level {curr_level}': is_included = True if not is_included: if dataset.startswith(('TotalOzone', 'UmkehrN14')): @@ -420,7 +399,7 @@ def update_extents(): label_en = dataset # Add level item if it does not already exist md_loads['properties']['levels'].append( - {'label_en': 'Level {}'.format(curr_level), + {'label_en': f'Level {curr_level}', 'networks': [{ 'label_en': label_en, 'instruments': [] @@ -442,9 +421,7 @@ def update_extents(): subquery ) for level in md_loads['properties']['levels']: - if level['label_en'] == 'Level {}'.format( - curr_level - ): + if level['label_en'] == f'Level {curr_level}': for ins in instruments: is_included = False otherIndex = [False, -1] @@ -524,13 +501,13 @@ def update_extents(): for curr_level in levels: is_included = False for level in md_loads['properties']['levels']: - if level['label_en'] == 'Level {}'.format(curr_level): + if level['label_en'] == f'Level {curr_level}': is_included = True if not is_included: # Add level item if it does not already exist md_loads['properties']['levels'].append( { - 'label_en': 'Level {}'.format(curr_level), + 'label_en': f'Level {curr_level}', 'networks': [{ 'label_en': inputs[input_table]['label_en'], 'instruments': [] @@ -548,7 +525,7 @@ def update_extents(): subquery ) for level in md_loads['properties']['levels']: - if level['label_en'] == 'Level {}'.format(curr_level): + if level['label_en'] == f'Level {curr_level}': for ins in instruments: is_included = False for n in level['networks']: diff --git a/woudc_data_registry/models.py b/woudc_data_registry/models.py index 03c51a49..a2511d2e 100644 --- a/woudc_data_registry/models.py +++ b/woudc_data_registry/models.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -132,7 +132,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Country ({}, {})'.format(self.country_id, self.name_en) + return f'Country ({self.country_id}, {self.name_en})' class Contributor(base): @@ -231,7 +231,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Contributor ({}, {})'.format(self.contributor_id, self.name) + return f'Contributor ({self.contributor_id}, {self.name})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -277,7 +277,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Dataset ({})'.format(self.dataset_id) + return f'Dataset ({self.dataset_id})' class Instrument(base): @@ -357,10 +357,9 @@ def __geo_interface__(self): else: dataset_folder = 'UmkehrN14_2.0_1' else: - dataset_folder = '{}_1.0_1'.format(self.dataset_id) + dataset_folder = f'{self.dataset_id}_1.0_1' - station_folder = '{}{}'.format(self.station.station_type.lower(), - self.station_id) + station_folder = f'{self.station.station_type.lower()}{self.station_id}' # noqa instrument_folder = self.name.lower() return { @@ -386,7 +385,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Instrument ({})'.format(self.instrument_id) + return f'Instrument ({self.instrument_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -428,7 +427,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Project ({})'.format(self.discovery_metadata_id) + return f'Project ({self.discovery_metadata_id})' class Project(base): @@ -456,7 +455,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Project ({})'.format(self.project_id) + return f'Project ({self.project_id})' class Station(base): @@ -498,8 +497,7 @@ def __init__(self, dict_): """serializer""" self.station_id = dict_['station_id'] - self.station_name_id = '{}:{}' \ - .format(self.station_id, dict_['station_name']) + self.station_name_id = f"{self.station_id}:{dict_['station_name']}" self.station_type = dict_['station_type'] self._name = dict_['station_name'] @@ -542,7 +540,7 @@ def name(self): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = '0-20008-0-{}'.format(self.gaw_id) + gaw_pagename = f'0-20008-0-{self.gaw_id}' return { 'id': self.station_id, @@ -561,13 +559,12 @@ def __geo_interface__(self): 'end_date': strftime_rfc3339(self.end_date), 'last_validated_datetime': strftime_rfc3339(self.last_validated_datetime), - 'gaw_url': '{}/{}'.format(gaw_baseurl, gaw_pagename) + 'gaw_url': f'{gaw_baseurl}/{gaw_pagename}' } } def __repr__(self): - return 'Station ({}, {})'.format(self.station_id, - self.station_name.name) + return f'Station ({self.station_id}, {self.station_name.name})' class StationName(base): @@ -590,7 +587,7 @@ def __init__(self, dict_): self.generate_ids() def __repr__(self): - return 'Station name ({}, {})'.format(self.station_id, self.name) + return f'Station name ({self.station_id}, {self.name})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -674,7 +671,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Deployment ({})'.format(self.deployment_id) + return f'Deployment ({self.deployment_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -934,15 +931,12 @@ def get_waf_path(self, basepath): else: dataset_only = self.content_category - datasetdirname = '{}_{}_{}'.format(dataset_only, - self.content_level, - self.content_form) + datasetdirname = f'{dataset_only}_{self.content_level}_{self.content_form}' # noqa' url_tokens = [ basepath.rstrip('/'), 'Archive-NewFormat', - datasetdirname, - '{}{}'.format(self.platform_type.lower(), self.station_id), + datasetdirname, f'{self.platform_type.lower()}{self.station_id}', self.instrument_name.lower(), self.timestamp_date.strftime('%Y'), self.filename @@ -981,8 +975,8 @@ def __geo_interface__(self): 'timestamp_utcoffset': self.timestamp_utcoffset, 'timestamp_date': strftime_rfc3339(self.timestamp_date), - 'timestamp_time': None if self.timestamp_time is None \ - else self.timestamp_time.isoformat(), + 'timestamp_time': (None if self.timestamp_time is None + else self.timestamp_time.isoformat()), 'timestamp_utc': strftime_rfc3339(self.timestamp_utc), 'published': self.published, @@ -1004,7 +998,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'DataRecord({}, {})'.format(self.data_record_id, self.url) + return f'DataRecord({self.data_record_id}, {self.url})' class Contribution(base): @@ -1087,7 +1081,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Contribution ({})'.format(self.contribution_id) + return f'Contribution ({self.contribution_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -1179,7 +1173,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'Notification ({})'.format(self.notification_id) + return f'Notification ({self.notification_id})' class PeerDataRecord(base): @@ -1250,8 +1244,7 @@ def __init__(self, dict_): self.contributor_acronym = dict_['contributor_acronym'] self.station_id = dict_['station_id'] - self.station_name_id = '{}:{}' \ - .format(self.station_id, dict_['station_name']) + self.station_name_id = f"{self.station_id}:{dict_['station_name']}" self.station_type = dict_['station_type'] self.country_id = dict_['country_id'] self.gaw_id = dict_.get('gaw_id') @@ -1297,7 +1290,7 @@ def contributor_url(self): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = '0-20008-0-{}'.format(self.gaw_id) + gaw_pagename = f'0-20008-0-{self.gaw_id}' return { 'id': self.es_id, @@ -1310,7 +1303,7 @@ def __geo_interface__(self): 'station_id': self.station_id, 'station_name': self.name, 'station_type': self.station_type, - 'gaw_url': '{}/{}'.format(gaw_baseurl, gaw_pagename), + 'gaw_url': f'{gaw_baseurl}/{gaw_pagename}', 'gaw_id': self.gaw_id, 'contributor_acronym': self.contributor_acronym, 'contributor_url': @@ -1327,7 +1320,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'PeerDataRecord({})'.format(self.url) + return f'PeerDataRecord({self.url})' class UVIndex(base): @@ -1428,16 +1421,14 @@ def timestamp_utc(self): def get_waf_path(self, dict_): """generate WAF url""" - datasetdirname = '{}_{}_{}'.format(self.dataset_id, - dict_['dataset_level'], - dict_['dataset_form']) + datasetdirname = f"{self.dataset_id}_{dict_['dataset_level']}_{dict_['dataset_form']}" # noqa timestamp_date = datetime.datetime.strptime( dict_['timestamp_date'], '%Y-%m-%d').date() url_tokens = [ config.WDR_WAF_BASEURL.rstrip('/'), 'Archive-NewFormat', datasetdirname, - '{}{}'.format(dict_['station_type'].lower(), self.station_id), + f"{dict_['station_type'].lower()}{self.station_id}", # noqa dict_['instrument_name'].lower(), timestamp_date.strftime('%Y'), dict_['filename'] @@ -1449,7 +1440,7 @@ def get_waf_path(self, dict_): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = '0-20008-0-{}'.format(self.station.gaw_id) + gaw_pagename = f'0-20008-0-{self.station.gaw_id}' return { 'id': self.uv_id, @@ -1462,7 +1453,7 @@ def __geo_interface__(self): 'station_id': self.station_id, 'station_name': self.station.station_name.name, 'station_gaw_id': self.station.gaw_id, - 'station_gaw_url': '{}/{}'.format(gaw_baseurl, gaw_pagename), + 'station_gaw_url': f'{gaw_baseurl}/{gaw_pagename}', 'contributor_name': self.instrument.deployment.contributor.name, 'contributor_acronym': @@ -1489,7 +1480,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'UV_Index ({})'.format(self.uv_id) + return f'UV_Index ({self.uv_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -1590,16 +1581,14 @@ def __init__(self, dict_): def get_waf_path(self, dict_): """generate WAF url""" - datasetdirname = '{}_{}_{}'.format(self.dataset_id, - dict_['dataset_level'], - dict_['dataset_form']) + datasetdirname = f"{self.dataset_id}_{dict_['dataset_level']}_{dict_['dataset_form']}" # noqa timestamp_date = datetime.datetime.strptime( dict_['timestamp_date'], '%Y-%m-%d').date() url_tokens = [ config.WDR_WAF_BASEURL.rstrip('/'), 'Archive-NewFormat', datasetdirname, - '{}{}'.format(dict_['station_type'].lower(), self.station_id), + f"{dict_['station_type'].lower()}{self.station_id}", dict_['instrument_name'].lower(), timestamp_date.strftime('%Y'), self.file_name @@ -1611,7 +1600,7 @@ def get_waf_path(self, dict_): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = '0-20008-0-{}'.format(self.station.gaw_id) + gaw_pagename = f'0-20008-0-{self.station.gaw_id}' return { 'id': self.ozone_id, @@ -1624,7 +1613,7 @@ def __geo_interface__(self): 'station_id': self.station_id, 'station_name': self.station.station_name.name, 'station_gaw_id': self.station.gaw_id, - 'station_gaw_url': '{}/{}'.format(gaw_baseurl, gaw_pagename), + 'station_gaw_url': f'{gaw_baseurl}/{gaw_pagename}', 'contributor_name': self.instrument.deployment.contributor.name, 'contributor_acronym': @@ -1659,7 +1648,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'TotalOzone ({})'.format(self.ozone_id) + return f'TotalOzone ({self.ozone_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -1763,16 +1752,14 @@ def __init__(self, dict_): def get_waf_path(self, dict_): """generate WAF url""" - datasetdirname = '{}_{}_{}'.format(self.dataset_id, - dict_['dataset_level'], - dict_['dataset_form']) + datasetdirname = f"{self.dataset_id}_{dict_['dataset_level']}_{ dict_['dataset_form']}" # noqa timestamp_date = datetime.datetime.strptime( dict_['timestamp_date'], '%Y-%m-%d').date() url_tokens = [ config.WDR_WAF_BASEURL.rstrip('/'), 'Archive-NewFormat', datasetdirname, - '{}{}'.format(dict_['station_type'].lower(), self.station_id), + f"{dict_['station_type'].lower()}{self.station_id}", dict_['instrument_name'].lower(), timestamp_date.strftime('%Y'), self.file_name @@ -1784,7 +1771,7 @@ def get_waf_path(self, dict_): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = '0-20008-0-{}'.format(self.station.gaw_id) + gaw_pagename = f'0-20008-0-{self.station.gaw_id}' return { 'id': self.ozone_id, @@ -1797,7 +1784,7 @@ def __geo_interface__(self): 'station_id': self.station_id, 'station_name': self.station.station_name.name, 'station_gaw_id': self.station.gaw_id, - 'station_gaw_url': '{}/{}'.format(gaw_baseurl, gaw_pagename), + 'station_gaw_url': f'{gaw_baseurl}/{gaw_pagename}', 'contributor_name': self.instrument.deployment.contributor.name, 'contributor_acronym': @@ -1819,7 +1806,7 @@ def __geo_interface__(self): } def __repr__(self): - return 'OzoneSonde ({})'.format(self.ozone_id) + return f'OzoneSonde ({self.ozone_id})' def generate_ids(self): """Builds and sets class ID field from other attributes""" @@ -1953,6 +1940,8 @@ def admin(): @click.pass_context def show_config(ctx): + masked = None + env_vars = [ 'WDR_LOGGING_LOGLEVEL', 'WDR_LOGGING_LOGFILE', @@ -1964,10 +1953,7 @@ def show_config(ctx): 'WDR_DB_PASSWORD', 'WDR_DB_NAME', 'WDR_SEARCH_TYPE', - 'WDR_SEARCH_INDEX_BASENAME', 'WDR_SEARCH_URL', - 'WDR_SEARCH_USERNAME', - 'WDR_SEARCH_PASSWORD', 'WDR_WAF_BASEDIR', 'WDR_WAF_BASEURL', 'WDR_ERROR_CONFIG', @@ -1977,17 +1963,14 @@ def show_config(ctx): ] for env_var in env_vars: - if env_var in ['WDR_DB_PASSWORD', 'WDR_SEARCH_PASSWORD']: - s = '{}: {}'.format(env_var, '*'*len(getattr(config, env_var))) + if env_var == 'WDR_DB_PASSWORD': + masked = '*' * len(getattr(config, env_var)) + s = '{env_var}: {masked}' elif env_var == 'WDR_DATABASE_URL' and config.WDR_DB_TYPE == 'postgresql': # noqa - value1 = getattr(config, env_var) - value_to_find = ':{}@'.format(config.WDR_DB_PASSWORD) - value_to_replace = ':{}@'.format('*'*len(config.WDR_DB_PASSWORD)) - value = value1.replace(value_to_find, value_to_replace) - - s = '{}: {}'.format(env_var, value) + value = config.WDR_DATABASE_URL.replace(config.WDR_DB_PASSWORD, masked) # noqa + s = f'{env_var}: {value}' else: - s = '{}: {}'.format(env_var, getattr(config, env_var)) + s = f'{env_var}: {getattr(config, env_var)}' click.echo(s) @@ -2006,7 +1989,7 @@ def setup(ctx): base.metadata.create_all(engine, checkfirst=True) click.echo('Done') except (OperationalError, ProgrammingError) as err: - click.echo('ERROR: {}'.format(err)) + click.echo(f'ERROR: {err}') @click.command() @@ -2023,7 +2006,7 @@ def teardown(ctx): base.metadata.drop_all(engine, checkfirst=True) click.echo('Done') except (OperationalError, ProgrammingError) as err: - click.echo('ERROR: {}'.format(err)) + click.echo(f'ERROR: {err}') @click.command() @@ -2041,10 +2024,10 @@ def init(ctx, datadir, init_search_index): raise click.ClickException('Missing required data directory') wmo_countries = os.path.join(datadir, 'wmo-countries.json') - countries = os.path.join(datadir, 'countries.json') + countries = os.path.join(datadir, 'init', 'countries.json') contributors = os.path.join(datadir, 'contributors.csv') stations = os.path.join(datadir, 'stations.csv') - ships = os.path.join(datadir, 'ships.csv') + ships = os.path.join(datadir, 'init', 'ships.csv') station_names = os.path.join(datadir, 'station-names.csv') datasets = os.path.join(datadir, 'datasets.csv') projects = os.path.join(datadir, 'projects.csv') @@ -2279,16 +2262,16 @@ def sync(ctx): plural_name = clazz.__tablename__ plural_caps = ''.join(map(str.capitalize, plural_name.split('_'))) - enabled_flag = '{}_enabled'.format(plural_name) + enabled_flag = f'{plural_name}_enabled' if not search_index_config.get(enabled_flag, True): - click.echo('{} index frozen (skipping)'.format(plural_caps)) + click.echo(f'{plural_caps} index frozen (skipping)') continue - click.echo('{}...'.format(plural_caps)) + click.echo(f'{plural_caps}...') if plural_caps == 'DataRecords': capacity = 10000 for obj in registry_.session.query(clazz).yield_per(1): - LOGGER.debug('Querying chunk of {}'.format(clazz)) + LOGGER.debug(f'Querying chunk of {clazz}') registry_contents.append(obj) if len(registry_contents) > capacity: @@ -2335,16 +2318,16 @@ def product_sync(ctx): plural_name = product.__tablename__ plural_caps = ''.join(map(str.capitalize, plural_name.split('_'))) - enabled_flag = '{}_enabled'.format(plural_name) + enabled_flag = f'{plural_name}_enabled' if not search_index_config.get(enabled_flag, True): - click.echo('{} index frozen (skipping)'.format(plural_caps)) + click.echo(f'{plural_caps} index frozen (skipping)') - click.echo('{}...'.format(plural_caps)) + click.echo(f'{plural_caps}...') registry_contents = [] # Sync product to elasticsearch for obj in registry_.session.query(product).yield_per(1): - LOGGER.debug('Querying chunk of {}'.format(product)) + LOGGER.debug(f'Querying chunk of {product}') registry_contents.append(obj) diff --git a/woudc_data_registry/peer/eubrewnet.py b/woudc_data_registry/peer/eubrewnet.py index 7a16a084..73768c1c 100644 --- a/woudc_data_registry/peer/eubrewnet.py +++ b/woudc_data_registry/peer/eubrewnet.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -102,7 +102,7 @@ def parse_index(csv_dict_reader): yield properties else: LOGGER.debug('No station metadata found.') - msg = 'Failed to persist PeerDataRecord({})'.format(row['Link']) + msg = f"Failed to persist PeerDataRecord {row['Link']}" LOGGER.error(msg) yield {} @@ -130,7 +130,7 @@ def index(ctx, file_index): PeerDataRecord.source == 'eubrewnet').delete() registry_.session.commit() - click.echo('Indexing EUBREWNET records from {}'.format(file_index)) + click.echo(f'Indexing EUBREWNET records from {file_index}') with open(file_index, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for dict_row in parse_index(reader): diff --git a/woudc_data_registry/peer/ndacc.py b/woudc_data_registry/peer/ndacc.py index daad084e..40ad6cdb 100644 --- a/woudc_data_registry/peer/ndacc.py +++ b/woudc_data_registry/peer/ndacc.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -102,7 +102,7 @@ def parse_index(csv_dict_reader): yield properties else: LOGGER.debug('No station metadata found.') - msg = 'Failed to persist PeerDataRecord({})'.format(row['url']) + msg = f"Failed to persist PeerDataRecord {row['url']}" LOGGER.error(msg) yield {} @@ -132,7 +132,7 @@ def index(ctx, file_index): PeerDataRecord.source == 'ndacc').delete() registry_.session.commit() - click.echo('Indexing NDACC records from {}'.format(file_index)) + click.echo(f'Indexing NDACC records from {file_index}') with open(file_index, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) diff --git a/woudc_data_registry/processing.py b/woudc_data_registry/processing.py index 125a0c3f..d0bbc259 100644 --- a/woudc_data_registry/processing.py +++ b/woudc_data_registry/processing.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -169,8 +169,8 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, if not deployment_ok: deployment_id = ':'.join([platform_id, agency, project]) - deployment_name = '{}@{}'.format(agency, platform_id) - LOGGER.warning('Deployment {} not found'.format(deployment_id)) + deployment_name = f'{agency}@{platform_id}' + LOGGER.warning(f'Deployment {deployment_id} not found') if verify_only: LOGGER.info('Verify mode. Skipping deployment addition.') @@ -180,8 +180,7 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, self._add_to_report(202) else: - msg = 'Deployment {} not added. Skipping file.' \ - .format(deployment_name) + msg = f'Deployment {deployment_name} not added. Skipping file.' # noqa LOGGER.warning(msg) line = self.extcsv.line_num('PLATFORM') + 2 @@ -211,7 +210,7 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, if old_serial != new_serial: LOGGER.debug('Attempting to search instrument serial' - ' number {}'.format(new_serial)) + f' number {new_serial}') self.extcsv.extcsv['INSTRUMENT']['Number'] = new_serial instrument_ok = self.check_instrument() @@ -219,8 +218,8 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, if not instrument_ok: # Attempt to add a new record with the new serial number # using name and model from the registry - LOGGER.warning('No instrument with serial {} found' - ' in registry'.format(old_serial)) + LOGGER.warning(f'No instrument with serial {old_serial} ' + 'found in registry') self.extcsv.extcsv['INSTRUMENT']['Number'] = old_serial if verify_only: @@ -315,7 +314,7 @@ def persist(self): else: LOGGER.info('Beginning persistence to data registry') for model in self._registry_updates: - LOGGER.debug('Saving {} to registry'.format(str(model))) + LOGGER.debug(f'Saving {model} to registry') self.registry.save(model) if isinstance(model, DataRecord): @@ -341,7 +340,7 @@ def persist(self): allow_update_model = False if allow_update_model: - LOGGER.debug('Saving {} to search index'.format(model)) + LOGGER.debug(f'Saving {model} to search index') self.search_index.index(type(model), model.__geo_interface__) @@ -376,8 +375,7 @@ def add_deployment(self, bypass=False): LOGGER.info('Bypass mode. Skipping permission check.') allow_add_deployment = True else: - response = input('Deployment {} not found. Add? (y/n) [n]: ' - .format(deployment.deployment_id)) + response = input(f'Deployment {deployment.deployment_id} not found. Add? (y/n) [n]: ') # noqa allow_add_deployment = response.lower() in ['y', 'yes'] if not allow_add_deployment: @@ -410,8 +408,7 @@ def add_station_name(self, bypass=False): LOGGER.info('Bypass mode. Skipping permission check') allow_add_station_name = True else: - response = input('Station name {} not found. Add? (y/n) [n]: ' - .format(station_name_object.station_name_id)) + response = input(f'Station name {station_name_object.station_name_id} not found. Add? (y/n) [n]: ') # noqa allow_add_station_name = response.lower() in ['y', 'yes'] if not allow_add_station_name: @@ -443,8 +440,7 @@ def add_instrument(self, bypass=False): LOGGER.info('Bypass mode. Skipping permission check') allow_add_instrument = True else: - response = input('Instrument {} not found. Add? (y/n) [n]: ' - .format(instrument.instrument_id)) + response = input(f'Instrument {instrument.instrument_id} not found. Add? (y/n) [n]: ') # noqa allow_add_instrument = response.lower() in ['y', 'yes'] if allow_add_instrument: @@ -511,8 +507,7 @@ def add_contribution(self, bypass=False): LOGGER.info('Bypass mode. Skipping permission check') allow_add_contribution = True else: - response = input('Contribution {} not found. Add? (y/n) [n]: ' - .format(contribution.contribution_id)) + response = input(f'Contribution {contribution.contribution_id} not found. Add? (y/n) [n]: ') # noqa allow_add_contribution = response.lower() in ['y', 'yes'] if allow_add_contribution: @@ -545,11 +540,10 @@ def check_contribution(self): 'contribution_id', contribution_id) if not contribution: - LOGGER.warning('Contribution {} not found'.format(contribution_id)) + LOGGER.warning(f'Contribution {contribution_id} not found') return False else: - LOGGER.warning('Found contribution match for {}' - .format(contribution_id)) + LOGGER.warning(f'Found contribution match for {contribution_id}') if not isinstance(timestamp_date, (str, int)): if contribution.start_date > timestamp_date: contribution.start_date = timestamp_date @@ -573,11 +567,11 @@ def check_project(self): project = self.extcsv.extcsv['CONTENT']['Class'] - LOGGER.debug('Validating project {}'.format(project)) + LOGGER.debug(f'Validating project {project}') self.projects = self.registry.query_distinct(Project.project_id) if project in self.projects: - LOGGER.debug('Match found for project {}'.format(project)) + LOGGER.debug(f'Match found for project {project}') return True else: line = self.extcsv.line_num('CONTENT') + 2 @@ -600,14 +594,14 @@ def check_dataset(self): if dataset == 'UmkehrN14': dataset = '_'.join([dataset, str(level)]) - LOGGER.debug('Validating dataset {}'.format(dataset)) + LOGGER.debug(f'Validating dataset {dataset}') dataset_model = {'dataset_id': dataset} fields = ['dataset_id'] response = self.registry.query_multiple_fields(Dataset, dataset_model, fields, fields) if response: - LOGGER.debug('Match found for dataset {}'.format(dataset)) + LOGGER.debug(f'Match found for dataset {dataset}') self.extcsv.extcsv['CONTENT']['Category'] = response.dataset_id return True else: @@ -643,10 +637,9 @@ def check_contributor(self): agency = replacement self.extcsv.extcsv['DATA_GENERATION']['Agency'] = agency - LOGGER.debug('Validating contributor {} under project {}' - .format(agency, project)) + LOGGER.debug(f'Validating contributor {agency} under project {project}') # noqa contributor = { - 'contributor_id': '{}:{}'.format(agency, project), + 'contributor_id': f'{agency}:{project}', 'project_id': project } @@ -657,8 +650,7 @@ def check_contributor(self): contributor_name = result.acronym self.extcsv.extcsv['DATA_GENERATION']['Agency'] = contributor_name - LOGGER.debug('Match found for contributor ID {}' - .format(result.contributor_id)) + LOGGER.debug(f'Match found for contributor ID {result.contributor_id}') # noqa else: line = self.extcsv.line_num('DATA_GENERATION') + 2 if not self._add_to_report(67, line): @@ -687,10 +679,10 @@ def check_station(self, bypass=False, verify=False): pl_type = self.extcsv.extcsv['PLATFORM']['Type'] name = self.extcsv.extcsv['PLATFORM']['Name'] country = self.extcsv.extcsv['PLATFORM']['Country'] - # gaw_id = self.extcsv.extcsv['PLATFORM'].get('GAW_ID', None) + # gaw_id = self.extcsv.extcsv['PLATFORM'].get('GAW_ID') # TODO: consider adding and checking #PLATFORM_Type - LOGGER.debug('Validating station {}:{}'.format(identifier, name)) + LOGGER.debug(f'Validating station {identifier}:{name}') valueline = self.extcsv.line_num('PLATFORM') + 2 water_codes = ['*IW', 'IW', 'XZ'] @@ -718,7 +710,7 @@ def check_station(self, bypass=False, verify=False): response = self.registry.query_by_field(Station, 'station_id', identifier) if response: - LOGGER.debug('Validated station with id: {}'.format(identifier)) + LOGGER.debug(f'Validated station with id: {identifier}') else: self._add_to_report(71, valueline) return False @@ -728,7 +720,7 @@ def check_station(self, bypass=False, verify=False): type_ok = pl_type in platform_types if type_ok: - LOGGER.debug('Validated station type {}'.format(type_ok)) + LOGGER.debug(f'Validated station type {type_ok}') elif not self._add_to_report(72, valueline): success = False @@ -739,13 +731,11 @@ def check_station(self, bypass=False, verify=False): name_ok = bool(response) if name_ok: self.extcsv.extcsv['PLATFORM']['Name'] = name = response.name - LOGGER.debug('Validated with name {} for id {}'.format( - name, identifier)) + LOGGER.debug(f'Validated with name {name} for id {identifier}') elif verify: LOGGER.info('Verify mode. Skipping station name addition.') elif self.add_station_name(bypass=bypass): - LOGGER.info('Added new station name {}' - .format(station['current_name'])) + LOGGER.info(f"Added new station name {station['current_name']}") elif not self._add_to_report(73, valueline, name=name): success = False @@ -757,9 +747,7 @@ def check_station(self, bypass=False, verify=False): if country_ok: country = response.country self.extcsv.extcsv['PLATFORM']['Country'] = country.country_id - LOGGER.debug('Validated with country: {} ({}) for id: {}' - .format(country.name_en, country.country_id, - identifier)) + LOGGER.debug(f'Validated with country: {country.name_en} ({country.country_id}) for id: {identifier}') # noqa elif not self._add_to_report(74, valueline): success = False @@ -790,11 +778,10 @@ def check_deployment(self): deployment = self.registry.query_by_field(Deployment, 'deployment_id', deployment_id) if not deployment: - LOGGER.warning('Deployment {} not found'.format(deployment_id)) + LOGGER.warning(f'Deployment {deployment_id} not found') return False else: - LOGGER.debug('Found deployment match for {}' - .format(deployment_id)) + LOGGER.debug(f'Found deployment match for {deployment_id}') if not isinstance(timestamp_date, (str, int)): if deployment.start_date > timestamp_date: deployment.start_date = timestamp_date @@ -891,12 +878,10 @@ def check_instrument(self): Instrument, model, fields, case_insensitive) if not response: - LOGGER.warning('No instrument {} found in registry' - .format(instrument.instrument_id)) + LOGGER.warning(f'No instrument {instrument.instrument_id} found in registry') # noqa return False else: - LOGGER.debug('Found instrument match for {}' - .format(instrument.instrument_id)) + LOGGER.debug(f'Found instrument match for {instrument.instrument_id}') # noqa self.extcsv.extcsv['INSTRUMENT']['Number'] = response.serial return True @@ -919,7 +904,7 @@ def check_location(self): lat = self.extcsv.extcsv['LOCATION']['Latitude'] lon = self.extcsv.extcsv['LOCATION']['Longitude'] - height = self.extcsv.extcsv['LOCATION'].get('Height', None) + height = self.extcsv.extcsv['LOCATION'].get('Height') valueline = self.extcsv.line_num('LOCATION') + 2 try: @@ -1082,8 +1067,8 @@ def check_data_generation(self): success = True - dg_date = self.extcsv.extcsv['DATA_GENERATION'].get('Date', None) - version = self.extcsv.extcsv['DATA_GENERATION'].get('Version', None) + dg_date = self.extcsv.extcsv['DATA_GENERATION'].get('Date') + version = self.extcsv.extcsv['DATA_GENERATION'].get('Version') valueline = self.extcsv.line_num('DATA_GENERATION') @@ -1142,7 +1127,7 @@ def check_time_series(self): success = True dg_date = self.extcsv.extcsv['DATA_GENERATION']['Date'] - ts_time = self.extcsv.extcsv['TIMESTAMP'].get('Time', None) + ts_time = self.extcsv.extcsv['TIMESTAMP'].get('Time') for table, body in self.extcsv.extcsv.items(): if table == 'DATA_GENERATION': diff --git a/woudc_data_registry/product/ozonesonde/ozonesonde_generator.py b/woudc_data_registry/product/ozonesonde/ozonesonde_generator.py index 048af9a8..8a2b4554 100644 --- a/woudc_data_registry/product/ozonesonde/ozonesonde_generator.py +++ b/woudc_data_registry/product/ozonesonde/ozonesonde_generator.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -80,12 +80,12 @@ def execute(path, bypass): for filename in filenames: ipath = os.path.join(dirname, filename) contents = read_file(ipath) - LOGGER.debug('Parsing extcsv {}'.format(ipath)) + LOGGER.debug(f'Parsing extcsv {ipath}') try: extcsv = ExtendedCSV(contents) except Exception as err: - msg = 'Unable to parse extcsv {}: {}'.format(ipath, err) + msg = f'Unable to parse extcsv {ipath}: {err}' LOGGER.error(msg) continue @@ -110,8 +110,7 @@ def execute(path, bypass): instrument_height = extcsv.extcsv['LOCATION']['Height'][0] timestamp_date = extcsv.extcsv['TIMESTAMP']['Date'][0] except Exception as err: - msg = 'Unable to get metadata from extcsv {}: {}'.format( - ipath, err) + msg = f'Unable to get metadata from extcsv {ipath}: {err}' LOGGER.error(msg) continue @@ -197,9 +196,8 @@ def execute(path, bypass): allow_add_instrument = True else: response = \ - input('Instrument {} not found. ' - 'Add? (y/n) [n]: ' - .format(instrument_id)) + input(f'Instrument {instrument_id} not found. ' + 'Add? (y/n) [n]: ') allow_add_instrument = \ response.lower() in ['y', 'yes'] @@ -260,7 +258,6 @@ def execute(path, bypass): LOGGER.debug('Done get_data().') print(count) - print(count) def conv(i): diff --git a/woudc_data_registry/product/totalozone/totalozone_generator.py b/woudc_data_registry/product/totalozone/totalozone_generator.py index dddfe52b..368bb5fb 100644 --- a/woudc_data_registry/product/totalozone/totalozone_generator.py +++ b/woudc_data_registry/product/totalozone/totalozone_generator.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -80,12 +80,12 @@ def execute(path, bypass): for filename in filenames: ipath = os.path.join(dirname, filename) contents = read_file(ipath) - LOGGER.debug('Parsing extcsv {}'.format(ipath)) + LOGGER.debug(f'Parsing extcsv {ipath}') try: extcsv = ExtendedCSV(contents) except Exception as err: - msg = 'Unable to parse extcsv {}: {}'.format(ipath, err) + msg = f'Unable to parse extcsv {ipath}: {err}' LOGGER.error(msg) continue @@ -110,8 +110,7 @@ def execute(path, bypass): instrument_height = extcsv.extcsv['LOCATION']['Height'][0] timestamp_date = extcsv.extcsv['TIMESTAMP']['Date'][0] except Exception as err: - msg = 'Unable to get metadata from extcsv {}: {}'.format( - ipath, err) + msg = f'Unable to get metadata from extcsv {ipath}: {err}' LOGGER.error(msg) continue @@ -158,9 +157,8 @@ def execute(path, bypass): allow_add_instrument = True else: response = \ - input('Instrument {} not found. ' - 'Add? (y/n) [n]: ' - .format(instrument_id)) + input(f'Instrument {instrument_id} not found. ' + 'Add? (y/n) [n]: ') allow_add_instrument = \ response.lower() in ['y', 'yes'] diff --git a/woudc_data_registry/product/uv_index/uv_index_generator.py b/woudc_data_registry/product/uv_index/uv_index_generator.py index fd6dd48b..ac8bf3db 100644 --- a/woudc_data_registry/product/uv_index/uv_index_generator.py +++ b/woudc_data_registry/product/uv_index/uv_index_generator.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2021 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -105,12 +105,12 @@ def execute(path, formula_lookup, update, start_year, end_year, bypass): ipath = os.path.join(dirname, filename) contents = read_file(ipath) - LOGGER.debug('Parsing extcsv {}'.format(ipath)) + LOGGER.debug(f'Parsing extcsv {ipath}') try: extcsv = ExtendedCSV(contents) except Exception as err: - msg = 'Unable to parse extcsv {}: {}'.format(ipath, err) + msg = f'Unable to parse extcsv {ipath}: {err}' LOGGER.error(msg) continue @@ -136,15 +136,14 @@ def execute(path, formula_lookup, update, start_year, end_year, bypass): instrument_height = extcsv.extcsv['LOCATION']['Height'][0] timestamp_date = extcsv.extcsv['TIMESTAMP']['Date'][0] except Exception as err: - msg = 'Unable to get data from extcsv {}: {}'.format( - ipath, err) + msg = f'Unable to get data from extcsv {ipath}: {err}' LOGGER.error(msg) continue if len(station_id) == 2: station_id = station_id.zfill(3) - station = '{}{}'.format(station_type.upper(), station_id) + station = f'{station_type.upper()}{station_id}' if station in formula_lookup.keys(): if dataset.lower() == 'spectral': @@ -170,8 +169,7 @@ def execute(path, formula_lookup, update, start_year, end_year, bypass): formula_lookup, max_index) except Exception as err: - msg = 'Unable to compute UV for file {}: {}'.format( # noqa - ipath, err) + msg = f'Unable to compute UV for file {ipath}: {err}' # noqa LOGGER.error(msg) continue elif dataset.lower() == 'broad-band': @@ -182,13 +180,11 @@ def execute(path, formula_lookup, update, start_year, end_year, bypass): country, formula_lookup) except Exception as err: - msg = 'Unable to compute UV for file {}: {}'.format( # noqa - ipath, err) + msg = f'Unable to compute UV for file {ipath}: {err}' # noqa LOGGER.error(msg) continue else: - msg = 'Unsupported dataset {}. Skipping.'.format( - dataset) + msg = f'Unsupported dataset {dataset}. Skipping.' LOGGER.error(msg) # form ids for data insert @@ -209,9 +205,8 @@ def execute(path, formula_lookup, update, start_year, end_year, bypass): allow_add_instrument = True else: response = \ - input('Instrument {} not found. ' - 'Add? (y/n) [n]: ' - .format(instrument_id)) + input(f'Instrument {instrument_id} not found. ' + 'Add? (y/n) [n]: ') allow_add_instrument = \ response.lower() in ['y', 'yes'] @@ -344,8 +339,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, time = extcsv.extcsv[global_summary_t]['Time'][0] utcoffset = extcsv.extcsv[timestamp_t]['UTCOffset'][0] except Exception as err: - msg = 'Unable to get value from file {}: {}'.format( - ipath, err) + msg = f'Unable to get value from file {ipath}: {err}' LOGGER.error(msg) pass @@ -355,23 +349,20 @@ def compute_uv_index(ipath, extcsv, dataset, station, try: uv = float(uv) except ValueError as err: - msg = ('Unable to make UVIndex: {} value into a float.' - ' Time: {}, file: {}: {}'.format(uv, time, - ipath, err)) + msg = (f'Unable to make UVIndex: {uv} value into a float.' # noqa + f' Time: {time}, file: {ipath}: {err}') LOGGER.error(msg) pass except Exception as err: - msg = ('Unable to get {}.UVIndex' - ' from file: {}. Time: {}: {}'.format( - global_summary_nsf_t, ipath, time, err)) + msg = (f'Unable to get {global_summary_nsf_t}.UVIndex' + f' from file: {ipath}. Time: {time}: {err}') LOGGER.error(msg) pass try: zen_angle = extcsv.extcsv[global_summary_nsf_t]['SZA'][0] except Exception as err: - msg = ('Unable to get {}.SZA from file {}: {}'.format( - global_summary_nsf_t, ipath, err)) + msg = f'Unable to get {global_summary_nsf_t}.SZA from file {ipath}: {err}' # noqa LOGGER.error(msg) pass @@ -383,8 +374,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, intcie_f = float(intcie) except Exception as err: msg = ('Unable to convert to float intcie:' - ' {}. File: {}. Time: {}: {}'.format( - intcie, ipath, time, err)) + f' {intcie}. File: {ipath}. Time: {time}: {err}') # noqa LOGGER.error(msg) continue # compute @@ -393,7 +383,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, elif '/' in formula: uv = intcie_f / 40 else: - msg = 'Unknown formula: {}'.format(formula) + msg = f'Unknown formula: {formula}' LOGGER.error(msg) continue @@ -401,15 +391,13 @@ def compute_uv_index(ipath, extcsv, dataset, station, zen_angle = \ extcsv.extcsv[global_summary_t]['ZenAngle'][0] except Exception as err: - msg = ('Unable to get {}.ZenAngle from file: {}' - 'Time: {}: {}'.format( - global_summary_t, ipath, time, err)) + msg = (f'Unable to get {global_summary_t}.ZenAngle from file: {ipath}' # noqa + f'Time: {time}: {err}') LOGGER.error(msg) pass except Exception as err: - msg = ('Unable to get {}.IntCIE from file: {}. Time: {}.' - ': {}'.format(global_summary_t, ipath, time, err)) + msg = f'Unable to get {global_summary_t}.IntCIE from file: {ipath}. Time: {time}: {err}' # noqa LOGGER.error(msg) continue @@ -438,8 +426,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, if instrument_name.lower() == 'kipp_zonen': formula = formula_lookup[station]['kipp_zonen']['GLOBAL'] except KeyError as err: - msg = ('Unable to get broad-band formula for file {}: {}'.format( - ipath, err)) + msg = f'Unable to get broad-band formula for file {ipath}: {err}' LOGGER.error(msg) raise err @@ -448,7 +435,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, date = extcsv.extcsv['TIMESTAMP']['Date'][0] utcoffset = extcsv.extcsv['TIMESTAMP']['UTCOffset'][0] except Exception as err: - msg = 'Unable to get value from file {}: {}'.format(ipath, err) + msg = f'Unable to get value from file {ipath}: {err}' LOGGER.error(msg) raise err @@ -456,15 +443,15 @@ def compute_uv_index(ipath, extcsv, dataset, station, try: times = extcsv.extcsv['GLOBAL']['Time'] except Exception as err: - msg = ('Unable to get GLOBAL.Time values from file {}: {}' - 'Trying DIFFUSE.Time'.format(ipath, err)) + msg = (f'Unable to get GLOBAL.Time values from file {ipath}: {err}' + 'Trying DIFFUSE.Time') LOGGER.error(msg) # try DIFFUSE if times is None: try: times = extcsv.extcsv['DIFFUSE']['Time'] except Exception as err: - msg = 'Unable to get DIFFUSE.Time {}: {}'.format(ipath, err) + msg = f'Unable to get DIFFUSE.Time {ipath}: {err}' LOGGER.error(msg) raise err @@ -474,8 +461,8 @@ def compute_uv_index(ipath, extcsv, dataset, station, try: irradiances = extcsv.extcsv['GLOBAL']['Irradiance'] except Exception as err: - msg = ('Unable to get GLOBAL.Irradiance values from file {}:' - '{}. Trying DIFFUSE.Irradiance'.format(ipath, err)) + msg = (f'Unable to get GLOBAL.Irradiance values from file {ipath}:' + f'{err}. Trying DIFFUSE.Irradiance') LOGGER.error(msg) # try DIFFUSE if irradiances is None: @@ -483,7 +470,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, irradiances = extcsv.extcsv['DIFFUSE']['Irradiance'] except Exception as err: msg = ('Unable to get DIFFUSE.Irradiance values from file' - '{}: {}'.format(ipath, err)) + f'{ipath}: {err}') LOGGER.error(msg) raise err @@ -503,7 +490,7 @@ def compute_uv_index(ipath, extcsv, dataset, station, irradiance_f = float(irradiance) except Exception: msg = ('Unable to make float for irradiance:' - ' {}. Time: {}'.format(irradiance, time)) + f' {irradiance}. Time: {time}') LOGGER.error(msg) continue diff --git a/woudc_data_registry/registry.py b/woudc_data_registry/registry.py index 61e48549..c0175522 100644 --- a/woudc_data_registry/registry.py +++ b/woudc_data_registry/registry.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -76,7 +76,7 @@ def query_index_by_category(self, domain, category): :returns: List of all objects of that class in the registry. """ - LOGGER.debug('Querying records for {} by category'.format(domain)) + LOGGER.debug(f'Querying records for {domain} by category') values = self.session.query(domain).filter_by( content_category=category) @@ -90,7 +90,7 @@ def query_full_index(self, domain): :returns: List of all objects of that class in the registry. """ - LOGGER.debug('Querying all records for {}'.format(domain)) + LOGGER.debug(f'Querying all records for {domain}') values = self.session.query(domain).all() return values @@ -103,7 +103,7 @@ def query_distinct(self, domain): :returns: list of distinct values """ - LOGGER.debug('Querying distinct values for {}'.format(domain)) + LOGGER.debug(f'Querying distinct values for {domain}') values = [v[0] for v in self.session.query(domain).distinct()] return values @@ -124,11 +124,12 @@ def query_distinct_by_fields( :returns: list of distinct values """ - LOGGER.debug('Querying distinct values for {}'.format(domain)) - conditions = [] target_fields = values.keys() + LOGGER.debug(f'Querying distinct values \ + by fields {target_fields} for {domain}') + for field in target_fields: table_field = getattr(obj, field) if case_insensitive: @@ -158,7 +159,7 @@ def query_distinct_in( """ LOGGER.debug( - 'Querying distinct values for {} from subquery'.format(domain) + f'Querying distinct values for {domain} from subquery' ) results = [v[0] for v in self.session.query(domain).filter( field.in_(subquery)).distinct()] @@ -180,11 +181,10 @@ def query_by_field(self, obj, by, value, case_insensitive=False): field = getattr(obj, by) if case_insensitive: - LOGGER.debug('Querying for LOWER({}) = LOWER({})' - .format(field, value)) + LOGGER.debug(f'Querying for LOWER({field}) = LOWER({value})') condition = func.lower(field) == value.lower() else: - LOGGER.debug('Querying for {} = {}'.format(field, value)) + LOGGER.debug(f'Querying for {field} = {value}') condition = field == value return self.session.query(obj).filter(condition).first() @@ -206,11 +206,10 @@ def query_extents( if by is not None: field = getattr(obj, by) if case_insensitive: - LOGGER.debug('Querying for LOWER({}) = LOWER({})' - .format(field, value)) + LOGGER.debug(f'Querying for LOWER({field}) = LOWER({value})') condition = func.lower(field) == value.lower() else: - LOGGER.debug('Querying for {} = {}'.format(field, value)) + LOGGER.debug(f'Querying for {field} = {value}') condition = field == value results = self.session.query( @@ -253,11 +252,10 @@ def query_by_pattern(self, obj, by, pattern, case_insensitive=False): field = getattr(obj, by) if case_insensitive: - LOGGER.debug('Querying for LOWER({}) LIKE {}' - .format(field, pattern.lower())) + LOGGER.debug(f'Querying for LOWER({field}) LIKE {pattern.lower()}') # noqa condition = func.lower(field).like(pattern.lower()) else: - LOGGER.debug('Querying for {} LIKE {}'.format(field, pattern)) + LOGGER.debug(f'Querying for {field} LIKE {pattern}') condition = field.like(pattern) return self.session.query(obj).filter(condition).first() @@ -308,11 +306,10 @@ def update_by_field( field = getattr(obj, by) if case_insensitive: - LOGGER.debug('Querying for LOWER({}) = LOWER({})' - .format(field, value)) + LOGGER.debug(f'Querying for LOWER({field}) = LOWER({value})') condition = func.lower(field) == value.lower() else: - LOGGER.debug('Querying for {} = {}'.format(field, value)) + LOGGER.debug(f'Querying for {field} = {value}') condition = field == value self.session.query(obj).filter(condition).update(new_value) @@ -322,35 +319,32 @@ def update_by_field( def save(self, obj=None): """ - helper function to save object to registry + Helper function to save object to registry. :param obj: object to save (default None) :returns: void """ + if obj is None: + LOGGER.warning('obj is none while trying to save, skipping') + return registry_config = config.EXTRAS.get('registry', {}) try: - if obj is not None: - flag_name = '_'.join([obj.__tablename__, 'enabled']) - if registry_config.get(flag_name, True): - self.session.add(obj) - # self.session.merge(obj) - else: - LOGGER.info('Registry persistence for model {} disabled,' - ' skipping'.format(obj.__tablename__)) - return - - try: - self.session.commit() - except SQLAlchemyError as err: - LOGGER.error('Failed to persist {} due to: {}' - .format(obj, err)) - self.session.rollback() - - LOGGER.debug('Saving {}'.format(obj)) - except DataError as err: - LOGGER.error('Failed to save to registry: {}'.format(err)) + flag_name = '_'.join([obj.__tablename__, 'enabled']) + if registry_config.get(flag_name, True): + # Use merge if needed: self.session.merge(obj) + self.session.add(obj) + else: + LOGGER.info(f'Registry persistence for \ + model {obj.__tablename__} disabled, skipping') + return + + LOGGER.debug(f'Committing save of {obj}') + self.session.commit() + + except (SQLAlchemyError, DataError) as err: + LOGGER.error(f'Failed to save to registry: {err}') self.session.rollback() def close_session(self): diff --git a/woudc_data_registry/report.py b/woudc_data_registry/report.py index 49be49b2..d80c6500 100644 --- a/woudc_data_registry/report.py +++ b/woudc_data_registry/report.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -443,7 +443,7 @@ def add_message(self, error_code, line=None, **kwargs): error_class, message_template = self._error_definitions[error_code] message = message_template.format(**kwargs) except KeyError: - msg = 'Unrecognized error code {}'.format(error_code) + msg = f'Unrecognized error code {error_code}' LOGGER.error(msg) raise ValueError(msg) @@ -628,7 +628,7 @@ def write(self): process_results = self._contributor_status[contributor] for status, filepath in process_results: - package += '{}: {}\n'.format(status, filepath) + package += f'{status}: {filepath}\n' blocks.append(package) @@ -685,7 +685,7 @@ def filepath(self): """ today = date.today().strftime('%Y-%m-%d') - filename = 'failed-files-{}'.format(today) + filename = f'failed-files-{today}' return os.path.join(self._output_directory, filename) @@ -698,7 +698,7 @@ def find_operator_reports(self): """ run_number = 1 - parent_dir = '{}/run{}'.format(self._working_directory, run_number) + parent_dir = f'{self._working_directory}/run{run_number}' operator_report_pattern = r'operator-report-\d{4}-\d{2}-\d{2}.csv' operator_report_paths = [] @@ -710,7 +710,7 @@ def find_operator_reports(self): operator_report_paths.append(fullpath) run_number += 1 - parent_dir = '{}/run{}'.format(self._working_directory, run_number) + parent_dir = f'{self._working_directory}/run{run_number}' return operator_report_paths @@ -886,16 +886,15 @@ def write(self, addresses): if contributor in addresses: email = addresses[contributor] - header = '{} ({})'.format(contributor, email) + header = f'{contributor} ({email})' else: header = contributor - feedback_block = '{}\n' \ - 'Total files received: {}\n' \ - 'Number of passed files: {}\n' \ - 'Number of manually repaired files: {}\n' \ - 'Number of failed files: {}\n' \ - .format(header, total_count, pass_count, fix_count, fail_count) + feedback_block = f'{header}\n' \ + f'Total files received: {total_count}\n' \ + f'Number of passed files: {pass_count}\n' \ + f'Number of manually repaired files: {fix_count}\n' \ + f'Number of failed files: {fail_count}\n' \ if fail_count > 0: fail_summary = 'Summary of Failures:\n' diff --git a/woudc_data_registry/search.py b/woudc_data_registry/search.py index ae4e1668..c5fce16d 100644 --- a/woudc_data_registry/search.py +++ b/woudc_data_registry/search.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -44,7 +44,6 @@ # ================================================================= import logging -from urllib.parse import urlparse import click from elasticsearch import Elasticsearch, helpers @@ -1066,32 +1065,8 @@ def __init__(self): self.index_basename = config.WDR_SEARCH_INDEX_BASENAME LOGGER.debug('Connecting to Elasticsearch') - url_parsed = urlparse(self.url) - url_settings = { - 'host': url_parsed.hostname - } - - if url_parsed.port is None: # proxy to default HTTP(S) port - if url_parsed.scheme == 'https': - url_settings['port'] = 443 - url_settings['scheme'] = url_parsed.scheme - else: - url_settings['port'] = 80 - else: # was set explictly - url_settings['port'] = url_parsed.port - - if url_parsed.path is not None: - url_settings['url_prefix'] = url_parsed.path - - LOGGER.debug('URL settings: {}'.format(url_settings)) - AUTH = (config.WDR_SEARCH_USERNAME, config.WDR_SEARCH_PASSWORD) - if None in AUTH: - self.connection = Elasticsearch([url_settings]) - else: - LOGGER.debug('Connecting using username {}'.format(AUTH[0])) - self.connection = Elasticsearch([url_settings], http_auth=AUTH, - verify_certs=False) + self.connection = Elasticsearch(self.url) self.headers = {'Content-Type': 'application/json'} @@ -1103,10 +1078,7 @@ def generate_index_name(self, index_name): :returns: fully qualified index name """ - if self.index_basename is not None: - return '{}.{}'.format(self.index_basename, index_name) - - return index_name + return f'{self.index_basename}.{index_name}' def create(self): """create search indexes""" @@ -1115,7 +1087,7 @@ def create(self): for key, definition in MAPPINGS.items(): # Skip indexes that have been manually disabled. - enabled_flag = '{}_enabled'.format(key) + enabled_flag = f'{key}_enabled' if not search_index_config.get(enabled_flag, True): continue @@ -1155,14 +1127,14 @@ def delete(self): for key, definition in MAPPINGS.items(): # Skip indexes that have been manually disabled. - enabled_flag = '{}_enabled'.format(key) + enabled_flag = f'{key}_enabled' if not search_index_config.get(enabled_flag, True): continue index_name = self.generate_index_name(definition['index']) try: - self.connection.indices.delete(index_name) + self.connection.indices.delete(index=index_name) except NotFoundError as err: LOGGER.error(err) raise SearchIndexError(err) @@ -1196,10 +1168,10 @@ def index(self, domain, target): """ search_index_config = config.EXTRAS.get('search_index', {}) - enabled_flag = '{}_enabled'.format(domain.__tablename__) + enabled_flag = f'{domain.__tablename__}_enabled' if not search_index_config.get(enabled_flag, True): - msg = '{} index is currently frozen'.format(domain.__tablename__) + msg = f'{domain.__tablename__} index is currently frozen' LOGGER.warning(msg) return False @@ -1213,23 +1185,29 @@ def index(self, domain, target): 'doc_as_upsert': True } - LOGGER.debug('Indexing 1 document into {}'.format(index_name)) - self.connection.update(index=index_name, id=target['id'], - body=wrapper) + LOGGER.debug(f'Indexing 1 document into {index_name}') + self.connection.update( + index=index_name, + id=target['id'], + body=wrapper + ) else: # Index/update multiple documents using bulk API. wrapper = ({ '_op_type': 'update', '_index': index_name, - '_type': '_doc', '_id': document['id'], 'doc': document, 'doc_as_upsert': True } for document in target) - LOGGER.debug('Indexing documents into {}'.format(index_name)) - helpers.bulk(self.connection, wrapper, - raise_on_error=False, raise_on_exception=False) + LOGGER.debug(f'Indexing documents into {index_name}') + helpers.bulk( + self.connection, + wrapper, + raise_on_error=False, + raise_on_exception=False + ) return True @@ -1244,10 +1222,10 @@ def unindex(self, domain, target): """ search_index_config = config.EXTRAS.get('search_index', {}) - enabled_flag = '{}_enabled'.format(domain.__tablename__) + enabled_flag = f'{domain.__tablename__}_enabled' if not search_index_config.get(enabled_flag, True): - msg = '{} index is currently frozen'.format(domain.__tablename__) + msg = f'{domain.__tablename__} index is currently frozen' LOGGER.warning(msg) return False @@ -1259,7 +1237,7 @@ def unindex(self, domain, target): result = self.connection.delete(index=index_name, id=target) if result['result'] != 'deleted': - msg = 'Data record {} does not exist'.format(target) + msg = f'Data record {target} does not exist' LOGGER.error(msg) raise SearchIndexError(msg) elif isinstance(target, dict): @@ -1267,7 +1245,7 @@ def unindex(self, domain, target): result = self.connection.delete(index=index_name, id=target['id']) if result['result'] != 'deleted': - msg = 'Data record {} does not exist'.format(target['id']) + msg = f"Data record {target['id']} does not exist" LOGGER.error(msg) raise SearchIndexError(msg) else: @@ -1275,7 +1253,6 @@ def unindex(self, domain, target): wrapper = ({ '_op_type': 'delete', '_index': index_name, - '_type': '_doc', '_id': document['id'] } for document in target) @@ -1295,10 +1272,10 @@ def unindex_except(self, domain, targets): """ search_index_config = config.EXTRAS.get('search_index', {}) - enabled_flag = '{}_enabled'.format(domain.__tablename__) + enabled_flag = f'{domain.__tablename__}_enabled' if not search_index_config.get(enabled_flag, True): - msg = '{} index is currently frozen'.format(domain.__tablename__) + msg = f'{domain.__tablename__} index is currently frozen' LOGGER.warning(msg) return False @@ -1319,7 +1296,7 @@ def unindex_except(self, domain, targets): } } - self.connection.delete_by_query(index_name, query) + self.connection.delete_by_query(index=index_name, body=query) return True diff --git a/woudc_data_registry/tests/test_data_registry.py b/woudc_data_registry/tests/test_data_registry.py index d498ba37..bad8bb73 100644 --- a/woudc_data_registry/tests/test_data_registry.py +++ b/woudc_data_registry/tests/test_data_registry.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -873,7 +873,8 @@ def test_get_validator(self): for dataset in datasets: with report.OperatorReport() as null_reporter: - validator_name = '{}Validator'.format(dataset.replace('-', '')) + dataset2 = dataset.replace('-', '') + validator_name = f'{dataset2}Validator' validator = dv.get_validator(dataset, null_reporter) if hasattr(dv, validator_name): @@ -1209,7 +1210,7 @@ def _helper_test_umkehr(self, level): # Test a file with unique, out-of-order dates contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-disordered.csv'.format(prefix))) + f'data/umkehr/{prefix}-disordered.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() @@ -1227,7 +1228,7 @@ def _helper_test_umkehr(self, level): # Test a file with non-unique (and out-of-order) dates contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-duplicated.csv'.format(prefix))) + f'data/umkehr/{prefix}-duplicated.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() @@ -1246,7 +1247,7 @@ def _helper_test_umkehr(self, level): # Test file where each TIMESTAMP.Date disagrees with the data table contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-mismatch-timestamp-date.csv'.format(prefix))) + f'data/umkehr/{prefix}-mismatch-timestamp-date.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() @@ -1265,7 +1266,7 @@ def _helper_test_umkehr(self, level): # Test file where TIMESTAMP.Times do not match between tables contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-mismatch-timestamp-time.csv'.format(prefix))) + f'data/umkehr/{prefix}-mismatch-timestamp-time.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() @@ -1280,7 +1281,7 @@ def _helper_test_umkehr(self, level): # Test that missing second TIMESTAMP table is detected/filled in contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-missing-timestamp.csv'.format(prefix))) + f'data/umkehr/{prefix}-missing-timestamp.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() @@ -1302,7 +1303,7 @@ def _helper_test_umkehr(self, level): # Test a file with no issues contents = util.read_file(resolve_test_data_path( - 'data/umkehr/{}-correct.csv'.format(prefix))) + f'data/umkehr/{prefix}-correct.csv')) ecsv = dummy_extCSV(contents) ecsv.validate_metadata_tables() diff --git a/woudc_data_registry/tests/test_report_generation.py b/woudc_data_registry/tests/test_report_generation.py index 330ae9ed..1b5759ac 100644 --- a/woudc_data_registry/tests/test_report_generation.py +++ b/woudc_data_registry/tests/test_report_generation.py @@ -1,3 +1,47 @@ +# ================================================================= +# +# Terms and Conditions of Use +# +# Unless otherwise noted, computer program source code of this +# distribution # is covered under Crown Copyright, Government of +# Canada, and is distributed under the MIT License. +# +# The Canada wordmark and related graphics associated with this +# distribution are protected under trademark law and copyright law. +# No permission is granted to use them outside the parameters of +# the Government of Canada's corporate identity program. For +# more information, see +# http://www.tbs-sct.gc.ca/fip-pcim/index-eng.asp +# +# Copyright title to all 3rd party software distributed with this +# software is held by the respective copyright holders as noted in +# those files. Users are asked to read the 3rd Party Licenses +# referenced with those assets. +# +# Copyright (c) 2024 Government of Canada +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= import csv import pathlib @@ -99,7 +143,7 @@ def test_passing_operator_report(self): """Test that a passing file is written in the operator report""" filename = '20080101.Kipp_Zonen.UV-S-E-T.000560.PMOD-WRC.csv' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) with report.OperatorReport(SANDBOX_DIR) as op_report: @@ -136,7 +180,7 @@ def test_warning_operator_report(self): """Test that file warnings are written in the operator report""" filename = 'ecsv-trailing-commas.csv' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) with report.OperatorReport(SANDBOX_DIR) as op_report: @@ -184,7 +228,7 @@ def test_failing_operator_report(self): """Test that a failing file is written in the operator report""" filename = 'ecsv-missing-instrument-name.csv' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) ecsv = None @@ -197,8 +241,7 @@ def test_failing_operator_report(self): agency = ecsv.extcsv['DATA_GENERATION']['Agency'] ecsv.validate_dataset_tables() - raise AssertionError('Parsing of {} did not fail' - .format(infile)) + raise AssertionError(f'Parsing of {infile} did not fail') except (MetadataValidationError, NonStandardDataError): output_path = os.path.join(SANDBOX_DIR, 'run1') @@ -337,7 +380,7 @@ def test_passing_run_report(self): """Test that a passing file is written to the run report""" filename = '20080101.Kipp_Zonen.UV-S-E-T.000560.PMOD-WRC.csv' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) run_report = report.RunReport(SANDBOX_DIR) @@ -360,13 +403,13 @@ def test_passing_run_report(self): self.assertEqual(len(lines), 2) self.assertEqual(lines[0], agency) - self.assertEqual(lines[1], 'Pass: {}'.format(infile)) + self.assertEqual(lines[1], f'Pass: {infile}') def test_failing_run_report(self): """Test that a failing file is written to the run report""" filename = 'ecsv-missing-instrument-name.csv' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) ecsv = None @@ -382,8 +425,7 @@ def test_failing_run_report(self): agency = ecsv.extcsv['DATA_GENERATION']['Agency'] ecsv.validate_dataset_tables() - raise AssertionError('Parsing of {} did not fail' - .format(infile)) + raise AssertionError(f'Parsing of {infile} did not fail') except (MetadataValidationError, NonStandardDataError): output_path = os.path.join(SANDBOX_DIR, 'run_report') @@ -396,13 +438,13 @@ def test_failing_run_report(self): self.assertEqual(len(lines), 2) self.assertEqual(lines[0], agency) - self.assertEqual(lines[1], 'Fail: {}'.format(infile)) + self.assertEqual(lines[1], f'Fail: {infile}') def test_non_extcsv_run_report(self): """Test that an unparseable file is written to the run report""" filename = 'not-an-ecsv.dat' - infile = resolve_test_data_path('data/general/{}'.format(filename)) + infile = resolve_test_data_path(f'data/general/{filename}') contents = util.read_file(infile) agency = 'UNKNOWN' @@ -412,8 +454,7 @@ def test_non_extcsv_run_report(self): try: _ = ExtendedCSV(contents, error_bank) - raise AssertionError('Parsing of {} did not fail' - .format(infile)) + raise AssertionError(f'Parsing of {infile} did not fail') except (MetadataValidationError, NonStandardDataError): output_path = os.path.join(SANDBOX_DIR, 'run_report') @@ -426,7 +467,7 @@ def test_non_extcsv_run_report(self): self.assertEqual(len(lines), 2) self.assertEqual(lines[0], agency) - self.assertEqual(lines[1], 'Fail: {}'.format(infile)) + self.assertEqual(lines[1], f'Fail: {infile}') def test_mixed_run_report(self): """ @@ -626,14 +667,12 @@ def test_find_operator_report_many_runs(self): email_report = report.EmailSummary(project_root) operator_reports = email_report.find_operator_reports() - expected_path_pattern = \ - 'data/reports/six_reports/run{}/operator-report-9999-12-31.csv' self.assertEqual(6, len(operator_reports)) for run_number in range(1, 6 + 1): expected_path = resolve_test_data_path( - expected_path_pattern.format(run_number)) + f'data/reports/six_reports/run{run_number}/operator-report-9999-12-31.csv') # noqa self.assertIn(expected_path, set(operator_reports)) def test_email_summary_single_pass(self): @@ -647,7 +686,7 @@ def test_email_summary_single_pass(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -672,7 +711,7 @@ def test_email_summary_single_fail(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -704,7 +743,7 @@ def test_email_summary_one_run_mixed_pass_fail(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -739,7 +778,7 @@ def test_email_summary_multiple_causes_one_group(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -779,7 +818,7 @@ def test_email_summary_multiple_agencies(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -837,7 +876,7 @@ def test_email_summary_multiple_runs(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -893,7 +932,7 @@ def test_email_summary_single_fix(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -925,7 +964,7 @@ def test_email_report_mixed_pass_fix(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -960,7 +999,7 @@ def test_email_report_mixed_fail_fix(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -1002,7 +1041,7 @@ def test_email_summary_fix_but_still_fail(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -1034,7 +1073,7 @@ def test_email_summary_mixed_pass_fix_fail(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) @@ -1085,7 +1124,7 @@ def test_email_summary_multiple_causes(self): email_report.write(emails) today = datetime.now().strftime('%Y-%m-%d') - output_filename = 'failed-files-{}'.format(today) + output_filename = f'failed-files-{today}' output_path = os.path.join(SANDBOX_DIR, output_filename) self.assertTrue(os.path.exists(output_path)) diff --git a/woudc_data_registry/util.py b/woudc_data_registry/util.py index 6061fd70..c1e6e948 100644 --- a/woudc_data_registry/util.py +++ b/woudc_data_registry/util.py @@ -18,7 +18,7 @@ # those files. Users are asked to read the 3rd Party Licenses # referenced with those assets. # -# Copyright (c) 2019 Government of Canada +# Copyright (c) 2024 Government of Canada # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -89,13 +89,13 @@ def read_file(filename, encoding='utf-8'): :returns: buffer of file contents """ - LOGGER.debug('Reading file {} (encoding {})'.format(filename, encoding)) + LOGGER.debug(f'Reading file {filename} (encoding {encoding})') try: with io.open(filename, encoding=encoding) as fh: return fh.read().strip() except UnicodeDecodeError as err: - LOGGER.warning('utf-8 decoding failed: {}'.format(err)) + LOGGER.warning(f'utf-8 decoding failed: {err}') LOGGER.info('Trying latin-1') with io.open(filename, encoding='latin-1') as fh: return fh.read().strip() @@ -178,7 +178,7 @@ def json_serial(obj): serial = obj.isoformat() return serial - msg = '{} type {} not serializable'.format(obj, type(obj)) + msg = f'{obj} type {type(obj)} not serializable' LOGGER.error(msg) raise TypeError(msg)