diff --git a/.github/workflows/cc-checker-ugrid-test.yml b/.github/workflows/cc-checker-ugrid-test.yml index 4e7c50763..fd129011c 100644 --- a/.github/workflows/cc-checker-ugrid-test.yml +++ b/.github/workflows/cc-checker-ugrid-test.yml @@ -3,7 +3,6 @@ name: UGRID Plugin Tests on: pull_request: push: - branches: [master] jobs: run: @@ -12,32 +11,24 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Setup Conda - uses: s-weigand/setup-conda@v1 + - name: Setup Micromamba + uses: mamba-org/provision-with-micromamba@v15 with: - activate-conda: false - conda-channels: conda-forge + environment-file: false - - name: Python ${{ matrix.python-version }} + - name: Setup Env shell: bash -l {0} - run: | - conda create --name TEST python=${{ matrix.python-version }} python=3 pip --file requirements.txt --file test_requirements.txt --strict-channel-priority - source activate TEST - pip install -e . --no-deps --force-reinstall - - - name: Conda Info - shell: bash -l {0} - run: | - source activate TEST - conda info --all - conda list + run: > + micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge + && micromamba activate TEST + && pip install -e . --no-deps --force-reinstall - name: cc-plugin-glider tests shell: bash -l {0} - run: | - source activate TEST - git clone https://github.com/ioos/cc-checker-ugrid.git - cd cc-checker-ugrid - pip install -e . --no-deps --force-reinstall - conda install --file requirements.txt --file requirements-dev.txt ; - pytest -s -rxs -v cc_plugin_ugrid + run: > + micromamba activate TEST + && git clone https://github.com/ioos/cc-checker-ugrid.git + && cd cc-checker-ugrid + && micromamba install --file requirements.txt --file requirements-dev.txt --channel conda-forge + && pip install -e . --no-deps --force-reinstall + && pytest -s -rxs -v cc_plugin_ugrid diff --git a/.github/workflows/cc-plugin-glider-test.yml b/.github/workflows/cc-plugin-glider-test.yml index ddee4bfa8..e1de1fa28 100644 --- a/.github/workflows/cc-plugin-glider-test.yml +++ b/.github/workflows/cc-plugin-glider-test.yml @@ -12,11 +12,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@v14 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Python + - name: Setup Env shell: bash -l {0} run: > micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/cc-plugin-sgrid-test.yml b/.github/workflows/cc-plugin-sgrid-test.yml index 160df5e59..272c92580 100644 --- a/.github/workflows/cc-plugin-sgrid-test.yml +++ b/.github/workflows/cc-plugin-sgrid-test.yml @@ -12,11 +12,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Python + - name: Setup Env shell: bash -l {0} run: > micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/cc-plugin-ugrid-test.yml b/.github/workflows/cc-plugin-ugrid-test.yml index 134523832..fd129011c 100644 --- a/.github/workflows/cc-plugin-ugrid-test.yml +++ b/.github/workflows/cc-plugin-ugrid-test.yml @@ -12,11 +12,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@v14 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Python + - name: Setup Env shell: bash -l {0} run: > micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index b125cb73e..e5a2340b7 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -12,11 +12,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@v14 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Python + - name: Setup Env shell: bash -l {0} run: > micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/default-tests.yml b/.github/workflows/default-tests.yml index 381767939..c3b7c20ee 100644 --- a/.github/workflows/default-tests.yml +++ b/.github/workflows/default-tests.yml @@ -17,11 +17,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@v14 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Python ${{ matrix.python-version }} + - name: Setup Env ${{ matrix.python-version }} shell: bash -l {0} run: > micromamba create --name TEST python=${{ matrix.python-version }} pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 000000000..2936f8db6 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,48 @@ + +name: Documentation + +on: + pull_request: + push: + release: + types: + - published + +jobs: + build-docs: + runs-on: ubuntu-latest + + steps: + - name: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup Mamba + uses: mamba-org/provision-with-micromamba@v15 + with: + environment-file: false + + - name: Build environment + shell: bash -l {0} + run: | + micromamba create --name TEST python=3 --file requirements.txt --file test_requirements.txt --channel conda-forge + micromamba activate TEST + python -m pip install -e . --no-deps --force-reinstall + + - name: Build documentation + shell: bash -l {0} + run: | + set -e + micromamba activate TEST + pushd docs + cp ../README.md source/quickintro.md + make clean html linkcheck + popd + + - name: Deploy + if: success() && github.event_name == 'release' + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/build/html diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 14ebe106a..634998781 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -3,7 +3,6 @@ name: Integration Tests on: pull_request: push: - branches: [master,develop] jobs: run: @@ -13,11 +12,11 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@v14 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - - name: Create Environment + - name: Setup Env shell: bash -l {0} run: > micromamba create --name TEST python=3 pip --file requirements.txt --file test_requirements.txt --channel conda-forge diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 70dc4221f..0a2bb6861 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -3,8 +3,6 @@ name: Publish to PyPI on: pull_request: push: - branches: - - master release: types: - published @@ -29,7 +27,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.11" - name: Install build tools run: | @@ -49,7 +47,7 @@ jobs: - name: Publish a Python distribution to PyPI if: success() && github.event_name == 'release' - uses: pypa/gh-action-pypi-publish@v1.8.5 + uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_ACCESS_TOKEN }} diff --git a/.gitignore b/.gitignore index 8302bdaf6..6809e5fe4 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,7 @@ activate # coverage output coverage/ coverage.xml + +# shpinx docs +docs/source/generated/ +docs/source/quickintro.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4532c0e6f..eea05f7e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,21 +10,13 @@ repos: exclude: compliance_checker/tests/data - id: check-docstring-first - id: check-added-large-files + - id: check-json + - id: check-merge-conflict + - id: check-yaml - id: requirements-txt-fixer - -- repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - exclude: docs/source/conf.py - args: [--max-line-length=200, "--ignore=E203,E501,W503", "--select=select=C,E,F,W,B,B950"] - -- repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - additional_dependencies: [toml] - args: ["--profile", "black", "--filter-files"] + args: + - requirements.txt + - test_requirements.txt - repo: https://github.com/psf/black rev: 23.3.0 @@ -32,20 +24,35 @@ repos: - id: black language_version: python3 +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.4.0 + hooks: + - id: add-trailing-comma + + +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.267 + hooks: + - id: ruff + +- repo: https://github.com/tox-dev/pyproject-fmt + rev: 0.11.2 + hooks: + - id: pyproject-fmt + - repo: https://github.com/codespell-project/codespell rev: v2.2.4 hooks: - id: codespell args: - - --ignore-words-list=degreee,varn,poit,uint,sur,herat,claus,dedent,dedenting,dedents,dedented + - --ignore-words-list=degreeE,degreee,varn,poit,uint,sur,herat,claus,tung,messsages exclude: > (?x)^( .*\.xml| .*\.cdl| - .*_version\.py| - .*versioneer\.py| - compliance_checker/tests/cassettes/test_netcdf_content_type\.yaml| - compliance_checker/data/seanames\.csv + .*\.yaml| + .*_version.py| + .*versioneer.py )$ ci: diff --git a/README.md b/README.md index 1f91e0557..f4c70c4e2 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ # IOOS Compliance Checker -[![Build Status](https://travis-ci.org/ioos/compliance-checker.svg)](https://travis-ci.org/ioos/compliance-checker) -[![codecov](https://codecov.io/gh/ioos/compliance-checker/branch/master/graph/badge.svg)](https://codecov.io/gh/ioos/compliance-checker) +[![Tests](https://github.com/ioos/compliance-checker/actions/workflows/default-tests.yml/badge.svg)](https://github.com/ioos/compliance-checker/actions/workflows/default-tests.yml) +[![codecov](https://codecov.io/gh/ioos/compliance-checker/branch/develop/graph/badge.svg)](https://app.codecov.io/gh/ioos/compliance-checker) The IOOS Compliance Checker is a python based tool for data providers to check for completeness and community standard compliance of local or remote [netCDF](https://en.wikipedia.org/wiki/NetCDF) files against [CF](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html) and -[ACDD](http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3) +[ACDD](https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3) file standards. The python module can be used as a command-line tool or as a library that can be integrated into other software. -A [web-based version](https://data.ioos.us/compliance/index.html) of the Compliance +A [web-based version](https://compliance.ioos.us/index.html) of the Compliance Checker was developed to enable a broader audience and improve accessibility for the checker. With the web version, providers can simply provide a link or upload their datasets and get the full suite of capabilities that Compliance Checker offers. @@ -19,17 +19,17 @@ datasets and get the full suite of capabilities that Compliance Checker offers. It currently supports the following sources and standards: -| Standard | Source | .nc/OPeNDAP/.cdl | SOS | -| ---------------------------------------------------------------------------------------------------- | ----------- | ------ | ------------------------------- | -| [ACDD (1.1, 1.3)](http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3) | Built-in | X | - | -| [CF (1.8)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html) | Built-in | X | - | -| [CF (1.7)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html) | Built-in | X | - | -| [CF (1.6)](http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html) | Built-in | X | - | -| IOOS SOS | Built-in | - | GetCapabilities, DescribeSensor | +| Standard | Source | .nc/OPeNDAP/.cdl | SOS | +| ---------------------------------------------------------------------------------------------------- | ----------- | ------ | ------------------------------- | +| [ACDD (1.1, 1.3)](https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3) | Built-in | X | - | +| [CF (1.8)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html) | Built-in | X | - | +| [CF (1.7)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html) | Built-in | X | - | +| [CF (1.6)](http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html) | Built-in | X | - | +| IOOS SOS | Built-in | - | GetCapabilities, DescribeSensor | | [IOOS (1.1)](https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-1.html#ioos-netcdf-metadata-profile-attributes) | Built-in | X | - | -| [IOOS (1.2)](https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-2.html) | Built-in | X | - | -| [Glider DAC](https://github.com/ioos/ioosngdac/wiki/NGDAC-NetCDF-File-Format-Version-2) | [ioos/cc-plugin-glider](https://github.com/ioos/cc-plugin-glider) | X | - | -| [NCEI (1.1, 2.0)](https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/) | [ioos/cc-plugin-ncei](https://github.com/ioos/cc-plugin-ncei) | X | - | +| [IOOS (1.2)](https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-2.html) | Built-in | X | - | +| [Glider DAC](https://github.com/ioos/ioosngdac/wiki/NetCDF-Specification) | [ioos/cc-plugin-glider](https://github.com/ioos/cc-plugin-glider) | X | - | +| [NCEI (1.1, 2.0)](https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html) | [ioos/cc-plugin-ncei](https://github.com/ioos/cc-plugin-ncei) | X | - | ## Advice to data providers @@ -45,11 +45,11 @@ If you feel you will need to run a batch of files through the Compliance Checker the IOOS Program Office Operations Division for assistance. -# [The Compliance Checker Web Tool](https://data.ioos.us/compliance/) +# [The Compliance Checker Web Tool](https://compliance.ioos.us/index.html) The IOOS Compliance Checker front end companion. -[https://data.ioos.us/compliance/](https://data.ioos.us/compliance/) +[https://compliance.ioos.us/index.html](https://compliance.ioos.us/index.html) Source Code is available on GitHub: @@ -74,11 +74,11 @@ Here are a couple examples: **HTML Output** -https://data.ioos.us/compliance/api/run?report_format=html&test=acdd&url=http://sos.maracoos.org/stable/dodsC/hrecos/stationHRMARPH-agg.ncml +https://compliance.ioos.us/index.htmlapi/run?report_format=html&test=acdd&url=http://sos.maracoos.org/stable/dodsC/hrecos/stationHRMARPH-agg.ncml **JSON Output** -https://data.ioos.us/compliance/api/run?report_format=json&test=acdd&url=http://sos.maracoos.org/stable/dodsC/hrecos/stationHRMARPH-agg.ncml +https://compliance.ioos.us/index.htmlapi/run?report_format=json&test=acdd&url=http://sos.maracoos.org/stable/dodsC/hrecos/stationHRMARPH-agg.ncml # The Compliance Checker Command Line Tool @@ -371,20 +371,20 @@ with open(output_filename, 'r') as fp: ## Compliance Checker Plug-Ins -Separate Plug-ins have been developed to complement the master Compliance Checker tool with +Separate Plug-ins have been developed to complement the Compliance Checker tool with specifications for preparing data to be submitted to different data assembly centers. The version numbering of these plug-ins are not necessarily link to the version of the -master Compliance Checker, but they are all designed to run with the master Compliance Checker tool. +Compliance Checker, but they are all designed to run with the Compliance Checker tool. ### Current Plug-in Releases: - [GliderDAC](https://github.com/ioos/cc-plugin-glider/releases) -This is a checker for [GliderDAC](https://github.com/ioos/ioosngdac/wiki/NGDAC-NetCDF-File-Format-Version-2) files +This is a checker for [GliderDAC](https://github.com/ioos/ioosngdac/wiki/NetCDF-Specification) files - [NCEI](https://github.com/ioos/cc-plugin-ncei/releases) - [link](https://github.com/ioos/cc-plugin-ncei) -This is a checker for NCEI netCDF Templates [v1.1](https://www.nodc.noaa.gov/data/formats/netcdf/v1.1/) and [v2.0](https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/) files. +This is a checker for NCEI netCDF Templates [v1.1](https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v1.1/index.html) and [v2.0](https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html) files. These plug-ins must be installed separately but work on top of the base compliance checker software. diff --git a/cchecker.py b/cchecker.py index 4155f057f..90329cdf4 100755 --- a/cchecker.py +++ b/cchecker.py @@ -34,7 +34,7 @@ def parse_options(opts): try: checker_type, checker_opt = opt_str.split(":", 1) except ValueError: - warnings.warn("Could not split option {}, ignoring".format(opt_str)) + warnings.warn(f"Could not split option {opt_str}, ignoring", stacklevel=2) else: options_dict[checker_type].add(checker_opt) return options_dict @@ -112,7 +112,7 @@ def main(): both high and medium priority issues, while skipping low priority issues. Cannot be used with `-i`/`--include-checks` option. - """ + """, ), action="append", ) @@ -125,7 +125,7 @@ def main(): Specifies checks to include. Can only take the form of ``. Cannot be specified along with `-s`/`skip_checks`. - """ + """, ), action="append", ) @@ -181,7 +181,7 @@ def main(): 'cf:enable_appendix_a_checks' - Allow check results against CF Appendix A for attribute location and data types. - """ + """, ), ) @@ -206,7 +206,10 @@ def main(): ) parser.add_argument( - "-l", "--list-tests", action="store_true", help="List the available tests" + "-l", + "--list-tests", + action="store_true", + help="List the available tests", ) parser.add_argument( @@ -291,7 +294,7 @@ def main(): if args.format != "json": print( "Running Compliance Checker on the datasets from: {}".format( - args.dataset_location + args.dataset_location, ), file=sys.stderr, ) @@ -313,7 +316,7 @@ def main(): if args.format != "json": print( "Running Compliance Checker on the dataset from: {}".format( - dataset + dataset, ), file=sys.stderr, ) diff --git a/compliance_checker/__init__.py b/compliance_checker/__init__.py index 1a25bdeff..783dd071c 100644 --- a/compliance_checker/__init__.py +++ b/compliance_checker/__init__.py @@ -21,7 +21,7 @@ class MemoizedDataset(Dataset): @lru_cache(128) def get_variables_by_attributes(self, **kwargs): - return super(MemoizedDataset, self).get_variables_by_attributes(**kwargs) + return super().get_variables_by_attributes(**kwargs) @contextmanager diff --git a/compliance_checker/acdd.py b/compliance_checker/acdd.py index 008e79a65..603b4156e 100644 --- a/compliance_checker/acdd.py +++ b/compliance_checker/acdd.py @@ -153,7 +153,7 @@ def check_var_long_name(self, ds): if not check: msgs.append("long_name") results.append( - Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs) + Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs), ) return results @@ -172,7 +172,7 @@ def check_var_standard_name(self, ds): if not check: msgs.append("standard_name") results.append( - Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs) + Result(BaseCheck.HIGH, check, self._var_header.format(variable), msgs), ) return results @@ -188,7 +188,7 @@ def check_var_units(self, ds): msgs = [] # Check units and dims for variable unit_check = hasattr(ds.variables[variable], "units") - no_dim_check = getattr(ds.variables[variable], "dimensions") == tuple() + no_dim_check = ds.variables[variable].dimensions == () # Check if we have no dimensions. If no dims, skip test if no_dim_check: continue @@ -197,8 +197,11 @@ def check_var_units(self, ds): msgs.append("units") results.append( Result( - BaseCheck.HIGH, unit_check, self._var_header.format(variable), msgs - ) + BaseCheck.HIGH, + unit_check, + self._var_header.format(variable), + msgs, + ), ) return results @@ -247,13 +250,13 @@ def check_lat_extents(self, ds): "geospatial_lat_extents_match", [ "Could not convert one of geospatial_lat_min ({}) or max ({}) to float see CF-1.6 spec chapter 4.1" - "".format(ds.geospatial_lat_min, ds.geospatial_lat_max) + "".format(ds.geospatial_lat_min, ds.geospatial_lat_max), ], ) # identify lat var(s) as per CF 4.1 lat_vars = {} # var -> number of criteria passed - for name, var in ds.variables.items(): + for _name, var in ds.variables.items(): # must have units if not hasattr(var, "units"): continue @@ -281,7 +284,7 @@ def check_lat_extents(self, ds): False, "geospatial_lat_extents_match", [ - "Could not find lat variable to test extent of geospatial_lat_min/max, see CF-1.6 spec chapter 4.1" + "Could not find lat variable to test extent of geospatial_lat_min/max, see CF-1.6 spec chapter 4.1", ], ) @@ -295,25 +298,26 @@ def check_lat_extents(self, ds): var._name: np.nanmax(var) for var in final_lats if not np.isnan(var).all() } - min_pass = any((np.isclose(lat_min, min_val) for min_val in obs_mins.values())) - max_pass = any((np.isclose(lat_max, max_val) for max_val in obs_maxs.values())) + min_pass = any(np.isclose(lat_min, min_val) for min_val in obs_mins.values()) + max_pass = any(np.isclose(lat_max, max_val) for max_val in obs_maxs.values()) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( - "Data for possible latitude variables (%s) did not match geospatial_lat_min value (%s)" - % (obs_mins, lat_min) + f"Data for possible latitude variables ({obs_mins}) did not match geospatial_lat_min value ({lat_min})", ) if not max_pass: msgs.append( - "Data for possible latitude variables (%s) did not match geospatial_lat_max value (%s)" - % (obs_maxs, lat_max) + f"Data for possible latitude variables ({obs_maxs}) did not match geospatial_lat_max value ({lat_max})", ) return Result( - BaseCheck.MEDIUM, (allpass, 2), "geospatial_lat_extents_match", msgs + BaseCheck.MEDIUM, + (allpass, 2), + "geospatial_lat_extents_match", + msgs, ) def check_lon_extents(self, ds): @@ -344,13 +348,13 @@ def check_lon_extents(self, ds): "geospatial_lon_extents_match", [ "Could not convert one of geospatial_lon_min ({}) or max ({}) to float see CF-1.6 spec chapter 4.1" - "".format(ds.geospatial_lon_min, ds.geospatial_lon_max) + "".format(ds.geospatial_lon_min, ds.geospatial_lon_max), ], ) # identify lon var(s) as per CF 4.2 lon_vars = {} # var -> number of criteria passed - for name, var in ds.variables.items(): + for _name, var in ds.variables.items(): # must have units if not hasattr(var, "units"): continue @@ -378,7 +382,7 @@ def check_lon_extents(self, ds): False, "geospatial_lon_extents_match", [ - "Could not find lon variable to test extent of geospatial_lon_min/max, see CF-1.6 spec chapter 4.2" + "Could not find lon variable to test extent of geospatial_lon_min/max, see CF-1.6 spec chapter 4.2", ], ) @@ -392,25 +396,26 @@ def check_lon_extents(self, ds): var._name: np.nanmax(var) for var in final_lons if not np.isnan(var).all() } - min_pass = any((np.isclose(lon_min, min_val) for min_val in obs_mins.values())) - max_pass = any((np.isclose(lon_max, max_val) for max_val in obs_maxs.values())) + min_pass = any(np.isclose(lon_min, min_val) for min_val in obs_mins.values()) + max_pass = any(np.isclose(lon_max, max_val) for max_val in obs_maxs.values()) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( - "Data for possible longitude variables (%s) did not match geospatial_lon_min value (%s)" - % (obs_mins, lon_min) + f"Data for possible longitude variables ({obs_mins}) did not match geospatial_lon_min value ({lon_min})", ) if not max_pass: msgs.append( - "Data for possible longitude variables (%s) did not match geospatial_lon_max value (%s)" - % (obs_maxs, lon_max) + f"Data for possible longitude variables ({obs_maxs}) did not match geospatial_lon_max value ({lon_max})", ) return Result( - BaseCheck.MEDIUM, (allpass, 2), "geospatial_lon_extents_match", msgs + BaseCheck.MEDIUM, + (allpass, 2), + "geospatial_lon_extents_match", + msgs, ) def verify_geospatial_bounds(self, ds): @@ -436,13 +441,13 @@ def verify_geospatial_bounds(self, ds): ( "Could not parse WKT from geospatial_bounds," ' possible bad value: "{}"'.format(ds.geospatial_bounds) - ) + ), ], variable_name="geospatial_bounds", ) # parsed OK else: - return ratable_result(True, "Global Attributes", tuple()) + return ratable_result(True, "Global Attributes", ()) def _check_total_z_extents(self, ds, z_variable): """ @@ -465,7 +470,10 @@ def _check_total_z_extents(self, ds, z_variable): msgs.append("geospatial_vertical_max cannot be cast to float") if len(msgs) > 0: return Result( - BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs + BaseCheck.MEDIUM, + (0, total), + "geospatial_vertical_extents_match", + msgs, ) zvalue = ds.variables[z_variable][:] @@ -478,23 +486,24 @@ def _check_total_z_extents(self, ds, z_variable): msgs.append( "Cannot compare geospatial vertical extents " "against min/max of data, as non-masked data " - "length is zero" + "length is zero", ) return Result( - BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs + BaseCheck.MEDIUM, + (0, total), + "geospatial_vertical_extents_match", + msgs, ) else: zmin = zvalue.min() zmax = zvalue.max() if not np.isclose(vert_min, zmin): msgs.append( - "geospatial_vertical_min != min(%s) values, %s != %s" - % (z_variable, vert_min, zmin) + f"geospatial_vertical_min != min({z_variable}) values, {vert_min} != {zmin}", ) if not np.isclose(vert_max, zmax): msgs.append( - "geospatial_vertical_max != max(%s) values, %s != %s" - % (z_variable, vert_min, zmax) + f"geospatial_vertical_max != max({z_variable}) values, {vert_min} != {zmax}", ) return Result( @@ -520,14 +529,12 @@ def _check_scalar_vertical_extents(self, ds, z_variable): zvalue = ds.variables[z_variable][:].item() if not np.isclose(vert_min, vert_max): msgs.append( - "geospatial_vertical_min != geospatial_vertical_max for scalar depth values, %s != %s" - % (vert_min, vert_max) + f"geospatial_vertical_min != geospatial_vertical_max for scalar depth values, {vert_min} != {vert_max}", ) if not np.isclose(vert_max, zvalue): msgs.append( - "geospatial_vertical_max != %s values, %s != %s" - % (z_variable, vert_max, zvalue) + f"geospatial_vertical_max != {z_variable} values, {vert_max} != {zvalue}", ) return Result( @@ -556,10 +563,10 @@ def check_vertical_extents(self, ds): False, "geospatial_vertical_extents_match", [ - "Could not find vertical variable to test extent of geospatial_vertical_min/geospatial_vertical_max, see CF-1.6 spec chapter 4.3" + "Could not find vertical variable to test extent of geospatial_vertical_min/geospatial_vertical_max, see CF-1.6 spec chapter 4.3", ], ) - if ds.variables[z_variable].dimensions == tuple(): + if ds.variables[z_variable].dimensions == (): return self._check_scalar_vertical_extents(ds, z_variable) return self._check_total_z_extents(ds, z_variable) @@ -583,7 +590,7 @@ def check_time_extents(self, ds): False, "time_coverage_extents_match", [ - "time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format." + "time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format.", ], ) @@ -595,7 +602,7 @@ def check_time_extents(self, ds): False, "time_coverage_extents_match", [ - "Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4" + "Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4", ], ) @@ -607,7 +614,8 @@ def check_time_extents(self, ds): # in the same time zone and cause erroneous results. # Pendulum uses UTC by default, but we are being explicit here time0 = pendulum.instance( - num2pydate(ds.variables[timevar][0], ds.variables[timevar].units), "UTC" + num2pydate(ds.variables[timevar][0], ds.variables[timevar].units), + "UTC", ) time1 = pendulum.instance( num2pydate(ds.variables[timevar][-1], ds.variables[timevar].units), @@ -629,15 +637,19 @@ def check_time_extents(self, ds): if start_dt > timedelta(hours=1): msgs.append( "Date time mismatch between time_coverage_start and actual " - "time values %s (time_coverage_start) != %s (time[0])" - % (t_min.isoformat(), time0.isoformat()) + "time values {} (time_coverage_start) != {} (time[0])".format( + t_min.isoformat(), + time0.isoformat(), + ), ) score -= 1 if end_dt > timedelta(hours=1): msgs.append( "Date time mismatch between time_coverage_end and actual " - "time values %s (time_coverage_end) != %s (time[N])" - % (t_max.isoformat(), time1.isoformat()) + "time values {} (time_coverage_end) != {} (time[N])".format( + t_max.isoformat(), + time1.isoformat(), + ), ) score -= 1 @@ -653,18 +665,20 @@ def verify_convention_version(self, ds): ): if convention == "ACDD-" + self._cc_spec_version: return ratable_result( - (2, 2), None, [] + (2, 2), + None, + [], ) # name=None so grouped with Globals # if no/wrong ACDD convention, return appropriate result # Result will have name "Global Attributes" to group with globals - m = ["Conventions does not contain 'ACDD-{}'".format(self._cc_spec_version)] + m = [f"Conventions does not contain 'ACDD-{self._cc_spec_version}'"] return ratable_result((1, 2), "Global Attributes", m) except AttributeError: # NetCDF attribute not found m = [ "No Conventions attribute present; must contain ACDD-{}".format( - self._cc_spec_version - ) + self._cc_spec_version, + ), ] # Result will have name "Global Attributes" to group with globals return ratable_result((0, 2), "Global Attributes", m) @@ -680,7 +694,7 @@ class ACDD1_1Check(ACDDNCCheck): register_checker = True def __init__(self): - super(ACDD1_1Check, self).__init__() + super().__init__() self.rec_atts.extend(["keywords_vocabulary"]) self.sug_atts.extend( @@ -689,7 +703,7 @@ def __init__(self): "publisher_url", # publisher "publisher_email", # publisher "geospatial_vertical_positive", - ] + ], ) @@ -699,7 +713,7 @@ class ACDD1_3Check(ACDDNCCheck): register_checker = True def __init__(self): - super(ACDD1_3Check, self).__init__() + super().__init__() self.high_rec_atts.extend([("Conventions", self.verify_convention_version)]) self.rec_atts.extend( @@ -711,7 +725,7 @@ def __init__(self): "publisher_url", # publisher "publisher_email", # publisher "source", - ] + ], ) self.sug_atts.extend( @@ -730,15 +744,17 @@ def __init__(self): "date_metadata_modified", "program", "publisher_institution", - ] + ], ) # override the ISO date checks in def _check_attr_is_iso_date(attr, ds): - result_name = "{}_is_iso".format(attr) + result_name = f"{attr}_is_iso" if not hasattr(ds, attr): return ratable_result( - (0, 2), result_name, ["Attr {} is not present".format(attr)] + (0, 2), + result_name, + [f"Attr {attr} is not present"], ) else: iso_check, msgs = datetime_is_iso(getattr(ds, attr)) @@ -750,8 +766,7 @@ def _check_attr_is_iso_date(attr, ds): self.rec_atts["date_created"] = partial(_check_attr_is_iso_date, "date_created") self.sug_atts = kvp_convert(self.sug_atts) for k in ( - "date_{}".format(suffix) - for suffix in ("issued", "modified", "metadata_modified") + f"date_{suffix}" for suffix in ("issued", "modified", "metadata_modified") ): self.sug_atts[k] = partial(_check_attr_is_iso_date, k) @@ -764,7 +779,7 @@ def check_metadata_link(self, ds): if not hasattr(ds, "metadata_link"): return msgs = [] - meta_link = getattr(ds, "metadata_link") + meta_link = ds.metadata_link if "http" not in meta_link: msgs.append("Metadata URL should include http:// or https://") valid_link = len(msgs) == 0 @@ -778,7 +793,7 @@ def check_id_has_no_blanks(self, ds): """ if not hasattr(ds, "id"): return - if " " in getattr(ds, "id"): + if " " in ds.id: return Result( BaseCheck.MEDIUM, False, @@ -803,8 +818,11 @@ def check_var_coverage_content_type(self, ds): msgs.append("coverage_content_type") results.append( Result( - BaseCheck.HIGH, check, self._var_header.format(variable), msgs - ) + BaseCheck.HIGH, + check, + self._var_header.format(variable), + msgs, + ), ) continue @@ -821,8 +839,7 @@ def check_var_coverage_content_type(self, ds): } if ctype not in valid_ctypes: msgs.append( - 'coverage_content_type "%s" not in %s' - % (variable, sorted(valid_ctypes)) + f'coverage_content_type "{variable}" not in {sorted(valid_ctypes)}', ) results.append( Result( @@ -830,7 +847,7 @@ def check_var_coverage_content_type(self, ds): check, # append to list self._var_header.format(variable), msgs, - ) + ), ) return results diff --git a/compliance_checker/base.py b/compliance_checker/base.py index cfc3ec140..34981221c 100644 --- a/compliance_checker/base.py +++ b/compliance_checker/base.py @@ -59,7 +59,7 @@ def csv_splitter(input_string): return list(itertools.chain.from_iterable(csv_contents)) -class ValidationObject(object): +class ValidationObject: validator_fail_msg = "" expected_type = None @@ -126,7 +126,7 @@ def validator_func(self, input_value): # Simple class for Generic File type (default to this if file not recognised) -class GenericFile(object): +class GenericFile: """ Simple class for any file. Has same path lookup as netCDF4.Dataset. """ @@ -138,7 +138,7 @@ def filepath(self): return self.fpath -class BaseCheck(object): +class BaseCheck: HIGH = 3 MEDIUM = 2 LOW = 1 @@ -179,7 +179,9 @@ def get_test_ctx(self, severity, name, variable=None): # per check? If so, it could be eliminated from key hierarchy if severity not in self._defined_results[name][variable]: self._defined_results[name][variable][severity] = TestCtx( - severity, name, variable=variable + severity, + name, + variable=variable, ) return self._defined_results[name][variable][severity] @@ -194,7 +196,7 @@ def __del__(self): cfutil.get_time_variables.cache_clear() -class BaseNCCheck(object): +class BaseNCCheck: """ Base Class for NetCDF Dataset supporting Check Suites. """ @@ -220,7 +222,7 @@ def std_check(cls, dataset, name): return name in dataset.ncattrs() -class BaseSOSGCCheck(object): +class BaseSOSGCCheck: """ Base class for SOS-GetCapabilities supporting Check Suites. """ @@ -228,7 +230,7 @@ class BaseSOSGCCheck(object): supported_ds = [SensorObservationService_1_0_0] -class BaseSOSDSCheck(object): +class BaseSOSDSCheck: """ Base class for SOS-DescribeSensor supporting Check Suites. """ @@ -236,7 +238,7 @@ class BaseSOSDSCheck(object): supported_ds = [SensorML] -class Result(object): +class Result: """ Holds the result of a check method. @@ -277,16 +279,16 @@ def __init__( self.variable_name = variable_name def __repr__(self): - ret = "{} (*{}): {}".format(self.name, self.weight, self.value) + ret = f"{self.name} (*{self.weight}): {self.value}" if len(self.msgs): if len(self.msgs) == 1: - ret += " ({})".format(self.msgs[0]) + ret += f" ({self.msgs[0]})" else: - ret += " ({!s} msgs)".format(len(self.msgs)) + ret += f" ({len(self.msgs)!s} msgs)" if len(self.children): - ret += " ({!s} children)".format(len(self.children)) + ret += f" ({len(self.children)!s} children)" ret += "\n" + pprint.pformat(self.children) return ret @@ -307,7 +309,7 @@ def __eq__(self, other): return self.serialize() == other.serialize() -class TestCtx(object): +class TestCtx: """ Simple struct object that holds score values and messages to compile into a result """ @@ -439,7 +441,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): msgs = [] name, other = kvp if var_name is not None: - display_name = "attribute {} in variable {}".format(name, var_name) + display_name = f"attribute {name} in variable {var_name}" base_context = ds.variables[var_name] else: display_name = name @@ -447,7 +449,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): if other is None: res = std_check(ds, name) if not res: - msgs = ["{} not present".format(display_name)] + msgs = [f"{display_name} not present"] else: try: # see if this attribute is a string, try stripping @@ -455,7 +457,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): att_strip = base_context.getncattr(name).strip() if not att_strip: res = False - msgs = ["{} is empty or completely whitespace".format(display_name)] + msgs = [f"{display_name} is empty or completely whitespace"] # if not a string/has no strip method we should be OK except AttributeError: pass @@ -468,19 +470,20 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): name=gname if gname else name, msgs=msgs, variable_name=var_name, - ) + ), ) elif hasattr(other, "__iter__"): # redundant, we could easily do this with a hasattr # check instead res = std_check_in(base_context, name, other) if res == 0: - msgs.append("{} not present".format(display_name)) + msgs.append(f"{display_name} not present") elif res == 1: msgs.append( "{} present, but not in expected value list ({})".format( - display_name, sorted(other) - ) + display_name, + sorted(other), + ), ) ret_val.append( @@ -490,7 +493,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): gname if gname else name, # groups Globals if supplied msgs, variable_name=var_name, - ) + ), ) # if we have an XPath expression, call it on the document elif type(other) is etree.XPath: @@ -498,11 +501,15 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): # no execution path for variable res = xpath_check(ds._root, other) if not res: - msgs = ["XPath for {} not found".format(display_name)] + msgs = [f"XPath for {display_name} not found"] ret_val.append( Result( - priority, res, gname if gname else name, msgs, variable_name=var_name - ) + priority, + res, + gname if gname else name, + msgs, + variable_name=var_name, + ), ) # check if this is a subclass of ValidationObject elif isinstance(other, ValidationObject): @@ -524,16 +531,16 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): check_val = attr_result[1] if not isinstance(check_val, str): res = False - msgs = ["{} must be a string".format(name)] + msgs = [f"{name} must be a string"] elif not other.search(check_val): res = False - msgs = ["{} must match regular expression {}".format(name, other)] + msgs = [f"{name} must match regular expression {other}"] else: res = True msgs = [] ret_val.append( - Result(priority, value=res, name=gname if gname else name, msgs=msgs) + Result(priority, value=res, name=gname if gname else name, msgs=msgs), ) # if the attribute is a function, call it @@ -542,7 +549,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): # starting with "check". Avoid naming check functions # starting with check if you want to pass them in with # a tuple to avoid them being checked more than once - elif hasattr(other, "__call__"): + elif callable(other): # check that the attribute is actually present. # This reduces boilerplate in functions by not needing # to check whether the attribute is present every time @@ -551,7 +558,7 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): res = other(base_context) # call the method on the dataset if not res: - msgs = ["{} not present".format(display_name)] + msgs = [f"{display_name} not present"] ret_val.append( Result( priority, @@ -559,14 +566,14 @@ def attr_check(kvp, ds, priority, ret_val, gname=None, var_name=None): gname if gname else name, msgs, variable_name=var_name, - ) + ), ) else: ret_val.append(res(priority)) # unsupported second type in second else: raise TypeError( - "Second arg in tuple has unsupported type: {}".format(type(other)) + f"Second arg in tuple has unsupported type: {type(other)}", ) return ret_val @@ -622,7 +629,10 @@ def score_group(group_name=None): Please do not using scoring groups and update your plugins if necessary """ - warnings.warn("Score_group is deprecated as of Compliance Checker v3.2.") + warnings.warn( + "Score_group is deprecated as of Compliance Checker v3.2.", + stacklevel=2, + ) def _inner(func): def _dec(s, ds): diff --git a/compliance_checker/cf/appendix_d.py b/compliance_checker/cf/appendix_d.py index c11c5d915..627e5b8eb 100644 --- a/compliance_checker/cf/appendix_d.py +++ b/compliance_checker/cf/appendix_d.py @@ -95,7 +95,7 @@ {"s", "C", "eta", "depth", "depth_c"}, ocean_computed_standard_names, ), - } + }, ) diff --git a/compliance_checker/cf/appendix_e.py b/compliance_checker/cf/appendix_e.py index 59392dad9..9921ad6d4 100644 --- a/compliance_checker/cf/appendix_e.py +++ b/compliance_checker/cf/appendix_e.py @@ -27,5 +27,5 @@ "mean_of_upper_decile", "range", "root_mean_square", - } + }, ) diff --git a/compliance_checker/cf/appendix_f.py b/compliance_checker/cf/appendix_f.py index cf4508d92..d2a8a981f 100644 --- a/compliance_checker/cf/appendix_f.py +++ b/compliance_checker/cf/appendix_f.py @@ -77,7 +77,7 @@ "projected_crs_name": {"type": "S", "extra_condition": True}, "reference_ellipsoid_name": {"type": "S", "extra_condition": False}, "towgs84": {"type": "N", "extra_condition": True}, - } + }, ) @@ -193,7 +193,7 @@ ("false_easting", "false_northing"), ("projection_x_coordinate", "projection_y_coordinate"), ], - } + }, ) # horizontal datum names from https://github.com/cf-convention/cf-conventions/wiki/csv/horiz_datum.csv diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index d4752bfde..4519f2fbb 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -26,8 +26,7 @@ class CF1_6Check(CFNCCheck): """CF-1.6-specific implementation of CFBaseCheck; supports checking netCDF datasets. These checks are translated documents: - http://cf-pcmdi.llnl.gov/documents/cf-conventions/1.6/cf-conventions.html - http://cf-pcmdi.llnl.gov/conformance/requirements-and-recommendations/1.6/""" + https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html""" register_checker = True _cc_spec = "cf" @@ -37,9 +36,18 @@ class CF1_6Check(CFNCCheck): _cc_display_headers = {3: "Errors", 2: "Warnings", 1: "Info"} appendix_a = appendix_a_base appendix_d_parametric_coords = dimless_vertical_coordinates_1_6 + _allowed_numeric_var_types = { + np.character, + np.bytes_, # "|S1" dtype, byte array used as string + np.int8, + np.int16, + np.int32, + np.float32, + np.float64, + } def __init__(self, options=None): # initialize with parent methods and data - super(CF1_6Check, self).__init__(options) + super().__init__(options) self.cell_methods = cell_methods16 self.grid_mapping_dict = grid_mapping_dict16 @@ -77,23 +85,13 @@ def check_data_types(self, ds): if ( v.dtype is not str and v.dtype.kind != "S" - and all( - v.dtype.type != t - for t in ( - np.character, - np.dtype("|S1"), - np.dtype("b"), - np.dtype("i2"), - np.dtype("i4"), - np.float32, - np.double, - ) - ) + and v.dtype.type not in self._allowed_numeric_var_types ): fails.append( "The variable {} failed because the datatype is {}".format( - k, v.datatype - ) + k, + v.datatype, + ), ) return Result( BaseCheck.HIGH, @@ -128,7 +126,7 @@ def check_child_attr_data_types(self, ds): "_FillValue", } - for var_name, var in ds.variables.items(): + for _var_name, var in ds.variables.items(): for att_name in special_attrs.intersection(var.ncattrs()): self._parent_var_attr_type_check(att_name, var, ctx) return ctx.to_result() @@ -173,10 +171,10 @@ def check_add_offset_scale_factor_type(self, ds): results = [] add_offset_vars = ds.get_variables_by_attributes( - add_offset=lambda x: x is not None + add_offset=lambda x: x is not None, ) scale_factor_vars = ds.get_variables_by_attributes( - scale_factor=lambda x: x is not None + scale_factor=lambda x: x is not None, ) both = set(add_offset_vars).intersection(scale_factor_vars) @@ -186,7 +184,7 @@ def check_add_offset_scale_factor_type(self, ds): both_msgs.append( "When both scale_factor and add_offset " f"are supplied for variable {both_var.name}, " - "they must have the same type" + "they must have the same type", ) results.append( Result( @@ -194,7 +192,7 @@ def check_add_offset_scale_factor_type(self, ds): not bool(both_msgs), self.section_titles["8.1"], both_msgs, - ) + ), ) for _att_vars_tup in ( @@ -202,14 +200,13 @@ def check_add_offset_scale_factor_type(self, ds): ("scale_factor", scale_factor_vars), ): results.extend( - list( - map( - lambda var: self._check_add_offset_scale_factor_type( - var, _att_vars_tup[0] - ), - _att_vars_tup[1], + [ + self._check_add_offset_scale_factor_type( + var, + _att_vars_tup[0], ) - ) + for var in _att_vars_tup[1] + ], ) return results @@ -341,7 +338,7 @@ def check_dimension_names(self, ds): for dimension, count in dims.items(): if count > 1: fails.append( - "%s has two or more dimensions named %s" % (k, dimension) + f"{k} has two or more dimensions named {dimension}", ) return Result( @@ -399,7 +396,9 @@ def check_dimension_order(self, ds): "unlimited).".format( name, self._get_pretty_dimension_order_with_type( - ds, name, dimension_order + ds, + name, + dimension_order, ), ), ) @@ -417,7 +416,7 @@ def check_fill_value_equal_missing_value(self, ds): fails = [] total = 0 - for name, variable in ds.variables.items(): + for _name, variable in ds.variables.items(): # If the variable have a defined _FillValue a defined missing_value check it. if hasattr(variable, "_FillValue") and hasattr(variable, "missing_value"): @@ -425,8 +424,8 @@ def check_fill_value_equal_missing_value(self, ds): if variable._FillValue != variable.missing_value: fails.append( "For the variable {} the missing_value must be equal to the _FillValue".format( - variable.name - ) + variable.name, + ), ) return Result( @@ -448,7 +447,7 @@ def check_valid_range_or_valid_min_max_present(self, ds): fails = [] total = 0 - for name, variable in ds.variables.items(): + for _name, variable in ds.variables.items(): if hasattr(variable, "valid_max") and ( hasattr(variable, "valid_min") or hasattr(variable, "valid_range") ): @@ -457,8 +456,8 @@ def check_valid_range_or_valid_min_max_present(self, ds): fails.append( "For the variable {} the valid_range attribute must not be present " "if the valid_min and/or valid_max attributes are present".format( - variable.name - ) + variable.name, + ), ) return Result( @@ -498,7 +497,8 @@ def check_fill_value_outside_valid_range(self, ds): valid_fill_range.assert_true( False, "{};\n\t{}:valid_range must be a numeric type not a string".format( - m, name + m, + name, ), ) continue @@ -509,15 +509,16 @@ def check_fill_value_outside_valid_range(self, ds): if isinstance(variable.valid_min, str): valid_fill_range.assert_true( False, - "{}:valid_min must be a numeric type not a string".format(name), + f"{name}:valid_min must be a numeric type not a string", ) if isinstance(variable.valid_max, str): valid_fill_range.assert_true( False, - "{}:valid_max must be a numeric type not a string".format(name), + f"{name}:valid_max must be a numeric type not a string", ) if isinstance(variable.valid_min, str) or isinstance( - variable.valid_max, str + variable.valid_max, + str, ): continue rmin = variable.valid_min @@ -564,6 +565,26 @@ def check_convention_globals(self, ds): ) return valid_globals.to_result() + # IMPLEMENTATION + def check_coordinate_variables_strict_monotonicity(self, ds): + """ + Checks that data in coordinate variables is either monotonically + increasing or decreasing + """ + + ret_val = [] + for coord_var_name in self._find_coord_vars(ds): + coord_var = ds.variables[coord_var_name] + arr_diff = np.diff(coord_var) + monotonicity = TestCtx(BaseCheck.HIGH, self.section_titles["5"]) + monotonicity.assert_true( + np.all(arr_diff > 0) or np.all(arr_diff < 0), + f'Coordinate variable "{coord_var_name}" must be strictly monotonic', + ) + ret_val.append(monotonicity.to_result()) + + return ret_val + def check_convention_possibly_var_attrs(self, ds): """ Check variable and global attributes are strings for recommended attributes under CF §2.6.2 @@ -649,7 +670,7 @@ def check_units(self, ds): dimless_vert = { var.name for var in ds.get_variables_by_attributes( - standard_name=lambda s: s in self.appendix_d_parametric_coords + standard_name=lambda s: s in self.appendix_d_parametric_coords, ) if not hasattr(var, "units") } @@ -661,7 +682,7 @@ def check_units(self, ds): + auxiliary_coordinates + geophysical_variables + forecast_variables - + modifier_variables + + modifier_variables, ) # standard names with modifiers require proper units, *except* for flags, where they should not be present - dimless_vert ) @@ -686,7 +707,7 @@ def check_units(self, ds): standard_name = getattr(variable, "standard_name", None) standard_name, standard_name_modifier = self._split_standard_name( - standard_name + standard_name, ) units = getattr(variable, "units", None) @@ -700,7 +721,8 @@ def check_units(self, ds): if units is not None and units_attr_is_string.assert_true( isinstance(units, str), "units ({}) attribute of '{}' must be a string compatible with UDUNITS".format( - units, variable.name + units, + variable.name, ), ): valid_udunits = self._check_valid_udunits(ds, name) @@ -733,16 +755,17 @@ def _check_valid_cf_units(self, ds, variable_name): units = getattr(variable, "units", None) standard_name_full = getattr(variable, "standard_name", None) standard_name, standard_name_modifier = self._split_standard_name( - standard_name_full + standard_name_full, ) std_name_units_dimensionless = cfutil.is_dimensionless_standard_name( - self._std_names._root, standard_name + self._std_names._root, + standard_name, ) # 3) units are not deprecated valid_units.assert_true( units not in deprecated, - 'units for {}, "{}" are deprecated by CF 1.6'.format(variable_name, units), + f'units for {variable_name}, "{units}" are deprecated by CF 1.6', ) # 4/5) Modifiers, if present, have the appropriate units, or none for # status_flag @@ -786,7 +809,7 @@ def _check_valid_cf_units(self, ds, variable_name): valid_units.assert_true( should_be_dimensionless or units is not None, "units attribute is required for {} when variable is not a dimensionless quantity".format( - variable_name + variable_name, ), ) @@ -796,14 +819,14 @@ def _check_valid_cf_units(self, ds, variable_name): # 2) units attribute must be a string valid_units.assert_true( should_be_dimensionless or isinstance(units, str), - "units attribute for {} needs to be a string".format(variable_name), + f"units attribute for {variable_name} needs to be a string", ) try: units_conv = Unit(units) except ValueError: valid_units.messages.append( - f'Unit string "{units}" is not recognized by UDUnits' + f'Unit string "{units}" is not recognized by UDUnits', ) valid_units.out_of += 1 return valid_units @@ -837,7 +860,8 @@ def _check_valid_udunits(self, ds, variable_name): standard_name = getattr(variable, "standard_name", None) standard_name, standard_name_modifier = self._split_standard_name(standard_name) std_name_units_dimensionless = cfutil.is_dimensionless_standard_name( - self._std_names._root, standard_name + self._std_names._root, + standard_name, ) # If the variable is supposed to be dimensionless, it automatically passes @@ -852,7 +876,8 @@ def _check_valid_udunits(self, ds, variable_name): valid_udunits.assert_true( should_be_dimensionless or are_udunits or units is None, 'units for {}, "{}" are not recognized by UDUNITS'.format( - variable_name, units + variable_name, + units, ), ) return valid_udunits.to_result() @@ -874,7 +899,8 @@ def _check_valid_standard_units(self, ds, variable_name): # If the variable is supposed to be dimensionless, it automatically passes std_name_units_dimensionless = cfutil.is_dimensionless_standard_name( - self._std_names._root, standard_name + self._std_names._root, + standard_name, ) if std_name_units_dimensionless: @@ -988,7 +1014,7 @@ def check_standard_name(self, ds): standard_name = getattr(ncvar, "standard_name", None) standard_name, standard_name_modifier = self._split_standard_name( - standard_name + standard_name, ) long_name = getattr(ncvar, "long_name", None) long_or_std_name = TestCtx(BaseCheck.HIGH, self.section_titles["3.3"]) @@ -996,7 +1022,7 @@ def check_standard_name(self, ds): long_name_present = True long_or_std_name.assert_true( isinstance(long_name, str), - "Attribute long_name for variable {} must be a string".format(name), + f"Attribute long_name for variable {name} must be a string", ) else: long_name_present = False @@ -1014,16 +1040,18 @@ def check_standard_name(self, ds): valid_std_name.assert_true( isinstance(standard_name, str), "Attribute standard_name for variable {} must be a string".format( - name + name, ), ) valid_std_name.out_of += 1 if standard_name not in self._std_names: err_msg = "standard_name {} is not defined in Standard Name Table v{}.".format( - standard_name or "undefined", self._std_names._version + standard_name or "undefined", + self._std_names._version, ) close_matches = difflib.get_close_matches( - standard_name, self._std_names + standard_name, + self._std_names, ) if close_matches: err_msg += f" Possible close match(es): {close_matches}" @@ -1040,7 +1068,8 @@ def check_standard_name(self, ds): standard_name_modifier in valid_modifiers, 'Standard name modifier "{}" for variable {} is not a valid modifier ' "according to CF Appendix C".format( - standard_name_modifier, name + standard_name_modifier, + name, ), ) @@ -1052,7 +1081,7 @@ def check_standard_name(self, ds): long_or_std_name.assert_true( long_name_present or standard_name_present, "Attribute long_name or/and standard_name is highly recommended for variable {}".format( - name + name, ), ) ret_val.append(long_or_std_name.to_result()) @@ -1078,7 +1107,7 @@ def check_ancillary_variables(self, ds): ret_val = [] for ncvar in ds.get_variables_by_attributes( - ancillary_variables=lambda x: x is not None + ancillary_variables=lambda x: x is not None, ): name = ncvar.name valid_ancillary = TestCtx(BaseCheck.HIGH, self.section_titles["3.4"]) @@ -1098,7 +1127,7 @@ def check_ancillary_variables(self, ds): for ancillary_variable in ancillary_variables.split(): valid_ancillary.assert_true( ancillary_variable in ds.variables, - "{} is not a variable in this dataset".format(ancillary_variable), + f"{ancillary_variable} is not a variable in this dataset", ) ret_val.append(valid_ancillary.to_result()) @@ -1149,7 +1178,7 @@ def check_flags(self, ds): # Check that the variable defines mask or values valid_flags_var.assert_true( flag_values is not None or flag_masks is not None, - "{} does not define either flag_masks or flag_values".format(name), + f"{name} does not define either flag_masks or flag_values", ) ret_val.append(valid_flags_var.to_result()) @@ -1181,8 +1210,8 @@ def check_flags(self, ds): if not allvr.value: allvr.msgs = [ "flag masks and flag values for '{}' combined don't equal flag values".format( - name - ) + name, + ), ] ret_val.append(allvr) @@ -1219,7 +1248,7 @@ def _check_flag_values(self, ds, name): flag_set = np.unique(flag_values) valid_values.assert_true( flag_set.size == np.array(flag_values).size, - "{}'s flag_values must be independent and can not be repeated".format(name), + f"{name}'s flag_values must be independent and can not be repeated", ) # IMPLEMENTATION CONFORMANCE 3.5 REQUIRED 1/8 @@ -1281,7 +1310,7 @@ def _check_flag_masks(self, ds, name): valid_masks.assert_true( type_ok, - "{}'s data type must be capable of bit-field expression".format(name), + f"{name}'s data type must be capable of bit-field expression", ) if isinstance(flag_meanings, str): @@ -1314,12 +1343,12 @@ def _check_flag_meanings(self, ds, name): valid_meanings.assert_true( flag_meanings is not None, - "{}'s flag_meanings attribute is required for flag variables".format(name), + f"{name}'s flag_meanings attribute is required for flag variables", ) valid_meanings.assert_true( isinstance(flag_meanings, str), - "{}'s flag_meanings attribute must be a string".format(name), + f"{name}'s flag_meanings attribute must be a string", ) # We can't perform any additional checks if it's not a string @@ -1327,7 +1356,8 @@ def _check_flag_meanings(self, ds, name): return valid_meanings.to_result() valid_meanings.assert_true( - len(flag_meanings) > 0, "{}'s flag_meanings can't be empty".format(name) + len(flag_meanings) > 0, + f"{name}'s flag_meanings can't be empty", ) # IMPLEMENTATION CONFORMANCE REQUIRED 3.5 3/8 @@ -1338,9 +1368,9 @@ def _check_flag_meanings(self, ds, name): valid_meanings.assert_true( False, "{}'s flag_meanings attribute defined an illegal flag meaning ".format( - name + name, ) - + "{}".format(meaning), + + f"{meaning}", ) return valid_meanings.to_result() @@ -1410,7 +1440,7 @@ def _check_axis(self, ds, name): axis_is_string = (isinstance(axis, str),) valid_axis.assert_true( axis_is_string and len(axis) > 0, - "{}'s axis attribute must be a non-empty string".format(name), + f"{name}'s axis attribute must be a non-empty string", ) # If axis isn't a string we can't continue any checks @@ -1419,8 +1449,7 @@ def _check_axis(self, ds, name): valid_axis.assert_true( axis in allowed_axis, - "{}'s axis attribute must be T, X, Y, or Z, ".format(name) - + "currently {}".format(axis), + f"{name}'s axis attribute must be T, X, Y, or Z, " + f"currently {axis}", ) return valid_axis.to_result() @@ -1480,7 +1509,7 @@ def check_latitude(self, ds): valid_latitude = TestCtx(BaseCheck.HIGH, self.section_titles["4.1"]) valid_latitude.assert_true( units is not None, - "latitude variable '{}' must define units".format(latitude), + f"latitude variable '{latitude}' must define units", ) ret_val.append(valid_latitude.to_result()) @@ -1512,7 +1541,10 @@ def check_latitude(self, ds): "".format(latitude) ) recommended_units = Result( - BaseCheck.LOW, (1, 1), self.section_titles["4.1"], [msg] + BaseCheck.LOW, + (1, 1), + self.section_titles["4.1"], + [msg], ) ret_val.append(recommended_units) @@ -1586,7 +1618,7 @@ def check_longitude(self, ds): valid_longitude = TestCtx(BaseCheck.HIGH, self.section_titles["4.2"]) valid_longitude.assert_true( units is not None, - "longitude variable '{}' must define units".format(longitude), + f"longitude variable '{longitude}' must define units", ) ret_val.append(valid_longitude.to_result()) @@ -1618,7 +1650,10 @@ def check_longitude(self, ds): "".format(longitude) ) recommended_units = Result( - BaseCheck.LOW, (1, 1), self.section_titles["4.2"], [msg] + BaseCheck.LOW, + (1, 1), + self.section_titles["4.2"], + [msg], ) ret_val.append(recommended_units) @@ -1635,7 +1670,9 @@ def check_longitude(self, ds): return ret_val def check_dimensional_vertical_coordinate( - self, ds, dimless_vertical_coordinates=dimless_vertical_coordinates_1_6 + self, + ds, + dimless_vertical_coordinates=dimless_vertical_coordinates_1_6, ): """ Check units for variables defining vertical position are valid under @@ -1699,7 +1736,12 @@ def check_dimensional_vertical_coordinate( return ret_val def _check_dimensionless_vertical_coordinate_1_6( - self, ds, vname, deprecated_units, ret_val, dim_vert_coords_dict + self, + ds, + vname, + deprecated_units, + ret_val, + dim_vert_coords_dict, ): """ Check that a dimensionless vertical coordinate variable is valid under @@ -1762,7 +1804,7 @@ def check_dimensionless_vertical_coordinates(self, ds): deprecated_units, self._check_dimensionless_vertical_coordinate_1_6, dimless_vertical_coordinates_1_6, - ) + ), ) return ret_val @@ -1821,11 +1863,44 @@ def check_time_coordinate(self, ds): reasoning = None if not correct_units: reasoning = ["%s does not have correct time units" % name] - result = Result( - BaseCheck.HIGH, correct_units, self.section_titles["4.4"], reasoning - ) - ret_val.append(result) - + result = Result( + BaseCheck.HIGH, + correct_units, + self.section_titles["4.4"], + reasoning, + ) + ret_val.append(result) + continue + # IMPLEMENTATION CONFORMANCE 4.4 RECOMMENDED 1/2 + if hasattr(variable, "climatology"): + year_match = regex.match(r"\w+ since (?P\d{1,4})", variable.units) + # year should always exist at this point if it's been parsed as + # valid date + if int(year_match.group("year")) == 0: + message = ( + f"Time coordinate variable {variable.name}'s " + "use of year 0 for climatological time is " + "deprecated" + ) + result = Result( + BaseCheck.MEDIUM, + False, + self.section_titles["4.4"], + [message], + ) + ret_val.append(result) + # IMPLEMENTATION CONFORMANCE 4.4 RECOMMENDED 2/2 + # catch non-recommended months or years time interval + unit = Unit(variable.units) + if unit.is_long_time_interval(): + message = f"Using relative time interval of months or years is not recommended for coordinate variable {variable.name}" + result = Result( + BaseCheck.MEDIUM, + False, + self.section_titles["4.4"], + [message], + ) + ret_val.append(result) return ret_val def check_calendar(self, ds): @@ -1870,7 +1945,7 @@ def check_calendar(self, ds): :rtype: list :return: List of results """ - valid_calendars = { + standard_calendars = { "gregorian", "standard", "proleptic_gregorian", @@ -1896,10 +1971,32 @@ def check_standard_calendar_no_cross(time_var): # should be made for time coordinate variables anyways, so errors # should be caught where implemented there crossover_date = cftime.DatetimeGregorian(1582, 10, 15) - times = cftime.num2date(time_var[:].compressed(), time_var.units) + # has_year_zero set to true in order to just check crossover, + # actual year less than or equal to zero check handled elsewhere + # when standard/Gregorian, or Julian calendars used. - no_cross_1582 = ~np.any(times < crossover_date) - if no_cross_1582: + # WARNING: might fail here if months_since are used and suppress + # usual warning + try: + times = cftime.num2date( + time_var[:].compressed(), + time_var.units, + has_year_zero=True, + ) + except ValueError: + return Result( + BaseCheck.LOW, + False, + self.section_titles["4.4"], + [ + "Miscellaneous failure when attempting to calculate crossover, possible malformed date", + ], + ) + + crossover_1582 = np.any(times < crossover_date) and np.any( + times >= crossover_date, + ) + if not crossover_1582: reasoning = ( f"Variable {time_var.name} has standard or Gregorian " "calendar and does not cross 1582-10-15T00:00Z" @@ -1912,15 +2009,22 @@ def check_standard_calendar_no_cross(time_var): ) return Result( - BaseCheck.LOW, no_cross_1582, self.section_titles["4.4"], [reasoning] + BaseCheck.LOW, + not crossover_1582, + self.section_titles["4.4"], + [reasoning], ) # if has a calendar, check that it is within the valid values # otherwise no calendar is valid - for time_var in ds.get_variables_by_attributes( - calendar=lambda c: c is not None - ): + # this will only fetch variables with time units defined + for time_var_name in cfutil.get_time_variables(ds): + if time_var_name not in {var.name for var in util.find_coord_vars(ds)}: + continue + time_var = ds.variables[time_var_name] + if not hasattr(time_var, "calendar"): + continue if time_var.calendar.lower() == "gregorian": reasoning = ( f"For time variable {time_var.name}, when using " @@ -1929,7 +2033,10 @@ def check_standard_calendar_no_cross(time_var): "the calendar attribute" ) result = Result( - BaseCheck.LOW, False, self.section_titles["4.4"], [reasoning] + BaseCheck.LOW, + False, + self.section_titles["4.4.1"], + [reasoning], ) ret_val.append(result) # check here and in the below case that time does not cross @@ -1939,12 +2046,12 @@ def check_standard_calendar_no_cross(time_var): ret_val.append(check_standard_calendar_no_cross(time_var)) # if a nonstandard calendar, then leap_years and leap_months must # must be present - if time_var.calendar.lower() not in valid_calendars: + if time_var.calendar.lower() not in standard_calendars: result = self._check_leap_time(time_var) # passes if the calendar is valid, otherwise notify of invalid # calendar else: - result = Result(BaseCheck.LOW, True, self.section_titles["4.4"], None) + result = Result(BaseCheck.LOW, True, self.section_titles["4.4.1"]) ret_val.append(result) return ret_val @@ -1964,7 +2071,7 @@ def _check_leap_time(self, time_variable): leap_time.messages.append( f"For nonstandard calendar on variable {time_variable.name}, " "attribute month_lengths must be supplied as a 12-element " - "integer array" + "integer array", ) return leap_time.to_result() # If leap years are included, then attributes leap_month and @@ -2033,12 +2140,12 @@ def check_aux_coordinates(self, ds): # required for contiguous count_vars = ds.get_variables_by_attributes( - sample_dimension=lambda x: x is not None + sample_dimension=lambda x: x is not None, ) # required for indexed index_vars = ds.get_variables_by_attributes( - instance_dimension=lambda x: x is not None + instance_dimension=lambda x: x is not None, ) # if these attributes exist, we don't need to test @@ -2084,7 +2191,10 @@ def check_aux_coordinates(self, ds): "dimensions for auxiliary coordinate variable {} ({}) " "are not a subset of dimensions for variable {} ({})" "".format( - aux_coord, ", ".join(aux_coord_dims), name, ", ".join(dim_set) + aux_coord, + ", ".join(aux_coord_dims), + name, + ", ".join(dim_set), ), ) ret_val.append(valid_aux_coords.to_result()) @@ -2125,7 +2235,9 @@ def check_duplicate_axis(self, ds): no_duplicates.assert_true( len(coords) <= 1, "'{}' has duplicate axis {} defined by [{}]".format( - name, axis, ", ".join(sorted(coords)) + name, + axis, + ", ".join(sorted(coords)), ), ) @@ -2300,11 +2412,11 @@ def check_reduced_horizontal_grid(self, ds): # Make sure it's associated with valid lat and valid lon valid_rgrid.assert_true( len(coord_set.intersection(lons)) > 0, - "{} must be associated with a valid longitude coordinate".format(name), + f"{name} must be associated with a valid longitude coordinate", ) valid_rgrid.assert_true( len(coord_set.intersection(lats)) > 0, - "{} must be associated with a valid latitude coordinate".format(name), + f"{name} must be associated with a valid latitude coordinate", ) valid_rgrid.assert_true( len(axis_map["C"]) == 1, @@ -2369,7 +2481,7 @@ def _check_grid_mapping_attr_condition(self, attr, attr_name): else: raise NotImplementedError( - "Evaluation for {} not yet implemented".format(attr_name) + f"Evaluation for {attr_name} not yet implemented", ) def _evaluate_latitude_of_projection_origin(self, val): @@ -2585,7 +2697,8 @@ def check_geographic_region(self, ds): valid_region.assert_true( "".join(region.astype(str)).lower() in region_list, "6.1.1 '{}' specified by '{}' is not a valid region".format( - "".join(region.astype(str)), var.name + "".join(region.astype(str)), + var.name, ), ) ret_val.append(valid_region.to_result()) @@ -2640,7 +2753,7 @@ def check_cell_boundaries(self, ds): ret_val = [] reasoning = [] for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map( - ds + ds, ).items(): variable = ds.variables[variable_name] valid = True @@ -2649,9 +2762,10 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "Boundary variable {} referenced by {} not ".format( - boundary_variable_name, variable.name + boundary_variable_name, + variable.name, ) - + "found in dataset variables" + + "found in dataset variables", ) else: boundary_variable = ds.variables[boundary_variable_name] @@ -2661,30 +2775,30 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "Boundary variable {} specified by {}".format( - boundary_variable.name, variable.name + boundary_variable.name, + variable.name, ) + " should have at least two dimensions to enclose the base " - + "case of a one dimensionsal variable" + + "case of a one dimensionsal variable", ) if boundary_variable.ndim != variable.ndim + 1: valid = False reasoning.append( - "The number of dimensions of the variable %s is %s, but the " - "number of dimensions of the boundary variable %s is %s. The boundary variable " - "should have %s dimensions" - % ( + "The number of dimensions of the variable {} is {}, but the " + "number of dimensions of the boundary variable {} is {}. The boundary variable " + "should have {} dimensions".format( variable.name, variable.ndim, boundary_variable.name, boundary_variable.ndim, variable.ndim + 1, - ) + ), ) if variable.dimensions[:] != boundary_variable.dimensions[: variable.ndim]: valid = False reasoning.append( "Boundary variable coordinates (for {}) are in improper order: {}. Bounds-specific dimensions should be last" - "".format(variable.name, boundary_variable.dimensions) + "".format(variable.name, boundary_variable.dimensions), ) # ensure p vertices form a valid simplex given previous a...n @@ -2700,14 +2814,102 @@ def check_cell_boundaries(self, ds): variable.name, len(variable.dimensions) + 1, boundary_variable.dimensions[:-1], - ) + ), ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["7.1"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["7.1"], + reasoning, ) ret_val.append(result) return ret_val + def _cell_measures_core(self, ds, var, external_set, variable_template): + # IMPLEMENTATION CONFORMANCE REQUIRED 1/2 + reasoning = [] + search_str = ( + r"^(?Parea|volume):\s+(?P\w+)$" + ) + search_res = regex.match(search_str, var.cell_measures) + if not search_res: + valid = False + reasoning.append( + "The cell_measures attribute for variable {} " + "is formatted incorrectly. It should take the " + "form of either 'area: cell_var' or " + "'volume: cell_var' where cell_var is an existing name of " + "a variable describing the cell measures.".format(var.name), + ) + else: + valid = True + cell_measure_var_name = search_res.group("cell_measure_var_name") + cell_measure_type = search_res.group("measure_type") + # TODO: cache previous results + if cell_measure_var_name not in set(ds.variables.keys()).union( + external_set, + ): + valid = False + reasoning.append( + f"Cell measure variable {cell_measure_var_name} referred to by " + f"{var.name} is not present in {variable_template}s".format( + cell_measure_var_name, + var.name, + ), + ) + # CF 1.7+ assume external variables -- further checks can't be run here + elif cell_measure_var_name in external_set: + # can't test anything on an external var + return Result( + BaseCheck.MEDIUM, + valid, + (self.section_titles["7.2"]), + reasoning, + ) + + else: + cell_measure_var = ds.variables[cell_measure_var_name] + if not hasattr(cell_measure_var, "units"): + valid = False + reasoning.append( + "Cell measure variable {} is required " + "to have units attribute defined".format(cell_measure_var_name), + ) + else: + # IMPLEMENTATION CONFORMANCE REQUIRED 2/2 + # verify this combination {area: 'm2', volume: 'm3'} + + # key is valid measure types, value is expected + # exponent + exponent_lookup = {"area": 2, "volume": 3} + exponent = exponent_lookup[search_res.group("measure_type")] + conversion_failure_msg = ( + f'Variable "{cell_measure_var.name}" must have units which are convertible ' + f'to UDUNITS "m{exponent}" when variable is referred to by a {variable_template} with ' + f'cell_methods attribute with a measure type of "{cell_measure_type}".' + ) + try: + cell_measure_units = Unit(cell_measure_var.units) + except ValueError: + valid = False + reasoning.append(conversion_failure_msg) + else: + if not cell_measure_units.is_convertible(Unit(f"m{exponent}")): + valid = False + reasoning.append(conversion_failure_msg) + if not set(cell_measure_var.dimensions).issubset(var.dimensions): + valid = False + reasoning.append( + "Cell measure variable {} must have " + "dimensions which are a subset of " + "those defined in variable {}.".format( + cell_measure_var_name, + var.name, + ), + ) + + return Result(BaseCheck.MEDIUM, valid, (self.section_titles["7.2"]), reasoning) + def check_cell_measures(self, ds): """ 7.2 To indicate extra information about the spatial properties of a @@ -2730,86 +2932,11 @@ def check_cell_measures(self, ds): :return: List of results """ ret_val = [] - reasoning = [] variables = ds.get_variables_by_attributes( - cell_measures=lambda c: c is not None + cell_measures=lambda c: c is not None, ) for var in variables: - # IMPLEMENTATION CONFORMANCE REQUIRED 1/2 - search_str = r"^(?:area|volume): (\w+)$" - search_res = regex.search(search_str, var.cell_measures) - if not search_res: - valid = False - reasoning.append( - "The cell_measures attribute for variable {} " - "is formatted incorrectly. It should take the" - " form of either 'area: cell_var' or " - "'volume: cell_var' where cell_var is the " - "variable describing the cell measures".format(var.name) - ) - else: - valid = True - cell_meas_var_name = search_res.groups()[0] - # TODO: cache previous results - if cell_meas_var_name not in ds.variables: - valid = False - reasoning.append( - "Cell measure variable {} referred to by " - "{} is not present in dataset variables".format( - cell_meas_var_name, var.name - ) - ) - else: - cell_meas_var = ds.variables[cell_meas_var_name] - if not hasattr(cell_meas_var, "units"): - valid = False - reasoning.append( - "Cell measure variable {} is required " - "to have units attribute defined.".format( - cell_meas_var_name - ) - ) - else: - # IMPLEMENTATION CONFORMANCE REQUIRED 2/2 - # verify this combination {area: 'm2', volume: 'm3'} - dic_expected = {"area": "m2", "volume": "m3"} - dic_to_be_verified = { - (var.cell_measures).split(":")[0]: cell_meas_var.units - } - - if not set(dic_to_be_verified).issubset(dic_expected): - valid = False - reasoning.append( - "Cell measure variable {} must have " - "units that are consistent with the measure type." - "i.e. {}.".format(cell_meas_var_name, dic_expected) - ) - - # verify units are recognized by UDUNITS - valid_udunits = self._check_valid_udunits( - ds, cell_meas_var_name - ) - if valid_udunits.value[0] != valid_udunits.value[1]: - valid = False - reasoning.append( - "Cell measure variable {} referred to by " - "{} has a unit {} not recognized by UDUNITS".format( - cell_meas_var_name, var.name, cell_meas_var.units - ) - ) - if not set(cell_meas_var.dimensions).issubset(var.dimensions): - valid = False - reasoning.append( - "Cell measure variable {} must have " - "dimensions which are a subset of " - "those defined in variable {}.".format( - cell_meas_var_name, var.name - ) - ) - - result = Result( - BaseCheck.MEDIUM, valid, (self.section_titles["7.2"]), reasoning - ) + result = self._cell_measures_core(ds, var, set(), "dataset variable") ret_val.append(result) return ret_val @@ -2842,7 +2969,7 @@ def check_cell_methods(self, ds): # CONFORMANCE IMPLEMENTATION 7.3 1/3 psep = regex.compile( r"(?P\w+: )+(?P\w+) ?(?Pwhere (?P\w+) " - r"?(?Pover (?P\w+))?| ?)(?:\((?P[^)]*)\))?" + r"?(?Pover (?P\w+))?| ?)(?:\((?P[^)]*)\))?", ) for var in ds.get_variables_by_attributes(cell_methods=lambda x: x is not None): @@ -2852,7 +2979,8 @@ def check_cell_methods(self, ds): method = getattr(var, "cell_methods", "") valid_attribute = TestCtx( - BaseCheck.HIGH, self.section_titles["7.3"] + BaseCheck.HIGH, + self.section_titles["7.3"], ) # changed from 7.1 to 7.3 valid_attribute.assert_true( regex.match(psep, method) is not None, @@ -2906,8 +3034,9 @@ def check_cell_methods(self, ds): # not sure what to do if a comment contains a colon! ret_val.append( self._check_cell_methods_paren_info( - match.group("paren_contents"), var - ).to_result() + match.group("paren_contents"), + var, + ).to_result(), ) return ret_val @@ -2932,20 +3061,22 @@ def _check_cell_methods_paren_info(self, paren_contents, var): # we need the count of the matches, and re.findall() only returns # groups if they are present and we wish to see if the entire match # object concatenated together is the same as the original string - pmatches = [m for m in regex.finditer(kv_pair_pat, paren_contents)] + pmatches = list(regex.finditer(kv_pair_pat, paren_contents)) for i, pmatch in enumerate(pmatches): keyword, val = pmatch.groups() if keyword == "interval:": valid_info.out_of += 2 interval_matches = regex.match( - r"^\s*(?P\S+)\s+(?P\S+)\s*$", val + r"^\s*(?P\S+)\s+(?P\S+)\s*$", + val, ) # attempt to get the number for the interval if not interval_matches: valid_info.messages.append( '§7.3.3 {}:cell_methods contains an interval specification that does not parse: "{}". Should be in format "interval: "'.format( - var.name, val - ) + var.name, + val, + ), ) else: try: @@ -2953,8 +3084,9 @@ def _check_cell_methods_paren_info(self, paren_contents, var): except ValueError: valid_info.messages.append( '§7.3.3 {}:cell_methods contains an interval value that does not parse as a numeric value: "{}".'.format( - var.name, interval_matches.group("interval_number") - ) + var.name, + interval_matches.group("interval_number"), + ), ) else: valid_info.score += 1 @@ -2965,8 +3097,9 @@ def _check_cell_methods_paren_info(self, paren_contents, var): except ValueError: valid_info.messages.append( '§7.3.3 {}:cell_methods interval units "{}" is not parsable by UDUNITS.'.format( - var.name, interval_matches.group("interval_units") - ) + var.name, + interval_matches.group("interval_units"), + ), ) else: valid_info.score += 1 @@ -2979,16 +3112,16 @@ def _check_cell_methods_paren_info(self, paren_contents, var): if len(pmatches) == 1: valid_info.messages.append( "§7.3.3 If there is no standardized information, the keyword comment: should be omitted for variable {}".format( - var.name - ) + var.name, + ), ) # otherwise check that the comment is the last # item in the parentheses elif i != len(pmatches) - 1: valid_info.messages.append( '§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable {}'.format( - var.name - ) + var.name, + ), ) # else: @@ -2997,8 +3130,9 @@ def _check_cell_methods_paren_info(self, paren_contents, var): valid_info.out_of += 1 valid_info.messages.append( '§7.3.3 Invalid cell_methods keyword "{}" for variable {}. Must be one of [interval, comment]'.format( - keyword, var.name - ) + keyword, + var.name, + ), ) # Ensure concatenated reconstructed matches are the same as the @@ -3006,7 +3140,8 @@ def _check_cell_methods_paren_info(self, paren_contents, var): valid_info.assert_true( "".join(m.group(0) for m in pmatches) == paren_contents, "§7.3.3 Parenthetical content inside {}:cell_methods is not well formed: {}".format( - var.name, paren_contents + var.name, + paren_contents, ), ) @@ -3057,7 +3192,7 @@ def check_climatological_statistics(self, ds): # find any climatology axis variables; any variables which contain climatological stats will use # these variables as coordinates clim_time_coord_vars = ds.get_variables_by_attributes( - climatology=lambda s: s is not None + climatology=lambda s: s is not None, ) # first, to determine whether or not we have a valid climatological time @@ -3071,17 +3206,20 @@ def check_climatological_statistics(self, ds): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Variable {clim_coord_var.name} is not detected as a time " - "coordinate variable, but has climatology attribute" + "coordinate variable, but has climatology attribute", ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED if hasattr(clim_coord_var, "bounds"): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Variable {clim_coord_var.name} has a climatology " - "attribute and cannot also have a bounds attribute." + "attribute and cannot also have a bounds attribute.", ) result = Result( - BaseCheck.MEDIUM, False, (self.section_titles["7.4"]), reasoning + BaseCheck.MEDIUM, + False, + (self.section_titles["7.4"]), + reasoning, ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 2/6 @@ -3090,7 +3228,7 @@ def check_climatological_statistics(self, ds): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Variable {clim_coord_var.name} must have a climatology " - "attribute which is a string" + "attribute which is a string", ) ret_val.append(climatology_ctx.to_result()) continue @@ -3098,8 +3236,8 @@ def check_climatological_statistics(self, ds): climatology_ctx.out_of += 1 climatology_ctx.messages.append( "Variable {} referenced in time's climatology attribute does not exist".format( - ds.variables["time"].climatology - ) + ds.variables["time"].climatology, + ), ) else: clim_var = ds.variables[clim_coord_var.climatology] @@ -3108,16 +3246,17 @@ def check_climatological_statistics(self, ds): if clim_var.dtype is str or not np.issubdtype(clim_var, np.number): climatology_ctx.out_of += 1 climatology_ctx.messages.append( - f"Climatology variable {clim_var.name} is not a numeric type" + f"Climatology variable {clim_var.name} is not a numeric type", ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 6/6 if hasattr(clim_var, "_FillValue") or hasattr( - clim_var, "missing_value" + clim_var, + "missing_value", ): climatology_ctx.out_of += 1 climatology_ctx.messages.append( f"Climatology variable {clim_var.name} may not contain " - "attributes _FillValue or missing_value" + "attributes _FillValue or missing_value", ) # IMPLEMENTATION CONFORMANCE 7.4 REQUIRED 5/6 @@ -3143,8 +3282,8 @@ def check_climatological_statistics(self, ds): total_climate_count += 1 reasoning.append( "Climatology variable coordinates are in improper order: {}. Bounds-specific dimensions should be last".format( - ds.variables[clim_coord_var.climatology].dimensions - ) + ds.variables[clim_coord_var.climatology].dimensions, + ), ) result = Result( BaseCheck.MEDIUM, @@ -3164,8 +3303,8 @@ def check_climatological_statistics(self, ds): ): reasoning.append( 'Climatology dimension "{}" should only contain two elements'.format( - ds.variables[clim_coord_var.climatology].name - ) + ds.variables[clim_coord_var.climatology].name, + ), ) total_climate_count += 1 result = Result( @@ -3189,7 +3328,7 @@ def check_climatological_statistics(self, ds): # "time: method1 within years time: method2 over years (sidereal years)" meth_regex = "(?:{})".format( - "|".join(methods) + "|".join(methods), ) # "or" comparison for the methods re_string = ( r"^time: {0} within (years|days)" # regex string to test @@ -3199,17 +3338,17 @@ def check_climatological_statistics(self, ds): # find any variables with a valid climatological cell_methods for cell_method_var in ds.get_variables_by_attributes( - cell_methods=lambda s: s is not None + cell_methods=lambda s: s is not None, ): if any( - [dim in all_clim_coord_var_names for dim in cell_method_var.dimensions] + dim in all_clim_coord_var_names for dim in cell_method_var.dimensions ): total_climate_count += 1 if not regex.search(re_string, cell_method_var.cell_methods): reasoning.append( 'The "time: method within years/days over years/days" format is not correct in variable {}.'.format( - cell_method_var.name - ) + cell_method_var.name, + ), ) else: valid_climate_count += 1 @@ -3272,7 +3411,7 @@ def check_packed_data(self, ds): if type(add_offset) != type(scale_factor): valid = False reasoning.append( - "Attributes add_offset and scale_factor have different data type." + "Attributes add_offset and scale_factor have different data type.", ) # IMPLEMENTATION CONFORMANCE 8.1 REQUIRED 2/3 # scale_factor and add_offset must be floating point or double @@ -3284,7 +3423,7 @@ def check_packed_data(self, ds): if not isinstance(scale_factor, (float, np.floating)): valid = False reasoning.append( - "Attributes add_offset and scale_factor are not of type float or double." + "Attributes add_offset and scale_factor are not of type float or double.", ) else: # Check variable type is byte, short or int @@ -3299,11 +3438,14 @@ def check_packed_data(self, ds): # IMPLEMENTATION CONFORMANCE REQUIRED 3/3 # IMPLEMENTATION CONFORMANCE REQUIRED 3/3 reasoning.append( - "Variable is not of type byte, short, or int as required for different type add_offset/scale_factor." + "Variable is not of type byte, short, or int as required for different type add_offset/scale_factor.", ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["8.1"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["8.1"], + reasoning, ) ret_val.append(result) reasoning = [] @@ -3314,33 +3456,32 @@ def check_packed_data(self, ds): if var._FillValue.dtype.type != var.dtype.type: valid = False reasoning.append( - "Type of %s:_FillValue attribute (%s) does not match variable type (%s)" - % (name, var._FillValue.dtype.name, var.dtype.name) + f"Type of {name}:_FillValue attribute ({var._FillValue.dtype.name}) does not match variable type ({var.dtype.name})", ) if hasattr(var, "valid_min"): if var.valid_min.dtype.type != var.dtype.type: valid = False reasoning.append( - "Type of %svalid_min attribute (%s) does not match variable type (%s)" - % (name, var.valid_min.dtype.name, var.dtype.name) + f"Type of {name}valid_min attribute ({var.valid_min.dtype.name}) does not match variable type ({var.dtype.name})", ) if hasattr(var, "valid_max"): if var.valid_max.dtype.type != var.dtype.type: valid = False reasoning.append( - "Type of %s:valid_max attribute (%s) does not match variable type (%s)" - % (name, var.valid_max.dtype.name, var.dtype.name) + f"Type of {name}:valid_max attribute ({var.valid_max.dtype.name}) does not match variable type ({var.dtype.name})", ) if hasattr(var, "valid_range"): if var.valid_range.dtype.type != var.dtype.type: valid = False reasoning.append( - "Type of %s:valid_range attribute (%s) does not match variable type (%s)" - % (name, var.valid_range.dtype.name, var.dtype.name) + f"Type of {name}:valid_range attribute ({var.valid_range.dtype.name}) does not match variable type ({var.dtype.name})", ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["8.1"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["8.1"], + reasoning, ) ret_val.append(result) @@ -3386,7 +3527,7 @@ def check_compression_gathering(self, ds): """ ret_val = [] for compress_var in ds.get_variables_by_attributes( - compress=lambda s: s is not None + compress=lambda s: s is not None, ): valid = True reasoning = [] @@ -3396,8 +3537,8 @@ def check_compression_gathering(self, ds): valid = False reasoning.append( "Compression variable {} may only have one dimension".format( - compress_var.name - ) + compress_var.name, + ), ) # IMPLEMENTATION CONFORMANCE 8.2 REQUIRED 1/3 # ensure compression variable is a proper index, and thus is an @@ -3408,8 +3549,8 @@ def check_compression_gathering(self, ds): valid = False reasoning.append( "Compression variable {} must be an integer type to form a proper array index".format( - compress_var.name - ) + compress_var.name, + ), ) # IMPLEMENTATION CONFORMANCE 8.2 REQUIRED 2/3 # make sure all the variables referred to are contained by the @@ -3419,8 +3560,9 @@ def check_compression_gathering(self, ds): valid = False reasoning.append( "The following dimensions referenced by the compress attribute of variable {} do not exist: {}".format( - compress_var.name, not_in_dims - ) + compress_var.name, + not_in_dims, + ), ) # IMPLEMENTATION CONFORMANCE 8.2 REQUIRED 3/3 # The values of the associated coordinate variable must be in the range @@ -3442,11 +3584,14 @@ def check_compression_gathering(self, ds): reasoning.append( "The dimenssion size {} referenced by the compress attribute is not " "in the range (0, The product of the compressed dimension sizes minus 1)".format( - coord_size - ) + coord_size, + ), ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["8.2"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["8.2"], + reasoning, ) ret_val.append(result) @@ -3481,7 +3626,8 @@ def check_feature_type(self, ds): feature_type = getattr(ds, "featureType", None) valid_feature_type = TestCtx( - BaseCheck.HIGH, "§9.1 Dataset contains a valid featureType" + BaseCheck.HIGH, + "§9.1 Dataset contains a valid featureType", ) valid_feature_type.assert_true( feature_type is None or feature_type.lower() in feature_list, @@ -3566,7 +3712,7 @@ def check_variable_features(self, ds): [ "{} ({})".format(ftr, ", ".join(vrs)) for ftr, vrs in feature_types_found.items() - ] + ], ) all_same_features = TestCtx(BaseCheck.HIGH, self.section_titles["9.1"]) all_same_features.assert_true( diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index a72b1038a..fbfac7401 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -5,7 +5,6 @@ import numpy as np import pyproj -import regex from compliance_checker import cfutil from compliance_checker.base import BaseCheck, Result, TestCtx @@ -51,11 +50,11 @@ class CF1_7Check(CF1_6Check): "cf_section": "2.6.3", }, "scale_factor": {"Type": "N", "attr_loc": {"D", "C"}, "cf_section": "8.1"}, - } + }, ) def __init__(self, options=None): - super(CF1_7Check, self).__init__(options) + super().__init__(options) self.cell_methods = cell_methods17 self.grid_mapping_dict = grid_mapping_dict17 @@ -130,7 +129,7 @@ def check_actual_range(self, ds): len(variable.actual_range) != 2 ): # TODO is the attr also a numpy array? if so, .size msgs.append( - "actual_range of '{}' must be 2 elements".format(name) + f"actual_range of '{name}' must be 2 elements", ) ret_val.append( Result( # putting result into list @@ -138,20 +137,20 @@ def check_actual_range(self, ds): (score, out_of), self.section_titles["2.5"], msgs, - ) + ), ) continue # no need to keep checking if already completely wrong else: score += 1 except TypeError: # in case it's just a single number - msgs.append("actual_range of '{}' must be 2 elements".format(name)) + msgs.append(f"actual_range of '{name}' must be 2 elements") ret_val.append( Result( # putting result into list BaseCheck.HIGH, (score, out_of), self.section_titles["2.5"], msgs, - ) + ), ) continue @@ -164,12 +163,13 @@ def check_actual_range(self, ds): # fail. out_of += 1 if not np.isclose( - variable.actual_range[0], variable[:].min() + variable.actual_range[0], + variable[:].min(), ) or not np.isclose(variable.actual_range[1], variable[:].max()): msgs.append( "actual_range elements of '{}' inconsistent with its min/max values".format( - name - ) + name, + ), ) else: score += 1 @@ -182,8 +182,8 @@ def check_actual_range(self, ds): ): msgs.append( '"{}"\'s actual_range must be within valid_range'.format( - name - ) + name, + ), ) else: score += 1 @@ -195,8 +195,9 @@ def check_actual_range(self, ds): if variable.actual_range[0] < variable.valid_min: msgs.append( '"{}"\'s actual_range first element must be >= valid_min ({})'.format( - name, variable.valid_min - ) + name, + variable.valid_min, + ), ) else: score += 1 @@ -205,16 +206,20 @@ def check_actual_range(self, ds): if variable.actual_range[1] > variable.valid_max: msgs.append( '"{}"\'s actual_range second element must be <= valid_max ({})'.format( - name, variable.valid_max - ) + name, + variable.valid_max, + ), ) else: score += 1 ret_val.append( Result( # putting result into list - BaseCheck.HIGH, (score, out_of), self.section_titles["2.5"], msgs - ) + BaseCheck.HIGH, + (score, out_of), + self.section_titles["2.5"], + msgs, + ), ) return ret_val @@ -237,7 +242,7 @@ def check_cell_boundaries(self, ds): ret_val = [] reasoning = [] for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map( - ds + ds, ).items(): variable = ds.variables[variable_name] valid = True @@ -250,9 +255,10 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "Boundary variable {} referenced by {} not ".format( - boundary_variable_name, variable.name + boundary_variable_name, + variable.name, ) - + "found in dataset variables" + + "found in dataset variables", ) else: boundary_variable = ds.variables[boundary_variable_name] @@ -264,30 +270,30 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "Boundary variable {} specified by {}".format( - boundary_variable.name, variable.name + boundary_variable.name, + variable.name, ) + " should have at least two dimensions to enclose the base " - + "case of a one dimensionsal variable" + + "case of a one dimensionsal variable", ) if boundary_variable.ndim != variable.ndim + 1: valid = False reasoning.append( - "The number of dimensions of the variable %s is %s, but the " - "number of dimensions of the boundary variable %s is %s. The boundary variable " - "should have %s dimensions" - % ( + "The number of dimensions of the variable {} is {}, but the " + "number of dimensions of the boundary variable {} is {}. The boundary variable " + "should have {} dimensions".format( variable.name, variable.ndim, boundary_variable.name, boundary_variable.ndim, variable.ndim + 1, - ) + ), ) if variable.dimensions[:] != boundary_variable.dimensions[: variable.ndim]: valid = False reasoning.append( "Boundary variable coordinates (for {}) are in improper order: {}. Bounds-specific dimensions should be last" - "".format(variable.name, boundary_variable.dimensions) + "".format(variable.name, boundary_variable.dimensions), ) # 7.1 Required 2/5: continue @@ -304,7 +310,7 @@ def check_cell_boundaries(self, ds): variable.name, len(variable.dimensions) + 1, boundary_variable.dimensions[:-1], - ) + ), ) # 7.1 Required 3/5: @@ -313,9 +319,10 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "Boundary variable {} specified by {}".format( - boundary_variable.name, variable.name + boundary_variable.name, + variable.name, ) - + "must be a numeric data type " + + "must be a numeric data type ", ) # 7.1 Required 4/5: @@ -335,7 +342,7 @@ def check_cell_boundaries(self, ds): getattr(boundary_variable, item), variable.name, getattr(variable, item), - ) + ), ) # 7.1 Required 5/5: @@ -346,8 +353,9 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "'{}' has 'formula_terms' attr, bounds variable '{}' must also have 'formula_terms'".format( - variable_name, boundary_variable_name - ) + variable_name, + boundary_variable_name, + ), ) # 7.1 Recommendations 2/2 @@ -373,12 +381,16 @@ def check_cell_boundaries(self, ds): valid = False reasoning.append( "The Boundary variables '{}' should not have the attributes: '{}'".format( - boundary_variable_name, unwanted_attributes - ) + boundary_variable_name, + unwanted_attributes, + ), ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["7.1"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["7.1"], + reasoning, ) ret_val.append(result) return ret_val @@ -394,7 +406,7 @@ def check_cell_boundaries_interval(self, ds): ret_val = [] reasoning = [] for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map( - ds + ds, ).items(): valid = True @@ -416,11 +428,14 @@ def check_cell_boundaries_interval(self, ds): variable[ii], boundary_variable_name, boundary_variable[ii], - ) + ), ) result = Result( - BaseCheck.MEDIUM, valid, self.section_titles["7.1"], reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["7.1"], + reasoning, ) ret_val.append(result) print(ret_val) @@ -453,74 +468,26 @@ def check_cell_measures(self, ds): :return: List of results """ ret_val = [] - reasoning = [] variables = ds.get_variables_by_attributes( - cell_measures=lambda c: c is not None + cell_measures=lambda c: c is not None, ) - for var in variables: - search_str = r"^(?:area|volume): (\w+)$" - search_res = regex.search(search_str, var.cell_measures) - if not search_res: - valid = False - reasoning.append( - "The cell_measures attribute for variable {} " - "is formatted incorrectly. It should take the" - " form of either 'area: cell_var' or " - "'volume: cell_var' where cell_var is the " - "variable describing the cell measures".format(var.name) - ) + try: + external_variables_str = ds.getncattr("external_variables") + if external_variables_str is not None: + external_variables_names = set(external_variables_str.split(" ")) else: - valid = True - cell_meas_var_name = search_res.groups()[0] - # TODO: cache previous results - - # if the dataset has external_variables, get it - try: - external_variables = ds.getncattr("external_variables") - except AttributeError: - external_variables = [] - if cell_meas_var_name not in ds.variables: - if cell_meas_var_name not in external_variables: - valid = False - reasoning.append( - "Cell measure variable {} referred to by {} is not present in dataset variables".format( - cell_meas_var_name, var.name - ) - ) - else: - valid = True - - # make Result - result = Result( - BaseCheck.MEDIUM, valid, (self.section_titles["7.2"]), reasoning - ) - ret_val.append(result) - continue # can't test anything on an external var - - else: - cell_meas_var = ds.variables[cell_meas_var_name] - if not hasattr(cell_meas_var, "units"): - valid = False - reasoning.append( - "Cell measure variable {} is required " - "to have units attribute defined.".format( - cell_meas_var_name - ) - ) - if not set(cell_meas_var.dimensions).issubset(var.dimensions): - valid = False - reasoning.append( - "Cell measure variable {} must have " - "dimensions which are a subset of " - "those defined in variable {}.".format( - cell_meas_var_name, var.name - ) - ) - - result = Result( - BaseCheck.MEDIUM, valid, (self.section_titles["7.2"]), reasoning + external_variables_names = set() + except (ValueError, AttributeError): + external_variables_names = set() + for var in variables: + ret_val.append( + self._cell_measures_core( + ds, + var, + external_variables_names, + "dataset or external variable", + ), ) - ret_val.append(result) return ret_val @@ -560,8 +527,9 @@ def _check_grid_mapping_attr_condition(self, attr, attr_name): return self._evaluate_towgs84(attr) else: # invoke method from 1.6, as these names are all still valid - return super(CF1_7Check, self)._check_grid_mapping_attr_condition( - attr, attr_name + return super()._check_grid_mapping_attr_condition( + attr, + attr_name, ) def _check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self, var): @@ -603,22 +571,18 @@ def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var): _ncattrs = set(var.ncattrs()) if any( - [ - x in _ncattrs - for x in [ - "reference_ellipsoid_name", - "prime_meridian_name", - "horizontal_datum_name", - ] + x in _ncattrs + for x in [ + "reference_ellipsoid_name", + "prime_meridian_name", + "horizontal_datum_name", ] ) and ( - not set( - [ - "reference_ellipsoid_name", - "prime_meridian_name", - "horizontal_datum_name", - ] - ).issubset(_ncattrs) + not { + "reference_ellipsoid_name", + "prime_meridian_name", + "horizontal_datum_name", + }.issubset(_ncattrs) ): return (False, msg) @@ -823,20 +787,23 @@ def _evaluate_towgs84(self, val): elif not val.shape: # single value return (False, msg) - elif not (val.size in (3, 6, 7)): + elif val.size not in (3, 6, 7): return (False, msg) else: return (True, msg) def check_grid_mapping(self, ds): - super(CF1_7Check, self).check_grid_mapping.__doc__ - prev_return = super(CF1_7Check, self).check_grid_mapping(ds) + # FIXME: Looks like this is not needed. + # super().check_grid_mapping.__doc__ + prev_return = super().check_grid_mapping(ds) grid_mapping_variables = cfutil.get_grid_mapping_variables(ds) for var_name in sorted(grid_mapping_variables): var = ds.variables[var_name] test_ctx = self.get_test_ctx( - BaseCheck.HIGH, self.section_titles["5.6"], var.name + BaseCheck.HIGH, + self.section_titles["5.6"], + var.name, ) # TODO: check cases where crs_wkt provides part of a necessary @@ -854,8 +821,8 @@ def check_grid_mapping(self, ds): except pyproj.exceptions.CRSError as crs_error: test_ctx.messages.append( "Cannot parse crs_wkt attribute to CRS using Proj4. Proj4 error: {}".format( - str(crs_error) - ) + str(crs_error), + ), ) else: test_ctx.score += 1 @@ -881,7 +848,7 @@ def check_grid_mapping(self, ds): test_ctx.messages.append( "Cannot have both 'geoid_name' and " "'geopotential_datum_name' attributes in " - "grid mapping variable '{}'".format(var.name) + "grid mapping variable '{}'".format(var.name), ) elif len_vdatum_name_attrs == 1: # should be one or zero attrs @@ -891,14 +858,17 @@ def check_grid_mapping(self, ds): v_datum_attr = next(iter(vert_datum_attrs)) v_datum_value = getattr(var, v_datum_attr) v_datum_str_valid = self._process_v_datum_str( - v_datum_value, conn + v_datum_value, + conn, ) invalid_msg = ( "Vertical datum value '{}' for " "attribute '{}' in grid mapping " "variable '{}' is not valid".format( - v_datum_value, v_datum_attr, var.name + v_datum_value, + v_datum_attr, + var.name, ) ) test_ctx.assert_true(v_datum_str_valid, invalid_msg) @@ -906,7 +876,8 @@ def check_grid_mapping(self, ds): # if we hit an error, skip the check warn( "Error occurred while trying to query " - "Proj4 SQLite database at {}: {}".format(proj_db_path, str(e)) + "Proj4 SQLite database at {}: {}".format(proj_db_path, str(e)), + stacklevel=2, ) prev_return[var.name] = test_ctx.to_result() @@ -921,7 +892,8 @@ def check_standard_name_deprecated_modifiers(self, ds): deprecated_var_names = cfutil._find_standard_name_modifier_variables(ds, True) if deprecated_var_names: warn( - f"Deprecated standard_name modifiers found on variables {deprecated_var_names}" + f"Deprecated standard_name modifiers found on variables {deprecated_var_names}", + stacklevel=2, ) def _process_v_datum_str(self, v_datum_str, conn): @@ -936,7 +908,12 @@ def _process_v_datum_str(self, v_datum_str, conn): return len(res_set.fetchall()) > 0 def _check_dimensionless_vertical_coordinate_1_7( - self, ds, vname, deprecated_units, ret_val, dim_vert_coords_dict + self, + ds, + vname, + deprecated_units, + ret_val, + dim_vert_coords_dict, ): """ Check that a dimensionless vertical coordinate variable is valid under @@ -951,18 +928,26 @@ def _check_dimensionless_vertical_coordinate_1_7( standard_name = getattr(variable, "standard_name", None) formula_terms = getattr(variable, "formula_terms", None) # Skip the variable if it's dimensional + correct_computed_std_name_ctx = TestCtx( + BaseCheck.MEDIUM, + self.section_titles["4.3"], + ) + # IMPLEMENTATION CONFORMANCE 4.3.3 REQUIRED + correct_computed_std_name_ctx.assert_true( + not (formula_terms is None and hasattr(variable, "computed_standard_name")), + f"Variable {vname} should have formula_terms attribute when " + "computed_standard_name attribute is defined", + ) if formula_terms is None and standard_name not in dim_vert_coords_dict: return # assert that the computed_standard_name is maps to the standard_name correctly - correct_computed_std_name_ctx = TestCtx( - BaseCheck.MEDIUM, self.section_titles["4.3"] - ) _comp_std_name = dim_vert_coords_dict[standard_name][1] correct_computed_std_name_ctx.assert_true( getattr(variable, "computed_standard_name", None) in _comp_std_name, "§4.3.3 The standard_name of `{}` must map to the correct computed_standard_name, `{}`".format( - vname, sorted(_comp_std_name) + vname, + sorted(_comp_std_name), ), ) ret_val.append(correct_computed_std_name_ctx.to_result()) @@ -1001,7 +986,7 @@ def check_dimensionless_vertical_coordinates(self, ds): deprecated_units, self._check_dimensionless_vertical_coordinate_1_6, dimless_vertical_coordinates_1_7, - ) + ), ) ret_val.extend( @@ -1010,7 +995,7 @@ def check_dimensionless_vertical_coordinates(self, ds): deprecated_units, self._check_dimensionless_vertical_coordinate_1_7, dimless_vertical_coordinates_1_7, - ) + ), ) return ret_val diff --git a/compliance_checker/cf/cf_1_8.py b/compliance_checker/cf/cf_1_8.py index 6fcc99bb2..3c1f5d793 100644 --- a/compliance_checker/cf/cf_1_8.py +++ b/compliance_checker/cf/cf_1_8.py @@ -37,13 +37,13 @@ class CF1_8Check(CF1_7Check): NON_ROOT_GROUP_OPT = ["title", "history"] def __init__(self, options=None): - super(CF1_8Check, self).__init__(options) + super().__init__(options) self.section_titles.update( { "2.7": "§2.7 Groups", "6.1.2": "§6.1.2 Taxon Names and Identifiers", "7.5": "§7.5 Geometries", - } + }, ) def check_groups(self, ds: MemoizedDataset): @@ -85,18 +85,17 @@ def check_groups(self, ds: MemoizedDataset): ctx_hi = TestCtx(BaseCheck.HIGH, self.section_titles["2.7"]) ctx_lo = TestCtx(BaseCheck.LOW, self.section_titles["2.7"]) + # IMPLEMENTATION CONFORMANCE 2.7 REQUIRED 1/4 # Make sure `Conventions` & `external_variables` attributes are only present in the # root group. for gname in ds.groups: - ginstance = ds.createGroup( - gname - ) # returns existing Group; doesn't create a new one + ginstance = ds.groups[gname] for attr in ginstance.ncattrs(): if attr in CF1_8Check.ROOT_GROUP_ONLY_ATTRS: ctx_hi.messages.append( f'§2.7.2 Attribute "{ attr }" MAY ONLY be used in the root group ' - "and SHALL NOT be duplicated or overridden in child groups." + "and SHALL NOT be duplicated or overridden in child groups.", ) results.append(ctx_hi.to_result()) @@ -106,7 +105,7 @@ def check_groups(self, ds: MemoizedDataset): f"§2.7.2 Note: attribute '{ attr }' found on non-root group '{ gname }'. " "This is optional for non-root groups. It is allowed in order to provide additional " "provenance and description of the subsidiary data. It does not override " - "attributes from parent groups." + "attributes from parent groups.", ) results.append(ctx_lo.to_result()) @@ -118,7 +117,7 @@ def check_geometry(self, ds: Dataset): :returns list: List of error messages """ vars_with_geometry = ds.get_variables_by_attributes( - geometry=lambda g: g is not None + geometry=lambda g: g is not None, ) results = [] unique_geometry_var_names = {var.geometry for var in vars_with_geometry} @@ -128,28 +127,28 @@ def check_geometry(self, ds: Dataset): for geometry_var_name in unique_geometry_var_names: if geometry_var_name not in ds.variables: geom_valid.messages.append( - "Cannot find geometry variable " f"named {geometry_var_name}" + "Cannot find geometry variable " f"named {geometry_var_name}", ) results.append(geom_valid.to_result()) continue else: geometry_var = ds.variables[geometry_var_name] - geometry_type = getattr(geometry_var, "geometry_type") + geometry_type = geometry_var.geometry_type try: node_coord_var_names = geometry_var.node_coordinates except AttributeError: geom_valid.messages.append( "Could not find required attribute " '"node_coordinates" in geometry ' - f'variable "{geometry_var_name}"' + f'variable "{geometry_var_name}"', ) results.append(geom_valid.to_result()) if not isinstance(node_coord_var_names, str): geom_valid.messages.append( 'Attribute "node_coordinates" in geometry ' f'variable "{geometry_var_name}" must be ' - "a string" + "a string", ) results.append(geom_valid.to_result()) continue @@ -166,21 +165,24 @@ def check_geometry(self, ds: Dataset): "The following referenced node coordinate" "variables for geometry variable" f'"{geometry_var_name}" were not found: ' - f"{not_found_node_vars}" + f"{not_found_node_vars}", ) results.append(geom_valid.to_result()) continue node_count = reference_attr_variables( - ds, getattr(geometry_var, "node_count", None) + ds, + getattr(geometry_var, "node_count", None), ) # multipart lines and polygons only part_node_count = reference_attr_variables( - ds, getattr(geometry_var, "part_node_count", None) + ds, + getattr(geometry_var, "part_node_count", None), ) # polygons with interior geometry only interior_ring = reference_attr_variables( - ds, getattr(geometry_var, "interior_ring", None) + ds, + getattr(geometry_var, "interior_ring", None), ) if geometry_type == "point": @@ -189,14 +191,17 @@ def check_geometry(self, ds: Dataset): geometry = LineGeometry(node_coord_vars, node_count, part_node_count) elif geometry_type == "polygon": geometry = PolygonGeometry( - node_coord_vars, node_count, part_node_count, interior_ring + node_coord_vars, + node_count, + part_node_count, + interior_ring, ) else: geom_valid.messages.append( f'For geometry variable "{geometry_var_name}' 'the attribute "geometry_type" must exist' "and have one of the following values:" - '"point", "line", "polygon"' + '"point", "line", "polygon"', ) results.append(geom_valid.to_result()) continue @@ -255,7 +260,7 @@ def match_taxa_standard_names(standard_name_string): ) taxa_quantifier_variables = ds.get_variables_by_attributes( - standard_name=match_taxa_standard_names + standard_name=match_taxa_standard_names, ) # If there are no matches, there either are no taxa variables # or the standard names are not appropriate, which will be picked up @@ -266,10 +271,11 @@ def match_taxa_standard_names(standard_name_string): for taxon_quantifier_variable in taxa_quantifier_variables: valid_taxa = TestCtx(BaseCheck.HIGH, self.section_titles["6.1.2"]) if not isinstance( - getattr(taxon_quantifier_variable, "coordinates", None), str + getattr(taxon_quantifier_variable, "coordinates", None), + str, ): valid_taxa.add_failure( - f'{taxon_quantifier_variable.name} must have a string valued "coordinates" attribute' + f'{taxon_quantifier_variable.name} must have a string valued "coordinates" attribute', ) continue @@ -278,12 +284,12 @@ def match_taxa_standard_names(standard_name_string): if invalid_coord_vars: valid_taxa.add_failure( 'The following values for "coordinates" attributes were not found in the dataset\'s variables ' - f"{invalid_coord_vars}" + f"{invalid_coord_vars}", ) if len(coordinate_var_names) > 2: valid_taxa.add_failure( - "coordinates attribute for taxon data must either reference one or two variable names" + "coordinates attribute for taxon data must either reference one or two variable names", ) continue @@ -316,7 +322,7 @@ def match_taxa_standard_names(standard_name_string): else: valid_taxa.add_failure( f"coordinates attribute for variable {taxon_quantifier_variable} must consist of " - 'variables containing standard names of either just "biological_taxon_name", or "biological_taxon_name" and "biological_taxon_identifier"' + 'variables containing standard names of either just "biological_taxon_name", or "biological_taxon_name" and "biological_taxon_identifier"', ) ret_val.append(valid_taxa.to_result()) @@ -336,7 +342,8 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): r"(?P\w+)(?::(?P\w+))?" ) for taxon_lsid, taxon_name in zip( - taxon_lsid_variable[:], taxon_name_variable[:] + taxon_lsid_variable[:], + taxon_name_variable[:], ): # TODO: handle case where LSID is not present. This can happen # if the species is not present in the database desired. @@ -358,7 +365,7 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): "- http://lsid.info/urn:lsid.info::/\n" "- http://lsid.info/urn:lsid.info::/:\n" "- http://www.lsid.info/urn:lsid.info::/\n" - "- http://www.lsid.info/urn:lsid.info::/:" + "- http://www.lsid.info/urn:lsid.info::/:", ) continue if lsid_str.startswith("urn"): @@ -378,12 +385,12 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): messages.append( "http://lsid.info returned an error message " f"for submitted LSID string '{lsid_str}': " - f"{problem_text}" + f"{problem_text}", ) else: messages.append( "Error occurred attempting to check LSID " - f"'{lsid_str}': {str(e)}" + f"'{lsid_str}': {str(e)}", ) continue @@ -401,13 +408,13 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): except requests.exceptions.RequestException as e: # noqa: F841 messages.append( "Aphia ID {taxon_match['object_id'] returned " - "other error: {str(e)}" + "other error: {str(e)}", ) # record not found in database if response.status_code == 204: messages.append( "Aphia ID {taxon_match['object_id'] " - "not found in WoRMS database" + "not found in WoRMS database", ) # good case, parse JSON elif response.status_code == 200: @@ -416,14 +423,14 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): messages.append( "Supplied taxon name and WoRMS valid name do not match. " f"Supplied taxon name is '{taxon_name_str}', WoRMS valid name " - f"is '{valid_name}.'" + f"is '{valid_name}.'", ) # Misc non-error code. Should not reach here. else: messages.append( f"Aphia ID {taxon_match['object_id']}" "returned an unhandled HTTP status " - f"code {response.status_code}" + f"code {response.status_code}", ) continue @@ -439,12 +446,12 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): except requests.exceptions.RequestException as e: if itis_response.status_code == 404: messages.append( - "itis.gov TSN " f"{taxon_match['object_id']} not found." + "itis.gov TSN " f"{taxon_match['object_id']} not found.", ) continue else: messages.append( - "itis.gov identifier returned other " f"error: {str(e)}" + "itis.gov identifier returned other " f"error: {str(e)}", ) continue json_contents = itis_response.json() @@ -454,7 +461,7 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): messages.append( "Supplied taxon name and ITIS scientific name do not match. " f"Supplied taxon name is '{taxon_name_str}', ITIS scientific name " - f"for TSN {taxon_match['object_id']} is '{combined_name}.'" + f"for TSN {taxon_match['object_id']} is '{combined_name}.'", ) else: @@ -463,13 +470,14 @@ def handle_lsid(self, taxon_lsid_variable, taxon_name_variable): "LSID URNs of the form " "'urn:lsid:marinespecies.org:taxname:' or " "'urn:lsid:itis.gov:itis_tsn:'. Assuming " - "pass condition" + "pass condition", + stacklevel=1, ) return messages -class GeometryStorage(object): +class GeometryStorage: """Abstract base class for geometries""" def __init__(self, coord_vars, node_count): @@ -488,7 +496,7 @@ def check_geometry(self): if invalid_vars: self.errors.append( "The following geometry variables " - f"have non-numeric contents: {invalid_vars}" + f"have non-numeric contents: {invalid_vars}", ) def _split_mulitpart_geometry(self): @@ -511,7 +519,7 @@ def check_geometry(self): "For a point geometry, coordinate " "variables must be the same length as " "node_count defined, or must be " - "length 1 if node_count is not set" + "length 1 if node_count is not set", ) return self.errors @@ -534,7 +542,7 @@ def check_geometry(self): "Coordinate variables must be the same length. " "If node_count is specified, this value must " "also sum to the length of the coordinate " - "variables." + "variables.", ) # if a multipart if self.node_count is not None: @@ -544,19 +552,19 @@ def check_geometry(self): "Coordinate variables must be the same " "length. If node_count is specified, this " "value must also sum to the length of the " - "coordinate variables." + "coordinate variables.", ) if self.part_node_count is not None: if not np.issubdtype(self.part_node_count.dtype, np.integer): geom_errors.append( "when part_node_count is specified, it must " - "be an array of integers" + "be an array of integers", ) same_node_count = len(self.coord_vars[0]) == self.node_count[:].sum() if not same_node_count: geom_errors.append( "The sum of part_node_count must be equal " - "to the value of node_count" + "to the value of node_count", ) return geom_errors @@ -590,9 +598,9 @@ def check_polygon_orientation(self, transposed_coords, interior=False): try: polygon = Polygon(transposed_coords.tolist()) - except ValueError: - raise ValueError( - "Polygon contains too few points to perform orientation test" + except ValueError as err: + raise ValueError from err( + "Polygon contains too few points to perform orientation test", ) ccw = polygon.exterior.is_ccw @@ -623,7 +631,8 @@ def check_geometry(self): extent_slice = slice(extents[i], extents[i + 1]) poly_sliced = np.vstack([cv[extent_slice] for cv in self.coord_vars]).T pass_orientation = self.check_polygon_orientation( - poly_sliced, ring_orientation[i] + poly_sliced, + ring_orientation[i], ) if not pass_orientation: orient_fix = ( diff --git a/compliance_checker/cf/cf_1_9.py b/compliance_checker/cf/cf_1_9.py index c499448cf..4f05fef96 100644 --- a/compliance_checker/cf/cf_1_9.py +++ b/compliance_checker/cf/cf_1_9.py @@ -1,7 +1,11 @@ +import cftime +import numpy as np +import regex from netCDF4 import Dataset from compliance_checker import cfutil -from compliance_checker.base import BaseCheck, TestCtx +from compliance_checker.base import BaseCheck, Result, TestCtx +from compliance_checker.cf import util from compliance_checker.cf.cf_1_8 import CF1_8Check from compliance_checker.cf.util import VariableReferenceError, reference_attr_variables @@ -9,34 +13,97 @@ class CF1_9Check(CF1_8Check): _cc_spec_version = "1.9" _cc_url = "http://cfconventions.org/Data/cf-conventions/cf-conventions-1.9/cf-conventions.html" + _allowed_numeric_var_types = CF1_8Check._allowed_numeric_var_types.union( + {np.ubyte, np.uint16, np.uint32, np.uint64}, + ) def __init__(self, options=None): - super(CF1_9Check, self).__init__(options) + super().__init__(options) self.section_titles.update({"5.8": "§5.8 Domain Variables"}) - def check_calendar(self, ds): - # IMPLEMENTATION CONFORMANCE 4.4.1 RECOMMENDED CF 1.9 - super(CF1_9Check, self).check_calendar.__doc__ - prev_return = super(CF1_9Check, self).check_calendar(ds) - time_var_candidate_name = cfutil.get_time_variable(ds) - time_var_name = ( - time_var_candidate_name - if time_var_candidate_name in self._find_coord_vars(ds) - else None - ) - # most datasets should have a time coordinate variable - test_ctx = self.get_test_ctx( - BaseCheck.HIGH, self.section_titles["4.4"], time_var_name - ) - if time_var_name is None: - return prev_return + def check_time_coordinate_variable_has_calendar(self, ds): + """ + Ensure that time coordinate variables have a calendar attribute + """ + ret_val = [] + for name in cfutil.get_time_variables(ds): + # DRY: get rid of time coordinate variable boilerplate + if name not in {var.name for var in util.find_coord_vars(ds)}: + continue + time_var = ds.variables[name] + if not hasattr(time_var, "calendar") or not isinstance( + time_var.calendar, + str, + ): + result = Result( + BaseCheck.MEDIUM, + True, + self.section_titles["4.4.1"], + [ + f'Time coordinate variable "{name}" should have a ' + 'string valued attribute "calendar"', + ], + ) + ret_val.append(result) + continue + if time_var.calendar.lower() in {"gregorian", "julian", "standard"}: + try: + reference_year = cftime.num2date( + 0, + time_var.units, + time_var.calendar, + has_year_zero=True, + ).year + # will fail on months, certain other time specifications + except ValueError: + continue + if reference_year == 0: + reasoning = ( + f'For time variable "{time_var.name}", when using ' + "the Gregorian or Julian calendars, the use of year " + "zero is not recommended. Furthermore, the use of year " + "zero to signify a climatological variable as in COARDS " + "is deprecated in CF." + ) + result = Result( + BaseCheck.MEDIUM, + False, + self.section_titles["4.4.1"], + [reasoning], + ) + + ret_val.append(result) + return ret_val - test_ctx.assert_true( - hasattr(ds.variables[time_var_name], "calendar"), - f'Time coordinate variable "{time_var_name}" ' - "should have a calendar attribute", + def check_time_coordinate(self, ds): + # FIXME: Looks like this is not needed. + # super().check_calendar.__doc__ + prev_return = super().check_time_coordinate(ds) + seconds_regex = regex.compile( + r"\w+ since \d{1,4}-\d{1,2}-\d{1,2}[ T]" + r"\d{1,2}:\d{1,2}:(?P\d{1,2})", ) - prev_return.append(test_ctx.to_result()) + for name in cfutil.get_time_variables(ds): + # DRY: get rid of time coordinate variable boilerplate + if name not in {var.name for var in util.find_coord_vars(ds)}: + continue + time_var = ds.variables[name] + test_ctx = self.get_test_ctx( + BaseCheck.HIGH, + self.section_titles["4.4"], + name, + ) + try: + match = regex.match(seconds_regex, time_var.units) + except AttributeError: + # not much can be done if there are no units + continue + test_ctx.assert_true( + match.group("seconds") is None or int(match.group("seconds")) < 60, + f'Time coordinate variable "{name}" must have ' + "units with seconds less than 60", + ) + prev_return.append(test_ctx.to_result()) return prev_return def check_domain_variables(self, ds: Dataset): @@ -46,14 +113,17 @@ def check_domain_variables(self, ds: Dataset): for domain_var in ( var for var in ds.get_variables_by_attributes( - coordinates=lambda c: c is not None + coordinates=lambda c: c is not None, ) + # IMPLICIT if not var.dimensions ): domain_valid = TestCtx(BaseCheck.MEDIUM, self.section_titles["5.8"]) domain_valid.out_of += 1 domain_coord_vars = reference_attr_variables( - ds, domain_var.coordinates, " " + ds, + domain_var.coordinates, + " ", ) errors = [ maybe_error.name @@ -67,7 +137,7 @@ def check_domain_variables(self, ds: Dataset): "variables referenced in " "coordinates attribute from " "domain variable " - f"{domain_var.name}: {errors_str}" + f"{domain_var.name}: {errors_str}", ) else: @@ -75,7 +145,7 @@ def check_domain_variables(self, ds: Dataset): if long_name is None or not isinstance(long_name, str): domain_valid.messages.append( f"For domain variable {domain_var.name} " - f"it is recommended that attribute long_name be present and a string" + f"it is recommended that attribute long_name be present and a string", ) results.append(domain_valid.to_result()) continue @@ -88,7 +158,7 @@ def check_domain_variables(self, ds: Dataset): domain_valid.messages.append( f"The following attributes appear in variable {domain_var.name} " "and CF Appendix A, but are not for use in domain variables: " - f"{appendix_a_not_recommended_attrs}" + f"{appendix_a_not_recommended_attrs}", ) # no errors occurred diff --git a/compliance_checker/cf/cf_base.py b/compliance_checker/cf/cf_base.py index 3fe3f1dc1..a6f6ed5d7 100644 --- a/compliance_checker/cf/cf_base.py +++ b/compliance_checker/cf/cf_base.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import logging import os import sys @@ -29,7 +28,7 @@ def __init__(self, options=None): # Each default dict is a key, value mapping from the dataset object to # a list of variables - super(CFBaseCheck, self).__init__(options) + super().__init__(options) self._coord_vars = defaultdict(list) self._ancillary_vars = defaultdict(list) self._clim_vars = defaultdict(list) @@ -58,6 +57,7 @@ def __init__(self, options=None): "4.2": "§4.2 Longitude Coordinate", "4.3": "§4.3 Vertical Coordinate", "4.4": "§4.4 Time Coordinate", + "4.4.1": "§4.4.1 Calendar", "4.5": "§4.5 Discrete Axis", "5": "§5 Coordinate Systems", "5.1": "§5.1 Independent Latitude, Longitude, Vertical, and Time Axes", @@ -103,7 +103,7 @@ def setup(self, ds): self._find_cf_standard_name_table(ds) self._find_geophysical_vars(ds) coord_containing_vars = ds.get_variables_by_attributes( - coordinates=lambda val: isinstance(val, str) + coordinates=lambda val: isinstance(val, str), ) # coordinate data variables @@ -175,11 +175,13 @@ def check_grid_mapping(self, ds): # Check the grid_mapping attribute to be a non-empty string and that its reference exists for variable in ds.get_variables_by_attributes( - grid_mapping=lambda x: x is not None + grid_mapping=lambda x: x is not None, ): grid_mapping = getattr(variable, "grid_mapping", None) defines_grid_mapping = self.get_test_ctx( - BaseCheck.HIGH, self.section_titles["5.6"], variable.name + BaseCheck.HIGH, + self.section_titles["5.6"], + variable.name, ) defines_grid_mapping.assert_true( (isinstance(grid_mapping, str) and grid_mapping), @@ -192,26 +194,28 @@ def check_grid_mapping(self, ds): if ":" in grid_mapping and self._cc_spec_version >= "1.7": colon_count = grid_mapping.count(":") re_all = regex.findall( - r"(\w+):\s*((?:\w+\s+)*(?:\w+)(?![\w:]))", grid_mapping + r"(\w+):\s*((?:\w+\s+)*(?:\w+)(?![\w:]))", + grid_mapping, ) if colon_count != len(re_all): defines_grid_mapping.out_of += 1 defines_grid_mapping.messages.append( - "Could not consume entire grid_mapping expression, please check for well-formedness" + "Could not consume entire grid_mapping expression, please check for well-formedness", ) else: for grid_var_name, coord_var_str in re_all: defines_grid_mapping.assert_true( grid_var_name in ds.variables, "grid mapping variable {} must exist in this dataset".format( - grid_var_name + grid_var_name, ), ) for ref_var in coord_var_str.split(): defines_grid_mapping.assert_true( ref_var in ds.variables, "Coordinate-related variable {} referenced by grid_mapping variable {} must exist in this dataset".format( - ref_var, grid_var_name + ref_var, + grid_var_name, ), ) @@ -220,7 +224,7 @@ def check_grid_mapping(self, ds): defines_grid_mapping.assert_true( grid_var_name in ds.variables, "grid mapping variable {} must exist in this dataset".format( - grid_var_name + grid_var_name, ), ) ret_val[variable.name] = defines_grid_mapping.to_result() @@ -228,7 +232,9 @@ def check_grid_mapping(self, ds): # Check the grid mapping variables themselves for grid_var_name in grid_mapping_variables: valid_grid_mapping = self.get_test_ctx( - BaseCheck.HIGH, self.section_titles["5.6"], grid_var_name + BaseCheck.HIGH, + self.section_titles["5.6"], + grid_var_name, ) grid_var = ds.variables[grid_var_name] @@ -237,7 +243,7 @@ def check_grid_mapping(self, ds): # Grid mapping name must be in appendix F valid_grid_mapping.assert_true( grid_mapping_name in self.grid_mapping_dict, - "{} is not a valid grid_mapping_name.".format(grid_mapping_name) + f"{grid_mapping_name} is not a valid grid_mapping_name." + " See Appendix F for valid grid mappings", ) @@ -259,7 +265,8 @@ def check_grid_mapping(self, ds): valid_grid_mapping.assert_true( hasattr(grid_var, req), "{} is a required attribute for grid mapping {}".format( - req, grid_mapping_name + req, + grid_mapping_name, ), ) @@ -272,7 +279,7 @@ def check_grid_mapping(self, ds): number_found += 1 valid_grid_mapping.assert_true( number_found == 1, - "grid mapping {}".format(grid_mapping_name) + f"grid mapping {grid_mapping_name}" + "must define exactly one of these attributes: " + "{}".format(" or ".join(at_least_attr)), ) @@ -281,13 +288,13 @@ def check_grid_mapping(self, ds): expected_std_names = grid_mapping[2] for expected_std_name in expected_std_names: found_vars = ds.get_variables_by_attributes( - standard_name=expected_std_name + standard_name=expected_std_name, ) valid_grid_mapping.assert_true( len(found_vars) == 1, - "grid mapping {} requires exactly ".format(grid_mapping_name) + f"grid mapping {grid_mapping_name} requires exactly " + "one variable with standard_name " - + "{} to be defined".format(expected_std_name), + + f"{expected_std_name} to be defined", ) ret_val[grid_var_name] = valid_grid_mapping.to_result() @@ -307,7 +314,8 @@ def check_conventions_version(self, ds): valid = False reasoning = [] correct_version_string = "{}-{}".format( - self._cc_spec, self._cc_spec_version + self._cc_spec, + self._cc_spec_version, ).upper() if hasattr(ds, "Conventions"): conventions = regex.split(r",|\s+", getattr(ds, "Conventions", "")) @@ -318,13 +326,16 @@ def check_conventions_version(self, ds): else: reasoning = [ "§2.6.1 Conventions global attribute does not contain " - '"{}"'.format(correct_version_string) + '"{}"'.format(correct_version_string), ] else: valid = False reasoning = ["§2.6.1 Conventions field is not present"] return Result( - BaseCheck.MEDIUM, valid, self.section_titles["2.6"], msgs=reasoning + BaseCheck.MEDIUM, + valid, + self.section_titles["2.6"], + msgs=reasoning, ) def _check_dimensionless_vertical_coordinates( @@ -388,21 +399,30 @@ def _check_formula_terms(self, ds, coord, dimless_coords_dict): # The pattern for formula terms is always component: variable_name # the regex grouping always has component names in even positions and # the corresponding variable name in odd positions. - matches = regex.findall( - r"([A-Za-z][A-Za-z0-9_]*: )([A-Za-z][A-Za-z0-9_]*)", variable.formula_terms + poorly_formed_formula_terms = ("Attribute formula_terms is not well-formed",) + matches = list( + regex.finditer( + r"(\w+):\s+(\w+)(?:\s+(?!$)|$)", + variable.formula_terms, + ), ) - terms = set(m[0][:-2] for m in matches) + if not matches: + valid_formula_terms.add_failure(poorly_formed_formula_terms) + return valid_formula_terms.to_result() + + terms = {m.group(1) for m in matches} # get the variables named in the formula terms and check if any # are not present in the dataset - missing_vars = sorted(set(m[1] for m in matches) - set(ds.variables)) + missing_vars = sorted({m.group(2) for m in matches} - set(ds.variables)) missing_fmt = "The following variable(s) referenced in {}:formula_terms are not present in the dataset: {}" valid_formula_terms.assert_true( - len(missing_vars) == 0, missing_fmt.format(coord, ", ".join(missing_vars)) + len(missing_vars) == 0, + missing_fmt.format(coord, ", ".join(missing_vars)), ) # try to reconstruct formula_terms by adding space in between the regex # matches. If it doesn't exactly match the original, the formatting # of the attribute is incorrect - reconstructed_formula = " ".join(m[0] + m[1] for m in matches) + reconstructed_formula = "".join(m.group(0) for m in matches) valid_formula_terms.assert_true( reconstructed_formula == formula_terms, "Attribute formula_terms is not well-formed", @@ -471,7 +491,10 @@ def _parent_var_attr_type_check(self, attr_name, var, ctx): type_match, "Attribute '{}' (type: {}) and parent variable '{}' (type: {}) " "must have equivalent datatypes".format( - attr_name, val_type, var.name, var.dtype.type + attr_name, + val_type, + var.name, + var.dtype.type, ), ) @@ -544,7 +567,7 @@ def _find_ancillary_vars(self, ds, refresh=False): # Invalidate the cache at all costs self._ancillary_vars[ds] = [] - for name, var in ds.variables.items(): + for _name, var in ds.variables.items(): if hasattr(var, "ancillary_variables"): for anc_name in var.ancillary_variables.split(" "): if anc_name in ds.variables: @@ -614,7 +637,8 @@ def _find_cf_standard_name_table(self, ds): except IndexError: warn( "Cannot extract CF standard name version number " - "from standard_name_vocabulary string" + "from standard_name_vocabulary string", + stacklevel=2, ) return False else: @@ -626,7 +650,8 @@ def _find_cf_standard_name_table(self, ds): warn( "Cannot convert standard name table to lowercase. This can " "occur if a non-string standard_name_vocabulary global " - "attribute is supplied" + "attribute is supplied", + stacklevel=2, ) return False @@ -636,7 +661,7 @@ def _find_cf_standard_name_table(self, ds): # If the packaged version is what we're after, then we're good if version == self._std_names._version: print( - "Using packaged standard name table v{0}".format(version), + f"Using packaged standard name table v{version}", file=sys.stderr, ) return False @@ -645,19 +670,21 @@ def _find_cf_standard_name_table(self, ds): try: data_directory = util.create_cached_data_dir() location = os.path.join( - data_directory, "cf-standard-name-table-test-{0}.xml".format(version) + data_directory, + f"cf-standard-name-table-test-{version}.xml", ) # Did we already download this before? if not os.path.isfile(location): util.download_cf_standard_name_table(version, location) print( - "Using downloaded standard name table v{0}".format(version), + f"Using downloaded standard name table v{version}", file=sys.stderr, ) else: print( - "Using cached standard name table v{0} from {1}".format( - version, location + "Using cached standard name table v{} from {}".format( + version, + location, ), file=sys.stderr, ) @@ -667,8 +694,9 @@ def _find_cf_standard_name_table(self, ds): except Exception as e: # There was an error downloading the CF table. That's ok, we'll just use the packaged version warn( - "Problem fetching standard name table:\n{0}\n" - "Using packaged v{1}".format(e, self._std_names._version) + f"Problem fetching standard name table:\n{e}\n" + f"Using packaged v{self._std_names._version}", + stacklevel=2, ) return False @@ -730,7 +758,7 @@ def _find_metadata_vars(self, ds, refresh=False): self._metadata_vars[ds] = [] for name, var in ds.variables.items(): if name in self._find_ancillary_vars(ds) or name in self._find_coord_vars( - ds + ds, ): continue @@ -881,7 +909,7 @@ def _get_instance_dimensions(self, ds): """ ret_val = [] for variable in ds.get_variables_by_attributes( - cf_role=lambda x: isinstance(x, str) + cf_role=lambda x: isinstance(x, str), ): if variable.ndim > 0: ret_val.append(variable.dimensions[0]) @@ -921,7 +949,7 @@ def _get_pretty_dimension_order_with_type(self, ds, name, dim_types): """ dim_names = [] for dim, dim_type in zip(ds.variables[name].dimensions, dim_types): - dim_name = "{} ({}".format(dim, dim_type) + dim_name = f"{dim} ({dim_type}" if ds.dimensions[dim].isunlimited(): dim_name += ", unlimited)" else: @@ -1003,7 +1031,8 @@ def att_loc_print_helper(att_letter): """ return "{} ({})".format( - attr_location_ident.get(att_letter, "other"), att_letter + attr_location_ident.get(att_letter, "other"), + att_letter, ) def _att_loc_msg(att_loc): @@ -1028,16 +1057,17 @@ def _att_loc_msg(att_loc): valid_loc = att_loc_print_helper(loc_sort[0]) elif att_loc_len == 2: valid_loc = "{} and {}".format( - att_loc_print_helper(loc_sort[0]), att_loc_print_helper(loc_sort[1]) + att_loc_print_helper(loc_sort[0]), + att_loc_print_helper(loc_sort[1]), ) # shouldn't be reached under normal circumstances, as any attribute # should be either G, C, or D but if another # category is added, this will be useful. else: valid_loc = ", ".join(loc_sort[:-1]) + ", and {}".format( - att_loc_print_helper(loc_sort[-1]) + att_loc_print_helper(loc_sort[-1]), ) - return "This attribute may only appear in {}.".format(valid_loc) + return f"This attribute may only appear in {valid_loc}." for global_att_name in possible_global_atts: global_att = ds.getncattr(global_att_name) @@ -1048,7 +1078,8 @@ def _att_loc_msg(att_loc): subsection_test = ".".join(att_dict["cf_section"].split(".")[:2]) section_loc = self.section_titles.get( - subsection_test, att_dict["cf_section"] + subsection_test, + att_dict["cf_section"], ) else: section_loc = None @@ -1058,7 +1089,7 @@ def _att_loc_msg(att_loc): if "G" not in att_loc: test_ctx.messages.append( '[Appendix A] Attribute "{}" should not be present in global (G) ' - "attributes. {}".format(global_att_name, valid_loc_warn) + "attributes. {}".format(global_att_name, valid_loc_warn), ) else: result = self._handle_dtype_check(global_att, global_att_name, att_dict) @@ -1080,11 +1111,12 @@ def _att_loc_msg(att_loc): att_dict = self.appendix_a[att_name] if att_dict["cf_section"] is not None: subsection_test = ".".join( - att_dict["cf_section"].split(".")[:2] + att_dict["cf_section"].split(".")[:2], ) section_loc = self.section_titles.get( - subsection_test, att_dict["cf_section"] + subsection_test, + att_dict["cf_section"], ) else: section_loc = None @@ -1101,7 +1133,7 @@ def _att_loc_msg(att_loc): att_loc_print_helper(coord_letter), var_name, valid_loc_warn, - ) + ), ) else: result = self._handle_dtype_check(att, att_name, att_dict, var) @@ -1134,20 +1166,20 @@ def _check_attr_type(self, attr_name, attr_type, attribute, variable=None): if attr_type == "S": if not isinstance(attribute, str): - return [False, "{} must be a string".format(attr_name)] + return [False, f"{attr_name} must be a string"] else: # if it's not a string, it should have a numpy dtype underlying_dtype = getattr(attribute, "dtype", None) # TODO check for np.nan separately if underlying_dtype is None: - return [False, "{} must be a numeric type".format(attr_name)] + return [False, f"{attr_name} must be a numeric type"] # both D and N should be some kind of numeric value is_numeric = np.issubdtype(underlying_dtype, np.number) if attr_type == "N": if not is_numeric: - return [False, "{} must be a numeric type".format(attr_name)] + return [False, f"{attr_name} must be a numeric type"] elif attr_type == "D": # TODO: handle edge case where variable is unset here temp_ctx = TestCtx() @@ -1157,14 +1189,15 @@ def _check_attr_type(self, attr_name, attr_type, attribute, variable=None): return ( False, "{} must be numeric and must be equivalent to {} dtype".format( - attr_name, var_dtype + attr_name, + var_dtype, ), ) else: # If we reached here, we fell off with an unrecognized type return ( False, - "{} has unrecognized type '{}'".format(attr_name, attr_type), + f"{attr_name} has unrecognized type '{attr_type}'", ) # pass if all other possible failure conditions have been evaluated return (True, None) @@ -1185,12 +1218,12 @@ def _handle_dtype_check(self, attribute, attr_name, attr_dict, variable=None): attr_type = attr_dict["Type"] if variable is None and "G" not in attr_dict["attr_loc"]: raise ValueError( - "Non-global attributes must be associated with a " " variable" + "Non-global attributes must be associated with a " " variable", ) attr_str = ( - "Global attribute {}".format(attr_name) + f"Global attribute {attr_name}" if "G" in attr_dict["attr_loc"] and variable is None - else "Attribute {} in variable {}".format(attr_name, variable.name) + else f"Attribute {attr_name} in variable {variable.name}" ) # check the type diff --git a/compliance_checker/cf/util.py b/compliance_checker/cf/util.py index 279b66a5c..d033cd528 100644 --- a/compliance_checker/cf/util.py +++ b/compliance_checker/cf/util.py @@ -1,4 +1,3 @@ -import io import itertools import os import sys @@ -191,8 +190,8 @@ def __str__(self): ) -class StandardNameTable(object): - class NameEntry(object): +class StandardNameTable: + class NameEntry: def __init__(self, entrynode): self.canonical_units = self._get(entrynode, "canonical_units", True) self.grib = self._get(entrynode, "grib") @@ -210,18 +209,20 @@ def _get(self, entrynode, attrname, required=False): def __init__(self, cached_location=None): if cached_location: - with io.open(cached_location, "r", encoding="utf-8") as fp: + with open(cached_location, encoding="utf-8") as fp: resource_text = fp.read() elif os.environ.get("CF_STANDARD_NAME_TABLE") and os.path.exists( - os.environ["CF_STANDARD_NAME_TABLE"] + os.environ["CF_STANDARD_NAME_TABLE"], ): - with io.open( - os.environ["CF_STANDARD_NAME_TABLE"], "r", encoding="utf-8" + with open( + os.environ["CF_STANDARD_NAME_TABLE"], + encoding="utf-8", ) as fp: resource_text = fp.read() else: resource_text = get_data( - "compliance_checker", "data/cf-standard-name-table.xml" + "compliance_checker", + "data/cf-standard-name-table.xml", ) parser = etree.XMLParser(remove_blank_text=True) @@ -246,7 +247,7 @@ def __getitem__(self, key): if len(entryids) != 1: raise Exception( "Inconsistency in standard name table, could not lookup alias for %s" - % key + % key, ) key = entryids[0].text @@ -286,22 +287,24 @@ def download_cf_standard_name_table(version, location=None): location is None ): # This case occurs when updating the packaged version from command line location = resource_filename( - "compliance_checker", "data/cf-standard-name-table.xml" + "compliance_checker", + "data/cf-standard-name-table.xml", ) if version == "latest": url = "http://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml" else: - url = "http://cfconventions.org/Data/cf-standard-names/{0}/src/cf-standard-name-table.xml".format( - version + url = "http://cfconventions.org/Data/cf-standard-names/{}/src/cf-standard-name-table.xml".format( + version, ) r = requests.get(url, allow_redirects=True) r.raise_for_status() print( - "Downloading cf-standard-names table version {0} from: {1}".format( - version, url + "Downloading cf-standard-names table version {} from: {}".format( + version, + url, ), file=sys.stderr, ) @@ -316,7 +319,8 @@ def create_cached_data_dir(): """ writable_directory = os.path.join(os.path.expanduser("~"), ".local", "share") data_directory = os.path.join( - os.environ.get("XDG_DATA_HOME", writable_directory), "compliance-checker" + os.environ.get("XDG_DATA_HOME", writable_directory), + "compliance-checker", ) if not os.path.isdir(data_directory): os.makedirs(data_directory) @@ -380,7 +384,8 @@ def is_time_variable(varname, var): satisfied |= getattr(var, "standard_name", "") == "time" satisfied |= getattr(var, "axis", "") == "T" satisfied |= units_convertible( - "seconds since 1900-01-01", getattr(var, "units", "") + "seconds since 1900-01-01", + getattr(var, "units", ""), ) return satisfied @@ -446,12 +451,14 @@ def string_from_var_type(variable): else: raise TypeError( f"Variable '{variable.name} has non-string/character' " - f"dtype {variable.dtype}" + f"dtype {variable.dtype}", ) def reference_attr_variables( - dataset: Dataset, attributes_string: str, split_by: str = None + dataset: Dataset, + attributes_string: str, + split_by: str = None, ): """ Attempts to reference variables in the string, optionally splitting by @@ -461,7 +468,8 @@ def reference_attr_variables( return None elif split_by is None: return dataset.variables.get( - attributes_string, VariableReferenceError(attributes_string) + attributes_string, + VariableReferenceError(attributes_string), ) else: string_proc = attributes_string.split(split_by) diff --git a/compliance_checker/cfutil.py b/compliance_checker/cfutil.py index 7ace15fff..e583570f0 100644 --- a/compliance_checker/cfutil.py +++ b/compliance_checker/cfutil.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ compliance_checker/cfutil.py """ @@ -68,8 +67,8 @@ def attr_membership(attr_val, value_set, attr_type=str, modifier_fn=lambda x: x) if not isinstance(attr_val, attr_type): warnings.warn( - "Attribute is of type {}, {} expected. " - "Attempting to cast to expected type.".format(type(attr_val), attr_type) + f"Attribute is of type {type(attr_val)!r}, {attr_type!r} expected. Attempting to cast to expected type.", + stacklevel=2, ) try: # if the expected type is str, try casting to unicode type @@ -80,7 +79,7 @@ def attr_membership(attr_val, value_set, attr_type=str, modifier_fn=lambda x: x) new_attr_val = attr_type(attr_val) # catch casting errors except (ValueError, UnicodeEncodeError): - warnings.warn("Could not cast to type {}".format(attr_type)) + warnings.warn(f"Could not cast to type {attr_type}", stacklevel=2) return False else: new_attr_val = attr_val @@ -89,8 +88,8 @@ def attr_membership(attr_val, value_set, attr_type=str, modifier_fn=lambda x: x) is_in_set = modifier_fn(new_attr_val) in value_set except Exception as e: warnings.warn( - "Could not apply modifier function {} to value: " - " {}".format(modifier_fn, e.msg) + f"Could not apply modifier function {modifier_fn} to value: {e.msg}", + stacklevel=2, ) return False @@ -109,7 +108,7 @@ def is_dimensionless_standard_name(standard_name_table, standard_name): if not isinstance(standard_name, str): return False found_standard_name = standard_name_table.find( - ".//entry[@id='{}']".format(standard_name) + f".//entry[@id='{standard_name}']", ) if found_standard_name is not None: canonical_units = Unit(found_standard_name.find("canonical_units").text) @@ -123,13 +122,13 @@ def get_sea_names(): """ Returns a list of NODC sea names - source of list: https://www.nodc.noaa.gov/General/NODC-Archive/seanamelist.txt + source of list: https://www.ncei.noaa.gov/resources/ocean-data-format-codes """ global _SEA_NAMES if _SEA_NAMES is None: buf = {} with open( - resource_filename("compliance_checker", "data/seanames.csv"), "r" + resource_filename("compliance_checker", "data/seanames.csv"), ) as f: reader = csv.reader(f) for code, sea_name in reader: @@ -173,15 +172,15 @@ def is_geophysical(ds, variable): if not isinstance(standard_name_test, str): warnings.warn( - "Variable {} has non string standard name, " - "Attempting cast to string".format(variable) + f"Variable {variable} has non string standard name, Attempting cast to string", + stacklevel=2, ) try: standard_name = str(standard_name_test) except ValueError: warnings.warn( - "Unable to cast standard name to string, excluding " - "from geophysical variables" + "Unable to cast standard name to string, excluding from geophysical variables", + stacklevel=2, ) else: standard_name = standard_name_test @@ -280,7 +279,7 @@ def get_auxiliary_coordinate_variables(ds): aux_vars = [] # get any variables referecned by the coordinates attribute for ncvar in ds.get_variables_by_attributes( - coordinates=lambda x: isinstance(x, str) + coordinates=lambda x: isinstance(x, str), ): # split the coordinates into individual variable names referenced_variables = ncvar.coordinates.split(" ") @@ -310,7 +309,7 @@ def get_auxiliary_coordinate_variables(ds): # Some datasets like ROMS use multiple variables to define coordinates for ncvar in ds.get_variables_by_attributes( - standard_name=lambda x: x in coordinate_standard_names + standard_name=lambda x: x in coordinate_standard_names, ): if ncvar.name not in aux_vars: aux_vars.append(ncvar.name) @@ -503,7 +502,9 @@ def get_latitude_variables(nc): latitude_variables.append(variable.name) check_fn = partial( - attr_membership, value_set=VALID_LAT_UNITS, modifier_fn=lambda s: s.lower() + attr_membership, + value_set=VALID_LAT_UNITS, + modifier_fn=lambda s: s.lower(), ) for variable in nc.get_variables_by_attributes(units=check_fn): if variable.name not in latitude_variables: @@ -568,7 +569,9 @@ def get_longitude_variables(nc): longitude_variables.append(variable.name) check_fn = partial( - attr_membership, value_set=VALID_LON_UNITS, modifier_fn=lambda s: s.lower() + attr_membership, + value_set=VALID_LON_UNITS, + modifier_fn=lambda s: s.lower(), ) for variable in nc.get_variables_by_attributes(units=check_fn): if variable.name not in longitude_variables: @@ -741,14 +744,15 @@ def match_modifier_variables(standard_name_str): matches = re.search(r"^\w+ +\w+", standard_name_str) else: matches = re.search( - r"^\w+ +(?:status_flag|number_of_observations)$", standard_name_str + r"^\w+ +(?:status_flag|number_of_observations)$", + standard_name_str, ) return bool(matches) return [ var.name for var in ds.get_variables_by_attributes( - standard_name=match_modifier_variables + standard_name=match_modifier_variables, ) ] @@ -929,7 +933,7 @@ def is_dataset_valid_ragged_array_repr_featureType(nc, feature_type: str): or (len(cf_role_vars) > 2 and is_compound) ): return False - cf_role_var = nc.get_variables_by_attributes(cf_role="{}_id".format(ftype))[0] + cf_role_var = nc.get_variables_by_attributes(cf_role=f"{ftype}_id")[0] if ( cf_role_var.cf_role.split("_id")[0].lower() != ftype ): # if cf_role_var returns None, this should raise an error? @@ -948,10 +952,10 @@ def is_dataset_valid_ragged_array_repr_featureType(nc, feature_type: str): # if the index/count variable is present, we check that only one of # each is present and that their dimensions are correct index_vars = nc.get_variables_by_attributes( - instance_dimension=lambda x: x is not None + instance_dimension=lambda x: x is not None, ) count_vars = nc.get_variables_by_attributes( - sample_dimension=lambda x: x is not None + sample_dimension=lambda x: x is not None, ) # if the featureType isn't compound, shouldn't have both count and index @@ -1751,7 +1755,8 @@ def isTrajectoryProfile(nc, variable): # NOTE # does this take into account single trajectory profile? if is_trajectory_profile_orthogonal( - nc, variable + nc, + variable, ) or is_trajectory_profile_incomplete(nc, variable): return True diff --git a/compliance_checker/ioos.py b/compliance_checker/ioos.py index dbe27d06e..bd06160a0 100644 --- a/compliance_checker/ioos.py +++ b/compliance_checker/ioos.py @@ -47,8 +47,9 @@ def _has_attr(cls, ds, attr, concept_name, priority=BaseCheck.HIGH): if not val: msgs.append( "Attr '{}' (IOOS concept: '{}') not found in dataset".format( - attr, concept_name - ) + attr, + concept_name, + ), ) return Result(priority, val, concept_name, msgs) @@ -64,8 +65,10 @@ def _has_var_attr(cls, dataset, vname, attr, concept_name, priority=BaseCheck.HI val = False msgs.append( "Variable '{}' not present while checking for attr '{}' for IOOS concept: '{}'".format( - vname, attr, concept_name - ) + vname, + attr, + concept_name, + ), ) else: v = dataset.variables[vname] @@ -73,8 +76,10 @@ def _has_var_attr(cls, dataset, vname, attr, concept_name, priority=BaseCheck.HI val = False msgs.append( "Attr '{}' not present on var '{}' while checking for IOOS concept: '{}'".format( - attr, vname, concept_name - ) + attr, + vname, + concept_name, + ), ) return Result(priority, val, concept_name, msgs) @@ -151,19 +156,34 @@ def check_global_attributes(self, ds): self._has_attr(ds, "acknowledgement", "Platform Sponsor"), self._has_attr(ds, "publisher_email", "Station Publisher Email"), self._has_attr( - ds, "publisher_email", "Service Contact Email", BaseCheck.MEDIUM + ds, + "publisher_email", + "Service Contact Email", + BaseCheck.MEDIUM, ), self._has_attr( - ds, "institution", "Service Provider Name", BaseCheck.MEDIUM + ds, + "institution", + "Service Provider Name", + BaseCheck.MEDIUM, ), self._has_attr( - ds, "publisher_name", "Service Contact Name", BaseCheck.MEDIUM + ds, + "publisher_name", + "Service Contact Name", + BaseCheck.MEDIUM, ), self._has_attr( - ds, "Conventions", "Data Format Template Version", BaseCheck.MEDIUM + ds, + "Conventions", + "Data Format Template Version", + BaseCheck.MEDIUM, ), self._has_attr( - ds, "publisher_name", "Station Publisher Name", BaseCheck.HIGH + ds, + "publisher_name", + "Station Publisher Name", + BaseCheck.HIGH, ), ] @@ -193,10 +213,13 @@ def check_variable_names(self, ds): if "standard_name" in v.ncattrs(): count += 1 else: - msgs.append("Variable '{}' missing standard_name attr".format(k)) + msgs.append(f"Variable '{k}' missing standard_name attr") return Result( - BaseCheck.MEDIUM, (count, len(ds.variables)), "Variable Names", msgs + BaseCheck.MEDIUM, + (count, len(ds.variables)), + "Variable Names", + msgs, ) def check_altitude_units(self, ds): @@ -214,7 +237,10 @@ def check_altitude_units(self, ds): return Result(BaseCheck.LOW, val, "Altitude Units", msgs) return Result( - BaseCheck.LOW, (0, 0), "Altitude Units", ["Dataset has no 'z' variable"] + BaseCheck.LOW, + (0, 0), + "Altitude Units", + ["Dataset has no 'z' variable"], ) def check_variable_units(self, ds): @@ -228,10 +254,13 @@ def check_variable_units(self, ds): if "units" in v.ncattrs(): count += 1 else: - msgs.append("Variable '{}' missing units attr".format(k)) + msgs.append(f"Variable '{k}' missing units attr") return Result( - BaseCheck.MEDIUM, (count, len(ds.variables)), "Variable Units", msgs + BaseCheck.MEDIUM, + (count, len(ds.variables)), + "Variable Units", + msgs, ) @@ -329,7 +358,7 @@ def check_platform_variables(self, ds): "which contains the details of the platform. If multiple platforms are " "involved, a variable should be defined for each platform and referenced " "from the geophysical variable in a space separated string." - ) + ), ] return [Result(BaseCheck.HIGH, val, "platform variables", msgs)] @@ -351,7 +380,10 @@ def check_platform_variable_attributes(self, ds): results += [ self._has_var_attr(ds, platform, "long_name", "Platform Long Name"), self._has_var_attr( - ds, platform, "short_name", "Platform Short Name" + ds, + platform, + "short_name", + "Platform Short Name", ), self._has_var_attr(ds, platform, "ioos_code", "Platform IOOS Code"), self._has_var_attr(ds, platform, "type", "Platform Type"), @@ -368,7 +400,11 @@ def check_geophysical_vars_fill_value(self, ds): for geo_var in get_geophysical_variables(ds): results.append( self._has_var_attr( - ds, geo_var, "_FillValue", "_FillValue", BaseCheck.MEDIUM + ds, + geo_var, + "_FillValue", + "_FillValue", + BaseCheck.MEDIUM, ), ) return results @@ -383,7 +419,10 @@ def check_geophysical_vars_standard_name(self, ds): for geo_var in get_geophysical_variables(ds): results.append( self._has_var_attr( - ds, geo_var, "standard_name", "geophysical variables standard_name" + ds, + geo_var, + "standard_name", + "geophysical variables standard_name", ), ) return results @@ -461,64 +500,56 @@ def __init__(self): ] self.cf1_7._std_names._names.extend(self._qartod_std_names) - self._default_check_var_attrs = set( - [ - ("_FillValue", BaseCheck.MEDIUM), - ("missing_value", BaseCheck.MEDIUM), - # ( "standard_name", BaseCheck.HIGH # already checked in CF1_7Check.check_standard_name() - # ( "units", BaseCheck.HIGH # already checked in CF1_7Check.check_units() - ] - ) + self._default_check_var_attrs = { + ("_FillValue", BaseCheck.MEDIUM), + ("missing_value", BaseCheck.MEDIUM), + # ( "standard_name", BaseCheck.HIGH # already checked in CF1_7Check.check_standard_name() + # ( "units", BaseCheck.HIGH # already checked in CF1_7Check.check_units() + } # geophysical variables must have the following attrs: self.geophys_check_var_attrs = self._default_check_var_attrs.union( - set( - [ - ("standard_name_url", BaseCheck.MEDIUM), - # ( "platform", BaseCheck.HIGH) # checked under check_single_platform() - # ( "wmo_platform_code", BaseCheck.HIGH) # only "if applicable", see check_wmo_platform_code() - # ( "ancillary_variables", BaseCheck.HIGH) # only "if applicable", see _check_var_gts_ingest() - # ("accuracy", BaseCheck.MEDIUM), see check_accuracy - ("precision", BaseCheck.MEDIUM), - ("resolution", BaseCheck.MEDIUM), - ] - ) + { + ("standard_name_url", BaseCheck.MEDIUM), + # ( "platform", BaseCheck.HIGH) # checked under check_single_platform() + # ( "wmo_platform_code", BaseCheck.HIGH) # only "if applicable", see check_wmo_platform_code() + # ( "ancillary_variables", BaseCheck.HIGH) # only "if applicable", see _check_var_gts_ingest() + # ("accuracy", BaseCheck.MEDIUM), see check_accuracy + ("precision", BaseCheck.MEDIUM), + ("resolution", BaseCheck.MEDIUM), + }, ) # valid contributor_role values - self.valid_contributor_roles = set( - [ # NERC and NOAA - "author", - "coAuthor", - "collaborator", - "contributor", - "custodian", - "distributor", - "editor", - "funder", - "mediator", - "originator", - "owner", - "pointOfContact", - "principalInvestigator", - "processor", - "publisher", - "resourceProvider", - "rightsHolder", - "sponsor", - "stakeholder", - "user", - ] - ) + self.valid_contributor_roles = { # NERC and NOAA + "author", + "coAuthor", + "collaborator", + "contributor", + "custodian", + "distributor", + "editor", + "funder", + "mediator", + "originator", + "owner", + "pointOfContact", + "principalInvestigator", + "processor", + "publisher", + "resourceProvider", + "rightsHolder", + "sponsor", + "stakeholder", + "user", + } - self.valid_contributor_role_vocabs = set( - [ - "http://vocab.nerc.ac.uk/collection/G04/current/", - "https://vocab.nerc.ac.uk/collection/G04/current/", - "http://www.ngdc.noaa.gov/wiki/index.php?title=ISO_19115_and_19115-2_CodeList_Dictionaries#CI_RoleCode", - "https://www.ngdc.noaa.gov/wiki/index.php?title=ISO_19115_and_19115-2_CodeList_Dictionaries#CI_RoleCode", - ] - ) + self.valid_contributor_role_vocabs = { + "http://vocab.nerc.ac.uk/collection/G04/current/", + "https://vocab.nerc.ac.uk/collection/G04/current/", + "http://www.ngdc.noaa.gov/wiki/index.php?title=ISO_19115_and_19115-2_CodeList_Dictionaries#CI_RoleCode", + "https://www.ngdc.noaa.gov/wiki/index.php?title=ISO_19115_and_19115-2_CodeList_Dictionaries#CI_RoleCode", + } self.required_atts = [ ("Conventions", IOOS1_2_ConventionsValidator()), @@ -609,7 +640,7 @@ def _find_platform_vars(self, ds): Set of variables which are platform variables. """ plat_vars = ds.get_variables_by_attributes( - platform=lambda p: isinstance(p, str) + platform=lambda p: isinstance(p, str), ) return { ds.variables[var.platform] @@ -723,7 +754,7 @@ def check_contributor_role_and_vocabulary(self, ds): role_val, "contributor_role", None if role_val else [role_msg.format(_role)], - ) + ), ) except TypeError: role_results.append( @@ -731,8 +762,8 @@ def check_contributor_role_and_vocabulary(self, ds): BaseCheck.MEDIUM, False, "contributor_role", - ["contributor_role '{}' must be of type 'string'".format(role)], - ) + [f"contributor_role '{role}' must be of type 'string'"], + ), ) else: role_results.append( @@ -741,7 +772,7 @@ def check_contributor_role_and_vocabulary(self, ds): False, "contributor_role", ["contributor_role should be present"], - ) + ), ) vocb_results = [] @@ -756,7 +787,7 @@ def check_contributor_role_and_vocabulary(self, ds): vocb_val, "contributor_role_vocabulary", None if vocb_val else [vocb_msg.format(_vocb)], - ) + ), ) except TypeError: vocb_results.append( @@ -766,10 +797,10 @@ def check_contributor_role_and_vocabulary(self, ds): "contributor_role_vocabulary", [ "contributor_role_vocabulary '{}' must be of type 'string'".format( - vocb - ) + vocb, + ), ], - ) + ), ) else: vocb_results.append( @@ -778,7 +809,7 @@ def check_contributor_role_and_vocabulary(self, ds): False, "contributor_role_vocabulary", ["contributor_role_vocabulary should be present"], - ) + ), ) return role_results + vocb_results @@ -799,7 +830,9 @@ def check_geophysical_vars_have_attrs(self, ds): # get geophysical variables geophys_vars = get_geophysical_variables(ds) # list of str results = self._check_vars_have_attrs( # list - ds, geophys_vars, self.geophys_check_var_attrs + ds, + geophys_vars, + self.geophys_check_var_attrs, ) return results @@ -841,7 +874,7 @@ def check_accuracy(self, ds): r, "geophysical_variable:accuracy", [msg.format(v=v)], - ) + ), ) return results @@ -870,7 +903,7 @@ def _check_vars_have_attrs(self, ds, vars_to_check, atts_to_check): attr_tuple[0], # attribute name attr_tuple[0], # attribute name used as 'concept_name' attr_tuple[1], # priority level - ) + ), ) return results @@ -910,7 +943,7 @@ def check_cf_role_variables(self, ds): ( f"Invalid featureType '{feature_type_attr}'; please see the " "IOOS 1.2 Profile and CF-1.7 Conformance documents for valid featureType" - ) + ), ], ) @@ -933,7 +966,9 @@ def check_cf_role_variables(self, ds): elif feature_type == "point": return Result( - BaseCheck.MEDIUM, True, "CF DSG: featureType=trajectoryProfile" + BaseCheck.MEDIUM, + True, + "CF DSG: featureType=trajectoryProfile", ) else: @@ -946,7 +981,7 @@ def check_cf_role_variables(self, ds): f"Invalid featureType '{feature_type_attr}'; " "please see the IOOS 1.2 Profile and CF-1.7 " "Conformance documents for valid featureType" - ) + ), ], ) @@ -968,7 +1003,7 @@ def _check_feattype_timeseries_cf_role(self, ds): ( "The IOOS-1.2 Profile specifies a single variable " "must be present with attribute cf_role=timeseries_id" - ) + ), ] else: @@ -983,8 +1018,10 @@ def _check_feattype_timeseries_cf_role(self, ds): _val = _dimsize == 1 msgs = [ ts_msg.format( - cf_role="timeseries_id", dim_type="station", dim_len=_dimsize - ) + cf_role="timeseries_id", + dim_type="station", + dim_len=_dimsize, + ), ] return Result( @@ -1014,7 +1051,7 @@ def _check_feattype_timeseriesprof_cf_role(self, ds): ( "Datasets of featureType=timeSeriesProfile must have variables " "containing cf_role=timeseries_id and cf_role=profile_id" - ) + ), ] else: @@ -1034,12 +1071,17 @@ def _check_feattype_timeseriesprof_cf_role(self, ds): _val = _ts_id_dimsize == 1 and _pf_id_dimsize >= 1 msgs = [ ts_prof_msg.format( - cf_role="timeseries_id", dim_type="station", dim_len=_ts_id_dimsize - ) + cf_role="timeseries_id", + dim_type="station", + dim_len=_ts_id_dimsize, + ), ] return Result( - BaseCheck.HIGH, _val, "CF DSG: featureType=timeSeriesProfile", msgs + BaseCheck.HIGH, + _val, + "CF DSG: featureType=timeSeriesProfile", + msgs, ) def _check_feattype_trajectory_cf_role(self, ds): @@ -1058,7 +1100,7 @@ def _check_feattype_trajectory_cf_role(self, ds): ( "Datasets of featureType=trajectory must have a variable " "containing cf_role=trajectory_id" - ) + ), ] else: @@ -1073,8 +1115,10 @@ def _check_feattype_trajectory_cf_role(self, ds): _val = _dimsize == 1 msgs = [ trj_msg.format( - cf_role="trajectory_id", dim_type="station", dim_len=_dimsize - ) + cf_role="trajectory_id", + dim_type="station", + dim_len=_dimsize, + ), ] return Result(BaseCheck.HIGH, _val, "CF DSG: featureType=trajectory", msgs) @@ -1099,7 +1143,7 @@ def _check_feattype_trajectoryprof_cf_role(self, ds): ( "Datasets of featureType=trajectoryProfile must have variables " "containing cf_role=trajectory_id and cf_role=profile_id" - ) + ), ] else: @@ -1120,12 +1164,17 @@ def _check_feattype_trajectoryprof_cf_role(self, ds): _val = _trj_id_dimsize == 1 and _prf_id_dimsize >= 1 msgs = [ trj_prof_msg.format( - cf_role="trajectory_id", dim_type="station", dim_len=_trj_id_dimsize - ) + cf_role="trajectory_id", + dim_type="station", + dim_len=_trj_id_dimsize, + ), ] return Result( - BaseCheck.HIGH, _val, "CF DSG: featureType=trajectoryProfile", msgs + BaseCheck.HIGH, + _val, + "CF DSG: featureType=trajectoryProfile", + msgs, ) def _check_feattype_profile_cf_role(self, ds): @@ -1141,7 +1190,7 @@ def _check_feattype_profile_cf_role(self, ds): if (not cf_role_vars) or (len(cf_role_vars) > 1): _val = False msgs = [ - "None or multiple variables found with cf_role=profile_id; only one is allowed" + "None or multiple variables found with cf_role=profile_id; only one is allowed", ] else: @@ -1156,8 +1205,10 @@ def _check_feattype_profile_cf_role(self, ds): _val = _dimsize == 1 msgs = [ prof_msg.format( - cf_role="profile_id", dim_type="profile", dim_len=_dimsize - ) + cf_role="profile_id", + dim_type="profile", + dim_len=_dimsize, + ), ] return Result(BaseCheck.HIGH, _val, "CF DSG: featureType=profile", msgs) @@ -1195,11 +1246,11 @@ def check_creator_and_publisher_type(self, ds): pass_stat = False messages.append( "If specified, {} must be in value list " - "({})".format(global_att_name, sorted(expected_types)) + "({})".format(global_att_name, sorted(expected_types)), ) result_list.append( - Result(BaseCheck.MEDIUM, pass_stat, global_att_name, messages) + Result(BaseCheck.MEDIUM, pass_stat, global_att_name, messages), ) return result_list @@ -1252,7 +1303,7 @@ def check_single_platform(self, ds): num_platforms = len(platform_set) if num_platforms > 1 and glb_platform: msg = "A dataset may only have one platform; {} found".format( - len(platform_set) + len(platform_set), ) val = False @@ -1276,7 +1327,7 @@ def check_single_platform(self, ds): def check_platform_vocabulary(self, ds): """ The platform_vocabulary attribute is recommended to be a URL to - http://mmisw.org/ont/ioos/platform or + https://mmisw.org/ont/ioos/platform or http://vocab.nerc.ac.uk/collection/L06/current/. However, it is required to at least be a URL. @@ -1291,7 +1342,10 @@ def check_platform_vocabulary(self, ds): pvocab = getattr(ds, "platform_vocabulary", "") val = bool(validators.url(pvocab)) return Result( - BaseCheck.MEDIUM, val, "platform_vocabulary", None if val else [m] + BaseCheck.MEDIUM, + val, + "platform_vocabulary", + None if val else [m], ) def _check_gts_ingest_val(self, val): @@ -1387,7 +1441,7 @@ def check_gts_ingest_global(self, ds): is_valid_string = self._check_gts_ingest_val(gts_ingest_value) fail_message = [ - 'Global attribute "gts_ingest" must be a string "true" or "false"' + 'Global attribute "gts_ingest" must be a string "true" or "false"', ] return Result( BaseCheck.HIGH, @@ -1452,7 +1506,7 @@ def check_gts_ingest_requirements(self, ds): """ Check which variables qualify for ingest. - According to https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-2.html#requirements-for-ioos-dataset-gts-ingest, + According to https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-2.html#requirements-for-ioos-dataset-ndbcgts-ingest, the gts_ingest is "required, if applicable". Any variables which a user would like ingested must also contain the gts_ingest attribute with a value of true. The variable must: @@ -1488,18 +1542,16 @@ def check_gts_ingest_requirements(self, ds): var_passed_ingest_reqs = set() for v in ds.get_variables_by_attributes(gts_ingest=lambda x: x == "true"): var_passed_ingest_reqs.add( - (v.name, self._var_qualifies_for_gts_ingest(ds, v)) + (v.name, self._var_qualifies_for_gts_ingest(ds, v)), ) # always show which variables have passed - _var_passed = map( - lambda y: y[0], filter(lambda x: x[1], var_passed_ingest_reqs) - ) + _var_passed = (y[0] for y in filter(lambda x: x[1], var_passed_ingest_reqs)) - all_passed_ingest_reqs = all(map(lambda x: x[1], var_passed_ingest_reqs)) + all_passed_ingest_reqs = all(x[1] for x in var_passed_ingest_reqs) if not all_passed_ingest_reqs: - _var_failed = map( - lambda y: y[0], filter(lambda x: not x[1], var_passed_ingest_reqs) + _var_failed = ( + y[0] for y in filter(lambda x: not x[1], var_passed_ingest_reqs) ) return Result( @@ -1535,8 +1587,9 @@ def check_instrument_variables(self, ds): compnt = getattr(ds.variables[instr], "component", None) m = [ "component attribute of {} ({}) must be a string".format( - instr, compnt - ) + instr, + compnt, + ), ] if compnt: results.append( @@ -1545,18 +1598,19 @@ def check_instrument_variables(self, ds): isinstance(compnt, str), "instrument_variable", m, - ) + ), ) else: results.append( - Result(BaseCheck.MEDIUM, True, "instrument_variable", m) + Result(BaseCheck.MEDIUM, True, "instrument_variable", m), ) disct = getattr(ds.variables[instr], "discriminant", None) m = [ "discriminant attribute of {} ({}) must be a string".format( - instr, disct - ) + instr, + disct, + ), ] if disct: results.append( @@ -1565,11 +1619,11 @@ def check_instrument_variables(self, ds): isinstance(disct, str), "instrument_variable", m, - ) + ), ) else: results.append( - Result(BaseCheck.MEDIUM, True, "instrument_variable", m) + Result(BaseCheck.MEDIUM, True, "instrument_variable", m), ) return results @@ -1593,7 +1647,7 @@ def check_qartod_variables_flags(self, ds): results = [] # get qartod variables for v in ds.get_variables_by_attributes( - standard_name=lambda x: x in self._qartod_std_names + standard_name=lambda x: x in self._qartod_std_names, ): missing_msg = "flag_{} not present on {}" @@ -1606,7 +1660,7 @@ def check_qartod_variables_flags(self, ds): False, "qartod_variables flags", missing_msg.format("values", v.name), - ) + ), ) else: # if exist, test @@ -1619,7 +1673,7 @@ def check_qartod_variables_flags(self, ds): False, "qartod_variables flags", missing_msg.format("meanings", v.name), - ) + ), ) else: # if exist, test @@ -1647,7 +1701,7 @@ def check_qartod_variables_references(self, ds): results = [] for v in ds.get_variables_by_attributes( - standard_name=lambda x: x in self._qartod_std_names + standard_name=lambda x: x in self._qartod_std_names, ): attval = getattr(v, "references", None) if attval is None: @@ -1658,7 +1712,7 @@ def check_qartod_variables_references(self, ds): val = False else: msg = '"references" attribute for variable "{}" must be a valid URL'.format( - v.name + v.name, ) val = bool(validators.url(attval)) @@ -1668,7 +1722,7 @@ def check_qartod_variables_references(self, ds): val, "qartod_variable:references", None if val else [msg], - ) + ), ) return results @@ -1736,7 +1790,7 @@ def check_instrument_make_model_calib_date(self, ds): None if valid else [f"Attribute {v}:make_model ({mm}) should be a string"], - ) + ), ) # calibration_date @@ -1746,7 +1800,7 @@ def check_instrument_make_model_calib_date(self, ds): re.match( r"^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$", cd, - ) + ), ) results.append( Result( @@ -1756,9 +1810,9 @@ def check_instrument_make_model_calib_date(self, ds): None if valid else [ - f"Attribute {v}:calibration_date ({cd}) should be an ISO-8601 string" + f"Attribute {v}:calibration_date ({cd}) should be an ISO-8601 string", ], - ) + ), ) return results diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 519f132d2..415a94ecf 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -83,7 +83,8 @@ def is_remote_netcdf(ds_str): head_req.raise_for_status() except requests.exceptions.RequestException as e: warnings.warn( - "Received exception when making HEAD request to {}: {}".format(ds_str, e) + f"Received exception when making HEAD request to {ds_str}: {e}", + stacklevel=2, ) content_type = None else: diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index 3ae4e8900..c78363861 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -25,19 +25,19 @@ def create_DAP_variable_str(url): """ # get dds - with urllib.request.urlopen("{}.dds".format(url)) as resp: + with urllib.request.urlopen(f"{url}.dds") as resp: _str = resp.read().decode()[8:] # remove beginning and ending braces, split on newlines no_braces_newlines = list( - filter(lambda x: "{" not in x and "}" not in x, _str.split("\n")) + filter(lambda x: "{" not in x and "}" not in x, _str.split("\n")), ) # remove all the extra space used in the DDS string - no_spaces = list(filter(None, map(lambda x: x.strip(" "), no_braces_newlines))) + no_spaces = list(filter(None, (x.strip(" ") for x in no_braces_newlines))) # now need to split from type, grab only the variable and remove ; - vars_only = list(map(lambda x: x.split(" ")[-1].strip(";"), no_spaces)) + vars_only = [x.split(" ")[-1].strip(";") for x in no_spaces] # encode as proper URL characters varstr = urllib.parse.quote(",".join(vars_only)) diff --git a/compliance_checker/runner.py b/compliance_checker/runner.py index 928aee3a6..114bcac1f 100644 --- a/compliance_checker/runner.py +++ b/compliance_checker/runner.py @@ -1,4 +1,3 @@ -import io import json import os import sys @@ -22,7 +21,7 @@ def stdout_redirector(stream): sys.stdout = old_stdout -class ComplianceChecker(object): +class ComplianceChecker: """ Compliance Checker runner class. @@ -41,7 +40,7 @@ def run_checker( skip_checks=None, include_checks=None, output_filename="-", - output_format=["text"], + output_format="text", options=None, ): """ @@ -85,7 +84,7 @@ def run_checker( if not score_groups: raise ValueError( - "No checks found, please check the name of the checker(s) and that they are installed" + "No checks found, please check the name of the checker(s) and that they are installed", ) else: score_dict[loc] = score_groups @@ -109,9 +108,9 @@ def run_checker( if len(output_format) > 1: # Update file name if needed output_filename = "{}.txt".format( - os.path.splitext(output_filename)[0] + os.path.splitext(output_filename)[0], ) - with io.open(output_filename, "w", encoding="utf-8") as f: + with open(output_filename, "w", encoding="utf-8") as f: with stdout_redirector(f): cls.stdout_output(cs, score_dict, verbose, limit) @@ -119,7 +118,7 @@ def run_checker( # Update file name if needed if len(output_format) > 1 and output_filename != "-": output_filename = "{}.html".format( - os.path.splitext(output_filename)[0] + os.path.splitext(output_filename)[0], ) cls.html_output(cs, score_dict, output_filename, ds_loc, limit) @@ -127,7 +126,7 @@ def run_checker( # Update file name if needed if len(output_format) > 1 and output_filename != "-": output_filename = "{}.json".format( - os.path.splitext(output_filename)[0] + os.path.splitext(output_filename)[0], ) cls.json_output(cs, score_dict, output_filename, ds_loc, limit, out_fmt) @@ -158,11 +157,18 @@ def stdout_output(cls, cs, score_dict, verbose, limit): for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = cs.standard_output( - ds, limit, checker, groups + ds, + limit, + checker, + groups, ) # send list of grouped result objects to stdout & reasoning_routine cs.standard_output_generation( - groups, limit, points, out_of, check=checker + groups, + limit, + points, + out_of, + check=checker, ) return groups @@ -178,21 +184,27 @@ def html_output(cls, cs, score_dict, output_filename, ds_loc, limit): """ checkers_html = [] for ds, score_groups in score_dict.items(): - for checker, (groups, errors) in score_groups.items(): + for checker, (groups, _errors) in score_groups.items(): checkers_html.append(cs.checker_html_output(checker, groups, ds, limit)) html = cs.html_output(checkers_html) if output_filename == "-": print(html) else: - with io.open(output_filename, "w", encoding="utf8") as f: + with open(output_filename, "w", encoding="utf8") as f: f.write(html) return groups @classmethod def json_output( - cls, cs, score_dict, output_filename, ds_loc, limit, output_type="json" + cls, + cs, + score_dict, + output_filename, + ds_loc, + limit, + output_type="json", ): """ Generates JSON output for the ocmpliance score(s) @@ -209,7 +221,7 @@ def json_output( # json output keys out at the top level by if len(score_dict) > 1 and output_type != "json_new": raise ValueError( - "output_type must be set to 'json_new' if outputting multiple datasets to a single json file or stdout" + "output_type must be set to 'json_new' if outputting multiple datasets to a single json file or stdout", ) if output_type == "json": @@ -233,7 +245,7 @@ def json_output( if output_filename == "-": print(json_results) else: - with io.open(output_filename, "w", encoding="utf8") as f: + with open(output_filename, "w", encoding="utf8") as f: f.write(json_results) return groups @@ -259,12 +271,13 @@ def check_errors(cls, score_groups, verbose): ) for check_name, epair in errors.items(): print( - "%s.%s: %s" % (checker, check_name, epair[0]), file=sys.stderr + f"{checker}.{check_name}: {epair[0]}", + file=sys.stderr, ) if verbose > 0: traceback.print_tb( - epair[1].tb_next.tb_next + epair[1].tb_next.tb_next, ) # skip first two as they are noise from the running itself @TODO search for check_name print(file=sys.stderr) diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index ba3e3fe8e..3eb6ecad8 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -13,12 +13,12 @@ import warnings from collections import defaultdict from datetime import datetime, timezone +from distutils.version import StrictVersion from operator import itemgetter from pathlib import Path from urllib.parse import urlparse import requests -from distutils.version import StrictVersion from lxml import etree as ET from netCDF4 import Dataset from owslib.sos import SensorObservationService @@ -49,13 +49,13 @@ def extract_docstring_summary(docstring): r"^(?=.)", " ", textwrap.dedent( - re.split(r"\n\s*:\w", docstring, flags=re.MULTILINE)[0] + re.split(r"\n\s*:\w", docstring, flags=re.MULTILINE)[0], ).strip(), flags=re.MULTILINE, ) -class CheckSuite(object): +class CheckSuite: checkers = ( {} ) # Base dict of checker names to BaseCheck derived types, override this in your CheckSuite implementation @@ -90,11 +90,11 @@ def _print_suites(self, verbose=0): for checker in sorted(self.checkers.keys()): version = getattr(self.checkers[checker], "_cc_checker_version", "???") if verbose > 0: - print(" - {} (v{})".format(checker, version)) + print(f" - {checker} (v{version})") elif ":" in checker and not checker.endswith( - ":latest" + ":latest", ): # Skip the "latest" output - print(" - {}".format(checker)) + print(f" - {checker}") def _print_checker(self, checker_obj): """ @@ -106,10 +106,10 @@ def _print_checker(self, checker_obj): check_functions = self._get_checks(checker_obj, {}, defaultdict(lambda: None)) for c, _ in check_functions: - print("- {}".format(c.__name__)) + print(f"- {c.__name__}") if c.__doc__ is not None: u_doc = c.__doc__ - print("\n{}\n".format(extract_docstring_summary(u_doc))) + print(f"\n{extract_docstring_summary(u_doc)}\n") @classmethod def add_plugin_args(cls, parser): @@ -149,10 +149,11 @@ def _load_checkers(cls, checkers): try: check_obj = c.resolve() if hasattr(check_obj, "_cc_spec") and hasattr( - check_obj, "_cc_spec_version" + check_obj, + "_cc_spec_version", ): check_version_str = ":".join( - (check_obj._cc_spec, check_obj._cc_spec_version) + (check_obj._cc_spec, check_obj._cc_spec_version), ) cls.checkers[check_version_str] = check_obj # TODO: remove this once all checkers move over to the new @@ -161,7 +162,9 @@ def _load_checkers(cls, checkers): # if _cc_spec and _cc_spec_version attributes aren't # present, fall back to using name attribute checker_name = getattr(check_obj, "name", None) or getattr( - check_obj, "_cc_spec", None + check_obj, + "_cc_spec", + None, ) warnings.warn( "Checker for {} should implement both " @@ -169,10 +172,11 @@ def _load_checkers(cls, checkers): 'attributes. "name" attribute is deprecated. ' "Assuming checker is latest version.", DeprecationWarning, + stacklevel=2, ) # append "unknown" to version string since no versioning # info was provided - cls.checkers["{}:unknown".format(checker_name)] = check_obj + cls.checkers[f"{checker_name}:unknown"] = check_obj except Exception as e: print("Could not load", c, ":", e, file=sys.stderr) @@ -253,7 +257,10 @@ def _run_check(self, check_method, ds, max_level): return check_val else: check_val = fix_return_value( - val, check_method.__func__.__name__, check_method, check_method.__self__ + val, + check_method.__func__.__name__, + check_method, + check_method.__self__, ) if max_level is None or check_val.weight > max_level: return [check_val] @@ -270,7 +277,7 @@ def _get_check_versioned_name(self, check_name): """ if ":" not in check_name or ":latest" in check_name: check_name = ":".join( - (check_name.split(":")[0], self.checkers[check_name]._cc_spec_version) + (check_name.split(":")[0], self.checkers[check_name]._cc_spec_version), ) return check_name @@ -300,7 +307,7 @@ def _get_valid_checkers(self, ds, checker_names): ] valid = [] - all_checked = set(a[1] for a in args) # only class types + all_checked = {a[1] for a in args} # only class types checker_queue = set(args) while len(checker_queue): name, a = checker_queue.pop() @@ -341,8 +348,10 @@ def _process_skip_checks(cls, skip_checks): warnings.warn( "Skip specifier '{}' on check '{}' not found," " defaulting to skip entire check".format( - split_check_spec[1], check_name - ) + split_check_spec[1], + check_name, + ), + stacklevel=2, ) check_max_level = BaseCheck.HIGH @@ -351,7 +360,10 @@ def _process_skip_checks(cls, skip_checks): return check_dict def run(self, ds, skip_checks, *checker_names): - warnings.warn("suite.run is deprecated, use suite.run_all in calls " "instead") + warnings.warn( + "suite.run is deprecated, use suite.run_all in calls instead", + stacklevel=2, + ) return self.run_all(ds, checker_names, skip_checks=skip_checks) def run_all(self, ds, checker_names, include_checks=None, skip_checks=None): @@ -376,7 +388,9 @@ def run_all(self, ds, checker_names, include_checks=None, skip_checks=None): if len(checkers) == 0: print( - "No valid checkers found for tests '{}'".format(",".join(checker_names)) + "No valid checkers found for tests '{}'".format( + ",".join(checker_names), + ), ) for checker_name, checker_class in checkers: @@ -510,7 +524,7 @@ def named_function(result): aggregates["cc_spec_version"] = self.checkers[check_name]._cc_spec_version aggregates["cc_url"] = self._get_check_url(aggregates["testname"]) aggregates["report_timestamp"] = datetime.now(timezone.utc).strftime( - "%Y-%m-%dT%H:%M:%SZ" + "%Y-%m-%dT%H:%M:%SZ", ) aggregates["cc_version"] = __version__ return aggregates @@ -557,7 +571,7 @@ def checker_html_output(self, check_name, groups, source_name, limit): from jinja2 import Environment, PackageLoader self.j2 = Environment( - loader=PackageLoader(self.templates_root, "data/templates") + loader=PackageLoader(self.templates_root, "data/templates"), ) template = self.j2.get_template("ccheck.html.j2") @@ -617,22 +631,24 @@ def standard_output(self, ds, limit, check_name, groups): print("\n") print("-" * width) print("IOOS Compliance Checker Report".center(width)) - print("Version {}".format(__version__).center(width)) + print(f"Version {__version__}".center(width)) print( "Report generated {}".format( - datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - ).center(width) + datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + ).center(width), ) - print("{}".format(check_name).center(width)) - print("{}".format(check_url).center(width)) + print(f"{check_name}".center(width)) + print(f"{check_url}".center(width)) print("-" * width) if issue_count > 0: print("Corrective Actions".center(width)) plural = "" if issue_count == 1 else "s" print( "{} has {} potential issue{}".format( - os.path.basename(ds), issue_count, plural - ) + os.path.basename(ds), + issue_count, + plural, + ), ) return [groups, points, out_of] @@ -679,7 +695,9 @@ def process_table(res, check): reasons = res.msgs else: child_reasons = self.reasoning_routine( - res.children, check, _top_level=False + res.children, + check, + _top_level=False, ) # there shouldn't be messages if there are children # is this a valid assumption? @@ -719,9 +737,9 @@ def process_table(res, check): print("") # join alphabetized reasons together reason_str = "\n".join( - "* {}".format(r) for r in sorted(reasons, key=lambda x: x[0]) + f"* {r}" for r in sorted(reasons, key=lambda x: x[0]) ) - proc_str = "{}\n{}".format(issue, reason_str) + proc_str = f"{issue}\n{reason_str}" print(proc_str) proc_strs.append(proc_str) has_printed = True @@ -742,7 +760,7 @@ def process_doc(self, doc): elif xml_doc.tag == "{http://www.opengis.net/sensorML/1.0.1}SensorML": ds = SensorML(xml_doc) else: - raise ValueError("Unrecognized XML root element: {}".format(xml_doc.tag)) + raise ValueError(f"Unrecognized XML root element: {xml_doc.tag}") return ds def generate_dataset(self, cdl_path): @@ -763,24 +781,26 @@ def generate_dataset(self, cdl_path): # generate netCDF-4 file iostat = subprocess.run( - ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], stderr=subprocess.PIPE + ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], + stderr=subprocess.PIPE, ) if iostat.returncode != 0: # if not successful, create netCDF classic file print( - "netCDF-4 file could not be generated from cdl file with " + "message:" + "netCDF-4 file could not be generated from cdl file with " + "message:", ) print(iostat.stderr.decode()) print("Trying to create netCDF Classic file instead.") iostat = subprocess.run( - ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], stderr=subprocess.PIPE + ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], + stderr=subprocess.PIPE, ) if iostat.returncode != 0: # Exit program if neither a netCDF Classic nor a netCDF-4 file # could be created. print( "netCDF Classic file could not be generated from cdl file" - + "with message:" + + "with message:", ) print(iostat.stderr.decode()) sys.exit(1) @@ -809,7 +829,8 @@ def check_remote_netcdf(self, ds_str): response = requests.get(ds_str, allow_redirects=True, timeout=60) try: return MemoizedDataset( - urlparse(response.url).path, memory=response.content + urlparse(response.url).path, + memory=response.content, ) except OSError: # handle case when netCDF C libs weren't compiled with @@ -839,7 +860,7 @@ def load_remote_dataset(self, ds_str): variables_str = opendap.create_DAP_variable_str(ds_str) # join to create a URL to an .ncCF resource - ds_str = "{}.ncCF?{}".format(ds_str, variables_str) + ds_str = f"{ds_str}.ncCF?{variables_str}" nc_remote_result = self.check_remote_netcdf(ds_str) if nc_remote_result: @@ -859,7 +880,7 @@ def load_remote_dataset(self, ds_str): return self.process_doc(response.content) else: raise ValueError( - "Unknown service with content-type: {}".format(content_type) + f"Unknown service with content-type: {content_type}", ) def load_local_dataset(self, ds_str): @@ -957,14 +978,18 @@ def group_func(r): else: max_weight = max([x.weight for x in v]) sum_scores = tuple( - map(sum, list(zip(*([self._translate_value(x.value) for x in v])))) + map(sum, list(zip(*([self._translate_value(x.value) for x in v])))), ) msgs = sum([x.msgs for x in v], []) ret_val.append( Result( - name=k, weight=max_weight, value=sum_scores, children=cv, msgs=msgs - ) + name=k, + weight=max_weight, + value=sum_scores, + children=cv, + msgs=msgs, + ), ) return ret_val diff --git a/compliance_checker/tests/__init__.py b/compliance_checker/tests/__init__.py index bce133fa9..d8060c2ac 100644 --- a/compliance_checker/tests/__init__.py +++ b/compliance_checker/tests/__init__.py @@ -14,7 +14,10 @@ def shortDescription(self): def __repr__(self): name = self.id() name = name.split(".") - return "%s ( %s )" % (name[-1], ".".join(name[:-2]) + ":" + ".".join(name[-2:])) + return "{} ( {} )".format( + name[-1], + ".".join(name[:-2]) + ":" + ".".join(name[-2:]), + ) __str__ = __repr__ @@ -51,7 +54,10 @@ def shortDescription(self): def __repr__(self): name = self.id() name = name.split(".") - return "%s ( %s )" % (name[-1], ".".join(name[:-2]) + ":" + ".".join(name[-2:])) + return "{} ( {} )".format( + name[-1], + ".".join(name[:-2]) + ":" + ".".join(name[-2:]), + ) __str__ = __repr__ diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index c83e5f44e..482bd814d 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -92,7 +92,7 @@ def new_nc_file(tmpdir): """ nc_file_path = os.path.join(tmpdir, "example.nc") if os.path.exists(nc_file_path): - raise IOError("File Exists: %s" % nc_file_path) + raise OSError("File Exists: %s" % nc_file_path) nc = Dataset(nc_file_path, "w") # no need for cleanup, built-in tmpdir fixture will handle it return nc @@ -102,7 +102,7 @@ def new_nc_file(tmpdir): def tmp_txt_file(tmpdir): file_path = os.path.join(tmpdir, "output.txt") if os.path.exists(file_path): - raise IOError("File Exists: %s" % file_path) + raise OSError("File Exists: %s" % file_path) return file_path diff --git a/compliance_checker/tests/helpers.py b/compliance_checker/tests/helpers.py index 9852ad5d9..a07c1aa3a 100644 --- a/compliance_checker/tests/helpers.py +++ b/compliance_checker/tests/helpers.py @@ -16,8 +16,11 @@ def __init__(self, filename=None): temp_filename = tempfile.NamedTemporaryFile(suffix=".nc", delete=True).name else: temp_filename = filename - super(MockNetCDF, self).__init__( - temp_filename, "w", diskless=True, persist=False + super().__init__( + temp_filename, + "w", + diskless=True, + persist=False, ) @@ -28,34 +31,30 @@ class MockTimeSeries(MockNetCDF): """ def __init__(self, filename=None, default_fill_value=None): - super(MockTimeSeries, self).__init__(filename) + super().__init__(filename) self.createDimension("time", 500) for name, std_name, units, axis in ( - ("time", "time", "seconds since 1970-01-01", "T"), + ("time", "time", "seconds since 1970-01-01 00:00:00", "T"), ("lon", "longitude", "degrees_east", "X"), ("lat", "latitude", "degrees_north", "Y"), ("depth", "depth", "m", "Z"), ): var = self.createVariable( - name, "d", ("time",), fill_value=default_fill_value + name, + "d", + ("time",), + fill_value=default_fill_value, ) var.standard_name = std_name var.units = units var.axis = axis # give some applicable units - self.variables["time"].units = "seconds since 2019-04-11T00:00:00" - self.variables["time"].axis = "T" - self.variables["lat"].units = "degree_north" - self.variables["lat"].axis = "Y" - self.variables["lon"].units = "degree_east" - self.variables["lon"].axis = "X" - self.variables["depth"].units = "meters" - self.variables["depth"].axis = "Z" + self.variables["time"].calendar = "standard" self.variables["depth"].positive = "down" -class MockVariable(object): +class MockVariable: """ For mocking a dataset variable. Constructor optionally takes a NetCDF variable, the NetCDF attributes of which will be copied over to this @@ -127,7 +126,7 @@ class MockRaggedArrayRepr(MockNetCDF): """ def __init__(self, feature_type: str, structure="contiguous"): - super(MockRaggedArrayRepr, self).__init__() + super().__init__() if structure.lower() not in ("contiguous", "indexed"): raise ValueError("Must initialize MockRaggedArray as contiguous or indexed") @@ -162,30 +161,38 @@ def __init__(self, feature_type: str, structure="contiguous"): # has the station dimension and cf_role _var_name = feature_type.lower().split("profile")[0] self.createVariable( - "{}_id_variable".format(_var_name), + f"{_var_name}_id_variable", str, ("STATION_DIMENSION",), fill_value=None, ) # set the cf_role - self.variables["{}_id_variable".format(_var_name)].setncattr( - "cf_role", "{}_id".format(_var_name) + self.variables[f"{_var_name}_id_variable"].setncattr( + "cf_role", + f"{_var_name}_id", ) # there will be one for the profile self.createVariable( - "profile_id_variable", str, ("INSTANCE_DIMENSION",), fill_value=None + "profile_id_variable", + str, + ("INSTANCE_DIMENSION",), + fill_value=None, ) self.variables["profile_id_variable"].setncattr("cf_role", "profile_id") # will need a station index variable self.createVariable( - "station_index_variable", int, ("INSTANCE_DIMENSION",), fill_value=None + "station_index_variable", + int, + ("INSTANCE_DIMENSION",), + fill_value=None, ) self.variables["station_index_variable"].setncattr( - "instance_dimension", "STATION_DIMENSION" + "instance_dimension", + "STATION_DIMENSION", ) # also need counter variable, as compound featureTypes @@ -200,19 +207,21 @@ def __init__(self, feature_type: str, structure="contiguous"): ) self.variables["counter_var"].setncattr( - "sample_dimension", "SAMPLE_DIMENSION" + "sample_dimension", + "SAMPLE_DIMENSION", ) else: # just a single featureType self.createVariable( - "{}_id_variable".format(feature_type), + f"{feature_type}_id_variable", str, ("INSTANCE_DIMENSION",), fill_value=None, ) - self.variables["{}_id_variable".format(feature_type)].setncattr( - "cf_role", "{}_id".format(feature_type) + self.variables[f"{feature_type}_id_variable"].setncattr( + "cf_role", + f"{feature_type}_id", ) if structure == "contiguous": @@ -225,7 +234,8 @@ def __init__(self, feature_type: str, structure="contiguous"): ) self.variables["counter_var"].setncattr( - "sample_dimension", "SAMPLE_DIMENSION" + "sample_dimension", + "SAMPLE_DIMENSION", ) else: @@ -237,5 +247,6 @@ def __init__(self, feature_type: str, structure="contiguous"): fill_value=None, ) self.variables["index_var"].setncattr( - "instance_dimension", "INSTANCE_DIMENSION" + "instance_dimension", + "INSTANCE_DIMENSION", ) diff --git a/compliance_checker/tests/resources.py b/compliance_checker/tests/resources.py index dd3df549d..4bcf314cc 100644 --- a/compliance_checker/tests/resources.py +++ b/compliance_checker/tests/resources.py @@ -37,11 +37,11 @@ def generate_dataset(cdl_path, nc_path): "bad2dim": get_filename("tests/data/non-comp/bad2dim.cdl"), "bounds_bad_order": get_filename("tests/data/non-comp/bounds_bad_order.cdl"), "bounds_bad_num_coords": get_filename( - "tests/data/non-comp/bounds_bad_num_coords.cdl" + "tests/data/non-comp/bounds_bad_num_coords.cdl", ), "cell_measure": get_filename("tests/data/cell_measure.cdl"), "cf_example_cell_measures": get_filename( - "tests/data/examples/cf_example_cell_measures.cdl" + "tests/data/examples/cf_example_cell_measures.cdl", ), "chap2": get_filename("tests/data/chap2.cdl"), "climatology": get_filename("tests/data/climatology.cdl"), @@ -58,7 +58,7 @@ def generate_dataset(cdl_path, nc_path): "forecast_reference": get_filename("tests/data/forecast_reference.cdl"), "fvcom": get_filename("tests/data/examples/fvcom.cdl"), "ghrsst": get_filename( - "tests/data/20160919092000-ABOM-L3S_GHRSST-SSTfnd-AVHRR_D-1d_dn_truncate.cdl" + "tests/data/20160919092000-ABOM-L3S_GHRSST-SSTfnd-AVHRR_D-1d_dn_truncate.cdl", ), "glcfs": get_filename("tests/data/examples/glcfs.cdl"), "grid-boundaries": get_filename("tests/data/grid-boundaries.cdl"), @@ -66,10 +66,10 @@ def generate_dataset(cdl_path, nc_path): "hycom_global": get_filename("tests/data/examples/hycom_global.cdl"), "h_point": get_filename("tests/data/appendix_h/point.cdl"), "h_timeseries-incomplete": get_filename( - "tests/data/appendix_h/timeseries-incomplete.cdl" + "tests/data/appendix_h/timeseries-incomplete.cdl", ), "h_timeseries-orthogonal": get_filename( - "tests/data/appendix_h/timeseries-orthogonal.cdl" + "tests/data/appendix_h/timeseries-orthogonal.cdl", ), "h_timeseries-single": get_filename("tests/data/appendix_h/timeseries-single.cdl"), "illegal-vertical": get_filename("tests/data/illegal-vertical.cdl"), @@ -84,15 +84,15 @@ def generate_dataset(cdl_path, nc_path): "mapping": get_filename("tests/data/mapping.cdl"), "multi-dim-coordinates": get_filename("tests/data/multi-dim-coordinates.cdl"), "multi-timeseries-orthogonal": get_filename( - "tests/data/multi-timeseries-orthogonal.cdl" + "tests/data/multi-timeseries-orthogonal.cdl", ), "multi-timeseries-incomplete": get_filename( - "tests/data/multi-timeseries-incomplete.cdl" + "tests/data/multi-timeseries-incomplete.cdl", ), "ncei_gold_point_1": get_filename("tests/data/ncei_gold_point_1.cdl"), "ncei_gold_point_2": get_filename("tests/data/ncei_gold_point_2.cdl"), "NCEI_profile_template_v2_0": get_filename( - "tests/data/NCEI_profile_template_v2.0_2016-09-22_181835.151325.cdl" + "tests/data/NCEI_profile_template_v2.0_2016-09-22_181835.151325.cdl", ), "ocos": get_filename("tests/data/examples/ocos.cdl"), "ooi_glider": get_filename("tests/data/examples/ooi_glider.cdl"), @@ -106,7 +106,7 @@ def generate_dataset(cdl_path, nc_path): "rhgrid": get_filename("tests/data/rhgrid.cdl"), "rutgers": get_filename("tests/data/ru07-20130824T170228_rt0.cdl"), "scalar_coordinate_variable": get_filename( - "tests/data/scalar_coordinate_variable.cdl" + "tests/data/scalar_coordinate_variable.cdl", ), "self-referencing-var": get_filename("tests/data/self-referencing-var.cdl"), "self_referencing": get_filename("tests/data/non-comp/self_referencing.cdl"), @@ -117,31 +117,31 @@ def generate_dataset(cdl_path, nc_path): "taxonomy_example": get_filename("tests/data/taxonomy_example.cdl"), "timeseries": get_filename("tests/data/timeseries.cdl"), "timeseries-profile-single-station": get_filename( - "tests/data/timeseries-profile-single-station.cdl" + "tests/data/timeseries-profile-single-station.cdl", ), "timeseries-profile-multi-station": get_filename( - "tests/data/timeseries-profile-multi-station.cdl" + "tests/data/timeseries-profile-multi-station.cdl", ), "timeseries-profile-single-ortho-time": get_filename( - "tests/data/timeseries-profile-single-ortho-time.cdl" + "tests/data/timeseries-profile-single-ortho-time.cdl", ), "timeseries-profile-multi-ortho-time": get_filename( - "tests/data/timeseries-profile-multi-ortho-time.cdl" + "tests/data/timeseries-profile-multi-ortho-time.cdl", ), "timeseries-profile-ortho-depth": get_filename( - "tests/data/timeseries-profile-ortho-depth.cdl" + "tests/data/timeseries-profile-ortho-depth.cdl", ), "timeseries-profile-incomplete": get_filename( - "tests/data/timeseries-profile-incomplete.cdl" + "tests/data/timeseries-profile-incomplete.cdl", ), "time_units": get_filename("tests/data/non-comp/time_units.cdl"), "trajectory-complete": get_filename("tests/data/trajectory-complete.cdl"), "trajectory-implied": get_filename("tests/data/trajectory-implied.cdl"), "trajectory-profile-orthogonal": get_filename( - "tests/data/trajectory-profile-orthogonal.cdl" + "tests/data/trajectory-profile-orthogonal.cdl", ), "trajectory-profile-incomplete": get_filename( - "tests/data/trajectory-profile-incomplete.cdl" + "tests/data/trajectory-profile-incomplete.cdl", ), "trajectory": get_filename("tests/data/trajectory.cdl"), "trajectory-single": get_filename("tests/data/trajectory-single.cdl"), diff --git a/compliance_checker/tests/test_acdd.py b/compliance_checker/tests/test_acdd.py index 453ce58c3..13b84bb08 100644 --- a/compliance_checker/tests/test_acdd.py +++ b/compliance_checker/tests/test_acdd.py @@ -106,14 +106,15 @@ def test_highly_recommended(self): Checks that all highly recommended attributes are present """ assert check_varset_nonintersect( - self.expected["Highly Recommended"], self.acdd_highly_recommended + self.expected["Highly Recommended"], + self.acdd_highly_recommended, ) # Check the reference dataset, NCEI 1.1 Gold Standard Point missing = ["\"Conventions\" does not contain 'ACDD-1.3'"] results = self.acdd.check_high(self.ds) for result in results: - if result.msgs and all([m in missing for m in result.msgs]): + if result.msgs and all(m in missing for m in result.msgs): # only the Conventions check should have failed self.assert_result_is_bad(result) self.assert_result_is_good(result) @@ -133,7 +134,8 @@ def test_recommended(self): # 'geospatial_bounds' attribute currently has its own separate check # from the list of required atts assert check_varset_nonintersect( - self.expected["Recommended"], self.acdd_recommended + self.expected["Recommended"], + self.acdd_recommended, ) ncei_exceptions = [ @@ -144,7 +146,7 @@ def test_recommended(self): results = self.acdd.check_recommended(self.ds) for result in results: if (result.msgs) and all( - [m in ncei_exceptions for m in result.msgs] + m in ncei_exceptions for m in result.msgs ): # we're doing string comparisons, this is kind of hacky... self.assert_result_is_bad(result) continue @@ -165,7 +167,8 @@ def test_suggested(self): Checks that all suggested attributes are present """ assert check_varset_nonintersect( - self.expected["Suggested"], self.acdd_suggested + self.expected["Suggested"], + self.acdd_suggested, ) # Attributes that are missing from NCEI but should be there @@ -178,7 +181,7 @@ def test_suggested(self): results = self.acdd.check_suggested(self.ds) for result in results: if (result.msgs) and all( - [m in missing for m in result.msgs] + m in missing for m in result.msgs ): # we're doing string comparisons, this is kind of hacky... self.assert_result_is_bad(result) continue @@ -311,7 +314,8 @@ def test_highly_recommended(self): Checks that all highly recommended attributes are present """ assert check_varset_nonintersect( - self.expected["Highly Recommended"], self.acdd_highly_recommended + self.expected["Highly Recommended"], + self.acdd_highly_recommended, ) results = self.acdd.check_high(self.ds) @@ -324,7 +328,8 @@ def test_recommended(self): Checks that all recommended attributes are present """ assert check_varset_nonintersect( - self.expected["Recommended"], self.acdd_recommended + self.expected["Recommended"], + self.acdd_recommended, ) results = self.acdd.check_recommended(self.ds) @@ -334,7 +339,7 @@ def test_recommended(self): ] for result in results: if (result.msgs) and all( - [m in ncei_exceptions for m in result.msgs] + m in ncei_exceptions for m in result.msgs ): # we're doing string comparisons, this is kind of hacky... self.assert_result_is_bad(result) continue @@ -346,7 +351,8 @@ def test_suggested(self): Checks that all suggested attributes are present """ assert check_varset_nonintersect( - self.expected["Suggested"], self.acdd_suggested + self.expected["Suggested"], + self.acdd_suggested, ) results = self.acdd.check_suggested(self.ds) @@ -358,7 +364,7 @@ def test_suggested(self): ] for result in results: if (result.msgs) and all( - [m in ncei_exceptions for m in result.msgs] + m in ncei_exceptions for m in result.msgs ): # we're doing string comparisons, this is kind of hacky... self.assert_result_is_bad(result) continue @@ -470,7 +476,9 @@ def test_check_lat_extents(self): # create dataset using MockDataset, give it lat/lon dimensions ds = MockTimeSeries() ds.variables["lat"][:] = np.linspace( - -135.0, -130.0, num=500 + -135.0, + -130.0, + num=500, ) # arbitrary, but matches time dim size # test no values, expect failure diff --git a/compliance_checker/tests/test_base.py b/compliance_checker/tests/test_base.py index 87b307353..18cc6c7b5 100644 --- a/compliance_checker/tests/test_base.py +++ b/compliance_checker/tests/test_base.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """Tests for base compliance checker class""" import os @@ -33,13 +32,19 @@ def test_attr_presence(self): self.ds.test = "" base.attr_check(attr, self.ds, priority, rv2) assert rv2[0] == base.Result( - priority, False, "test", ["test is empty or completely whitespace"] + priority, + False, + "test", + ["test is empty or completely whitespace"], ) # test with whitespace in the form of a space and a tab self.ds.test = " " base.attr_check(attr, self.ds, priority, rv3) assert rv3[0] == base.Result( - priority, False, "test", ["test is empty or completely whitespace"] + priority, + False, + "test", + ["test is empty or completely whitespace"], ) # test with actual string contents self.ds.test = "abc 123" diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 388053b11..70c8677c2 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import copy import json @@ -36,6 +35,7 @@ from compliance_checker.suite import CheckSuite from compliance_checker.tests import BaseTestCase from compliance_checker.tests.helpers import ( + MockNetCDF, MockRaggedArrayRepr, MockTimeSeries, MockVariable, @@ -86,7 +86,7 @@ def new_nc_file(self): """ nc_file_path = os.path.join(gettempdir(), "example.nc") if os.path.exists(nc_file_path): - raise IOError("File Exists: %s" % nc_file_path) + raise OSError("File Exists: %s" % nc_file_path) nc = Dataset(nc_file_path, "w") self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) @@ -121,6 +121,12 @@ def test_coord_data_vars(self): # present in coord_data_vars self.assertEqual(self.cf.coord_data_vars, {"time", "sigma"}) + ds = MockTimeSeries() + ds.variables["time"][:3] = np.array([20, -2, 0]) + result = self.cf.check_coordinate_variables_strict_monotonicity(ds) + _, _, messages = get_results(result) + assert 'Coordinate variable "time" must be strictly monotonic' in messages + # -------------------------------------------------------------------------------- # Compliance Tests # -------------------------------------------------------------------------------- @@ -145,7 +151,7 @@ def test_check_data_types(self): dataset = self.load_dataset(STATIC_FILES["string"]) if dataset.file_format != "NETCDF4": raise RuntimeError( - "netCDF file of wrong format (not netCDF4) was created for checking" + "netCDF file of wrong format (not netCDF4) was created for checking", ) result = self.cf.check_data_types(dataset) assert result.value[0] == result.value[1] @@ -169,12 +175,14 @@ def test_check_child_attr_data_types(self): # create dataset using MockDataset (default constructor gives it time dimension) ds = MockTimeSeries() ds.createVariable( - "temp", np.float64, dimensions=("time") + "temp", + np.float64, + dimensions=("time"), ) # add variable "temp" with dimension "time" # check where no special data attrs are present, should result good result = self.cf.check_child_attr_data_types( - ds + ds, ) # checks all special attrs for all variables self.assert_result_is_good(result) @@ -641,12 +649,13 @@ def test_cell_measures(self): dataset = self.load_dataset(STATIC_FILES["bad_cell_measure1"]) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - message = ( - "The cell_measures attribute for variable PS is formatted incorrectly. " + expected_message = ( + "The cell_measures attribute for variable PS is formatted incorrectly. " "It should take the form of either 'area: cell_var' or 'volume: cell_var' " - "where cell_var is the variable describing the cell measures" + "where cell_var is an existing name of a variable describing the " + "cell measures." ) - assert message in messages + assert expected_message in messages dataset = self.load_dataset(STATIC_FILES["bad_cell_measure2"]) results = self.cf.check_cell_measures(dataset) @@ -654,6 +663,42 @@ def test_cell_measures(self): message = "Cell measure variable box_area referred to by PS is not present in dataset variables" assert message in messages + dataset = MockTimeSeries() + dataset.createVariable("PS", "d", ("time",)) # dtype=double, dims=time + dataset.variables["PS"].setncattr("cell_measures", "area: cell_area") + # ensure the cell_measures var is in the dataset + dataset.createVariable("cell_area", "d", ("time",)) + dataset.variables["cell_area"].setncattr("units", "m3") + # TEST CONFORMANCE 7.2 REQUIRED + # inappropriate length exponent for area + expected_fail_msg = ( + 'Variable "cell_area" must have units which are convertible ' + 'to UDUNITS "m2" when variable is referred to by a dataset variable with ' + 'cell_methods attribute with a measure type of "area".' + ) + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert expected_fail_msg in messages + + # set erroneous units that aren't convertible to UDUnits length + # units + dataset.variables["cell_area"].setncattr("units", "s3") + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert expected_fail_msg in messages + + # TEST CONFORMANCE 7.2 REQUIRED 1/2 + dataset.createDimension("depth2", 5) + dataset.variables["PS"].setncattr("cell_measures", "area: cell_area2") + dataset.createVariable("cell_area2", "f8", ("time", "depth2")) + dataset.variables["cell_area2"].setncattr("units", "m2") + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert ( + "Cell measure variable cell_area2 must have dimensions which are a subset of those defined in variable PS." + in messages + ) + def test_climatology_cell_methods(self): """ Checks that climatology cell_methods strings are properly validated @@ -665,7 +710,7 @@ def test_climatology_cell_methods(self): score, out_of, messages = get_results(results) self.assertEqual(score, out_of) temp_var = dataset.variables["temperature"] = MockVariable( - dataset.variables["temperature"] + dataset.variables["temperature"], ) temp_var.cell_methods = "INVALID" results = self.cf.check_climatological_statistics(dataset) @@ -699,9 +744,9 @@ def test_climatology_cell_methods(self): score, out_of, messages = get_results(results) self.assertEqual(score, out_of) - # TEST CONFORMMANCE 7.4 REQUIRED 5/6 + # TEST CONFORMANCE 7.4 REQUIRED 5/6 dataset.variables["climatology_bounds"] = MockVariable( - dataset.variables["climatology_bounds"] + dataset.variables["climatology_bounds"], ) clim_bounds = dataset.variables["climatology_bounds"] clim_bounds.standard_name = "forecast_reference_time" @@ -777,7 +822,8 @@ def test_download_standard_name_table(self): data_directory = create_cached_data_dir() location = os.path.join( - data_directory, "cf-standard-name-table-test-{0}.xml".format(version) + data_directory, + f"cf-standard-name-table-test-{version}.xml", ) download_cf_standard_name_table(version, location) @@ -821,7 +867,7 @@ def test_check_flags(self): imperfect = [r.value for r in results if r.value[0] < r.value[1]] assert len(imperfect) == 4 dataset.variables["conductivity_qc"] = MockVariable( - dataset.variables["conductivity_qc"] + dataset.variables["conductivity_qc"], ) # Test with single element. Will fail, but should not throw exception. dataset.variables["conductivity_qc"].flag_values = np.array([1], dtype=np.int8) @@ -1056,21 +1102,21 @@ def test_appendix_d(self): "atmosphere_ln_pressure_coordinate", {"p0", "lev"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "atmosphere_sigma_coordinate", {"sigma", "ps", "ptop"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "atmosphere_hybrid_sigma_pressure_coordinate", {"a", "b", "ps"}, dimless_vertical_coordinates_1_6, - ) + ), ) # test alternative terms for # 'atmosphere_hybrid_sigma_pressure_coordinate' @@ -1079,7 +1125,7 @@ def test_appendix_d(self): "atmosphere_hybrid_sigma_pressure_coordinate", {"ap", "b", "ps"}, dimless_vertical_coordinates_1_6, - ) + ), ) # check that an invalid set of terms fails self.assertFalse( @@ -1087,14 +1133,14 @@ def test_appendix_d(self): "atmosphere_hybrid_sigma_pressure_coordinate", {"a", "b", "p"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "atmosphere_hybrid_height_coordinate", {"a", "b", "orog"}, dimless_vertical_coordinates_1_6, - ) + ), ) # missing terms should cause failure self.assertFalse( @@ -1102,7 +1148,7 @@ def test_appendix_d(self): "atmosphere_hybrid_height_coordinate", {"a", "b"}, dimless_vertical_coordinates_1_6, - ) + ), ) # excess terms should cause failure self.assertFalse( @@ -1110,42 +1156,42 @@ def test_appendix_d(self): "atmosphere_hybrid_height_coordinate", {"a", "b", "c", "orog"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "atmosphere_sleve_coordinate", {"a", "b1", "b2", "ztop", "zsurf1", "zsurf2"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "ocean_sigma_coordinate", {"sigma", "eta", "depth"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "ocean_s_coordinate", {"s", "eta", "depth", "a", "b", "depth_c"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "ocean_sigma_z_coordinate", {"sigma", "eta", "depth", "depth_c", "zlev"}, dimless_vertical_coordinates_1_6, - ) + ), ) self.assertTrue( no_missing_terms( "ocean_double_sigma_coordinate", {"sigma", "depth", "z1", "z2", "a", "href", "k_c"}, dimless_vertical_coordinates_1_6, - ) + ), ) def test_dimensionless_vertical(self): @@ -1187,6 +1233,11 @@ def test_dimensionless_vertical(self): assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == "§4.3 Vertical Coordinate" for r in results) + # blank string is not valid and won't match, ensure this is caught + lev2.formula_terms = "" + results = self.cf.check_dimensionless_vertical_coordinates(dataset) + assert "Attribute formula_terms is not well-formed" + def test_is_time_variable(self): var1 = MockVariable() var1.standard_name = "time" @@ -1211,20 +1262,23 @@ def test_dimensionless_standard_names(self): # canonical_units are K, should be False self.assertFalse( cfutil.is_dimensionless_standard_name( - std_names_xml_root, "sea_water_temperature" - ) + std_names_xml_root, + "sea_water_temperature", + ), ) # canonical_units are 1, should be True self.assertTrue( cfutil.is_dimensionless_standard_name( - std_names_xml_root, "sea_water_practical_salinity" - ) + std_names_xml_root, + "sea_water_practical_salinity", + ), ) # canonical_units are 1e-3, should be True self.assertTrue( cfutil.is_dimensionless_standard_name( - std_names_xml_root, "sea_water_salinity" - ) + std_names_xml_root, + "sea_water_salinity", + ), ) def test_check_time_coordinate(self): @@ -1233,6 +1287,7 @@ def test_check_time_coordinate(self): for r in results: self.assertTrue(r.value) + # TEST CONFORMANCE 4.4 REQUIRED 1/2 dataset = self.load_dataset(STATIC_FILES["bad"]) results = self.cf.check_time_coordinate(dataset) @@ -1240,6 +1295,23 @@ def test_check_time_coordinate(self): assert "time does not have correct time units" in messages assert (scored, out_of) == (1, 2) + # TEST CONFORMANCE 4.4 REQUIRED 2/2, RECOMMENDED 1, 2/2 + dataset = MockTimeSeries() + # NB: >= 60 seconds is nonstandard, but isn't actually a CF requirement + # until CF 1.9 onwards + dataset.variables["time"].units = "months since 0-1-1 23:00:60" + dataset.variables[ + "time" + ].climatology = ( + "nonexistent_variable_reference_only_used_to_test_year_zero_failure" + ) + results = self.cf.check_time_coordinate(dataset) + scored, out_of, messages = get_results(results) + assert scored < out_of + assert ( + "Using relative time interval of months or years is not recommended for coordinate variable time" + in messages + ) def test_check_calendar(self): """Load a dataset with an invalid calendar attribute (non-comp/bad.nc). @@ -1261,7 +1333,12 @@ def test_check_calendar(self): assert bad_month_msg in messages dataset = MockTimeSeries() - dataset.variables["time"] + # no calendar should not raise an issue on time coordinate variables + del dataset.variables["time"].calendar + results = self.cf.check_calendar(dataset) + scored, out_of, messages = get_results(results) + assert not messages + # test case insensivity valid_calendars = ( "GREGORIAN", @@ -1276,6 +1353,10 @@ def test_check_calendar(self): "NONE", ) for calendar_uppercase in valid_calendars: + # need to make a new MockTimeSeries when attribute deleted for + # calendar attributes to work properly + dataset = MockTimeSeries() + dataset.calendar = calendar_uppercase results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) assert scored == out_of @@ -1288,7 +1369,8 @@ def test_check_calendar(self): assert bad_month_msg in messages dataset.variables["time"].month_lengths = np.array( - [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], dtype=int + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], + dtype=int, ) results = self.cf.check_calendar(dataset) scored, out_of, messages = get_results(results) @@ -1434,7 +1516,7 @@ def test_is_geophysical(self): dataset = self.load_dataset(STATIC_FILES["string"]) if dataset.file_format != "NETCDF4": raise RuntimeError( - "netCDF file of wrong format (not netCDF4) was created for checking" + "netCDF file of wrong format (not netCDF4) was created for checking", ) try: result = cfutil.is_geophysical(dataset, "j") @@ -1442,7 +1524,7 @@ def test_is_geophysical(self): pytest.fail( "Test probably fails because var.dtype.kind or var.dtype.char " "was tested on string-type variable. Consider checking for " - "`var.dtype is str`" + "`var.dtype is str`", ) assert not result # assert False @@ -1509,7 +1591,8 @@ def test_check_attr_type(self): res = self.cf._check_attr_type(att_name, att_type, att, _var) self.assertFalse(res[0]) self.assertEqual( - res[1], "test_att must be numeric and must be equivalent to float64 dtype" + res[1], + "test_att must be numeric and must be equivalent to float64 dtype", ) def test_check_grid_mapping_attr_condition(self): @@ -1669,7 +1752,7 @@ def test_check_standard_name_modifier_units(self): n_failed = out_of - scored assert n_failed == 1 expected_messages = { - "units attribute for variable temp_flag must be unset when status_flag standard name modifier is set" + "units attribute for variable temp_flag must be unset when status_flag standard name modifier is set", } assert set(messages) == expected_messages @@ -1751,8 +1834,8 @@ def test_check_cell_methods(self): results_list = list(chain(*(r.msgs for r in results if r.msgs))) # check the results only have expected headers - assert set([r.name for r in results]).issubset( - set(["§7.1 Cell Boundaries", "§7.3 Cell Methods"]) + assert {r.name for r in results}.issubset( + {"§7.1 Cell Boundaries", "§7.3 Cell Methods"}, ) # check that all the expected variables have been hit @@ -1785,7 +1868,7 @@ def test_check_cell_methods(self): self.assertTrue( '§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature' - in messages + in messages, ) # standalone comments require no keyword @@ -1802,7 +1885,7 @@ def test_check_cell_methods(self): scored, out_of, messages = get_results(results) self.assertTrue( '§7.3.3 Invalid cell_methods keyword "invalid_keyword:" for variable temperature. Must be one of [interval, comment]' - in messages + in messages, ) # check that "parenthetical elements" are well-formed (they should not be) @@ -2060,8 +2143,6 @@ def test_check_cell_boundaries_interval(self): assert (score, out_of) == (1, 2) def test_cell_measures(self): - """Over-ride the test_cell_measures from CF1_6""" - # create a temporary variable and test this only with MockTimeSeries() as dataset: dataset.createVariable("PS", "d", ("time",)) # dtype=double, dims=time @@ -2075,37 +2156,72 @@ def test_cell_measures(self): score, out_of, messages = get_results(results) assert (score == out_of) and (score > 0) - # same thing, but test that the cell_area variable is in - # the global attr "external_variables" + # bad measure, not area or volume + dataset.variables["PS"].cell_measures = "length: cell_area" + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert ( + "The cell_measures attribute for variable PS is formatted " + "incorrectly. It should take the form of either 'area: " + "cell_var' or 'volume: cell_var' where cell_var is an " + "existing name of a variable describing the cell measures." in messages + ) - with MockTimeSeries() as dataset: - dataset.createVariable("PS", "d", ("time",)) # dtype=double, dims=time - dataset.variables["PS"].setncattr("cell_measures", "area: cell_area") - dataset.setncattr("external_variables", ["cell_area"]) + # proper measure type, but referenced variable does not exist + dataset.variables["PS"].cell_measures = "area: NONEXISTENT_VAR" + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert ( + "Cell measure variable NONEXISTENT_VAR referred to by " + "PS is not present in dataset or external variables" in messages + ) - # run the check + dataset.variables["PS"].cell_measures = "area: no_units" + dataset.createVariable("no_units", "i2", ()) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - assert score > 0 - assert score == out_of + assert ( + "Cell measure variable no_units is required to have units " + "attribute defined" in messages + ) + + # cell_area variable is in + # the global attr "external_variables" + + dataset = MockTimeSeries() + dataset.createVariable("PS", "d", ("time",)) # dtype=double, dims=time + dataset.variables["PS"].setncattr("cell_measures", "area: cell_area") + dataset.setncattr("external_variables", "cell_area") + + # run the check + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + assert score > 0 + assert score == out_of + + # Non-string external variables, just treat as empty + dataset.setncattr("external_variables", 1) + results = self.cf.check_cell_measures(dataset) + score, out_of, messages = get_results(results) + message = "Cell measure variable cell_area referred to by PS is not present in dataset or external variables" # now test a dataset with a poorly formatted cell_measure attr dataset = self.load_dataset(STATIC_FILES["bad_cell_measure1"]) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - message = ( - "The cell_measures attribute for variable PS is formatted incorrectly. " + expected_message = ( + "The cell_measures attribute for variable PS is formatted incorrectly. " "It should take the form of either 'area: cell_var' or 'volume: cell_var' " - "where cell_var is the variable describing the cell measures" + "where cell_var is an existing name of a variable describing the cell measures." ) - assert message in messages + assert expected_message in messages # test a dataset where the cell_measure attr is not in the dataset or external_variables # check for the variable should fail dataset = self.load_dataset(STATIC_FILES["bad_cell_measure2"]) results = self.cf.check_cell_measures(dataset) score, out_of, messages = get_results(results) - message = "Cell measure variable box_area referred to by PS is not present in dataset variables" + message = "Cell measure variable box_area referred to by PS is not present in dataset or external variables" assert message in messages def test_variable_features(self): @@ -2140,7 +2256,7 @@ def test_process_vdatum(self): alt_name TEXT NOT NULL CHECK (length(alt_name) >= 2), source TEXT ); - """ + """, ) cur.execute( """ @@ -2155,18 +2271,18 @@ def test_process_vdatum(self): deprecated BOOLEAN NOT NULL CHECK (deprecated IN (0, 1)), CONSTRAINT pk_vertical_datum PRIMARY KEY (auth_name, code) ); - """ + """, ) cur.execute( """INSERT INTO alias_name VALUES ('vertical_datum', 'EPSG', '5103', 'NAVD88', 'EPSG'); - """ + """, ) cur.execute( """INSERT INTO vertical_datum VALUES ('EPSG', '5101', 'Ordnance Datum Newlyn', NULL, NULL, - 'EPSG', '2792', '0')""" + 'EPSG', '2792', '0')""", ) cur.close() @@ -2220,7 +2336,7 @@ def test_check_grid_mapping_crs_wkt(self): for m in messages if m != "false_easting is a required attribute for grid mapping stereographic" - ] + ], ) self.assertEqual(msg_len, 0) @@ -2331,14 +2447,14 @@ def test_appendix_d(self): "ocean_s_coordinate_g1", {"s", "C", "eta", "depth", "depth_c"}, dimless_vertical_coordinates_1_7, - ) + ), ) self.assertTrue( no_missing_terms( "ocean_s_coordinate_g2", {"s", "C", "eta", "depth", "depth_c"}, dimless_vertical_coordinates_1_7, - ) + ), ) def test_check_dimensionless_vertical_coordinate_1_7(self): @@ -2354,10 +2470,12 @@ def test_check_dimensionless_vertical_coordinate_1_7(self): with MockTimeSeries() as dataset: dataset.createVariable("lev", "d") # dtype=double, dims=1 dataset.variables["lev"].setncattr( - "standard_name", "atmosphere_sigma_coordinate" + "standard_name", + "atmosphere_sigma_coordinate", ) dataset.variables["lev"].setncattr( - "formula_terms", "sigma: lev ps: PS ptop: PTOP" + "formula_terms", + "sigma: lev ps: PS ptop: PTOP", ) dataset.createVariable("PS", "d", ("time",)) # dtype=double, dims=time @@ -2374,8 +2492,7 @@ def test_check_dimensionless_vertical_coordinate_1_7(self): # one should have failed, as no computed_standard_name is assigned score, out_of, messages = get_results(ret_val) - assert score == 0 - assert out_of == 1 + assert score < out_of # this time, assign computed_standard_name ret_val = [] @@ -2420,6 +2537,16 @@ def test_dimensionless_vertical(self): assert scored < out_of assert all(r.name == "§4.3 Vertical Coordinate" for r in results) + # TEST CONFORMANCE 4.3.3 REQUIRED + del dataset.variables["lev"].formula_terms + results = self.cf.check_dimensionless_vertical_coordinates(dataset) + + # FIXME: get_results messages variable doesn't return message here + assert ( + "Variable lev should have formula_terms attribute when " + "computed_standard_name attribute is defined" in results[-1].msgs + ) + def test_check_attr_type(self): """ Ensure the _check_attr_type method works as expected. @@ -2444,20 +2571,20 @@ def test_check_attr_type(self): _var.test_att = np.float64(45) attr_type = "D" self.assertTrue( - self.cf._check_attr_type(att_name, attr_type, _var.test_att, _var)[0] + self.cf._check_attr_type(att_name, attr_type, _var.test_att, _var)[0], ) # check failures _var.test_att = "my_attr_value" attr_type = "N" # should be numeric self.assertFalse( - self.cf._check_attr_type(att_name, attr_type, _var.test_att)[0] + self.cf._check_attr_type(att_name, attr_type, _var.test_att)[0], ) _var.test_att = np.int8(64) attr_type = "S" # should be string self.assertFalse( - self.cf._check_attr_type(att_name, attr_type, _var.test_att)[0] + self.cf._check_attr_type(att_name, attr_type, _var.test_att)[0], ) nc_obj = MockTimeSeries() @@ -2467,7 +2594,7 @@ def test_check_attr_type(self): _var.test_att = np.int8(45) attr_type = "D" # should match self.assertFalse( - self.cf._check_attr_type(att_name, attr_type, _var.test_att, _var)[0] + self.cf._check_attr_type(att_name, attr_type, _var.test_att, _var)[0], ) def test_check_grid_mapping_attr_condition(self): @@ -2588,7 +2715,7 @@ def test_check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self): dataset.createVariable("lev", "d") # dtype=double, dims=1 dataset.variables["lev"].setncattr("geoid_name", "blah") res = self.cf._check_gmattr_existence_condition_geoid_name_geoptl_datum_name( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertTrue(res[0]) dataset.close() @@ -2597,7 +2724,7 @@ def test_check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self): dataset.createVariable("lev", "d") # dtype=double, dims=1 dataset.variables["lev"].setncattr("geopotential_datum_name", "blah") res = self.cf._check_gmattr_existence_condition_geoid_name_geoptl_datum_name( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertTrue(res[0]) dataset.close() @@ -2608,7 +2735,7 @@ def test_check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self): dataset.variables["lev"].setncattr("geopotential_datum_name", "blah") dataset.variables["lev"].setncattr("geoid_name", "blah") res = self.cf._check_gmattr_existence_condition_geoid_name_geoptl_datum_name( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertFalse(res[0]) dataset.close() @@ -2621,7 +2748,7 @@ def test_check_gmattr_existence_condition_ell_pmerid_hdatum(self): dataset.variables["lev"].setncattr("prime_meridian_name", "blah") dataset.variables["lev"].setncattr("horizontal_datum_name", "blah") res = self.cf._check_gmattr_existence_condition_ell_pmerid_hdatum( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertTrue(res[0]) dataset.close() @@ -2631,7 +2758,7 @@ def test_check_gmattr_existence_condition_ell_pmerid_hdatum(self): dataset.createVariable("lev", "d") # dtype=double, dims=1 dataset.variables["lev"].setncattr("reference_ellipsoid_name", "blah") res = self.cf._check_gmattr_existence_condition_ell_pmerid_hdatum( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertFalse(res[0]) dataset.close() @@ -2642,7 +2769,7 @@ def test_check_gmattr_existence_condition_ell_pmerid_hdatum(self): dataset.variables["lev"].setncattr("reference_ellipsoid_name", "blah") dataset.variables["lev"].setncattr("prime_meridian_name", "blah") res = self.cf._check_gmattr_existence_condition_ell_pmerid_hdatum( - dataset.variables["lev"] + dataset.variables["lev"], ) self.assertFalse(res[0]) dataset.close() @@ -2698,7 +2825,9 @@ def test_check_add_offset_scale_factor_type(self): # floating point add_offset/scale_factor for var_bytes in ("1", "2", "4"): coarse_temp = dataset.createVariable( - f"coarse_temp_{var_bytes}", f"i{var_bytes}", dimensions=("time",) + f"coarse_temp_{var_bytes}", + f"i{var_bytes}", + dimensions=("time",), ) coarse_temp.setncattr("scale_factor", np.float32(23.0)) coarse_temp.setncattr("add_offset", np.double(-2.1)) @@ -2724,6 +2853,20 @@ class TestCF1_8(BaseTestCase): def setUp(self): self.cf = CF1_8Check() + def test_groups(self): + dataset = MockTimeSeries() + # TEST CONFORMANCE 2.7 REQUIRED 1/4 + nonroot_group = dataset.createGroup("nonroot") + nonroot_group.setncattr("Conventions", "CF-1.8") + nonroot_group.setncattr("external_variables", "ext1") + results = self.cf.check_groups(dataset) + bad_msg_template = '§2.7.2 Attribute "{}" MAY ONLY be used in the root group and SHALL NOT be duplicated or overridden in child groups.' + bad_messages = { + bad_msg_template.format(attr_name) + for attr_name in ["Conventions", "external_variables"] + } + assert bad_messages == set(results[0].msgs) + def test_point_geometry_simple(self): dataset = MockTimeSeries() fake_data = dataset.createVariable("someData", "f8", ("time",)) @@ -2767,7 +2910,7 @@ def test_polygon_geometry(self): dataset = self.load_dataset(STATIC_FILES["polygon_geometry"]) self.cf.check_geometry(dataset) dataset.variables["interior_ring"] = MockVariable( - dataset.variables["interior_ring"] + dataset.variables["interior_ring"], ) # Flip sign indicator for interior rings. Should cause failure flip_ring_bits = (dataset.variables["interior_ring"][:] == 0).astype(int) @@ -2814,7 +2957,7 @@ def test_bad_lsid(self): ] = "http://www.lsid.info/urn:lsid:marinespecies.org:taxname:99999999999" results = self.cf.check_taxa(dataset) assert messages[0].startswith( - "Taxon id must match one of the following forms:" + "Taxon id must match one of the following forms:", ) assert results[0].value[0] < results[0].value[1] @@ -2826,8 +2969,8 @@ def test_taxonomy_data_worms_valid(self): # assume LSID lookups for WoRMS return valid HTTP status code m.get( re.compile( - r"^http://www.lsid.info/urn:lsid:marinespecies.org:taxname:\d+$" - ) + r"^http://www.lsid.info/urn:lsid:marinespecies.org:taxname:\d+$", + ), ) response_1 = json.dumps( { @@ -2858,7 +3001,7 @@ def test_taxonomy_data_worms_valid(self): "isExtinct": None, "match_type": "exact", "modified": "2020-10-06T15:25:25.040Z", - } + }, ) m.get( "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/104464", @@ -2893,7 +3036,7 @@ def test_taxonomy_data_worms_valid(self): "isExtinct": None, "match_type": "exact", "modified": "2004-12-21T15:54:05Z", - } + }, ) m.get( "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/104466", @@ -2945,7 +3088,7 @@ def test_taxonomy_data_itis_valid(self): assert result.msgs == [ "Supplied taxon name and ITIS scientific name do not match. " "Supplied taxon name is 'Morone saxitilis', ITIS scientific name " - "for TSN 162139 is 'Esox lucius.'" + "for TSN 162139 is 'Esox lucius.'", ] def test_taxonomy_skip_lsid(self): @@ -2997,30 +3140,78 @@ class TestCF1_9(BaseTestCase): def setUp(self): self.cf = CF1_9Check() + def test_check_data_types(self): + """Check the unsigned int datatypes for variables CF 1.9 added""" + dataset = MockTimeSeries() + for bytes_count in [1, 2, 4, 8]: + dataset.createVariable(f"var_{bytes_count}_ubytes", f"u{bytes_count}", ()) + + result = self.cf.check_data_types(dataset) + assert result.value[0] == result.value[1] + + def test_time_variable_over_sixty_seconds(self): + dataset = MockTimeSeries() + # TEST CF CONFORMANCE 4.4 REQUIRED + dataset.variables["time"].units = "months since 0-1-1 23:00:60" + results = self.cf.check_time_coordinate(dataset) + scored, out_of, messages = get_results(results) + assert ( + 'Time coordinate variable "time" must have units with seconds less than 60' + in messages + ) + def test_time_variable_has_calendar(self): + self.cf = CF1_9Check() # TEST CONFORMANCE 4.4.1 RECOMMENDED CF 1.9 dataset = MockTimeSeries() - results = self.cf.check_calendar(dataset) + del dataset.variables["time"].calendar + results = self.cf.check_time_coordinate_variable_has_calendar(dataset) + scored, out_of, messages = get_results(results) assert ( - results[0].msgs[0] == 'Time coordinate variable "time" should have a ' - "calendar attribute" + 'Time coordinate variable "time" should have a string valued attribute "calendar"' + in messages ) # FIXME: NetCDF files shouldn't normally be modified so we can usually # depend on cached results. Here we need to recreate the checker # instance in order to not have previous results included pass condition - self.cf = CF1_9Check() dataset.variables["time"].calendar = "standard" results = self.cf.check_calendar(dataset) # no time coordinate present, i.e. there is no time variable name with # the same name as the time dimension name. self.cf = CF1_9Check() - dataset = MockTimeSeries() - dataset.variables["time2"] = dataset.variables["time"] - del dataset.variables["time"] + # need to manually construct the netCDF object here -- + # get_variables_by_attributes appears to be interfering here + dataset = MockNetCDF() + dataset.createDimension("time", 500) + dataset.createVariable("time2", "f8", ("time",)) + dataset.variables["time2"].standard_name = "time" + dataset.variables["time2"].units = "seconds since 1970-01-01 00:00:00" + dataset.variables["time2"].axis = "T" results = self.cf.check_calendar(dataset) # results array should be empty as no time coordinate variable detected assert not results + # TEST CONFORMANCE 4.4.1 + dataset = MockTimeSeries() + dataset.variables["time"].units = "months since 0-1-1 23:00:60" + results = self.cf.check_calendar(dataset) + scored, out_of, messages = get_results(results) + + # test greater than or equal to one zero year for Julian and Gregorian + # calendars + dataset = MockTimeSeries() + dataset.variables["time"].units = "seconds since 0-01-01 00:00:00" + for calendar_name in ("standard", "julian", "gregorian"): + dataset.variables["time"].calendar = calendar_name + results = self.cf.check_time_coordinate_variable_has_calendar(dataset) + scored, out_of, messages = get_results(results) + assert ( + 'For time variable "time", when using the Gregorian or Julian ' + "calendars, the use of year zero is not recommended. " + "Furthermore, the use of year zero to signify a climatological " + "variable as in COARDS is deprecated in CF." in messages + ) + def test_domain(self): dataset = MockTimeSeries() domain_var = dataset.createVariable("domain", "c", ()) @@ -3037,7 +3228,7 @@ def test_domain(self): self.assertTrue(results[0].msgs) self.assertTrue( results[0].msgs[0] - == "For domain variable domain it is recommended that attribute long_name be present and a string" + == "For domain variable domain it is recommended that attribute long_name be present and a string", ) # bad coordinates variable @@ -3049,7 +3240,7 @@ def test_domain(self): results[0].msgs[0] == "Could not find the following variables referenced in " "coordinates attribute from domain variable domain: " - "xyxz, abc" + "xyxz, abc", ) del dataset @@ -3075,7 +3266,7 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): v = nc.createVariable("data1", "d", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "blah") self.assertFalse( - cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) # add geophysical variable with correct dimension @@ -3084,7 +3275,7 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): v.setncattr("standard_name", "sea_water_pressure") # test the variable self.assertTrue( - cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) # add good variable and another variable, this time with the improper dimension @@ -3096,10 +3287,10 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): # good variable should pass, second should fail self.assertTrue( - cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) self.assertFalse( - cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data2") + cfutil.is_variable_valid_ragged_array_repr_featureType(nc, "data2"), ) def test_is_dataset_valid_ragged_array_repr_featureType(self): @@ -3109,7 +3300,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc = MockRaggedArrayRepr("timeseries", "indexed") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # we'll add another cf_role variable @@ -3117,7 +3308,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # we'll add another index variable, also bad @@ -3125,13 +3316,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # ----- timeseries, contiguous ----- # nc = MockRaggedArrayRepr("timeseries", "contiguous") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # add another cf_role var, bad @@ -3139,23 +3330,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # add another count variable, bad v = nc.createVariable( - "count_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "count_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # ----- profile, indexed ----- # nc = MockRaggedArrayRepr("profile", "indexed") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # add another cf_role var @@ -3163,7 +3357,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # we'll add another index variable, also bad @@ -3171,13 +3365,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # ----- profile, contiguous ----- # nc = MockRaggedArrayRepr("profile", "contiguous") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # add another cf_role var @@ -3185,23 +3379,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # we'll add another count variable, also bad nc = MockRaggedArrayRepr("profile", "contiguous") v = nc.createVariable( - "index_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "index_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # ----- trajectory, indexed ----- # nc = MockRaggedArrayRepr("trajectory", "indexed") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # add another cf_role var @@ -3209,7 +3406,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # we'll add another index variable, also bad @@ -3217,13 +3414,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # ----- trajectory, contiguous ----- # nc = MockRaggedArrayRepr("trajectory", "contiguous") self.assertTrue( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # add another cf_role var @@ -3231,17 +3428,20 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # we'll add another count variable, also bad nc = MockRaggedArrayRepr("trajectory", "contiguous") v = nc.createVariable( - "index_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "index_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + cfutil.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # ----- now test compound featureType ----- # @@ -3253,8 +3453,9 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): # has no geophysical vars, so should (?) (will) fail self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) # add a geophysical variable and test again @@ -3263,21 +3464,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v1.setncattr("standard_name", "pressure") self.assertTrue( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) nc = MockRaggedArrayRepr("timeSeriesProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) # add a third cf_role variable - this should fail v = nc.createVariable( - "cf_role_var3", "i", ("INSTANCE_DIMENSION",), fill_value=None + "cf_role_var3", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) # set the index variable to have an incorrect attr @@ -3287,8 +3493,9 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) # change the sample_dimension attr on the count variable, bad @@ -3298,20 +3505,25 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) # give another geophysical data variable a different dimension nc = MockRaggedArrayRepr("timeSeriesProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1 = nc.createVariable( - "data2", "i", ("STATION_DIMENSION",), fill_value=None # bad! + "data2", + "i", + ("STATION_DIMENSION",), + fill_value=None, # bad! ) self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "timeseriesprofile" - ) + nc, + "timeseriesprofile", + ), ) # ----- trajectoryProfile ----- # @@ -3321,8 +3533,9 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): # has no geophysical vars, so should (?) (will) fail self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) # add a geophysical variable and test again @@ -3331,21 +3544,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v1.setncattr("standard_name", "pressure") self.assertTrue( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) nc = MockRaggedArrayRepr("trajectoryProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) # add a third cf_role variable - this should fail v = nc.createVariable( - "cf_role_var3", "i", ("INSTANCE_DIMENSION",), fill_value=None + "cf_role_var3", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) # set the index variable to have an incorrect attr @@ -3355,8 +3573,9 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) # change the sample_dimension attr on the count variable, bad @@ -3366,18 +3585,23 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) # give another geophysical data variable a different dimension nc = MockRaggedArrayRepr("trajectoryProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1 = nc.createVariable( - "data2", "i", ("STATION_DIMENSION",), fill_value=None # bad! + "data2", + "i", + ("STATION_DIMENSION",), + fill_value=None, # bad! ) self.assertFalse( cfutil.is_dataset_valid_ragged_array_repr_featureType( - nc, "trajectoryprofile" - ) + nc, + "trajectoryprofile", + ), ) diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index 3b102a4f7..bb344f5ea 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import pytest @@ -22,8 +21,8 @@ "attribute lat:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", "attribute lon:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", "§2.6.2 global attribute history should exist and be a non-empty string", - "standard_name temperature is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name temperature is not defined in Standard Name Table v{}. Possible close match(es): ['air_temperature', 'soil_temperature', 'snow_temperature']".format( + std_names._version, ), "temperature's auxiliary coordinate specified by the coordinates attribute, precise_lat, is not a variable in this dataset", "temperature's auxiliary coordinate specified by the coordinates attribute, precise_lon, is not a variable in this dataset", @@ -46,29 +45,29 @@ "Attribute 'valid_range' (type: ) and parent variable 'wind_direction_qc' (type: ) must have equivalent datatypes", "Attribute 'valid_range' (type: ) and parent variable 'visibility_qc' (type: ) must have equivalent datatypes", '§2.6.1 Conventions global attribute does not contain "CF-1.8"', - "standard_name visibility is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name visibility is not defined in Standard Name Table v{}. Possible close match(es): ['visibility_in_air']".format( + std_names._version, ), 'Standard name modifier "data_quality" for variable visibility_qc is not a valid modifier according to CF Appendix C', - "standard_name wind_direction is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name wind_direction is not defined in Standard Name Table v{}. Possible close match(es): ['wind_to_direction', 'wind_from_direction', 'wind_gust_from_direction']".format( + std_names._version, ), 'Standard name modifier "data_quality" for variable wind_direction_qc is not a valid modifier according to CF Appendix C', - "standard_name wind_gust is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name wind_gust is not defined in Standard Name Table v{}. Possible close match(es): ['y_wind_gust', 'x_wind_gust', 'wind_speed_of_gust']".format( + std_names._version, ), 'Standard name modifier "data_quality" for variable wind_gust_qc is not a valid modifier according to CF Appendix C', 'Standard name modifier "data_quality" for variable air_temperature_qc is not a valid modifier according to CF Appendix C', - "standard_name use_wind is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name use_wind is not defined in Standard Name Table v{}. Possible close match(es): ['y_wind', 'x_wind']".format( + std_names._version, ), - "standard_name barometric_pressure is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name barometric_pressure is not defined in Standard Name Table v{}. Possible close match(es): ['air_pressure', 'reference_pressure', 'barometric_altitude']".format( + std_names._version, ), 'Standard name modifier "data_quality" for variable barometric_pressure_qc is not a valid modifier according to CF Appendix C', 'Standard name modifier "data_quality" for variable wind_speed_qc is not a valid modifier according to CF Appendix C', - "standard_name barometric_pressure is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name barometric_pressure is not defined in Standard Name Table v{}. Possible close match(es): ['air_pressure', 'reference_pressure', 'barometric_altitude']".format( + std_names._version, ), "CF recommends latitude variable 'lat' to use units degrees_north", "CF recommends longitude variable 'lon' to use units degrees_east", @@ -152,11 +151,11 @@ [ # TODO: referenced/relative time is treated like time units 'Units "hours since 2016-01-01T12:00:00Z" for variable time_offset must be convertible to canonical units "s"', - "standard_name cloud_cover is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name cloud_cover is not defined in Standard Name Table v{}. Possible close match(es): ['land_cover', 'land_cover_lccs', 'cloud_albedo']".format( + std_names._version, ), - "standard_name dew_point is not defined in Standard Name Table v{}".format( - std_names._version + "standard_name dew_point is not defined in Standard Name Table v{}. Possible close match(es): ['dew_point_depression', 'dew_point_temperature']".format( + std_names._version, ), ( "GRID is not a valid CF featureType. It must be one of point, timeseries, " @@ -231,7 +230,10 @@ def get_results(self, check_results, checksuite): cs: instance of CheckSuite object """ aggregation = checksuite.build_structure( - "cf", check_results["cf"][0], "test", 1 + "cf", + check_results["cf"][0], + "test", + 1, ) out_of = 0 scored = 0 @@ -257,7 +259,7 @@ def get_results(self, check_results, checksuite): "loaded_dataset,expected_messages", dataset_stem__expected_messages, indirect=[ - "loaded_dataset" + "loaded_dataset", ], # must be specified to load this param at runtime, instead of at collection ) def test_cf_integration(self, loaded_dataset, expected_messages, cs): @@ -266,7 +268,7 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs): assert scored < out_of - assert all([m in messages for m in expected_messages]), mult_msgs_diff.format( + assert all(m in messages for m in expected_messages), mult_msgs_diff.format( missing_msgs="\n".join([m for m in expected_messages if m not in messages]), found_msgs="\n".join(messages), ) @@ -305,7 +307,7 @@ def test_fvcom(self, cs, loaded_dataset): raise AssertionError( '"dimensions for auxiliary coordinate variable siglay (node, siglay) ' 'are not a subset of dimensions for variable u (siglay, nele, time)"' - " not in messages" + " not in messages", ) assert ( '§2.6.1 Conventions global attribute does not contain "CF-1.8"' diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 42682d9b4..fc6b4f94a 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Tests for command line output and parsing diff --git a/compliance_checker/tests/test_feature_detection.py b/compliance_checker/tests/test_feature_detection.py index c2b76fd73..53665e948 100644 --- a/compliance_checker/tests/test_feature_detection.py +++ b/compliance_checker/tests/test_feature_detection.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ compliance_checker/tests/test_feature_detection.py """ @@ -24,7 +23,7 @@ def test_point(self): """ with Dataset(resources.STATIC_FILES["point"]) as nc: for variable in util.get_geophysical_variables(nc): - assert util.is_point(nc, variable), "{} is point".format(variable) + assert util.is_point(nc, variable), f"{variable} is point" def test_timeseries(self): """ @@ -33,7 +32,7 @@ def test_timeseries(self): with Dataset(resources.STATIC_FILES["timeseries"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries(nc, variable), "{} is timeseries".format( - variable + variable, ) def test_multi_timeseries_orthogonal(self): @@ -43,8 +42,9 @@ def test_multi_timeseries_orthogonal(self): with Dataset(resources.STATIC_FILES["multi-timeseries-orthogonal"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_multi_timeseries_orthogonal( - nc, variable - ), "{} is multi-timeseries orthogonal".format(variable) + nc, + variable, + ), f"{variable} is multi-timeseries orthogonal" def test_multi_timeseries_incomplete(self): """ @@ -53,8 +53,9 @@ def test_multi_timeseries_incomplete(self): with Dataset(resources.STATIC_FILES["multi-timeseries-incomplete"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_multi_timeseries_incomplete( - nc, variable - ), "{} is multi-timeseries incomplete".format(variable) + nc, + variable, + ), f"{variable} is multi-timeseries incomplete" def test_trajectory(self): """ @@ -63,7 +64,7 @@ def test_trajectory(self): with Dataset(resources.STATIC_FILES["trajectory"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_cf_trajectory(nc, variable), "{} is trajectory".format( - variable + variable, ) def test_trajectory_single(self): @@ -73,8 +74,9 @@ def test_trajectory_single(self): with Dataset(resources.STATIC_FILES["trajectory-single"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_single_trajectory( - nc, variable - ), "{} is trajectory-single".format(variable) + nc, + variable, + ), f"{variable} is trajectory-single" def test_profile_orthogonal(self): """ @@ -83,8 +85,9 @@ def test_profile_orthogonal(self): with Dataset(resources.STATIC_FILES["profile-orthogonal"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_profile_orthogonal( - nc, variable - ), "{} is profile-orthogonal".format(variable) + nc, + variable, + ), f"{variable} is profile-orthogonal" def test_profile_incomplete(self): """ @@ -93,8 +96,9 @@ def test_profile_incomplete(self): with Dataset(resources.STATIC_FILES["profile-incomplete"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_profile_incomplete( - nc, variable - ), "{} is profile-incomplete".format(variable) + nc, + variable, + ), f"{variable} is profile-incomplete" def test_timeseries_profile_single_station(self): """ @@ -103,8 +107,9 @@ def test_timeseries_profile_single_station(self): with Dataset(resources.STATIC_FILES["timeseries-profile-single-station"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_single_station( - nc, variable - ), "{} is timeseries-profile-single-station".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-single-station" def test_timeseries_profile_multi_station(self): """ @@ -113,32 +118,35 @@ def test_timeseries_profile_multi_station(self): with Dataset(resources.STATIC_FILES["timeseries-profile-multi-station"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_multi_station( - nc, variable - ), "{} is timeseries-profile-multi-station".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-multi-station" def test_timeseries_profile_single_ortho_time(self): """ Ensures timeseries profile single station ortho time detection works """ with Dataset( - resources.STATIC_FILES["timeseries-profile-single-ortho-time"] + resources.STATIC_FILES["timeseries-profile-single-ortho-time"], ) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_single_ortho_time( - nc, variable - ), "{} is timeseries-profile-single-ortho-time".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-single-ortho-time" def test_timeseries_profile_multi_ortho_time(self): """ Ensures timeseries profile multi station ortho time detection works """ with Dataset( - resources.STATIC_FILES["timeseries-profile-multi-ortho-time"] + resources.STATIC_FILES["timeseries-profile-multi-ortho-time"], ) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_multi_ortho_time( - nc, variable - ), "{} is timeseries-profile-multi-ortho-time".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-multi-ortho-time" def test_timeseries_profile_ortho_depth(self): """ @@ -147,8 +155,9 @@ def test_timeseries_profile_ortho_depth(self): with Dataset(resources.STATIC_FILES["timeseries-profile-ortho-depth"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_ortho_depth( - nc, variable - ), "{} is timeseries-profile-ortho-depth".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-ortho-depth" def test_timeseries_profile_incomplete(self): """ @@ -157,8 +166,9 @@ def test_timeseries_profile_incomplete(self): with Dataset(resources.STATIC_FILES["timeseries-profile-incomplete"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_timeseries_profile_incomplete( - nc, variable - ), "{} is timeseries-profile-incomplete".format(variable) + nc, + variable, + ), f"{variable} is timeseries-profile-incomplete" def test_trajectory_profile_orthogonal(self): """ @@ -167,8 +177,9 @@ def test_trajectory_profile_orthogonal(self): with Dataset(resources.STATIC_FILES["trajectory-profile-orthogonal"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_trajectory_profile_orthogonal( - nc, variable - ), "{} is trajectory profile orthogonal".format(variable) + nc, + variable, + ), f"{variable} is trajectory profile orthogonal" def test_trajectory_profile_incomplete(self): """ @@ -177,8 +188,9 @@ def test_trajectory_profile_incomplete(self): with Dataset(resources.STATIC_FILES["trajectory-profile-incomplete"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_trajectory_profile_incomplete( - nc, variable - ), "{} is trajectory profile incomplete".format(variable) + nc, + variable, + ), f"{variable} is trajectory profile incomplete" def test_2d_regular_grid(self): """ @@ -187,8 +199,9 @@ def test_2d_regular_grid(self): with Dataset(resources.STATIC_FILES["2d-regular-grid"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_2d_regular_grid( - nc, variable - ), "{} is 2D regular grid".format(variable) + nc, + variable, + ), f"{variable} is 2D regular grid" def test_2d_static_grid(self): """ @@ -197,8 +210,9 @@ def test_2d_static_grid(self): with Dataset(resources.STATIC_FILES["2d-static-grid"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_2d_static_grid( - nc, variable - ), "{} is a 2D static grid".format(variable) + nc, + variable, + ), f"{variable} is a 2D static grid" def test_3d_regular_grid(self): """ @@ -207,8 +221,9 @@ def test_3d_regular_grid(self): with Dataset(resources.STATIC_FILES["3d-regular-grid"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_3d_regular_grid( - nc, variable - ), "{} is 3d regular grid".format(variable) + nc, + variable, + ), f"{variable} is 3d regular grid" def test_3d_static_grid(self): """ @@ -217,8 +232,9 @@ def test_3d_static_grid(self): with Dataset(resources.STATIC_FILES["3d-static-grid"]) as nc: for variable in util.get_geophysical_variables(nc): assert util.is_3d_static_grid( - nc, variable - ), "{} is a 3D static grid".format(variable) + nc, + variable, + ), f"{variable} is a 3D static grid" def test_boundaries(self): """ @@ -255,8 +271,8 @@ def test_grid_mapping(self): axis_variables = util.get_axis_variables(nc) assert "rotated_pole" in grid_mapping - assert set(["rlon", "rlat", "lev"]) == set(coordinate_variables) - assert set(["rlon", "rlat", "lev"]) == set(axis_variables) + assert {"rlon", "rlat", "lev"} == set(coordinate_variables) + assert {"rlon", "rlat", "lev"} == set(axis_variables) assert "lat" == util.get_lat_variable(nc) assert "lon" == util.get_lon_variable(nc) @@ -266,10 +282,10 @@ def test_auxiliary_coordinates(self): """ with Dataset(resources.STATIC_FILES["bad_units"]) as nc: coordinate_variables = util.get_coordinate_variables(nc) - assert set(["time"]) == set(coordinate_variables) + assert {"time"} == set(coordinate_variables) aux_coord_vards = util.get_auxiliary_coordinate_variables(nc) - assert set(["lat", "lon"]) == set(aux_coord_vards) + assert {"lat", "lon"} == set(aux_coord_vards) def test_forecast_reference_metadata(self): """ @@ -416,7 +432,7 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): v = nc.createVariable("data1", "d", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "blah") self.assertFalse( - util.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + util.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) # add geophysical variable with correct dimension @@ -425,7 +441,7 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): v.setncattr("standard_name", "sea_water_pressure") # test the variable self.assertTrue( - util.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + util.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) # add good variable and another variable, this time with the improper dimension @@ -437,10 +453,10 @@ def test_is_variable_valid_ragged_array_repr_featureType(self): # good variable should pass, second should fail self.assertTrue( - util.is_variable_valid_ragged_array_repr_featureType(nc, "data1") + util.is_variable_valid_ragged_array_repr_featureType(nc, "data1"), ) self.assertFalse( - util.is_variable_valid_ragged_array_repr_featureType(nc, "data2") + util.is_variable_valid_ragged_array_repr_featureType(nc, "data2"), ) def test_is_dataset_valid_ragged_array_repr_featureType(self): @@ -450,7 +466,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc = MockRaggedArrayRepr("timeseries", "indexed") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # we'll add another cf_role variable @@ -458,7 +474,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # we'll add another index variable, also bad @@ -466,13 +482,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # ----- timeseries, contiguous ----- # nc = MockRaggedArrayRepr("timeseries", "contiguous") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # add another cf_role var, bad @@ -480,23 +496,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # add another count variable, bad v = nc.createVariable( - "count_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "count_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseries"), ) # ----- profile, indexed ----- # nc = MockRaggedArrayRepr("profile", "indexed") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # add another cf_role var @@ -504,7 +523,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # we'll add another index variable, also bad @@ -512,13 +531,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # ----- profile, contiguous ----- # nc = MockRaggedArrayRepr("profile", "contiguous") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # add another cf_role var @@ -526,23 +545,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # we'll add another count variable, also bad nc = MockRaggedArrayRepr("profile", "contiguous") v = nc.createVariable( - "index_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "index_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "profile"), ) # ----- trajectory, indexed ----- # nc = MockRaggedArrayRepr("trajectory", "indexed") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # add another cf_role var @@ -550,7 +572,7 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # we'll add another index variable, also bad @@ -558,13 +580,13 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("index_var2", "i", ("SAMPLE_DIMENSION",), fill_value=None) v.setncattr("instance_dimension", "INSTANCE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # ----- trajectory, contiguous ----- # nc = MockRaggedArrayRepr("trajectory", "contiguous") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # add another cf_role var @@ -572,17 +594,20 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v = nc.createVariable("var2", "i", ("INSTANCE_DIMENSION",), fill_value=None) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # we'll add another count variable, also bad nc = MockRaggedArrayRepr("trajectory", "contiguous") v = nc.createVariable( - "index_var2", "i", ("INSTANCE_DIMENSION",), fill_value=None + "index_var2", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("sample_dimension", "SAMPLE_DIMENSION") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory") + util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectory"), ) # ----- now test compound featureType ----- # @@ -593,7 +618,10 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): # NOTE # has no geophysical vars, so should (?) (will) fail self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) # add a geophysical variable and test again @@ -601,18 +629,27 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1.setncattr("standard_name", "pressure") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) nc = MockRaggedArrayRepr("timeSeriesProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) # add a third cf_role variable - this should fail v = nc.createVariable( - "cf_role_var3", "i", ("INSTANCE_DIMENSION",), fill_value=None + "cf_role_var3", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) # set the index variable to have an incorrect attr @@ -621,7 +658,10 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc.variables["station_index_variable"].instance_dimension = "SIKE!" self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) # change the sample_dimension attr on the count variable, bad @@ -630,17 +670,26 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc.variables["counter_var"].sample_dimension = "SIKE!" self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) # give another geophysical data variable a different dimension nc = MockRaggedArrayRepr("timeSeriesProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1 = nc.createVariable( - "data2", "i", ("STATION_DIMENSION",), fill_value=None # bad! + "data2", + "i", + ("STATION_DIMENSION",), + fill_value=None, # bad! ) self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "timeseriesprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "timeseriesprofile", + ), ) # ----- trajectoryProfile ----- # @@ -649,7 +698,10 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): # NOTE # has no geophysical vars, so should (?) (will) fail self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) # add a geophysical variable and test again @@ -657,18 +709,27 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1.setncattr("standard_name", "pressure") self.assertTrue( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) nc = MockRaggedArrayRepr("trajectoryProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) # add a third cf_role variable - this should fail v = nc.createVariable( - "cf_role_var3", "i", ("INSTANCE_DIMENSION",), fill_value=None + "cf_role_var3", + "i", + ("INSTANCE_DIMENSION",), + fill_value=None, ) v.setncattr("cf_role", "yeetyeet_id") self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) # set the index variable to have an incorrect attr @@ -677,7 +738,10 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc.variables["station_index_variable"].instance_dimension = "SIKE!" self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) # change the sample_dimension attr on the count variable, bad @@ -686,15 +750,24 @@ def test_is_dataset_valid_ragged_array_repr_featureType(self): nc.variables["counter_var"].sample_dimension = "SIKE!" self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) # give another geophysical data variable a different dimension nc = MockRaggedArrayRepr("trajectoryProfile") v1 = nc.createVariable("data1", "i", ("SAMPLE_DIMENSION",), fill_value=None) v1 = nc.createVariable( - "data2", "i", ("STATION_DIMENSION",), fill_value=None # bad! + "data2", + "i", + ("STATION_DIMENSION",), + fill_value=None, # bad! ) self.assertFalse( - util.is_dataset_valid_ragged_array_repr_featureType(nc, "trajectoryprofile") + util.is_dataset_valid_ragged_array_repr_featureType( + nc, + "trajectoryprofile", + ), ) diff --git a/compliance_checker/tests/test_ioos_profile.py b/compliance_checker/tests/test_ioos_profile.py index e536f4fd9..ceb33b6fb 100644 --- a/compliance_checker/tests/test_ioos_profile.py +++ b/compliance_checker/tests/test_ioos_profile.py @@ -326,7 +326,9 @@ def test_check_geophysical_vars_have_attrs(self): # set the necessary attributes ds = MockTimeSeries(default_fill_value=9999999999.0) # time, lat, lon, depth temp = ds.createVariable( - "temp", np.float64, fill_value=9999999999.0 + "temp", + np.float64, + fill_value=9999999999.0, ) # _FillValue temp.setncattr("missing_value", 9999999999.0) temp.setncattr("standard_name", "sea_surface_temperature") @@ -348,7 +350,10 @@ def test_check_accuracy_precision_resolution(self): ds = MockTimeSeries() # time, lat, lon, depth temp = ds.createVariable( - "temp", np.float64, dimensions=("time",), fill_value=9999999999.0 + "temp", + np.float64, + dimensions=("time",), + fill_value=9999999999.0, ) # _FillValue temp.setncattr("standard_name", "sea_water_temperature") results = self.ioos.check_accuracy(ds) @@ -382,7 +387,10 @@ def test_check_geospatial_vars_have_attrs(self): # create geophysical variable ds = MockTimeSeries() # time, lat, lon, depth temp = ds.createVariable( - "temp", np.float64, dimensions=("time",), fill_value=9999999999.0 + "temp", + np.float64, + dimensions=("time",), + fill_value=9999999999.0, ) # _FillValue temp.setncattr("standard_name", "sea_water_temperature") results = self.ioos.check_accuracy(ds) @@ -523,7 +531,8 @@ def test_check_gts_ingest_requirements(self): result = self.ioos.check_gts_ingest_requirements(ds) self.assertFalse(result.value) self.assertIn( - "The following variables qualified for NDBC/GTS Ingest: time\n", result.msgs + "The following variables qualified for NDBC/GTS Ingest: time\n", + result.msgs, ) def test_check_instrument_variables(self): @@ -643,7 +652,7 @@ def test_naming_authority_validation(self): bad_result[1], [ "naming_authority should either be a URL or a " - 'reversed DNS name (e.g "edu.ucar.unidata")' + 'reversed DNS name (e.g "edu.ucar.unidata")', ], ) @@ -796,7 +805,8 @@ def test_check_qartod_variables_references(self): # QARTOD variable with bad references (fail) qr.setncattr( - "references", r"p9q384ht09q38@@####???????////??//\/\/\/\//\/\74ht" + "references", + r"p9q384ht09q38@@####???????////??//\/\/\/\//\/\74ht", ) results = self.ioos.check_qartod_variables_references(ds) self.assertFalse(all(r.value for r in results)) @@ -1190,7 +1200,9 @@ def test_check_instrument_make_model_calib_date(self): temp = ds.createVariable("temperature", "d", dimensions=("time",)) temp.setncattr("instrument", "inst") inst = ds.createVariable( - "inst", "d", dimensions=() + "inst", + "d", + dimensions=(), ) # no make_model or calibration_date results = self.ioos.check_instrument_make_model_calib_date(ds) scored, out_of, messages = get_results(results) diff --git a/compliance_checker/tests/test_ioos_sos.py b/compliance_checker/tests/test_ioos_sos.py index 8b82f1f51..7d852689b 100644 --- a/compliance_checker/tests/test_ioos_sos.py +++ b/compliance_checker/tests/test_ioos_sos.py @@ -12,8 +12,9 @@ class TestIOOSSOSGetCapabilities(unittest.TestCase): def setUp(self): with open( os.path.join( - os.path.dirname(__file__), "data/http_mocks/ncsos_getcapabilities.xml" - ) + os.path.dirname(__file__), + "data/http_mocks/ncsos_getcapabilities.xml", + ), ) as f: self.resp = f.read() # need to monkey patch checkers prior to running tests, or no checker @@ -25,10 +26,16 @@ def test_retrieve_getcaps(self): """Method that simulates retrieving SOS GetCapabilities""" url = "http://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml" httpretty.register_uri( - httpretty.GET, url, content_type="text/xml", body=self.resp + httpretty.GET, + url, + content_type="text/xml", + body=self.resp, ) httpretty.register_uri( - httpretty.HEAD, url, content_type="text/xml", body="HTTP/1.1 200" + httpretty.HEAD, + url, + content_type="text/xml", + body="HTTP/1.1 200", ) ComplianceChecker.run_checker(url, ["ioos_sos"], 1, "normal") @@ -37,8 +44,9 @@ class TestIOOSSOSDescribeSensor(unittest.TestCase): def setUp(self): with open( os.path.join( - os.path.dirname(__file__), "data/http_mocks/ncsos_describesensor.xml" - ) + os.path.dirname(__file__), + "data/http_mocks/ncsos_describesensor.xml", + ), ) as f: self.resp = f.read() # need to monkey patch checkers prior to running tests, or no checker @@ -57,10 +65,16 @@ def test_retrieve_describesensor(self): "&version=1.0.0" ) httpretty.register_uri( - httpretty.GET, url, content_type="text/xml", body=self.resp + httpretty.GET, + url, + content_type="text/xml", + body=self.resp, ) httpretty.register_uri( - httpretty.HEAD, url, content_type="text/xml", body="HTTP/1.1 200" + httpretty.HEAD, + url, + content_type="text/xml", + body="HTTP/1.1 200", ) # need to mock out the HEAD response so that compliance checker # recognizes this as some sort of XML doc instead of an OPeNDAP diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index 5283f315d..a0aed2207 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -59,7 +59,7 @@ def test_sos(): """ Tests that a connection can be made to an SOS endpoint """ - url = "https://thredds.aoos.org/thredds/sos/aoos/cruises/ecofoci/2dy12.nc" + url = "https://thredds.aoos.org/thredds/dodsC/aoos/cruises/ecofoci/2dy12.nc" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 9689de7d1..1708a8d9e 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -1,4 +1,3 @@ -# coding=utf-8 import os import unittest from pathlib import Path @@ -14,18 +13,22 @@ "2dim": resource_filename("compliance_checker", "tests/data/2dim-grid.nc"), "bad_region": resource_filename("compliance_checker", "tests/data/bad_region.nc"), "bad_data_type": resource_filename( - "compliance_checker", "tests/data/bad_data_type.nc" + "compliance_checker", + "tests/data/bad_data_type.nc", ), "test_cdl": resource_filename("compliance_checker", "tests/data/test_cdl.cdl"), "test_cdl_nc": resource_filename( - "compliance_checker", "tests/data/test_cdl_nc_file.nc" + "compliance_checker", + "tests/data/test_cdl_nc_file.nc", ), "empty": resource_filename("compliance_checker", "tests/data/non-comp/empty.file"), "ru07": resource_filename( - "compliance_checker", "tests/data/ru07-20130824T170228_rt0.nc" + "compliance_checker", + "tests/data/ru07-20130824T170228_rt0.nc", ), "netCDF4": resource_filename( - "compliance_checker", "tests/data/test_cdl_nc4_file.cdl" + "compliance_checker", + "tests/data/test_cdl_nc4_file.cdl", ), } @@ -47,9 +50,9 @@ def __repr__(self): name = self.id() name = name.split(".") if name[0] not in ["ion", "pyon"]: - return "%s (%s)" % (name[-1], ".".join(name[:-1])) + return "{} ({})".format(name[-1], ".".join(name[:-1])) else: - return "%s ( %s )" % ( + return "{} ( {} )".format( name[-1], ".".join(name[:-2]) + ":" + ".".join(name[-2:]), ) @@ -75,7 +78,10 @@ def test_unicode_formatting(self): for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output( - ds.filepath(), limit, checker, groups + ds.filepath(), + limit, + checker, + groups, ) # This asserts that print is able to generate all of the unicode # output @@ -164,7 +170,10 @@ def test_group_func(self): for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output( - ds.filepath(), limit, checker, groups + ds.filepath(), + limit, + checker, + groups, ) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) @@ -196,11 +205,18 @@ def test_cdl_file(self): for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = self.cs.standard_output( - ds.filepath(), limit, checker, groups + ds.filepath(), + limit, + checker, + groups, ) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation( - groups, limit, cdl_points, cdl_out_of, checker + groups, + limit, + cdl_points, + cdl_out_of, + checker, ) ds.close() @@ -212,11 +228,18 @@ def test_cdl_file(self): for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = self.cs.standard_output( - ds.filepath(), limit, checker, groups + ds.filepath(), + limit, + checker, + groups, ) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation( - groups, limit, nc_points, nc_out_of, checker + groups, + limit, + nc_points, + nc_out_of, + checker, ) ds.close() @@ -241,7 +264,10 @@ def test_standard_output_score_header(self): limit = 2 groups, errors = score_groups["cf"] score_list, all_passed, out_of = self.cs.standard_output( - ds.filepath(), limit, "cf", groups + ds.filepath(), + limit, + "cf", + groups, ) assert all_passed < out_of diff --git a/compliance_checker/tests/test_util.py b/compliance_checker/tests/test_util.py index ee860a9b0..7f9124dde 100644 --- a/compliance_checker/tests/test_util.py +++ b/compliance_checker/tests/test_util.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ compliance_checker/tests/test_util.py """ diff --git a/doc/ubuntu-install-guide.md b/doc/ubuntu-install-guide.md deleted file mode 100644 index ec8d78400..000000000 --- a/doc/ubuntu-install-guide.md +++ /dev/null @@ -1,59 +0,0 @@ -# Installing Compliance-Checker on Ubuntu - - - -## Installing UDUnits on Ubuntu - -``` -wget 'ftp://ftp.unidata.ucar.edu/pub/udunits/udunits-2.1.24.tar.gz' -tar -zxvf udunits-2.1.24.tar.gz -cd udunits-2.1.24.tar.gz -./configure -make -sudo make install -cd .. -``` - -Linux infrequently updates the shared library cache (ldconfig). To force the cache to update: -``` -sudo ldconfig -v -``` - -To ensure that UDUnits is properly installed and recognized by the operating system as a registered shared library: - -``` -ldconfig -p | grep udunits -``` - -You should see: - -``` - libudunits2.so.0 (libc6,x86-64) => /usr/local/lib/libudunits2.so.0 - libudunits2.so (libc6,x86-64) => /usr/local/lib/libudunits2.so -``` - -## Installing lxml on Ubuntu - -### Get the libxml2 and libxslt packages - -``` -sudo apt-get install libxml2-dev -sudo apt-get install libxslt1-dev -pip install lxml -``` - -## Installing Compliance Checker - -``` -pip install compliance-checker -``` - -## Testing your compliance checker installation: - -Run python on your virtual environment and try: - -``` -from compliance_checker.runner import ComplianceCheckerCheckSuite -``` - -If it succeeds, then the Compliance Checker should be working correctly. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..356b211e8 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = compliance_checker +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/_templates/autosummary/base.rst b/docs/source/_templates/autosummary/base.rst new file mode 100644 index 000000000..cfedda5d8 --- /dev/null +++ b/docs/source/_templates/autosummary/base.rst @@ -0,0 +1,12 @@ +{{ objname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} + +.. include:: {{fullname}}.examples + +.. raw:: html + +
diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst new file mode 100644 index 000000000..c6a6773b0 --- /dev/null +++ b/docs/source/_templates/autosummary/class.rst @@ -0,0 +1,62 @@ +{{ objname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block attributes_summary %} + {% if attributes %} + + .. rubric:: Attributes Summary + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + + {% block methods_summary %} + {% if methods %} + + .. rubric:: Methods Summary + + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + + {% block attributes_documentation %} + {% if attributes %} + + .. rubric:: Attributes Documentation + + {% for item in attributes %} + .. autoattribute:: {{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + + {% block methods_documentation %} + {% if methods %} + + .. rubric:: Methods Documentation + + {% for item in methods %} + .. automethod:: {{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + +.. include:: {{fullname}}.examples + +.. raw:: html + +
diff --git a/docs/source/_templates/autosummary/module.rst b/docs/source/_templates/autosummary/module.rst new file mode 100644 index 000000000..074165792 --- /dev/null +++ b/docs/source/_templates/autosummary/module.rst @@ -0,0 +1,56 @@ + {% if name == 'io' %} + {% set nice_name = 'Reading Data' %} + {% else %} + {% set nice_name = name | title | escape %} + {% endif %} + +{{ (nice_name ~ ' ``(' ~ fullname ~ ')``')|underline }} + +.. automodule:: {{ fullname }} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Module Attributes') }} + + .. autosummary:: + {% for item in attributes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block functions %} + {% if functions %} + .. rubric:: {{ _('Functions') }} + + .. autosummary:: + :toctree: ./ + {% for item in functions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block classes %} + {% if classes %} + .. rubric:: {{ _('Classes') }} + + .. autosummary:: + :toctree: ./ + {% for item in classes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block exceptions %} + {% if exceptions %} + .. rubric:: {{ _('Exceptions') }} + + .. autosummary:: + :toctree: ./ + {% for item in exceptions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html new file mode 100644 index 000000000..80b059798 --- /dev/null +++ b/docs/source/_templates/layout.html @@ -0,0 +1,21 @@ +{% extends "!layout.html" %} + +{% block fonts %} + + {{ super() }} +{% endblock %} + +{% block extrahead %} + {{ super() }} + +{% endblock %} + +{% block docs_navbar %} + + +{# Added to support a banner with an alert #} + +{% endblock %} diff --git a/docs/source/_templates/versions.html b/docs/source/_templates/versions.html new file mode 100644 index 000000000..21001e725 --- /dev/null +++ b/docs/source/_templates/versions.html @@ -0,0 +1,4 @@ + + diff --git a/docs/source/compliance_checker_api.rst b/docs/source/compliance_checker_api.rst new file mode 100644 index 000000000..ebc2e171a --- /dev/null +++ b/docs/source/compliance_checker_api.rst @@ -0,0 +1,24 @@ +.. _api-index: + +############### +API +############### + +.. currentmodule:: compliance_checker + +.. autosummary:: + :toctree: generated/ + + compliance_checker.cf + compliance_checker.protocols + compliance_checker.acdd + compliance_checker.base + compliance_checker.cfutil + compliance_checker.ioos + compliance_checker.runner + compliance_checker.suite + compliance_checker.util + + +* :ref:`modindex` +* :ref:`genindex` diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 000000000..8ca29f8cf --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# +# compliance_checker documentation build configuration file, created by +# sphinx-quickstart on Mon Oct 9 21:28:42 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +from datetime import datetime + +import compliance_checker + +version = release = compliance_checker.__version__ +cur_date = datetime.utcnow() + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +needs_sphinx = "2.1" + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "myst_parser", + "numpydoc", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", +] + +# Make MyST generate anchors for headings +myst_heading_anchors = 2 + +autodoc_typehints = "description" + +# Tweak how docs are formatted +napoleon_use_rtype = False + +# Control main class documentation +autoclass_content = "both" + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {"**": ["versions", "sidebar-nav-bs"]} + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = [".rst", ".md"] + +# Controlling automatically generating summary tables in the docs +autosummary_generate = True +autosummary_ignore_module_all = False + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "compliance_checker" +copyright = f"{cur_date:%Y}, Dave Foster" +author = "Dave Foster" + + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "pydata_sphinx_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +html_theme_options = { + "external_links": [ + { + "name": "Release Notes", + "url": "https://github.com/ioos/compliance-checker/releases", + }, + ], + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/ioos/compliance-checker", + "icon": "fab fa-github-square", + }, + ], + "use_edit_page_button": False, + "navbar_align": "left", + "navbar_start": ["navbar-logo"], + "navbar_center": ["navbar-nav"], + "navbar_end": ["search-field", "navbar-icon-links"], +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = "compliance_checkerdoc" + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, "compliance_checker", "compliance_checker Documentation", [author], 1), +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "compliance_checker", + "compliance_checker Documentation", + author, + "compliance_checker", + "One line description of project.", + "Miscellaneous", + ), +] diff --git a/docs/source/faq.md b/docs/source/faq.md new file mode 100644 index 000000000..f461ce886 --- /dev/null +++ b/docs/source/faq.md @@ -0,0 +1,50 @@ +# Frequently Asked Questions + +## What is the Compliance Checker? + +The IOOS Compliance Checker is a Python-based tool for data providers to check for completeness and community standard compliance of local or remote netCDF files against CF and ACDD file standards. +The Python module can be used as a command-line tool or as a library that can be integrated into other software. + +You are currently viewing the web-based version of the Compliance Checker. +It was developed to enable a broader audience and improve accessibility for the checker. +With the web version, +providers can simply provide a link or upload their datasets and get the full suite of capabilities that Compliance Checker offers. + +## What does the Compliance Checker check? + +It currently supports the following sources and standards: +- [ACDD (1.1, 1.3)](https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3) +- [CF (1.6, 1.7)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html) +- [IOOS (1.1, 1.2)](https://ioos.github.io/ioos-metadata/) +- [Glider DAC](https://github.com/ioos/ioosngdac/wiki/NetCDF-Specification) +- [NCEI (1.1, 2.0)](https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html) + +## Can I test an ERDDAP dataset with the Compliance Checker? + +Yes. +When testing an ERDDAP dataset, +please supply the URL to the dataset without a file extension. +For example, +to test this [Morro Bay dataset](https://standards.sensors.ioos.us/erddap/tabledap/morro-bay-bs1-met.html), +you should supply the URL like so: +"https://standards.sensors.ioos.us/erddap/tabledap/morro-bay-bs1-met". + + +## What version of the Compliance Checker is run on [compliance.ioos.us](https://compliance.ioos.us/index.html)? + +This web site is using [version 5.0.0](https://pypi.org/project/compliance-checker/) of the Compliance Checker. + +## Is there an API? + +There sure is. +Check out the details on how to use the web [API here](https://github.com/ioos/compliance-checker-web/wiki/API). + +## Where can I find more information about the Compliance Checker? + +The Compliance Checker is completely open-source and available on [GitHub](https://github.com/ioos/compliance-checker). + +## Disclaimer + +The objective of the IOOS Compliance Checker is to check your file against our interpretation of select dataset metadata standards to use as a guideline in generating compliant files. +The compliance checker should not be considered the authoritative source on whether your file is 100% "compliant". +Instead, we recommend that users use the results as a guide to work towards compliance. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 000000000..0f67a1527 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,19 @@ +compliance_checker +================== + +Python tool to check your datasets against compliance standards. + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + quickintro + compliance_checker_api + faq + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/pyproject.toml b/pyproject.toml index 3856b4004..631982623 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,10 @@ [build-system] -requires = ["setuptools>=41.2", "setuptools_scm", "wheel"] build-backend = "setuptools.build_meta" +requires = [ + "setuptools>=42", + "setuptools_scm", + "wheel", +] [tool.pytest.ini_options] markers = [ @@ -8,5 +12,32 @@ markers = [ "slowtest: marks slow tests (deselect with '-m \"not slowtest\"')" ] -[tool.isort] -profile = "black" +[tool.ruff] +select = [ + "A", # flake8-builtins + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "E", # pycodecstyle + "F", # flakes + "I", # import sorting + "W", # pydocstyle + "UP", # upgrade +] +target-version = "py38" +line-length = 200 + +exclude = [ + "compliance_checker/cf/cf.py", +] + +ignore = [ + "E501", +] + +[tool.ruff.per-file-ignores] +"docs/source/conf.py" = [ + "E402", + "A001", +] +"compliance_checker/__init__.py" = ["B019"] +"compliance_checker/cfutil.py" = ["B028"] diff --git a/requirements.txt b/requirements.txt index 144e32d63..6c3dc9de7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ cf-units>=2 cftime>=1.1.0 isodate>=0.5.4 -Jinja2>=2.7.3 +jinja2>=2.7.3 lxml>=3.2.1 -netCDF4>=1.5.7 -OWSLib>=0.8.3 +netcdf4>=1.5.7 +owsLib>=0.8.3 pendulum>=1.2.4 pygeoif>=0.6 pyproj>=2.2.1 diff --git a/setup.py b/setup.py index e58299faa..00b4877c7 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,14 @@ -import io - from setuptools import find_packages, setup def readme(): - with io.open("README.md", encoding="utf-8") as f: + with open("README.md", encoding="utf-8") as f: return f.read() def pip_requirements(fname="requirements.txt"): reqs = [] - with open(fname, "r") as f: + with open(fname) as f: for line in f: line = line.strip() if not line or line.startswith("#"): diff --git a/test_requirements.txt b/test_requirements.txt index 408c68096..1ebf02790 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -3,7 +3,10 @@ codespell flake8 httpretty mypy +myst-parser +numpydoc pre-commit +pydata-sphinx-theme pytest>=2.9.0 pytest-cov>=3.0.0 pytest-vcr