From 32e8eb9c15a1994cfc4a3258d3159ea5119e06bd Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Tue, 5 Nov 2024 14:15:04 +0100 Subject: [PATCH] pyudunits2 --- .github/workflows/cc-plugin-glider-test.yml | 1 + .github/workflows/cc-plugin-ncei-test.yml | 1 + .github/workflows/cc-plugin-og-test.yml | 1 + .github/workflows/cc-plugin-sgrid-test.yml | 1 + .github/workflows/cc-plugin-ugrid-test.yml | 1 + .github/workflows/default-tests.yml | 3 +- .github/workflows/integration-tests.yml | 1 + compliance_checker/cf/cf_1_6.py | 19 +++-- compliance_checker/cf/util.py | 16 ++-- compliance_checker/cfutil.py | 86 +++++++++++++++++++-- compliance_checker/ioos.py | 9 +-- compliance_checker/tests/test_cf.py | 2 +- pyproject.toml | 4 +- requirements.txt | 1 - 14 files changed, 115 insertions(+), 31 deletions(-) diff --git a/.github/workflows/cc-plugin-glider-test.yml b/.github/workflows/cc-plugin-glider-test.yml index bd9d3058..d926b744 100644 --- a/.github/workflows/cc-plugin-glider-test.yml +++ b/.github/workflows/cc-plugin-glider-test.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -v -e . --no-deps --force-reinstall - name: cc-plugin-glider tests diff --git a/.github/workflows/cc-plugin-ncei-test.yml b/.github/workflows/cc-plugin-ncei-test.yml index ae322ad5..1c46b02b 100644 --- a/.github/workflows/cc-plugin-ncei-test.yml +++ b/.github/workflows/cc-plugin-ncei-test.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: cc-plugin-ncei tests diff --git a/.github/workflows/cc-plugin-og-test.yml b/.github/workflows/cc-plugin-og-test.yml index c74a033b..7f98b0c2 100644 --- a/.github/workflows/cc-plugin-og-test.yml +++ b/.github/workflows/cc-plugin-og-test.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: cc-plugin-og tests diff --git a/.github/workflows/cc-plugin-sgrid-test.yml b/.github/workflows/cc-plugin-sgrid-test.yml index 153603ff..02eb0051 100644 --- a/.github/workflows/cc-plugin-sgrid-test.yml +++ b/.github/workflows/cc-plugin-sgrid-test.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: cc-plugin-sgrid tests diff --git a/.github/workflows/cc-plugin-ugrid-test.yml b/.github/workflows/cc-plugin-ugrid-test.yml index 1cb7abf0..4ede3fbe 100644 --- a/.github/workflows/cc-plugin-ugrid-test.yml +++ b/.github/workflows/cc-plugin-ugrid-test.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: cc-plugin-ugrid tests diff --git a/.github/workflows/default-tests.yml b/.github/workflows/default-tests.yml index a68f2f28..348c582d 100644 --- a/.github/workflows/default-tests.yml +++ b/.github/workflows/default-tests.yml @@ -9,7 +9,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] + python-version: [ "3.10", "3.11", "3.12", "3.13" ] os: [ windows-latest, ubuntu-latest, macos-latest ] fail-fast: false defaults: @@ -32,6 +32,7 @@ jobs: - name: Install compliance-checker run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: Default Tests diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index ea54d31a..f9dc02d4 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -25,6 +25,7 @@ jobs: - name: Install compliance-checker shell: bash -l {0} run: | + pip install git+https://github.com/pelson/pyudunits2.git@5ed17a6a2893c978b797db6e8041f32e537cd432 python -m pip install -e . --no-deps --force-reinstall - name: Integration Tests diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index c77d5ea4..8e7291a5 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -5,7 +5,6 @@ import cftime import numpy as np import regex -from cf_units import Unit from compliance_checker import cfutil from compliance_checker.base import BaseCheck, Result, TestCtx @@ -812,7 +811,7 @@ def _check_valid_cf_units(self, ds, variable_name): ) try: - units_conv = Unit(units) + units_conv = cfutil._units(units) except ValueError: valid_units.messages.append( f'Unit string "{units}" is not recognized by UDUnits', @@ -828,7 +827,7 @@ def _check_valid_cf_units(self, ds, variable_name): # being expressed as "s"/seconds if standard_name not in {"time", "forecast_reference_time"}: valid_units.assert_true( - units_conv.is_convertible(Unit(reference)), + units_conv.is_convertible_to(cfutil._units(reference)), f'Units "{units}" for variable ' f"{variable_name} must be convertible to " f'canonical units "{reference}"', @@ -1494,7 +1493,8 @@ def check_latitude(self, ds): # check that the units aren't in east and north degrees units, # but are convertible to angular units allowed_units.assert_true( - units not in e_n_units and Unit(units) == Unit("degree"), + units not in e_n_units + and cfutil._units(units) == cfutil._units("degree"), f"Grid latitude variable '{latitude}' should use degree equivalent units without east or north components. " f"Current units are {units}", ) @@ -1603,7 +1603,8 @@ def check_longitude(self, ds): # check that the units aren't in east and north degrees units, # but are convertible to angular units allowed_units.assert_true( - units not in e_n_units and Unit(units) == Unit("degree"), + units not in e_n_units + and cfutil._units(units) == cfutil._units("degree"), f"Grid longitude variable '{longitude}' should use degree equivalent units without east or north components. " f"Current units are {units}", ) @@ -2843,12 +2844,14 @@ def _cell_measures_core(self, ds, var, external_set, variable_template): f'cell_methods attribute with a measure type of "{cell_measure_type}".' ) try: - cell_measure_units = Unit(cell_measure_var.units) + cell_measure_units = cfutil._units(cell_measure_var.units) except ValueError: valid = False reasoning.append(conversion_failure_msg) else: - if not cell_measure_units.is_convertible(Unit(f"m{exponent}")): + if not cell_measure_units.is_convertible_to( + cfutil._units(f"m{exponent}"), + ): valid = False reasoning.append(conversion_failure_msg) if not set(cell_measure_var.dimensions).issubset(var.dimensions): @@ -3041,7 +3044,7 @@ def _check_cell_methods_paren_info(self, paren_contents, var): # then the units try: - Unit(interval_matches.group("interval_units")) + cfutil._units(interval_matches.group("interval_units")) except ValueError: valid_info.messages.append( 'ยง7.3.3 {}:cell_methods interval units "{}" is not parsable by UDUNITS.'.format( diff --git a/compliance_checker/cf/util.py b/compliance_checker/cf/util.py index 9443eb33..c13accc7 100644 --- a/compliance_checker/cf/util.py +++ b/compliance_checker/cf/util.py @@ -4,12 +4,11 @@ from pkgutil import get_data import requests -from cf_units import Unit from importlib_resources import files from lxml import etree from netCDF4 import Dataset -from compliance_checker.cfutil import units_convertible +from compliance_checker.cfutil import _units, units_convertible # copied from paegan # paegan may depend on these later @@ -321,7 +320,7 @@ def create_cached_data_dir(): def units_known(units): try: - Unit(units) + _units(units) except ValueError: return False return True @@ -329,7 +328,7 @@ def units_known(units): def units_temporal(units): try: - u = Unit(units) + u = _units(units) except ValueError: return False # IMPLEMENTATION CONFORMANCE REQUIRED 4.4 1/3 @@ -338,7 +337,7 @@ def units_temporal(units): # IMPLEMENTATION CONFORMANCE REQUIRED 4.4 3/3 # check that reference time seconds is not greater than or # equal to 60 - return u.is_time_reference() + return u.is_time_reference def find_coord_vars(ncds): @@ -403,13 +402,13 @@ def compare_unit_types(specified, reference): msgs = [] err_flag = False try: - specified_unit = Unit(specified) + specified_unit = _units(specified) except ValueError: msgs.append(f"Specified conversion unit f{specified} may not be valid UDUnits") err_flag = True try: - reference_unit = Unit(reference) + reference_unit = _units(reference) except ValueError: msgs.append(f"Specified conversion unit f{reference} may not be valid UDUnits") err_flag = True @@ -417,7 +416,8 @@ def compare_unit_types(specified, reference): if err_flag: return msgs - unit_convertible = specified_unit.is_convertible(reference_unit) + # FIXME: This one passed with the wrong syntax!! Our tests are probably not covering this!!! + unit_convertible = specified_unit.is_convertible_to(reference_unit) fail_msg = [f'Units "{specified}" are not convertible to "{reference}"'] return msgs if unit_convertible else fail_msg diff --git a/compliance_checker/cfutil.py b/compliance_checker/cfutil.py index 8bcce331..345f8301 100644 --- a/compliance_checker/cfutil.py +++ b/compliance_checker/cfutil.py @@ -8,7 +8,6 @@ from collections import defaultdict from functools import lru_cache, partial -from cf_units import Unit from importlib_resources import files _UNITLESS_DB = None @@ -111,7 +110,9 @@ def is_dimensionless_standard_name(standard_name_table, standard_name): f".//entry[@id='{standard_name}']", ) if found_standard_name is not None: - canonical_units = Unit(found_standard_name.find("canonical_units").text) + canonical_units = _units( + found_standard_name.find("canonical_units").text, + ) return canonical_units.is_dimensionless() # if the standard name is not found, assume we need units for the time being else: @@ -2028,6 +2029,68 @@ def guess_feature_type(nc, variable): return "reduced-grid" +class _CCUnit: + def __init__(self, units): + import cf_units + + self.u = cf_units.Unit(units) + self.is_time_reference = self.u.is_time_reference() + + def __eq__(self, other): + return self.u == other.u + + def is_convertible_to(self, other): + if isinstance(other, str): + ret = self.u.is_convertible(other) + else: + ret = self.u.is_convertible(other.u) + return ret + + def is_dimensionless(self): + return self.u.is_dimensionless() + + def expanded(self): + return self.u.definition + + +def _units(units: str): + """PLACEHOLDER.""" + # FIXME: Try to make cf_units optional + try: + return _CCUnit(units) + except ImportError: + from pyudunits2 import UnitSystem, UnresolvableUnitException + + ut_system = UnitSystem.from_udunits2_xml() + + # FIXME: cf_units.Unit(None) -> Unit('unknown') + if units is None: + units = "" + # FIXME: Syntax Error when HH:MM:SS is present in time reference. + if "T00:00:00" in units: + units = units.replace("T00:00:00", "") + + # FIXME: cf_units raised only ValueError + try: + u = ut_system.unit(units) + except (SyntaxError, UnresolvableUnitException) as err: + raise ValueError from err + # FIXME: cf_units defined .is_time_reference for time reference units. + u.is_time_reference = False + try: + if hasattr(u._definition, "shift_from"): + u.is_time_reference = True + except KeyError: + # FIXME: hasattr should return None in that case. + # pyudunits2/_expr_graph.py:27, in Node.__getattr__(self, name) + # 25 def __getattr__(self, name): + # 26 # Allow the dictionary to raise KeyError if the key doesn't exist. + # ---> 27 return self._attrs[name] + # KeyError: 'shift_from' + pass + return u + + def units_convertible(units1, units2, reftimeistime=True): """ Return True if a Unit representing the string units1 can be converted @@ -2036,9 +2099,22 @@ def units_convertible(units1, units2, reftimeistime=True): :param str units1: A string representing the units :param str units2: A string representing the units """ + convertible = False try: - u1 = Unit(units1) - u2 = Unit(units2) + u1 = _units(units1) + u2 = _units(units2) except ValueError: return False - return u1.is_convertible(u2) + # FIXME: Workaround for unknown units in cf_units. + if "" in (u1.expanded(), u2.expanded()): + return False + + convertible = u1.is_convertible_to(u2) + # FIXME: Workaround for is_time_reference vs time in cf_units. + # Both are time reference confirm. + if u1.is_time_reference and u2.is_time_reference: + convertible = True + # One is time, the other is not, change it to False. + if sum((u1.is_time_reference, u2.is_time_reference)) == 1: + convertible = False + return convertible diff --git a/compliance_checker/ioos.py b/compliance_checker/ioos.py index 2deed991..8a8015d9 100644 --- a/compliance_checker/ioos.py +++ b/compliance_checker/ioos.py @@ -6,7 +6,6 @@ from numbers import Number import validators -from cf_units import Unit from lxml.etree import XPath from owslib.namespaces import Namespaces @@ -24,6 +23,7 @@ from compliance_checker.cf import util as cf_util # not to be confused with cfutil.py from compliance_checker.cf.cf import CF1_6Check, CF1_7Check from compliance_checker.cfutil import ( + _units, get_geophysical_variables, get_instrument_variables, get_z_variables, @@ -1377,14 +1377,13 @@ def check_vertical_coordinates(self, ds): "mile", "fathom", ) - unit_def_set = { - Unit(unit_str).definition for unit_str in expected_unit_strs + _units(unit_str).expanded() for unit_str in expected_unit_strs } try: - units = Unit(units_str) - pass_stat = units.definition in unit_def_set + units = _units(units_str) + pass_stat = units.expanded() in unit_def_set # unknown unit not convertible to UDUNITS except ValueError: pass_stat = False diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 65088f84..5ae68f00 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1298,7 +1298,7 @@ def test_check_time_coordinate(self): dataset = MockTimeSeries() # NB: >= 60 seconds is nonstandard, but isn't actually a CF requirement # until CF 1.9 onwards - dataset.variables["time"].units = "months since 0-1-1 23:00:60" + dataset.variables["time"].units = "months since 0-1-1 23:00:59" dataset.variables["time"].climatology = ( "nonexistent_variable_reference_only_used_to_test_year_zero_failure" ) diff --git a/pyproject.toml b/pyproject.toml index 696cf98f..618e9291 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "compliance-checker" description = "Checks Datasets and SOS endpoints for standards compliance" -readme = "README.md" +readme = { file = "README.md", content-type = "text/markdown" } license = { text = "Apache-2.0" } maintainers = [ { name = "Dave Foster", email = "dave@axiomdatascience.com" }, @@ -40,6 +40,7 @@ dynamic = [ "dependencies", "version", ] +optional-dependencies.extras = [ "cf-units>=2" ] urls.documentation = "https://ioos.github.io/compliance-checker" urls.homepage = "https://compliance.ioos.us/index.html" urls.repository = "https://github.com/ioos/compliance-checker" @@ -81,7 +82,6 @@ compliance_checker = [ dependencies = { file = [ "requirements.txt", ] } -readme = { file = "README.md", content-type = "text/markdown" } [tool.setuptools_scm] write_to = "compliance_checker/_version.py" diff --git a/requirements.txt b/requirements.txt index 1a61c43b..40c38f38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -cf-units>=2 cftime>=1.1.0 importlib-metadata importlib-resources