From 5cf663d3b429cc7ac204294ef61d976837fa4b7c Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 21 Oct 2024 11:29:20 +0800 Subject: [PATCH 01/13] - Added some optional test dependencies to pyproject.toml to make runnings tests easier - Added tests to make sure taht we are parsing netCDF files as expected using intake-esm by comparing against a dataset read in directly with xarray (with same additional logic intake-esm uses) - All tests passing when coordinate variable discovery enabled - test_parsed_ncfile_values_0 failing when coordinate discovery disabled --- pyproject.toml | 8 + tests/test_builders.py | 778 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 786 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1a6c6650..4eb49fb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,14 @@ dependencies = [ ] dynamic = ["version"] +[project.optional-dependencies] +test = [ + "pytest", + "pytest-watcher", + "pytest-cov", + "pytest-sugar", +] + [project.scripts] catalog-build = "access_nri_intake.cli:build" metadata-validate = "access_nri_intake.cli:metadata_validate" diff --git a/tests/test_builders.py b/tests/test_builders.py index c28f9eac..1e9a8223 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -6,6 +6,9 @@ import intake import pandas as pd import pytest +import xarray as xr +from intake_esm.source import _get_xarray_open_kwargs, _open_dataset +from intake_esm.utils import OPTIONS from access_nri_intake.source import CORE_COLUMNS, builders from access_nri_intake.source.utils import _AccessNCFileInfo @@ -1095,3 +1098,778 @@ def test_parse_access_ncfile(test_data, builder, filename, expected): expected.path = file assert builder.parse_access_ncfile(file) == expected + + +@pytest.mark.parametrize( + "builder, filename, expected", + [ + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_grid.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_grid.nc", + file_id="ocean_grid", + filename_timestamp=None, + frequency="fx", + start_date="none", + end_date="none", + variable=["geolat_t", "geolon_t", "xt_ocean", "yt_ocean"], + variable_long_name=[ + "tracer latitude", + "tracer longitude", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=["", "", "", ""], + variable_cell_methods=["time: point", "time: point", "", ""], + variable_units=["degrees_N", "degrees_E", "degrees_E", "degrees_N"], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean.nc", + file_id="ocean", + filename_timestamp=None, + frequency="1yr", + start_date="1900-01-01, 00:00:00", + end_date="1910-01-01, 00:00:00", + variable=[ + "nv", + "st_ocean", + "temp", + "time", + "time_bounds", + "xt_ocean", + "yt_ocean", + ], + variable_long_name=[ + "vertex number", + "tcell zstar depth", + "Conservative temperature", + "time", + "time axis boundaries", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=[ + "", + "", + "sea_water_conservative_temperature", + "", + "", + "", + "", + ], + variable_cell_methods=["", "", "time: mean", "", "", "", ""], + variable_units=[ + "none", + "meters", + "K", + "days since 1900-01-01 00:00:00", + "days", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_month.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_month.nc", + file_id="ocean_month", + filename_timestamp=None, + frequency="1mon", + start_date="1900-01-01, 00:00:00", + end_date="1910-01-01, 00:00:00", + variable=["mld", "nv", "time", "time_bounds", "xt_ocean", "yt_ocean"], + variable_long_name=[ + "mixed layer depth determined by density criteria", + "vertex number", + "time", + "time axis boundaries", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=[ + "ocean_mixed_layer_thickness_defined_by_sigma_t", + "", + "", + "", + "", + "", + ], + variable_cell_methods=["time: mean", "", "", "", "", ""], + variable_units=[ + "m", + "none", + "days since 1900-01-01 00:00:00", + "days", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_month_inst_nobounds.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_month_inst_nobounds.nc", + file_id="ocean_month_inst_nobounds", + filename_timestamp=None, + frequency="1mon", + start_date="1900-01-01, 00:00:00", + end_date="1900-02-01, 00:00:00", + variable=["mld", "time", "xt_ocean", "yt_ocean"], + variable_long_name=[ + "mixed layer depth determined by density criteria", + "time", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=[ + "ocean_mixed_layer_thickness_defined_by_sigma_t", + "", + "", + "", + ], + variable_cell_methods=["time: mean", "", "", ""], + variable_units=[ + "m", + "days since 1900-01-01 00:00:00", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ice/OUTPUT/iceh.1900-01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="iceh.1900-01.nc", + file_id="iceh_XXXX_XX", + filename_timestamp="1900-01", + frequency="1mon", + start_date="1900-01-01, 00:00:00", + end_date="1900-02-01, 00:00:00", + variable=["TLAT", "TLON", "aice_m", "tarea", "time", "time_bounds"], + variable_long_name=[ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "model time", + "boundaries for time-averaging interval", + ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], + variable_units=[ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/atm/netCDF/by578a.pd201501_dai.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="by578a.pd201501_dai.nc", + file_id="by578a_pdXXXXXX_dai", + filename_timestamp="201501", + frequency="1day", + start_date="2015-01-01, 00:00:00", + end_date="2015-02-01, 00:00:00", + variable=["fld_s03i236"], + variable_long_name=["TEMPERATURE AT 1.5M"], + variable_standard_name=["air_temperature"], + variable_cell_methods=["time: mean"], + variable_units=["K"], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ice/iceh_d.2015-01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="iceh_d.2015-01.nc", + file_id="iceh_d_XXXX_XX", + filename_timestamp="2015-01", + frequency="1day", + start_date="2015-01-01, 00:00:00", + end_date="2015-02-01, 00:00:00", + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], + variable_long_name=[ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "model time", + "boundaries for time-averaging interval", + ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], + variable_units=[ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1850-01-01 00:00:00", + "days since 1850-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ocn/ocean_daily.nc-20150630", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_daily.nc-20150630", + file_id="ocean_daily", + filename_timestamp=None, + frequency="1day", + start_date="2015-01-01, 00:00:00", + end_date="2015-07-01, 00:00:00", + variable=["nv", "sst", "time", "time_bounds", "xt_ocean", "yt_ocean"], + variable_long_name=[ + "vertex number", + "Potential temperature", + "time", + "time axis boundaries", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=["", "sea_surface_temperature", "", "", "", ""], + variable_cell_methods=["", "time: mean", "", "", "", ""], + variable_units=[ + "none", + "K", + "days since 1850-01-01 00:00:00", + "days", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ocn/ocean_scalar.nc-20150630", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_scalar.nc-20150630", + file_id="ocean_scalar", + filename_timestamp=None, + frequency="1mon", + start_date="2015-01-01, 00:00:00", + end_date="2015-07-01, 00:00:00", + variable=[ + "nv", + "scalar_axis", + "temp_global_ave", + "time", + "time_bounds", + ], + variable_long_name=[ + "vertex number", + "none", + "Global mean temp in liquid seawater", + "time", + "time axis boundaries", + ], + variable_standard_name=[ + "", + "", + "sea_water_potential_temperature", + "", + "", + ], + variable_cell_methods=["", "", "time: mean", "", ""], + variable_units=[ + "none", + "none", + "deg_C", + "days since 1850-01-01 00:00:00", + "days", + ], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/atm/netCDF/HI-C-05-r1.pa-185001_mon.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="HI-C-05-r1.pa-185001_mon.nc", + file_id="HI_C_05_r1_pa_XXXXXX_mon", + filename_timestamp="185001", + frequency="1mon", + start_date="1850-01-01, 00:00:00", + end_date="1850-02-01, 00:00:00", + variable=["fld_s03i236"], + variable_long_name=["TEMPERATURE AT 1.5M"], + variable_standard_name=["air_temperature"], + variable_cell_methods=["time: mean"], + variable_units=["K"], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ice/iceh.1850-01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="iceh.1850-01.nc", + file_id="iceh_XXXX_XX", + filename_timestamp="1850-01", + frequency="1mon", + start_date="1850-01-01, 00:00:00", + end_date="1850-02-01, 00:00:00", + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], + variable_long_name=[ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "model time", + "boundaries for time-averaging interval", + ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], + variable_units=[ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ocn/ocean_bgc_ann.nc-18501231", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_bgc_ann.nc-18501231", + file_id="ocean_bgc_ann", + filename_timestamp=None, + frequency="1yr", + start_date="1849-12-30, 00:00:00", + end_date="1850-12-30, 00:00:00", + variable=[ + "fgco2_raw", + "nv", + "time", + "time_bounds", + "xt_ocean", + "yt_ocean", + ], + variable_long_name=[ + "Flux into ocean - DIC, inc. anth.", + "vertex number", + "time", + "time axis boundaries", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["time: mean", "", "", "", "", ""], + variable_units=[ + "mmol/m^2/s", + "none", + "days since 0001-01-01 00:00:00", + "days", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ocn/ocean_bgc.nc-18501231", + _AccessNCFileInfo( + path=None, # type: ignore + filename="ocean_bgc.nc-18501231", + file_id="ocean_bgc", + filename_timestamp=None, + frequency="1mon", + start_date="1849-12-30, 00:00:00", + end_date="1850-12-30, 00:00:00", + variable=[ + "nv", + "o2", + "st_ocean", + "time", + "time_bounds", + "xt_ocean", + "yt_ocean", + ], + variable_long_name=[ + "vertex number", + "o2", + "tcell zstar depth", + "time", + "time axis boundaries", + "tcell longitude", + "tcell latitude", + ], + variable_standard_name=["", "", "", "", "", "", ""], + variable_cell_methods=["", "time: mean", "", "", "", "", ""], + variable_units=[ + "none", + "mmol/m^3", + "meters", + "days since 0001-01-01 00:00:00", + "days", + "degrees_E", + "degrees_N", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.native_1900_01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.mom6.h.native_1900_01.nc", + file_id="GMOM_JRA_WD_mom6_h_native_XXXX_XX", + filename_timestamp="1900_01", + frequency="1mon", + start_date="1900-01-01, 00:00:00", + end_date="1900-02-01, 00:00:00", + variable=[ + "average_DT", + "average_T1", + "average_T2", + "nv", + "thetao", + "time", + "time_bnds", + "xh", + "yh", + "zl", + ], + variable_long_name=[ + "Length of average period", + "Start time for average period", + "End time for average period", + "vertex number", + "Sea Water Potential Temperature", + "time", + "time axis boundaries", + "h point nominal longitude", + "h point nominal latitude", + "Layer pseudo-depth, -z*", + ], + variable_standard_name=[ + "", + "", + "", + "", + "sea_water_potential_temperature", + "", + "", + "", + "", + "", + ], + variable_cell_methods=[ + "", + "", + "", + "", + "area:mean zl:mean yh:mean xh:mean time: mean", + "", + "", + "", + "", + "", + ], + variable_units=[ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "", + "degC", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degrees_east", + "degrees_north", + "meter", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", + file_id="GMOM_JRA_WD_mom6_h_sfc_XXXX_XX_XX", + filename_timestamp="1900_01_02", + frequency="1day", + start_date="1900-01-01, 00:00:00", + end_date="1900-01-02, 00:00:00", + variable=[ + "average_DT", + "average_T1", + "average_T2", + "nv", + "time", + "time_bnds", + "tos", + "xh", + "yh", + ], + variable_long_name=[ + "Length of average period", + "Start time for average period", + "End time for average period", + "vertex number", + "time", + "time axis boundaries", + "Sea Surface Temperature", + "h point nominal longitude", + "h point nominal latitude", + ], + variable_standard_name=[ + "", + "", + "", + "", + "", + "", + "sea_surface_temperature", + "", + "", + ], + variable_cell_methods=[ + "", + "", + "", + "", + "", + "", + "area:mean yh:mean xh:mean time: mean", + "", + "", + ], + variable_units=[ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degC", + "degrees_east", + "degrees_north", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.static.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.mom6.h.static.nc", + file_id="GMOM_JRA_WD_mom6_h_static", + filename_timestamp=None, + frequency="fx", + start_date="none", + end_date="none", + variable=["geolat", "geolon", "xh", "yh"], + variable_long_name=[ + "Latitude of tracer (T) points", + "Longitude of tracer (T) points", + "h point nominal longitude", + "h point nominal latitude", + ], + variable_standard_name=["", "", "", ""], + variable_cell_methods=["time: point", "time: point", "", ""], + variable_units=[ + "degrees_north", + "degrees_east", + "degrees_east", + "degrees_north", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.z_1900_01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.mom6.h.z_1900_01.nc", + file_id="GMOM_JRA_WD_mom6_h_z_XXXX_XX", + filename_timestamp="1900_01", + frequency="1mon", + start_date="1900-01-01, 00:00:00", + end_date="1900-02-01, 00:00:00", + variable=[ + "average_DT", + "average_T1", + "average_T2", + "nv", + "thetao", + "time", + "time_bnds", + "xh", + "yh", + "z_l", + ], + variable_long_name=[ + "Length of average period", + "Start time for average period", + "End time for average period", + "vertex number", + "Sea Water Potential Temperature", + "time", + "time axis boundaries", + "h point nominal longitude", + "h point nominal latitude", + "Depth at cell center", + ], + variable_standard_name=[ + "", + "", + "", + "", + "sea_water_potential_temperature", + "", + "", + "", + "", + "", + ], + variable_cell_methods=[ + "", + "", + "", + "", + "area:mean z_l:mean yh:mean xh:mean time: mean", + "", + "", + "", + "", + "", + ], + variable_units=[ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "", + "degC", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degrees_east", + "degrees_north", + "meters", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.cice.h.1900-01-01.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.cice.h.1900-01-01.nc", + file_id="GMOM_JRA_WD_cice_h_XXXX_XX_XX", + filename_timestamp="1900-01-01", + frequency="1day", + start_date="1900-01-01, 00:00:00", + end_date="1900-01-02, 00:00:00", + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], + variable_long_name=[ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "time", + "time interval endpoints", + ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], + variable_units=[ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 0000-01-01 00:00:00", + "days since 0000-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", + _AccessNCFileInfo( + path=None, # type: ignore + filename="GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", + file_id="GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", + filename_timestamp="1900-01-02-00000", + frequency="fx", # WW3 provides no time bounds + start_date="1900-01-02, 00:00:00", + end_date="1900-01-02, 00:00:00", + variable=["EF", "mapsta"], + variable_long_name=["1D spectral density", "map status"], + variable_standard_name=["", ""], + variable_cell_methods=["", ""], + variable_units=["m2 s", "unitless"], + ), + ), + ], +) +def test_parsed_ncfile_values(test_data, builder, filename, expected): + # In this test, we will refer to the intake-esm dataset as ie_ds and the + # dataset loaded directly with xarray as xr_ds. + file = str(test_data / Path(filename)) + + # Set the path to the test data directory + expected.path = file + + # First we need to ensure that our builer is grabbing the right info - this + # is just a reproduction of `test_parse_access_ncfile`, but if we aren't + # parsing that correctly, then we can't really compare the datasets + assert builder.parse_access_ncfile(file) == expected + xarray_open_kwargs = _get_xarray_open_kwargs("netcdf") + + ie_ds = _open_dataset( + urlpath=expected.path, + varname=expected.variable, + xarray_open_kwargs=xarray_open_kwargs, + requested_variables=expected.variable, + ).compute() + ie_ds.set_coords(set(ie_ds.variables) - set(ie_ds.attrs[OPTIONS["vars_key"]])) + + """ + We need to perform some additional logic that intake-esm does or we are + going to get all sorts of random errors + """ + breakpoint() + xr_ds = xr.open_dataset(file, **xarray_open_kwargs) + + scalar_variables = [v for v in xr_ds.data_vars if len(xr_ds[v].dims) == 0] + xr_ds = xr_ds.set_coords(scalar_variables) + + xr_ds = xr_ds[expected.variable] + + xr.testing.assert_equal(ie_ds, xr_ds) + """ + ^ We expect this to fail in test case 0, but only if we aren't searching for + coordinate variables. This is going to be difficult to detect & test. + + Note: case zero is a grid file $PROJ_ROOT/tests/data/access-om2/output000/ocean/ocean_grid.nc, + which is why it fails if we don't turn on coordinate/static variable search. + All of the other files aer fine, as far as I can tell. + + """ From 73530af488fe1f2c97e969dfc23cad7f9db750ca Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 22 Oct 2024 11:58:38 +0800 Subject: [PATCH 02/13] Remove redundant breakpoint --- tests/test_builders.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_builders.py b/tests/test_builders.py index 1e9a8223..c1230261 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -1855,7 +1855,6 @@ def test_parsed_ncfile_values(test_data, builder, filename, expected): We need to perform some additional logic that intake-esm does or we are going to get all sorts of random errors """ - breakpoint() xr_ds = xr.open_dataset(file, **xarray_open_kwargs) scalar_variables = [v for v in xr_ds.data_vars if len(xr_ds[v].dims) == 0] From 2d921bab282e4f32e92102e0bc78e28f24cd2c52 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 22 Oct 2024 12:16:25 +0800 Subject: [PATCH 03/13] Condensed duplicate tests --- tests/test_builders.py | 749 +---------------------------------------- 1 file changed, 10 insertions(+), 739 deletions(-) diff --git a/tests/test_builders.py b/tests/test_builders.py index c1230261..29ddccbf 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -363,743 +363,12 @@ def test_parse_access_filename(builder, filename, expected): @pytest.mark.parametrize( - "builder, filename, expected", - [ - ( - builders.AccessOm2Builder, - "access-om2/output000/ocean/ocean_grid.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_grid.nc", - file_id="ocean_grid", - filename_timestamp=None, - frequency="fx", - start_date="none", - end_date="none", - variable=["geolat_t", "geolon_t", "xt_ocean", "yt_ocean"], - variable_long_name=[ - "tracer latitude", - "tracer longitude", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=["", "", "", ""], - variable_cell_methods=["time: point", "time: point", "", ""], - variable_units=["degrees_N", "degrees_E", "degrees_E", "degrees_N"], - ), - ), - ( - builders.AccessOm2Builder, - "access-om2/output000/ocean/ocean.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean.nc", - file_id="ocean", - filename_timestamp=None, - frequency="1yr", - start_date="1900-01-01, 00:00:00", - end_date="1910-01-01, 00:00:00", - variable=[ - "nv", - "st_ocean", - "temp", - "time", - "time_bounds", - "xt_ocean", - "yt_ocean", - ], - variable_long_name=[ - "vertex number", - "tcell zstar depth", - "Conservative temperature", - "time", - "time axis boundaries", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=[ - "", - "", - "sea_water_conservative_temperature", - "", - "", - "", - "", - ], - variable_cell_methods=["", "", "time: mean", "", "", "", ""], - variable_units=[ - "none", - "meters", - "K", - "days since 1900-01-01 00:00:00", - "days", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessOm2Builder, - "access-om2/output000/ocean/ocean_month.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_month.nc", - file_id="ocean_month", - filename_timestamp=None, - frequency="1mon", - start_date="1900-01-01, 00:00:00", - end_date="1910-01-01, 00:00:00", - variable=["mld", "nv", "time", "time_bounds", "xt_ocean", "yt_ocean"], - variable_long_name=[ - "mixed layer depth determined by density criteria", - "vertex number", - "time", - "time axis boundaries", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=[ - "ocean_mixed_layer_thickness_defined_by_sigma_t", - "", - "", - "", - "", - "", - ], - variable_cell_methods=["time: mean", "", "", "", "", ""], - variable_units=[ - "m", - "none", - "days since 1900-01-01 00:00:00", - "days", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessOm2Builder, - "access-om2/output000/ocean/ocean_month_inst_nobounds.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_month_inst_nobounds.nc", - file_id="ocean_month_inst_nobounds", - filename_timestamp=None, - frequency="1mon", - start_date="1900-01-01, 00:00:00", - end_date="1900-02-01, 00:00:00", - variable=["mld", "time", "xt_ocean", "yt_ocean"], - variable_long_name=[ - "mixed layer depth determined by density criteria", - "time", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=[ - "ocean_mixed_layer_thickness_defined_by_sigma_t", - "", - "", - "", - ], - variable_cell_methods=["time: mean", "", "", ""], - variable_units=[ - "m", - "days since 1900-01-01 00:00:00", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessOm2Builder, - "access-om2/output000/ice/OUTPUT/iceh.1900-01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="iceh.1900-01.nc", - file_id="iceh_XXXX_XX", - filename_timestamp="1900-01", - frequency="1mon", - start_date="1900-01-01, 00:00:00", - end_date="1900-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice_m", "tarea", "time", "time_bounds"], - variable_long_name=[ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "model time", - "boundaries for time-averaging interval", - ], - variable_standard_name=["", "", "", "", "", ""], - variable_cell_methods=["", "", "time: mean", "", "", ""], - variable_units=[ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 1900-01-01 00:00:00", - "days since 1900-01-01 00:00:00", - ], - ), - ), - ( - builders.AccessCm2Builder, - "access-cm2/by578/history/atm/netCDF/by578a.pd201501_dai.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="by578a.pd201501_dai.nc", - file_id="by578a_pdXXXXXX_dai", - filename_timestamp="201501", - frequency="1day", - start_date="2015-01-01, 00:00:00", - end_date="2015-02-01, 00:00:00", - variable=["fld_s03i236"], - variable_long_name=["TEMPERATURE AT 1.5M"], - variable_standard_name=["air_temperature"], - variable_cell_methods=["time: mean"], - variable_units=["K"], - ), - ), - ( - builders.AccessCm2Builder, - "access-cm2/by578/history/ice/iceh_d.2015-01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="iceh_d.2015-01.nc", - file_id="iceh_d_XXXX_XX", - filename_timestamp="2015-01", - frequency="1day", - start_date="2015-01-01, 00:00:00", - end_date="2015-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], - variable_long_name=[ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "model time", - "boundaries for time-averaging interval", - ], - variable_standard_name=["", "", "", "", "", ""], - variable_cell_methods=["", "", "time: mean", "", "", ""], - variable_units=[ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 1850-01-01 00:00:00", - "days since 1850-01-01 00:00:00", - ], - ), - ), - ( - builders.AccessCm2Builder, - "access-cm2/by578/history/ocn/ocean_daily.nc-20150630", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_daily.nc-20150630", - file_id="ocean_daily", - filename_timestamp=None, - frequency="1day", - start_date="2015-01-01, 00:00:00", - end_date="2015-07-01, 00:00:00", - variable=["nv", "sst", "time", "time_bounds", "xt_ocean", "yt_ocean"], - variable_long_name=[ - "vertex number", - "Potential temperature", - "time", - "time axis boundaries", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=["", "sea_surface_temperature", "", "", "", ""], - variable_cell_methods=["", "time: mean", "", "", "", ""], - variable_units=[ - "none", - "K", - "days since 1850-01-01 00:00:00", - "days", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessCm2Builder, - "access-cm2/by578/history/ocn/ocean_scalar.nc-20150630", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_scalar.nc-20150630", - file_id="ocean_scalar", - filename_timestamp=None, - frequency="1mon", - start_date="2015-01-01, 00:00:00", - end_date="2015-07-01, 00:00:00", - variable=[ - "nv", - "scalar_axis", - "temp_global_ave", - "time", - "time_bounds", - ], - variable_long_name=[ - "vertex number", - "none", - "Global mean temp in liquid seawater", - "time", - "time axis boundaries", - ], - variable_standard_name=[ - "", - "", - "sea_water_potential_temperature", - "", - "", - ], - variable_cell_methods=["", "", "time: mean", "", ""], - variable_units=[ - "none", - "none", - "deg_C", - "days since 1850-01-01 00:00:00", - "days", - ], - ), - ), - ( - builders.AccessEsm15Builder, - "access-esm1-5/history/atm/netCDF/HI-C-05-r1.pa-185001_mon.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="HI-C-05-r1.pa-185001_mon.nc", - file_id="HI_C_05_r1_pa_XXXXXX_mon", - filename_timestamp="185001", - frequency="1mon", - start_date="1850-01-01, 00:00:00", - end_date="1850-02-01, 00:00:00", - variable=["fld_s03i236"], - variable_long_name=["TEMPERATURE AT 1.5M"], - variable_standard_name=["air_temperature"], - variable_cell_methods=["time: mean"], - variable_units=["K"], - ), - ), - ( - builders.AccessEsm15Builder, - "access-esm1-5/history/ice/iceh.1850-01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="iceh.1850-01.nc", - file_id="iceh_XXXX_XX", - filename_timestamp="1850-01", - frequency="1mon", - start_date="1850-01-01, 00:00:00", - end_date="1850-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], - variable_long_name=[ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "model time", - "boundaries for time-averaging interval", - ], - variable_standard_name=["", "", "", "", "", ""], - variable_cell_methods=["", "", "time: mean", "", "", ""], - variable_units=[ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - ], - ), - ), - ( - builders.AccessEsm15Builder, - "access-esm1-5/history/ocn/ocean_bgc_ann.nc-18501231", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_bgc_ann.nc-18501231", - file_id="ocean_bgc_ann", - filename_timestamp=None, - frequency="1yr", - start_date="1849-12-30, 00:00:00", - end_date="1850-12-30, 00:00:00", - variable=[ - "fgco2_raw", - "nv", - "time", - "time_bounds", - "xt_ocean", - "yt_ocean", - ], - variable_long_name=[ - "Flux into ocean - DIC, inc. anth.", - "vertex number", - "time", - "time axis boundaries", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=["", "", "", "", "", ""], - variable_cell_methods=["time: mean", "", "", "", "", ""], - variable_units=[ - "mmol/m^2/s", - "none", - "days since 0001-01-01 00:00:00", - "days", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessEsm15Builder, - "access-esm1-5/history/ocn/ocean_bgc.nc-18501231", - _AccessNCFileInfo( - path=None, # type: ignore - filename="ocean_bgc.nc-18501231", - file_id="ocean_bgc", - filename_timestamp=None, - frequency="1mon", - start_date="1849-12-30, 00:00:00", - end_date="1850-12-30, 00:00:00", - variable=[ - "nv", - "o2", - "st_ocean", - "time", - "time_bounds", - "xt_ocean", - "yt_ocean", - ], - variable_long_name=[ - "vertex number", - "o2", - "tcell zstar depth", - "time", - "time axis boundaries", - "tcell longitude", - "tcell latitude", - ], - variable_standard_name=["", "", "", "", "", "", ""], - variable_cell_methods=["", "time: mean", "", "", "", "", ""], - variable_units=[ - "none", - "mmol/m^3", - "meters", - "days since 0001-01-01 00:00:00", - "days", - "degrees_E", - "degrees_N", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.mom6.h.native_1900_01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.mom6.h.native_1900_01.nc", - file_id="GMOM_JRA_WD_mom6_h_native_XXXX_XX", - filename_timestamp="1900_01", - frequency="1mon", - start_date="1900-01-01, 00:00:00", - end_date="1900-02-01, 00:00:00", - variable=[ - "average_DT", - "average_T1", - "average_T2", - "nv", - "thetao", - "time", - "time_bnds", - "xh", - "yh", - "zl", - ], - variable_long_name=[ - "Length of average period", - "Start time for average period", - "End time for average period", - "vertex number", - "Sea Water Potential Temperature", - "time", - "time axis boundaries", - "h point nominal longitude", - "h point nominal latitude", - "Layer pseudo-depth, -z*", - ], - variable_standard_name=[ - "", - "", - "", - "", - "sea_water_potential_temperature", - "", - "", - "", - "", - "", - ], - variable_cell_methods=[ - "", - "", - "", - "", - "area:mean zl:mean yh:mean xh:mean time: mean", - "", - "", - "", - "", - "", - ], - variable_units=[ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "", - "degC", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degrees_east", - "degrees_north", - "meter", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", - file_id="GMOM_JRA_WD_mom6_h_sfc_XXXX_XX_XX", - filename_timestamp="1900_01_02", - frequency="1day", - start_date="1900-01-01, 00:00:00", - end_date="1900-01-02, 00:00:00", - variable=[ - "average_DT", - "average_T1", - "average_T2", - "nv", - "time", - "time_bnds", - "tos", - "xh", - "yh", - ], - variable_long_name=[ - "Length of average period", - "Start time for average period", - "End time for average period", - "vertex number", - "time", - "time axis boundaries", - "Sea Surface Temperature", - "h point nominal longitude", - "h point nominal latitude", - ], - variable_standard_name=[ - "", - "", - "", - "", - "", - "", - "sea_surface_temperature", - "", - "", - ], - variable_cell_methods=[ - "", - "", - "", - "", - "", - "", - "area:mean yh:mean xh:mean time: mean", - "", - "", - ], - variable_units=[ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degC", - "degrees_east", - "degrees_north", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.mom6.h.static.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.mom6.h.static.nc", - file_id="GMOM_JRA_WD_mom6_h_static", - filename_timestamp=None, - frequency="fx", - start_date="none", - end_date="none", - variable=["geolat", "geolon", "xh", "yh"], - variable_long_name=[ - "Latitude of tracer (T) points", - "Longitude of tracer (T) points", - "h point nominal longitude", - "h point nominal latitude", - ], - variable_standard_name=["", "", "", ""], - variable_cell_methods=["time: point", "time: point", "", ""], - variable_units=[ - "degrees_north", - "degrees_east", - "degrees_east", - "degrees_north", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.mom6.h.z_1900_01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.mom6.h.z_1900_01.nc", - file_id="GMOM_JRA_WD_mom6_h_z_XXXX_XX", - filename_timestamp="1900_01", - frequency="1mon", - start_date="1900-01-01, 00:00:00", - end_date="1900-02-01, 00:00:00", - variable=[ - "average_DT", - "average_T1", - "average_T2", - "nv", - "thetao", - "time", - "time_bnds", - "xh", - "yh", - "z_l", - ], - variable_long_name=[ - "Length of average period", - "Start time for average period", - "End time for average period", - "vertex number", - "Sea Water Potential Temperature", - "time", - "time axis boundaries", - "h point nominal longitude", - "h point nominal latitude", - "Depth at cell center", - ], - variable_standard_name=[ - "", - "", - "", - "", - "sea_water_potential_temperature", - "", - "", - "", - "", - "", - ], - variable_cell_methods=[ - "", - "", - "", - "", - "area:mean z_l:mean yh:mean xh:mean time: mean", - "", - "", - "", - "", - "", - ], - variable_units=[ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "", - "degC", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degrees_east", - "degrees_north", - "meters", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.cice.h.1900-01-01.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.cice.h.1900-01-01.nc", - file_id="GMOM_JRA_WD_cice_h_XXXX_XX_XX", - filename_timestamp="1900-01-01", - frequency="1day", - start_date="1900-01-01, 00:00:00", - end_date="1900-01-02, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], - variable_long_name=[ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "time", - "time interval endpoints", - ], - variable_standard_name=["", "", "", "", "", ""], - variable_cell_methods=["", "", "time: mean", "", "", ""], - variable_units=[ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 0000-01-01 00:00:00", - "days since 0000-01-01 00:00:00", - ], - ), - ), - ( - builders.AccessOm3Builder, - "access-om3/output000/GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", - _AccessNCFileInfo( - path=None, # type: ignore - filename="GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", - file_id="GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", - filename_timestamp="1900-01-02-00000", - frequency="fx", # WW3 provides no time bounds - start_date="1900-01-02, 00:00:00", - end_date="1900-01-02, 00:00:00", - variable=["EF", "mapsta"], - variable_long_name=["1D spectral density", "map status"], - variable_standard_name=["", ""], - variable_cell_methods=["", ""], - variable_units=["m2 s", "unitless"], - ), - ), + "compare_files", + [ + (True), + (False), ], ) -def test_parse_access_ncfile(test_data, builder, filename, expected): - file = str(test_data / Path(filename)) - - # Set the path to the test data directory - expected.path = file - - assert builder.parse_access_ncfile(file) == expected - - @pytest.mark.parametrize( "builder, filename, expected", [ @@ -1829,9 +1098,7 @@ def test_parse_access_ncfile(test_data, builder, filename, expected): ), ], ) -def test_parsed_ncfile_values(test_data, builder, filename, expected): - # In this test, we will refer to the intake-esm dataset as ie_ds and the - # dataset loaded directly with xarray as xr_ds. +def test_parse_access_ncfile(test_data, builder, filename, expected, compare_files): file = str(test_data / Path(filename)) # Set the path to the test data directory @@ -1841,6 +1108,11 @@ def test_parsed_ncfile_values(test_data, builder, filename, expected): # is just a reproduction of `test_parse_access_ncfile`, but if we aren't # parsing that correctly, then we can't really compare the datasets assert builder.parse_access_ncfile(file) == expected + if not compare_files: + return None + + # In the rest of this test, we will refer to the intake-esm dataset as ie_ds + # and the dataset loaded directly with xarray as xr_ds. xarray_open_kwargs = _get_xarray_open_kwargs("netcdf") ie_ds = _open_dataset( @@ -1870,5 +1142,4 @@ def test_parsed_ncfile_values(test_data, builder, filename, expected): Note: case zero is a grid file $PROJ_ROOT/tests/data/access-om2/output000/ocean/ocean_grid.nc, which is why it fails if we don't turn on coordinate/static variable search. All of the other files aer fine, as far as I can tell. - """ From f28f8a5790399c23b1bb1e331f40443a9856b975 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 22 Oct 2024 12:38:34 +0800 Subject: [PATCH 04/13] * Added tox config to test migration to 2.0.7 * Added dynamic xfail setting - necessary to ensure that we are correctly determining whether tests should pass or fail based on whether we are using latest intake-esm release or ACCESS-NRI modified version with coord variable detection. Uses a pytest_collection_modifyitems function to create xfails for tests which should fail Merge 226 editable install (#228) * Updated the way that the location of the catalog.yaml file is discovered to work in both editable & regular installations * Changed ci.yml to run tests in a default, rathe than editable installation * Updated pyproject.toml & .github/workflows/ci.yl to ensure correct coverage issues running tests using regular installation rather than editable * Added explicit test for metadata_template - must have previously been implicitly run --- .github/workflows/ci.yml | 8 ++++++-- pyproject.toml | 8 +++----- tests/conftest.py | 19 +++++++++++++++++++ tests/test_builders.py | 4 +--- tests/test_cli.py | 5 +++++ tests/test_data.py | 18 ++++++++++++------ tox.ini | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 82 insertions(+), 16 deletions(-) create mode 100644 tox.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ef27595..901c6374 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - name: Install source shell: bash -l {0} - run: python -m pip install -e . + run: python -m pip install . - name: List installed packages shell: bash -l {0} @@ -40,7 +40,11 @@ jobs: - name: Run tests shell: bash -l {0} - run: python -m pytest -s . + run: coverage run -m --source=access_nri_intake pytest + + - name: Generate coverage report + shell: bash -l {0} + run: coverage xml - name: Upload code coverage uses: codecov/codecov-action@v4 diff --git a/pyproject.toml b/pyproject.toml index 4eb49fb3..b15457c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ test = [ "pytest-watcher", "pytest-cov", "pytest-sugar", + "tox", ] [project.scripts] @@ -66,13 +67,10 @@ versionfile_build = "access_nri_intake/_version.py" tag_prefix = "v" parentdir_prefix = "access-nri-intake-" -[tool.pytest.ini_options] -addopts = "--cov=./src --cov-report=xml" - [tool.coverage.run] omit = [ - "src/access_nri_intake/_version.py", - "src/access_nri_intake/data/__init__.py", + "*/_version.py", + "*/data/__init__.py", ] [tool.ruff] diff --git a/tests/conftest.py b/tests/conftest.py index edecd3e7..60e9383e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,26 @@ here = os.path.abspath(os.path.dirname(__file__)) +_add_xfail = int(os.environ.get("XFAILS", "")) + @fixture(scope="session") def test_data(): return Path(os.path.join(here, "data")) + + +def pytest_collection_modifyitems(config, items): + """ + This function is called by pytest to modify the items collected during test + collection. I'm going to use it here to mark the xfail tests in + test_builders::test_parse_access_ncfile when we check the file contents & + """ + for item in items: + if ( + item.name + in ( + "test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]", + ) + and _add_xfail + ): + item.add_marker("xfail") diff --git a/tests/test_builders.py b/tests/test_builders.py index 29ddccbf..20d62fe7 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -1104,10 +1104,8 @@ def test_parse_access_ncfile(test_data, builder, filename, expected, compare_fil # Set the path to the test data directory expected.path = file - # First we need to ensure that our builer is grabbing the right info - this - # is just a reproduction of `test_parse_access_ncfile`, but if we aren't - # parsing that correctly, then we can't really compare the datasets assert builder.parse_access_ncfile(file) == expected + if not compare_files: return None diff --git a/tests/test_cli.py b/tests/test_cli.py index a8edbbab..086abd82 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -14,6 +14,7 @@ MetadataCheckError, _check_build_args, build, + metadata_template, metadata_validate, ) @@ -163,3 +164,7 @@ def test_metadata_validate_no_file(mockargs): with pytest.raises(FileNotFoundError) as excinfo: metadata_validate() assert "No such file(s)" in str(excinfo.value) + + +def test_metadata_template(): + metadata_template() diff --git a/tests/test_data.py b/tests/test_data.py index 514df27e..dc9b1983 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -2,14 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 import os +from pathlib import Path +import access_nri_intake from access_nri_intake.utils import get_catalog_fp def test_get_catalog_fp(): - _oneup = os.path.abspath(os.path.dirname("../")) - assert str(get_catalog_fp()) == str( - os.path.join( - _oneup, "access-nri-intake-catalog/src/access_nri_intake/data/catalog.yaml" - ) - ) + """ + Check that we're getting the correct path to the catalog.yaml file. We need + to ensure that this works both in editable & non-editable installs. + """ + INSTALL_DIR = Path(access_nri_intake.__file__).parent + expected_path = os.path.join(INSTALL_DIR, "data/catalog.yaml") + + catalog_fullpath = get_catalog_fp() + + assert str(catalog_fullpath) == expected_path diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..3197b108 --- /dev/null +++ b/tox.ini @@ -0,0 +1,36 @@ +[tox] +env_list = + py{39,310,311}-intake{070,207}-intakeesm{access,202426}, +minversion = 4.23.0 + +[testenv] +description = run the tests with pytest +package = wheel +wheel_build_env = .pkg +deps = + .[test] +setenv = + intakeesmaccess: XFAILS=0 + intakeesm202426: XFAILS=1 +commands = + pytest {tty:--color=yes} {posargs:tests} + +[testenv:intake070] +description = Pin the intake version to 0.7.0 +deps = + intake==0.7.0 + +[testenv:intake207] +description = Pin the intake version to 2.0.7 +deps = + intake==2.0.7 + +[testenv:intakeesm202426] +description = Use the most recent version of intake-esm on PyPI +deps = + intake-esm==2024.2.6 + +[testenv:intakeesmaccess] +description = Use the ACCESS-NRI fork of intake-esm +deps = + git+https://github.com/ACCESS-NRI/intake-esm.git@issue_660#egg=intake-esm \ No newline at end of file From 091245a1f59fa238da1002d8dc49b2f35dae5221 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Thu, 24 Oct 2024 13:01:15 +0800 Subject: [PATCH 05/13] Changed ox.environ.get('XFAILS', default) from default='' to default=0 to fix integer conversion error if environment variables not specified. --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 60e9383e..4cd1cf57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,7 @@ here = os.path.abspath(os.path.dirname(__file__)) -_add_xfail = int(os.environ.get("XFAILS", "")) +_add_xfail = int(os.environ.get("XFAILS", 0)) @fixture(scope="session") From 13c4d4dafcf27bd05b0649412b2e3073dbe9626e Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 09:19:13 +0800 Subject: [PATCH 06/13] - Final tox.ini set up, with as sensible factoring as possible - Relaxed intake version on pyproject.toml to >0.7 - Updated conftest to emit a warning if coordinate discovery is disabled (ie. wrong intake-esm version) and no environment variable is set to inform pytest this is the case --- pyproject.toml | 2 +- tests/conftest.py | 21 ++++++++- tox.ini | 117 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 121 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b15457c0..bf267a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ dependencies = [ "cftime", "ecgtools>=2023.7.13", - "intake==0.7.0", + "intake>=0.7.0", "intake-dataframe-catalog>=0.2.4", "intake-esm>=2023.11.10", "jsonschema", diff --git a/tests/conftest.py b/tests/conftest.py index 4cd1cf57..ef4e092c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,13 +2,32 @@ # SPDX-License-Identifier: Apache-2.0 import os +import warnings from pathlib import Path from pytest import fixture here = os.path.abspath(os.path.dirname(__file__)) -_add_xfail = int(os.environ.get("XFAILS", 0)) + +def _get_xfail(): + """ + Get the XFAILS environment variable. We're going to to use a default of 1 + """ + xfails_default = 1 + + try: + return int(os.environ["XFAILS"]) + except KeyError: + warnings.warn( + "XFAILS enabled by default as coordinate discovery disabled by default. ", + "This will be deprecated when coordinate discovery is enabled by default", + PendingDeprecationWarning, + ) + return xfails_default + + +_add_xfail = _get_xfail() @fixture(scope="session") diff --git a/tox.ini b/tox.ini index 3197b108..b954cd98 100644 --- a/tox.ini +++ b/tox.ini @@ -1,36 +1,119 @@ [tox] env_list = - py{39,310,311}-intake{070,207}-intakeesm{access,202426}, + py{310,311}-take2, + py{310,311}-take2coords, + py{310,311}-main, + py{310,311}-coords, + minversion = 4.23.0 [testenv] -description = run the tests with pytest +description = Run the tests with pytest package = wheel wheel_build_env = .pkg deps = + pytest .[test] +commands = + pytest \ + -W ignore::UserWarning \ + ; Unable to parse $N assets + ; Frequency derived from filename does not match frequency determined from file contents + -W ignore::DeprecationWarning \ + ; PydanticDeprecatedSince20 Warning + -W ignore::RuntimeWarning \ + ; Numpy ABI mismatch warning + {tty:--color=yes} {posargs:tests} + +[testenv:coordsdisabled] +setenv = + XFAILS=1 + ; We expect correctness checks to fail here because coordinate discovery isn't + ; enabled in the main branch of intake-esm. This toggles on xfail marks in + ; conftest.py. + +[testenv:coordsenabled] setenv = - intakeesmaccess: XFAILS=0 - intakeesm202426: XFAILS=1 -commands = - pytest {tty:--color=yes} {posargs:tests} + XFAILS=0 + ; We expect correctness checks to pass here because coordinate discovery is + ; enabled in the main branch of intake-esm. This wont toggle on xfail marks in + ; conftest.py. -[testenv:intake070] -description = Pin the intake version to 0.7.0 +[testenv:base-main] +description = Pin the intake version to 0.7.0, run pytest deps = + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@main#egg=intake_dataframe_catalog intake==0.7.0 -[testenv:intake207] +[testenv:base-take2] description = Pin the intake version to 2.0.7 deps = - intake==2.0.7 + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog + git+https://github.com/ACCESS-NRI/intake-esm.git@take2#egg=intake-esm + intake>=2.0.0 -[testenv:intakeesm202426] -description = Use the most recent version of intake-esm on PyPI +[testenv:base-coords] +description = Use the ACCESS-NRI fork of intake-esm, branch issue_660-coords +deps = + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-esm.git@issue_660-coords#egg=intake-esm + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@main#egg=intake_dataframe_catalog + intake==0.7.0 + +[testenv:base-take2coords] +description = Use the ACCESS-NRI fork of intake-esm, branch take2-coords deps = - intake-esm==2024.2.6 + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog + git+https://github.com/ACCESS-NRI/intake-esm.git@take2-coords#egg=intake-esm + intake>=2.0.0 -[testenv:intakeesmaccess] -description = Use the ACCESS-NRI fork of intake-esm -deps = - git+https://github.com/ACCESS-NRI/intake-esm.git@issue_660#egg=intake-esm \ No newline at end of file +[testenv:py310-main] +basepython = python3.10 +deps = {[testenv:base-main]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py311-main] +basepython = python3.11 +deps = {[testenv:base-main]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py310-take2] +basepython = python3.10 +deps = {[testenv:base-take2]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py311-take2] +basepython = python3.11 +deps = {[testenv:base-take2]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py310-coords] +basepython = python3.10 +deps = {[testenv:base-coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} + +[testenv:py311-coords] +basepython = python3.11 +deps = {[testenv:base-coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} + +[testenv:py310-take2coords] +basepython = python3.10 +deps = {[testenv:base-take2coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} + +[testenv:py311-take2coords] +basepython = python3.11 +deps = {[testenv:base-take2coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} From 9b04e1eee47baa80ef734db4f3712de8626136c0 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 12:44:23 +0800 Subject: [PATCH 07/13] Removed some unnecessary dependencies from .[test], updated warning call in conftest to use keyword arguments rather than positional, updated comments in test_parse_access_ncfile to be more descriptive --- pyproject.toml | 3 --- tests/conftest.py | 14 +++++++++----- tests/test_builders.py | 21 +++++++-------------- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bf267a71..97964914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,9 +28,6 @@ dynamic = ["version"] [project.optional-dependencies] test = [ "pytest", - "pytest-watcher", - "pytest-cov", - "pytest-sugar", "tox", ] diff --git a/tests/conftest.py b/tests/conftest.py index ef4e092c..d84ac63a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,9 +20,11 @@ def _get_xfail(): return int(os.environ["XFAILS"]) except KeyError: warnings.warn( - "XFAILS enabled by default as coordinate discovery disabled by default. ", - "This will be deprecated when coordinate discovery is enabled by default", - PendingDeprecationWarning, + message=( + "XFAILS enabled by default as coordinate discovery disabled by default. " + "This will be deprecated when coordinate discovery is enabled by default" + ), + category=PendingDeprecationWarning, ) return xfails_default @@ -38,8 +40,10 @@ def test_data(): def pytest_collection_modifyitems(config, items): """ This function is called by pytest to modify the items collected during test - collection. I'm going to use it here to mark the xfail tests in - test_builders::test_parse_access_ncfile when we check the file contents & + collection. We use it here to mark the xfail tests in + test_builders::test_parse_access_ncfile when we check the file contents & to + ensure we correctly get xfails if we don't have cordinate discovery enabled + in intake-esm. """ for item in items: if ( diff --git a/tests/test_builders.py b/tests/test_builders.py index 20d62fe7..fd64c1be 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -1109,8 +1109,13 @@ def test_parse_access_ncfile(test_data, builder, filename, expected, compare_fil if not compare_files: return None - # In the rest of this test, we will refer to the intake-esm dataset as ie_ds - # and the dataset loaded directly with xarray as xr_ds. + """ + In the rest of this test, we refer to the dataset loaded using intake-esm + as ie_ds and the dataset loaded directly with xarray as xr_ds. + + We also need to perform some additional logic that intake-esm does to avoid + xr.testing.assert_equal from failing due to preprocessing differences. + """ xarray_open_kwargs = _get_xarray_open_kwargs("netcdf") ie_ds = _open_dataset( @@ -1121,10 +1126,6 @@ def test_parse_access_ncfile(test_data, builder, filename, expected, compare_fil ).compute() ie_ds.set_coords(set(ie_ds.variables) - set(ie_ds.attrs[OPTIONS["vars_key"]])) - """ - We need to perform some additional logic that intake-esm does or we are - going to get all sorts of random errors - """ xr_ds = xr.open_dataset(file, **xarray_open_kwargs) scalar_variables = [v for v in xr_ds.data_vars if len(xr_ds[v].dims) == 0] @@ -1133,11 +1134,3 @@ def test_parse_access_ncfile(test_data, builder, filename, expected, compare_fil xr_ds = xr_ds[expected.variable] xr.testing.assert_equal(ie_ds, xr_ds) - """ - ^ We expect this to fail in test case 0, but only if we aren't searching for - coordinate variables. This is going to be difficult to detect & test. - - Note: case zero is a grid file $PROJ_ROOT/tests/data/access-om2/output000/ocean/ocean_grid.nc, - which is why it fails if we don't turn on coordinate/static variable search. - All of the other files aer fine, as far as I can tell. - """ From 9b2c5c0782700fbcfe1f65a3b2af476b9953fe1d Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 12:52:35 +0800 Subject: [PATCH 08/13] Added python3.9 to tox.ini --- tox.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index b954cd98..ec2dba0d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,9 @@ [tox] env_list = - py{310,311}-take2, - py{310,311}-take2coords, - py{310,311}-main, - py{310,311}-coords, + py{39,310,311}-take2, + py{39,310,311}-take2coords, + py{39,310,311}-main, + py{39,310,311}-coords, minversion = 4.23.0 From d27e3c10fe71643e33f88ed26f249783d20b1d70 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 13:08:38 +0800 Subject: [PATCH 09/13] Removed python3.9 again (not supported by intake-esm) --- tox.ini | 49 +++++++++++-------------------------------------- 1 file changed, 11 insertions(+), 38 deletions(-) diff --git a/tox.ini b/tox.ini index ec2dba0d..3dc57857 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,7 @@ [tox] env_list = - py{39,310,311}-take2, - py{39,310,311}-take2coords, - py{39,310,311}-main, - py{39,310,311}-coords, + py{310,311}-main, + py{310,311}-coords, minversion = 4.23.0 @@ -46,13 +44,6 @@ deps = git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@main#egg=intake_dataframe_catalog intake==0.7.0 -[testenv:base-take2] -description = Pin the intake version to 2.0.7 -deps = - {[testenv]deps} - git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog - git+https://github.com/ACCESS-NRI/intake-esm.git@take2#egg=intake-esm - intake>=2.0.0 [testenv:base-coords] description = Use the ACCESS-NRI fork of intake-esm, branch issue_660-coords @@ -62,13 +53,11 @@ deps = git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@main#egg=intake_dataframe_catalog intake==0.7.0 -[testenv:base-take2coords] -description = Use the ACCESS-NRI fork of intake-esm, branch take2-coords -deps = - {[testenv]deps} - git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog - git+https://github.com/ACCESS-NRI/intake-esm.git@take2-coords#egg=intake-esm - intake>=2.0.0 +[testenv:py39-main] +basepython = python3.9 +deps = {[testenv:base-main]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} [testenv:py310-main] basepython = python3.10 @@ -82,16 +71,10 @@ deps = {[testenv:base-main]deps} setenv = {[testenv:coordsdisabled]setenv} commands = {[testenv]commands} -[testenv:py310-take2] -basepython = python3.10 -deps = {[testenv:base-take2]deps} -setenv = {[testenv:coordsdisabled]setenv} -commands = {[testenv]commands} - -[testenv:py311-take2] -basepython = python3.11 -deps = {[testenv:base-take2]deps} -setenv = {[testenv:coordsdisabled]setenv} +[testenv:py39-coords] +basepython = python3.9 +deps = {[testenv:base-coords]deps} +setenv = {[testenv:coordsenabled]setenv} commands = {[testenv]commands} [testenv:py310-coords] @@ -106,14 +89,4 @@ deps = {[testenv:base-coords]deps} setenv = {[testenv:coordsenabled]setenv} commands = {[testenv]commands} -[testenv:py310-take2coords] -basepython = python3.10 -deps = {[testenv:base-take2coords]deps} -setenv = {[testenv:coordsenabled]setenv} -commands = {[testenv]commands} -[testenv:py311-take2coords] -basepython = python3.11 -deps = {[testenv:base-take2coords]deps} -setenv = {[testenv:coordsenabled]setenv} -commands = {[testenv]commands} From a9943a4b86afe73e770e582be6fb6c3b3d7cda96 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 13:09:42 +0800 Subject: [PATCH 10/13] Repinned intake to 0.7.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 97964914..2bc6b75e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ dependencies = [ "cftime", "ecgtools>=2023.7.13", - "intake>=0.7.0", + "intake==0.7.0", "intake-dataframe-catalog>=0.2.4", "intake-esm>=2023.11.10", "jsonschema", From 0e6a5abefbe3f5c5c22b6f74b552a99d650551fe Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 28 Oct 2024 14:38:34 +0800 Subject: [PATCH 11/13] Changes xfails explanation to be more terse --- tox.ini | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index 3dc57857..2e8cdd98 100644 --- a/tox.ini +++ b/tox.ini @@ -27,15 +27,13 @@ commands = setenv = XFAILS=1 ; We expect correctness checks to fail here because coordinate discovery isn't - ; enabled in the main branch of intake-esm. This toggles on xfail marks in - ; conftest.py. + ; enabled in the main branch of intake-esm. This enables xfail marks in conftest.py. [testenv:coordsenabled] setenv = XFAILS=0 ; We expect correctness checks to pass here because coordinate discovery is - ; enabled in the main branch of intake-esm. This wont toggle on xfail marks in - ; conftest.py. + ; enabled in the main branch of intake-esm. This disables xfail marks in conftest.py. [testenv:base-main] description = Pin the intake version to 0.7.0, run pytest From 83bae0825ef133b8d9d0a777b651573e6ddc846c Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 29 Oct 2024 04:31:43 +0800 Subject: [PATCH 12/13] Made _get_xfail() docstr clearer --- tests/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index d84ac63a..dfc8a12d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,9 @@ def _get_xfail(): """ - Get the XFAILS environment variable. We're going to to use a default of 1 + Get the XFAILS environment variable. We use a default of 1, indicating we expect + to add xfail marker to `test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]` + unless specified. """ xfails_default = 1 From 22c36c69dd070fdc7637f67ac8c9565a63765ad4 Mon Sep 17 00:00:00 2001 From: Charles Turner <52199577+charles-turner-1@users.noreply.github.com> Date: Tue, 29 Oct 2024 05:55:56 +0800 Subject: [PATCH 13/13] Intake Take2 (#233) * Rebased 153 onto 187 * gelaxed intake version on pyproject.toml to >0.7 (take2) & added tox environments for take2 --- pyproject.toml | 2 +- tox.ini | 55 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2bc6b75e..97964914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ dependencies = [ "cftime", "ecgtools>=2023.7.13", - "intake==0.7.0", + "intake>=0.7.0", "intake-dataframe-catalog>=0.2.4", "intake-esm>=2023.11.10", "jsonschema", diff --git a/tox.ini b/tox.ini index 2e8cdd98..8fc25653 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,8 @@ env_list = py{310,311}-main, py{310,311}-coords, + py{310,311}-take2, + py{310,311}-take2coords minversion = 4.23.0 @@ -27,7 +29,8 @@ commands = setenv = XFAILS=1 ; We expect correctness checks to fail here because coordinate discovery isn't - ; enabled in the main branch of intake-esm. This enables xfail marks in conftest.py. + ; enabled in the main branch of intake-esm. This toggles on xfail marks in + ; conftest.py. [testenv:coordsenabled] setenv = @@ -51,6 +54,22 @@ deps = git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@main#egg=intake_dataframe_catalog intake==0.7.0 +[testenv:base-take2] +description = Pin the intake version to 0.7.0, run pytest +deps = + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog + intake>=2.0.0 + + +[testenv:base-take2coords] +description = Use the ACCESS-NRI fork of intake-esm, branch issue_660-coords +deps = + {[testenv]deps} + git+https://github.com/ACCESS-NRI/intake-esm.git@take2-coords#egg=intake-esm + git+https://github.com/ACCESS-NRI/intake-dataframe-catalog.git@take2#egg=intake_dataframe_catalog + intake>=2.0.0 + [testenv:py39-main] basepython = python3.9 deps = {[testenv:base-main]deps} @@ -87,4 +106,38 @@ deps = {[testenv:base-coords]deps} setenv = {[testenv:coordsenabled]setenv} commands = {[testenv]commands} +[testenv:py39-take2] +basepython = python3.9 +deps = {[testenv:base-take2]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py310-take2] +basepython = python3.10 +deps = {[testenv:base-take2]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} +[testenv:py311-take2] +basepython = python3.11 +deps = {[testenv:base-take2]deps} +setenv = {[testenv:coordsdisabled]setenv} +commands = {[testenv]commands} + +[testenv:py39-take2coords] +basepython = python3.9 +deps = {[testenv:base-take2coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} + +[testenv:py310-take2coords] +basepython = python3.10 +deps = {[testenv:base-take2coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands} + +[testenv:py311-take2coords] +basepython = python3.11 +deps = {[testenv:base-take2coords]deps} +setenv = {[testenv:coordsenabled]setenv} +commands = {[testenv]commands}