diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 06f2694e..890fc757 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v4.2.0 + uses: actions/checkout@v4.2.1 - name: Set up Python 3.11 uses: actions/setup-python@v5 @@ -39,7 +39,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v4.2.0 + uses: actions/checkout@v4.2.1 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v3 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 46b0a3fc..4ef27595 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.2.0 + - uses: actions/checkout@v4.2.1 - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v4.2.0 + uses: actions/checkout@v4.2.1 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v3 diff --git a/.github/workflows/gadi_deploy.yml b/.github/workflows/gadi_deploy.yml index dcae13c4..6d8366dd 100644 --- a/.github/workflows/gadi_deploy.yml +++ b/.github/workflows/gadi_deploy.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout repository ### Latest at time of writing - uses: actions/checkout@v4.2.0 + uses: actions/checkout@v4.2.1 - name: Sync repository to Gadi ### Latest at time of writing uses: up9cloud/action-rsync@v1.4 diff --git a/config/cordex.yaml b/config/cordex.yaml new file mode 100644 index 00000000..ca947025 --- /dev/null +++ b/config/cordex.yaml @@ -0,0 +1,9 @@ +builder: null + +translator: CordexTranslator + +sources: + + - metadata_yaml: /g/data/xp65/admin/intake/metadata/cordex_ig45/metadata.yaml + path: + - /g/data/ig45/catalog/v2/esm/catalog.json diff --git a/config/metadata_sources/cordex-ig45/metadata.yaml b/config/metadata_sources/cordex-ig45/metadata.yaml new file mode 100644 index 00000000..88b9a197 --- /dev/null +++ b/config/metadata_sources/cordex-ig45/metadata.yaml @@ -0,0 +1,126 @@ +name: cordex_ig45 +experiment_uuid: c7021d1e-7ba2-11ef-beb5-000007d3fe80 +description: 20km regional projections for CORDEX-CMIP6 from the Queensland Future Climate Science Program +long_description: >- + This dataset includes projections at 20km and 10km, formatted to meet the CORDEX-CMIP6 data standards. + The 20km projections were derived from the 10km projections. +model: +- CMIP6 +frequency: +- day +- mon +- 1hr +- fx +variable: +- clt +- tauv +- clh +- clwvi +- ua850 +- sund +- ua100m +- va250 +- uas +- prc +- vas +- mrfso +- rlds +- ta200 +- hus1000 +- hus600 +- prw +- hus850 +- va200 +- tas +- clivi +- zg200 +- rsut +- va600 +- rsdt +- tasmax +- sfcWindmax +- va850 +- mrso +- ps +- hus400 +- ta1000 +- ua250 +- tauu +- pr +- va925 +- snc +- hus200 +- clm +- zg500 +- hurs +- rlut +- hus300 +- rsds +- ua200 +- psl +- ta850 +- va400 +- zg400 +- snm +- ta925 +- prsn +- hus250 +- zg1000 +- ta600 +- zg925 +- huss +- ta500 +- va1000 +- zg700 +- zmla +- hfss +- zg850 +- ua925 +- zg600 +- ua300 +- rsus +- hus500 +- sfcWind +- ts +- va500 +- va100m +- ua500 +- ua700 +- va700 +- soilt +- snd +- ua1000 +- ta700 +- hfls +- tasmin +- zg250 +- cll +- hus700 +- rlus +- va300 +- ua600 +- hus925 +- ta250 +- ua400 +- prhmax +- sftlf +- ta400 +- ta300 +- snw +- zg300 +- orog +- sftlaf +nominal_resolution: +- 20km +- 10km +version: +contact: NCI +email: help@nci.org.au +reference: +license: +url: https://geonetwork.nci.org.au/geonetwork/srv/eng/catalog.search#/metadata/f7465_8388_5100_7022 +parent_experiment: +related_experiments: +notes: +keywords: +- cmip diff --git a/src/access_nri_intake/catalog/translators.py b/src/access_nri_intake/catalog/translators.py index 3f01b9cf..430dd963 100644 --- a/src/access_nri_intake/catalog/translators.py +++ b/src/access_nri_intake/catalog/translators.py @@ -6,6 +6,7 @@ like the ACCESS-NRI catalog """ +from dataclasses import dataclass from functools import partial from typing import Callable, Optional @@ -15,7 +16,7 @@ from . import COLUMNS_WITH_ITERABLES -frequency_translations = { +FREQUENCY_TRANSLATIONS = { "3hrPt": "3hr", "6hrPt": "6hr", "daily": "1day", @@ -31,6 +32,36 @@ } +def _to_tuple(series: pd.Series) -> pd.Series: + """ + Make each entry in the provided series a tuple + + Parameters + ---------- + series: :py:class:`~pandas.Series` + A pandas Series or another object with an `apply` method + """ + return series.apply(lambda x: (x,)) + + +def tuplify_series(func: Callable) -> Callable: + """ + Decorator that wraps a function that returns a pandas Series and converts + each entry in the series to a tuple + """ + + def wrapper(*args, **kwargs): + # Check if the first argument is 'self' + if len(args) > 0 and hasattr(args[0], "__class__"): + self = args[0] + series = func(self, *args[1:], **kwargs) + else: + series = func(*args, **kwargs) + return _to_tuple(series) + + return wrapper + + class TranslatorError(Exception): "Generic Exception for the Translator classes" @@ -67,6 +98,7 @@ def __init__(self, source: DataSource, columns: list[str]): column: partial(self._default_translator, column=column) for column in columns } + self._dispatch_keys = _DispatchKeys() def _default_translator(self, column: str) -> pd.Series: """ @@ -161,109 +193,131 @@ def _unique_values(series): return df[self.columns] # Preserve ordering - -class Cmip6Translator(DefaultTranslator): - """ - CMIP6 Translator for translating metadata from the NCI CMIP6 intake datastores. - """ - - def __init__(self, source: DataSource, columns: list[str]): + def set_dispatch( + self, core_colname: str, func: Callable, input_name: Optional[str] = None + ): """ - Initialise a Cmip6Translator + Set a dispatch function for a column. Typically only required when either: + 1. `core_colname != input_name` + 2. A custom translation function (`func`) is required. Parameters ---------- - source: :py:class:`~intake.DataSource` - The NCI CMIP6 intake-esm datastore - columns: list of str - The columns to translate to (these are the core columns in the intake-dataframe-catalog) - """ - - super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator + core_colname: str + The core column name to translate to + input_name: str, optional + The name of the column in the source. If not provided, this defaults + to none, and no translation will occur + func: callable + The function to translate the column + """ + if core_colname not in ["model", "realm", "frequency", "variable"]: + raise TranslatorError( + f"'core_colname' must be one of 'model', 'realm', 'frequency', 'variable', not {core_colname}" + ) + self._dispatch[core_colname] = func + setattr(self._dispatch_keys, core_colname, input_name) - def _model_translator(self): + def _realm_translator(self) -> pd.Series: """ - Return model from source_id + Return realm, fixing a few issues """ - return _to_tuple(self.source.df["source_id"]) + return _cmip_realm_translator(self.source.df[self._dispatch_keys.realm]) - def _realm_translator(self): + @tuplify_series + def _model_translator(self) -> pd.Series: """ - Return realm, fixing a few issues + Return model from dispatch_keys.model """ - return _cmip_realm_translator(self.source.df["realm"]) + return self.source.df[self._dispatch_keys.model] - def _frequency_translator(self): + @tuplify_series + def _frequency_translator(self) -> pd.Series: """ Return frequency, fixing a few issues """ - return _to_tuple( - self.source.df["frequency"].apply( - lambda x: frequency_translations.get(x, x) - ) + return self.source.df[self._dispatch_keys.frequency].apply( + lambda x: FREQUENCY_TRANSLATIONS.get(x, x) ) - def _variable_translator(self): + @tuplify_series + def _variable_translator(self) -> pd.Series: """ Return variable as a tuple """ - return _to_tuple(self.source.df["variable_id"]) + return self.source.df[self._dispatch_keys.variable] -class Cmip5Translator(DefaultTranslator): +class Cmip6Translator(DefaultTranslator): """ - CMIP5 Translator for translating metadata from the NCI CMIP5 intake datastores. + CMIP6 Translator for translating metadata from the NCI CMIP6 intake datastores. """ def __init__(self, source: DataSource, columns: list[str]): """ - Initialise a Cmip5Translator + Initialise a Cmip6Translator Parameters ---------- source: :py:class:`~intake.DataSource` - The NCI CMIP5 intake-esm datastore + The NCI CMIP6 intake-esm datastore columns: list of str The columns to translate to (these are the core columns in the intake-dataframe-catalog) """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=super()._realm_translator + ) + self.set_dispatch( + input_name="frequency", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, + ) - def _model_translator(self): - """ - Return variable as a tuple - """ - return _to_tuple(self.source.df["model"]) - def _realm_translator(self): - """ - Return realm, fixing a few issues - """ - return _cmip_realm_translator(self.source.df["realm"]) +class Cmip5Translator(DefaultTranslator): + """ + CMIP5 Translator for translating metadata from the NCI CMIP5 intake datastores. + """ - def _frequency_translator(self): - """ - Return frequency, fixing a few issues + def __init__(self, source: DataSource, columns: list[str]): """ - return _to_tuple( - self.source.df["frequency"].apply( - lambda x: frequency_translations.get(x, x) - ) - ) + Initialise a Cmip5Translator - def _variable_translator(self): - """ - Return variable as a tuple + Parameters + ---------- + source: :py:class:`~intake.DataSource` + The NCI CMIP5 intake-esm datastore + columns: list of str + The columns to translate to (these are the core columns in the intake-dataframe-catalog) """ - return _to_tuple(self.source.df["variable"]) + + super().__init__(source, columns) + self.set_dispatch( + input_name="model", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=super()._realm_translator + ) + self.set_dispatch( + input_name="frequency", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable", + core_colname="variable", + func=super()._variable_translator, + ) class EraiTranslator(DefaultTranslator): @@ -284,13 +338,12 @@ def __init__(self, source: DataSource, columns: list[str]): """ super().__init__(source, columns) - self._dispatch["variable"] = self._variable_translator - def _variable_translator(self): - """ - Return variable as a tuple - """ - return _to_tuple(self.source.df["variable"]) + self.set_dispatch( + input_name="variable", + core_colname="variable", + func=super()._variable_translator, + ) class BarpaTranslator(DefaultTranslator): @@ -311,16 +364,22 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator - - def _model_translator(self): - """ - Return model from source_id - """ - return _to_tuple(self.source.df["source_id"]) + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=self._realm_translator + ) + self.set_dispatch( + input_name="freq", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, + ) def _realm_translator(self): """ @@ -328,19 +387,54 @@ def _realm_translator(self): """ return self.source.df.apply(lambda x: ("none",), 1) - def _frequency_translator(self): + +class CordexTranslator(DefaultTranslator): + """ + Cordex Translator for translating metadata from the NCI CORDEX intake datastores. + """ + + def __init__(self, source, columns): """ - Return frequency, fixing a few issues + Initialise a CordexTranslator + + Parameters + ---------- + source: :py:class:`~intake.DataSource` + The NCI CORDEX intake-esm datastore + columns: list of str + The columns to translate to (these are the core columns in the intake-dataframe-catalog) """ - return _to_tuple( - self.source.df["freq"].apply(lambda x: frequency_translations.get(x, x)) + + super().__init__(source, columns) + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=self._realm_translator ) - def _variable_translator(self): + def _realm_translator(self): """ - Return variable as a tuple + Return realm, fixing a few issues """ - return _to_tuple(self.source.df["variable_id"]) + return self.source.df.apply(lambda x: ("none",), 1) + + +@dataclass +class _DispatchKeys: + """ + Data class to store the keys for the dispatch dictionary in the Translator classes + """ + + model: Optional[str] = None + realm: Optional[str] = None + frequency: Optional[str] = None + variable: Optional[str] = None def _cmip_realm_translator(series) -> pd.Series: @@ -359,23 +453,10 @@ def _translate(string: str) -> tuple[str, ...]: } raw_realms = string.split(" ") - realms = [] + realms = set() for realm in raw_realms: realm = translations.get(realm, realm) - if realm not in realms: - realms.append(realm) + realms |= {realm} return tuple(realms) return series.apply(lambda string: _translate(string)) - - -def _to_tuple(series: pd.Series) -> pd.Series: - """ - Make each entry in the provided series a tuple - - Parameters - ---------- - series: :py:class:`~pandas.Series` - A pandas Series or another object with an `apply` method - """ - return series.apply(lambda x: (x,)) diff --git a/src/access_nri_intake/source/builders.py b/src/access_nri_intake/source/builders.py index c1799a6f..ddfcdca7 100644 --- a/src/access_nri_intake/source/builders.py +++ b/src/access_nri_intake/source/builders.py @@ -286,14 +286,14 @@ def parse_access_ncfile( Parameters ---------- - file: str + fname: str The path to the netcdf file time_dim: str The name of the time dimension Returns ------- - output_nc_info: AccessNCFileInfo + output_nc_info: _AccessNCFileInfo A dataclass containing the information parsed from the file Raises @@ -532,9 +532,11 @@ def parser(cls, file): ncinfo_dict = nc_info.to_dict() # Remove exp_id from file id so that members can be part of the same dataset - ncinfo_dict["file_id"] = re.sub(exp_id, "", ncinfo_dict["file_id"]).strip( - "_" - ) + ncinfo_dict["file_id"] = re.sub( + exp_id, + "", + ncinfo_dict["file_id"], + ).strip("_") ncinfo_dict["realm"] = realm_mapping[realm] ncinfo_dict["member"] = exp_id diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index b28811c0..4d0ab76e 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -7,7 +7,7 @@ from dataclasses import asdict, dataclass, field from datetime import timedelta from pathlib import Path -from typing import Union +from typing import Optional, Union import cftime import xarray as xr @@ -23,8 +23,8 @@ class _AccessNCFileInfo: Holds information about a NetCDF file that is used to create an intake-esm catalog entry. - ______ - Notes: + Notes + ----- Use of both path and filename seems redundant, but constructing filename from the path using a __post_init__ method makes testing more difficult. On balance, more explicit tests are probably more important than the slight redundancy. @@ -33,7 +33,7 @@ class _AccessNCFileInfo: filename: Union[str, Path] file_id: str path: str - filename_timestamp: Union[str, None] + filename_timestamp: Optional[str] frequency: str start_date: str end_date: str @@ -139,7 +139,7 @@ def _guess_start_end_dates(ts, te, frequency): def get_timeinfo( ds: xr.Dataset, - filename_frequency: Union[str, None], + filename_frequency: Optional[str], time_dim: str, ) -> tuple[str, str, str]: """ @@ -177,7 +177,7 @@ def _todate(t): time_format = "%Y-%m-%d, %H:%M:%S" ts = None te = None - frequency: Union[str, tuple[Union[int, None], str]] = "fx" + frequency: Union[str, tuple[Optional[int], str]] = "fx" has_time = time_dim in ds if has_time: diff --git a/tests/data/esm_datastore/cordex-ig45.csv b/tests/data/esm_datastore/cordex-ig45.csv new file mode 100644 index 00000000..06f6a2fd --- /dev/null +++ b/tests/data/esm_datastore/cordex-ig45.csv @@ -0,0 +1,6 @@ +path,file_type,project_id,resolution,institution_id,source_id,experiment_id,member_id,frequency,variable_id,version,time_range +/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/hus200/v20240709/hus200_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20580101-20581231.nc,f,output,AUS-10i,UQ-DEC,ACCESS-CM2,ssp126,r2i1p1f1,day,hus200,v20240709,20580101-20581231 +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp126/r20i1p1f1/CCAMoc-v2112/v1-r1/mon/va925/v20240722/va925_AUS-20i_ACCESS-ESM1-5_ssp126_r20i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_208101-209012.nc,f,CORDEX,AUS-20i,UQ-DEC,ACCESS-ESM1-5,ssp126,r20i1p1f1,mon,va925,v20240722,208101-209012 +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/ACCESS-ESM1-5/ssp370/r6i1p1f1/CCAM-v2105/v1-r1/mon/clh/v20240722/clh_AUS-20i_ACCESS-ESM1-5_ssp370_r6i1p1f1_UQ-DEC_CCAM-v2105_v1-r1_mon_201501-202012.nc,f,CORDEX,AUS-20i,UQ-DEC,ACCESS-ESM1-5,ssp370,r6i1p1f1,mon,clh,v20240722,201501-202012 +/g/data/ig45/QldFCP-2/output/CMIP6/DD/AUS-10i/UQ-DEC/ACCESS-CM2/ssp126/r2i1p1f1/CCAMoc-v2112/v1-r1/day/ta850/v20240709/ta850_AUS-10i_ACCESS-CM2_ssp126_r2i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_day_20340101-20341231.nc,f,output,AUS-10i,UQ-DEC,ACCESS-CM2,ssp126,r2i1p1f1,day,ta850,v20240709,20340101-20341231 +/g/data/ig45/QldFCP-2/CORDEX/CMIP6/DD/AUS-20i/UQ-DEC/NorESM2-MM/ssp126/r1i1p1f1/CCAMoc-v2112/v1-r1/mon/hus200/v20240722/hus200_AUS-20i_NorESM2-MM_ssp126_r1i1p1f1_UQ-DEC_CCAMoc-v2112_v1-r1_mon_201501-202012.nc,f,CORDEX,AUS-20i,UQ-DEC,NorESM2-MM,ssp126,r1i1p1f1,mon,hus200,v20240722,201501-202012 diff --git a/tests/data/esm_datastore/cordex-ig45.json b/tests/data/esm_datastore/cordex-ig45.json new file mode 100644 index 00000000..fab7b871 --- /dev/null +++ b/tests/data/esm_datastore/cordex-ig45.json @@ -0,0 +1,70 @@ +{ + "id": "qldfcp-2-ig45", + "title": "qldfcp-2-ig45", + "description": "Datasets on Gadi, both publised and replicated. All file versions present are in the listing\nMaintained By: NCI\nContact: help@nci.org.au", + "assets": { + "column_name": "path", + "format": "netcdf" + }, + "aggregation_control": { + "variable_column_name": "variable_id", + "groupby_attrs": [ + "file_type", + "project_id", + "resolution", + "institution_id", + "source_id", + "experiment_id", + "member_id", + "frequency", + "variable_id", + "version" + ], + "aggregations": [ + { + "type": "join_existing", + "attribute_name": "time_range", + "options": { + "dim": "time" + } + } + ] + }, + "esmcat_version": "0.1.0", + "catalog_file": "cordex-ig45.csv", + "attributes": [ + { + "column_name": "file_type" + }, + { + "column_name": "project_id" + }, + { + "column_name": "resolution" + }, + { + "column_name": "institution_id" + }, + { + "column_name": "source_id" + }, + { + "column_name": "experiment_id" + }, + { + "column_name": "member_id" + }, + { + "column_name": "frequency" + }, + { + "column_name": "variable_id" + }, + { + "column_name": "version" + }, + { + "column_name": "time_range" + } + ] +} diff --git a/tests/test_builders.py b/tests/test_builders.py index c28f9eac..42ed8e5e 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -556,8 +556,7 @@ def test_parse_access_filename(builder, filename, expected): variable_cell_methods=["time: mean"], variable_units=["K"], ), - ), - ( + )( builders.AccessCm2Builder, "access-cm2/by578/history/ice/iceh_d.2015-01.nc", _AccessNCFileInfo( @@ -680,8 +679,7 @@ def test_parse_access_filename(builder, filename, expected): variable_cell_methods=["time: mean"], variable_units=["K"], ), - ), - ( + )( builders.AccessEsm15Builder, "access-esm1-5/history/ice/iceh.1850-01.nc", _AccessNCFileInfo( diff --git a/tests/test_translators.py b/tests/test_translators.py index deea65da..29ecb756 100644 --- a/tests/test_translators.py +++ b/tests/test_translators.py @@ -7,15 +7,17 @@ from access_nri_intake.catalog import CORE_COLUMNS, TRANSLATOR_GROUPBY_COLUMNS from access_nri_intake.catalog.translators import ( + FREQUENCY_TRANSLATIONS, BarpaTranslator, Cmip5Translator, Cmip6Translator, + CordexTranslator, DefaultTranslator, EraiTranslator, TranslatorError, _cmip_realm_translator, _to_tuple, - frequency_translations, + tuplify_series, ) @@ -68,7 +70,7 @@ def test_cmip_frequency_translator(input, expected): """Test translation of entries in the CMIP frequency column""" series = pd.Series(input) - translated = series.apply(lambda x: frequency_translations.get(x, x)) + translated = series.apply(lambda x: FREQUENCY_TRANSLATIONS.get(x, x)) assert list(translated) == expected @@ -121,6 +123,9 @@ def test_cmip_realm_translator(input, expected): """Test translation of entries in the CMIP realm column""" series = pd.Series(input) translated = _cmip_realm_translator(series) + # Sort expected & translated to make the test less brittle + translated = translated.apply(lambda x: tuple(sorted(x))) + expected = [tuple(sorted(x)) for x in expected] assert list(translated) == expected @@ -185,6 +190,29 @@ def test_DefaultTranslator_error(test_data): assert "Could not translate" in str(excinfo.value) +@pytest.mark.parametrize( + "colname, should_raise", + [ + ("model", False), + ("realm", False), + ("frequency", False), + ("variable", False), + ("random_string", True), + ], +) +def test_DefaultTranslator_set_dispatch(test_data, colname, should_raise): + """Test that only valid translation setups are allowed""" + esmds = intake.open_esm_datastore(test_data / "esm_datastore/cmip5-al33.json") + dtrans = DefaultTranslator(esmds, CORE_COLUMNS) + if should_raise: + with pytest.raises(TranslatorError) as excinfo: + dtrans.set_dispatch(colname, dtrans._model_translator, "model") + assert "'core_colname' must be one of" in str(excinfo.value) + else: + dtrans.set_dispatch(colname, dtrans._model_translator, colname) + assert dtrans._dispatch[colname] == dtrans._model_translator + + @pytest.mark.parametrize( "groupby, n_entries", [ @@ -263,3 +291,38 @@ def test_BarpaTranslator(test_data, groupby, n_entries): esmds.description = "description" df = BarpaTranslator(esmds, CORE_COLUMNS).translate(groupby) assert len(df) == n_entries + + +@pytest.mark.parametrize( + "groupby, n_entries", + [(None, 5), (["variable"], 4), (["frequency"], 2), (["realm"], 1)], +) +def test_CordexTranslator(test_data, groupby, n_entries): + """Test CORDEX datastore translator""" + esmds = intake.open_esm_datastore(test_data / "esm_datastore/cordex-ig45.json") + esmds.name = "name" + esmds.description = "description" + df = CordexTranslator(esmds, CORE_COLUMNS).translate(groupby) + assert len(df) == n_entries + + +@pytest.mark.parametrize( + "input_series, expected_output", + [ + (pd.Series([1, 2, 3]), pd.Series([(1,), (2,), (3,)])), + ], +) +def test_tuplify_series(input_series, expected_output): + """Test the _tuplify_series function""" + + @tuplify_series + def tuplify_func(series): + return series + + class TestSeries: + @tuplify_series + def method(self, series): + return series + + assert all(tuplify_func(input_series) == expected_output) + assert all(TestSeries().method(input_series) == expected_output)