From aeb978e3dcd7a6f3e0eee305500b8b23f72b4bab Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 14:00:25 +0800 Subject: [PATCH 01/24] Updated functions in cli.py to use argv[Optional[Sequence[str]] instead of no args - much easier to test --- src/access_nri_intake/cli.py | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index c2073105..951daecf 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -8,7 +8,9 @@ import logging import os import re +from collections.abc import Sequence from pathlib import Path +from typing import Optional import jsonschema import yaml @@ -27,7 +29,9 @@ class MetadataCheckError(Exception): pass -def _parse_build_inputs(config_yamls, build_path): +def _parse_build_inputs( + config_yamls: list[str], build_path: str +) -> list[tuple[str, dict]]: """ Parse build inputs into a list of tuples of CatalogManager methods and args to pass to the methods @@ -74,9 +78,12 @@ def _parse_build_inputs(config_yamls, build_path): return args -def _check_build_args(args_list): +def _check_build_args(args_list: list[dict]) -> None: """ Run some checks on the parsed build argmuents to be passed to the CatalogManager + + Raises: + MetadataCheckError: If there are experiments with the same name or experiment_uuid """ names = [] @@ -87,18 +94,19 @@ def _check_build_args(args_list): if len(names) != len(set(names)): seen = set() - dupes = [name for name in names if name in seen or seen.add(name)] + dupes = [name for name in names if name in seen or seen.add(name)] # type: ignore + # seen.add(name) returns None & so is always Falsey - so what is it doing? raise MetadataCheckError(f"There are experiments with the same name: {dupes}") if len(uuids) != len(set(uuids)): seen = set() - dupes = [uuid for uuid in uuids if uuid in seen or seen.add(uuid)] + dupes = [uuid for uuid in uuids if uuid in seen or seen.add(uuid)] # type: ignore dupes = [name for name, uuid in zip(names, uuids) if uuid in dupes] raise MetadataCheckError( f"There are experiments with the same experiment_uuid: {dupes}" ) -def build(): +def build(argv: Optional[Sequence[str]] = None): """ Build an intake-dataframe-catalog from YAML configuration file(s). """ @@ -166,7 +174,7 @@ def build(): ), ) - args = parser.parse_args() + args = parser.parse_args(argv) config_yamls = args.config_yaml build_base_path = args.build_base_path catalog_base_path = args.catalog_base_path @@ -197,20 +205,20 @@ def _get_project(path): return match.groups()[0] if match else None project = set() - for method, args in parsed_sources: + for method, src_args in parsed_sources: if method == "load": # This is a hack but I don't know how else to get the storage from pre-built datastores - esm_ds = open_esm_datastore(args["path"][0]) + esm_ds = open_esm_datastore(src_args["path"][0]) project |= set(esm_ds.df["path"].map(_get_project)) - project |= {_get_project(path) for path in args["path"]} + project |= {_get_project(path) for path in src_args["path"]} project |= {_get_project(build_base_path)} storage_flags = "+".join(sorted([f"gdata/{proj}" for proj in project])) # Build the catalog cm = CatalogManager(path=metacatalog_path) - for method, args in parsed_sources: - logger.info(f"Adding '{args['name']}' to metacatalog '{metacatalog_path}'") + for method, src_args in parsed_sources: + logger.info(f"Adding '{src_args['name']}' to metacatalog '{metacatalog_path}'") getattr(cm, method)(**args) # Write catalog yaml file @@ -292,9 +300,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"][ - "storage" - ] = _combine_storage_flags(storage_new, storage_old) + yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( + _combine_storage_flags(storage_new, storage_old) + ) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above @@ -352,7 +360,7 @@ def _combine_storage_flags(a: str, b: str) -> str: return "+".join(sorted(list(set(aflags + bflags)))) -def metadata_validate(): +def metadata_validate(argv: Optional[Sequence[str]] = None): """ Check provided metadata.yaml file(s) against the experiment schema """ From 9e785f4a3b441cd6106972da44d399e2c7cb138f Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 14:00:42 +0800 Subject: [PATCH 02/24] Skeleton of e2e test --- e2e/build_subset.sh | 42 +++++++++++++++++++++++++ e2e/configs/access-om2.yaml | 9 ++++++ e2e/configs/cmip5.yaml | 9 ++++++ e2e/conftest.py | 63 +++++++++++++++++++++++++++++++++++++ e2e/test_end_to_end.py | 6 ++++ 5 files changed, 129 insertions(+) create mode 100644 e2e/build_subset.sh create mode 100644 e2e/configs/access-om2.yaml create mode 100644 e2e/configs/cmip5.yaml create mode 100644 e2e/conftest.py create mode 100644 e2e/test_end_to_end.py diff --git a/e2e/build_subset.sh b/e2e/build_subset.sh new file mode 100644 index 00000000..75e3cab8 --- /dev/null +++ b/e2e/build_subset.sh @@ -0,0 +1,42 @@ +#!/bin/bash -l + +#PBS -P iq82 +#PBS -l storage=gdata/xp65+gdata/ik11+gdata/cj50+gdata/hh5+gdata/p73+gdata/dk92+gdata/al33+gdata/rr3+gdata/fs38+gdata/oi10 +#PBS -q normal +#PBS -W block=true +#PBS -l walltime=03:00:00 +#PBS -l mem=192gb +#PBS -l ncpus=48 +#PBS -l wd +#PBS -j oe + +########################################################################################### +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +# Description: +# Generate access-nri intake metacatalog from config files + +########################################################################################### + +set -e + +if [ ! $# -eq 0 ]; then + version=$1 +fi + +module use /g/data/xp65/public/modules +module load conda/access-med-0.6 + +OUTPUT_BASE_PATH=/scratch/tm70/ct1163/test_cat/ +CONFIG_DIR=/g/data/xp65/admin/access-nri-intake-catalog/config +CONFIGS=( cmip5.yaml access-om2.yaml ) + +config_paths=( "${CONFIGS[@]/#/${CONFIG_DIR}/}" ) + +if [ -z "$version" ]; then + catalog-build --build_base_path=${OUTPUT_BASE_PATH} ${config_paths[@]} + +else + catalog-build --build_base_path=${OUTPUT_BASE_PATH} ${config_paths[@]} +fi diff --git a/e2e/configs/access-om2.yaml b/e2e/configs/access-om2.yaml new file mode 100644 index 00000000..8523a262 --- /dev/null +++ b/e2e/configs/access-om2.yaml @@ -0,0 +1,9 @@ +builder: AccessOm2Builder + +translator: DefaultTranslator + +sources: + + - metadata_yaml: /g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/metadata.yaml + path: + - /g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi diff --git a/e2e/configs/cmip5.yaml b/e2e/configs/cmip5.yaml new file mode 100644 index 00000000..a04764a2 --- /dev/null +++ b/e2e/configs/cmip5.yaml @@ -0,0 +1,9 @@ +builder: null + +translator: Cmip5Translator + +sources: + + - metadata_yaml: /g/data/xp65/admin/access-nri-intake-catalog/config/metadata_sources/cmip5-al33/metadata.yaml + path: + - /g/data/al33/catalog/v2/esm/catalog.json \ No newline at end of file diff --git a/e2e/conftest.py b/e2e/conftest.py new file mode 100644 index 00000000..fec3c943 --- /dev/null +++ b/e2e/conftest.py @@ -0,0 +1,63 @@ +# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +import os +import warnings +from pathlib import Path + +from pytest import fixture + +here = os.path.abspath(os.path.dirname(__file__)) + + +def _get_xfail(): + """ + Get the XFAILS environment variable. We use a default of 1, indicating we expect + to add xfail marker to `test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]` + unless specified. + """ + xfails_default = 1 + + try: + return int(os.environ["XFAILS"]) + except KeyError: + warnings.warn( + message=( + "XFAILS enabled by default as coordinate discovery disabled by default. " + "This will be deprecated when coordinate discovery is enabled by default" + ), + category=PendingDeprecationWarning, + ) + return xfails_default + + +_add_xfail = _get_xfail() + + +@fixture(scope="session") +def test_data(): + return Path(os.path.join(here, "data")) + + +@fixture(scope="session") +def BASE_DIR(tmp_path_factory): + yield tmp_path_factory.mktemp("catalog-dir") + + +def pytest_collection_modifyitems(config, items): + """ + This function is called by pytest to modify the items collected during test + collection. We use it here to mark the xfail tests in + test_builders::test_parse_access_ncfile when we check the file contents & to + ensure we correctly get xfails if we don't have cordinate discovery enabled + in intake-esm. + """ + for item in items: + if ( + item.name + in ( + "test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]", + ) + and _add_xfail + ): + item.add_marker("xfail") diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py new file mode 100644 index 00000000..5bfb2db4 --- /dev/null +++ b/e2e/test_end_to_end.py @@ -0,0 +1,6 @@ +def test_build_esm_datastore(): + pass + + +def test_translate_esm_datastore(): + pass From 6206a7ee17153ffa04f4e7f73afe4137c1200626 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 14:02:45 +0800 Subject: [PATCH 03/24] Peelin apart argparse for e2e test --- src/access_nri_intake/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 951daecf..6cccc070 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -182,6 +182,10 @@ def build(argv: Optional[Sequence[str]] = None): version = args.version update = not args.no_update + print("\n", "*" * 80) + print(f"{args=}") + return None + if not version.startswith("v"): version = f"v{version}" if not re.match(CATALOG_NAME_FORMAT, version): From a4aab125728d3e1fc013d28e0783a5f0afbfef40 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 14:43:22 +0800 Subject: [PATCH 04/24] Pass build in using argparse --- e2e/build_subset.sh | 1 + e2e/test_end_to_end.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/e2e/build_subset.sh b/e2e/build_subset.sh index 75e3cab8..caffd763 100644 --- a/e2e/build_subset.sh +++ b/e2e/build_subset.sh @@ -27,6 +27,7 @@ fi module use /g/data/xp65/public/modules module load conda/access-med-0.6 +source /home/189/ct1163/end2end/venv/bin/activate OUTPUT_BASE_PATH=/scratch/tm70/ct1163/test_cat/ CONFIG_DIR=/g/data/xp65/admin/access-nri-intake-catalog/config diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 5bfb2db4..4cbc6263 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -1,5 +1,39 @@ +from access_nri_intake.cli import build + +""" +args=Namespace( + config_yaml=[ + '/scratch/tm70/ct1163/configs/cmip5.yaml', + '/scratch/tm70/ct1163/configs/access-om2.yaml'], + build_base_path='/scratch/tm70/ct1163/test_cat/', + catalog_base_path='./', + catalog_file='metacatalog.csv', + version='v2024-11-18', + no_update=False + ) +""" + + def test_build_esm_datastore(): - pass + build( + [ + "--config_yaml", + "/scratch/tm70/ct1163/configs/cmip5.yaml", + "/scratch/tm70/ct1163/configs/access-om2.yaml", + "--build_base_path", + "/scratch/tm70/ct1163/test_cat/", + "--catalog_base_path", + "./", + "--catalog_file", + "metacatalog.csv", + "--version", + "v2024-11-18", + "--no_update", + "False", + ] + ) + + assert True def test_translate_esm_datastore(): From c7efe9596a12697190e979790b50821ff9e6f7d6 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 20:21:59 +1100 Subject: [PATCH 05/24] End to end build test working - now to add queries --- e2e/__init__.py | 0 e2e/test_end_to_end.py | 39 +++++++++++++++++++++++++++++------- src/access_nri_intake/cli.py | 6 +----- 3 files changed, 33 insertions(+), 12 deletions(-) create mode 100644 e2e/__init__.py diff --git a/e2e/__init__.py b/e2e/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 4cbc6263..2f41dae9 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -1,4 +1,7 @@ from access_nri_intake.cli import build +import os +from datetime import datetime +from .conftest import here """ args=Namespace( @@ -14,27 +17,49 @@ """ -def test_build_esm_datastore(): +def print_directory_tree(root, indent=""): + """ + Pretty print a directory tree - code from chatgpt. + """ + for item in os.listdir(root): + path = os.path.join(root, item) + if os.path.isdir(path): + print(f"{indent}├── {item}/") + print_directory_tree(path, indent + "│ ") + else: + print(f"{indent}├── {item}") + + +def test_build_esm_datastore(BASE_DIR): + # Build our subset of the catalog. This should take ~2 minutes with the PBS + # flags in build_subset.sh + print(f"Building the catalog subset & writing to {BASE_DIR}") + v_num = datetime.now().strftime("v%Y-%m-%d") + print(f"Version number: {v_num}") build( [ - "--config_yaml", - "/scratch/tm70/ct1163/configs/cmip5.yaml", - "/scratch/tm70/ct1163/configs/access-om2.yaml", + f"{here}/configs/cmip5.yaml", + f"{here}/configs/access-om2.yaml", "--build_base_path", - "/scratch/tm70/ct1163/test_cat/", + str(BASE_DIR), "--catalog_base_path", "./", "--catalog_file", "metacatalog.csv", "--version", - "v2024-11-18", + v_num, "--no_update", - "False", ] ) + print_directory_tree(BASE_DIR) + assert True + print("Catalog built successfully. Finish test tomorrow.") + + + def test_translate_esm_datastore(): pass diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 6cccc070..e6af1cbf 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -182,10 +182,6 @@ def build(argv: Optional[Sequence[str]] = None): version = args.version update = not args.no_update - print("\n", "*" * 80) - print(f"{args=}") - return None - if not version.startswith("v"): version = f"v{version}" if not re.match(CATALOG_NAME_FORMAT, version): @@ -223,7 +219,7 @@ def _get_project(path): cm = CatalogManager(path=metacatalog_path) for method, src_args in parsed_sources: logger.info(f"Adding '{src_args['name']}' to metacatalog '{metacatalog_path}'") - getattr(cm, method)(**args) + getattr(cm, method)(**src_args) # Write catalog yaml file cat = cm.dfcat From 1f4feea025c3624dbd6cfae8f0687b8728c4cff2 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 18 Nov 2024 20:27:48 +1100 Subject: [PATCH 06/24] Pre-commit --- e2e/test_end_to_end.py | 8 ++++---- src/access_nri_intake/cli.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 2f41dae9..4d60c11a 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -1,6 +1,8 @@ -from access_nri_intake.cli import build import os from datetime import datetime + +from access_nri_intake.cli import build + from .conftest import here """ @@ -31,7 +33,7 @@ def print_directory_tree(root, indent=""): def test_build_esm_datastore(BASE_DIR): - # Build our subset of the catalog. This should take ~2 minutes with the PBS + # Build our subset of the catalog. This should take ~2 minutes with the PBS # flags in build_subset.sh print(f"Building the catalog subset & writing to {BASE_DIR}") v_num = datetime.now().strftime("v%Y-%m-%d") @@ -59,7 +61,5 @@ def test_build_esm_datastore(BASE_DIR): print("Catalog built successfully. Finish test tomorrow.") - - def test_translate_esm_datastore(): pass diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index e6af1cbf..771b3ef8 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -300,9 +300,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( - _combine_storage_flags(storage_new, storage_old) - ) + yaml_dict["sources"]["access_nri"]["metadata"][ + "storage" + ] = _combine_storage_flags(storage_new, storage_old) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above From 02fdd23089677cb3440bf8c13f1b81c2a7c98a38 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 19 Nov 2024 10:54:17 +1100 Subject: [PATCH 07/24] Updated workflow to only run tests in 'tests' dir --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8483669c..d6b88dba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: - name: Run tests shell: bash -l {0} - run: coverage run -m --source=access_nri_intake pytest + run: coverage run -m --source=access_nri_intake pytest tests - name: Generate coverage report shell: bash -l {0} From e194eadb4f55bc6d5adafd0a531c73e1888c03b4 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 20 Nov 2024 14:02:08 +1100 Subject: [PATCH 08/24] Lots of tests working - mostly just the content tests to finish --- e2e/conftest.py | 6 + e2e/test_end_to_end.py | 470 ++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 4 + 3 files changed, 473 insertions(+), 7 deletions(-) diff --git a/e2e/conftest.py b/e2e/conftest.py index fec3c943..c9ef271d 100644 --- a/e2e/conftest.py +++ b/e2e/conftest.py @@ -3,6 +3,7 @@ import os import warnings +from datetime import datetime from pathlib import Path from pytest import fixture @@ -44,6 +45,11 @@ def BASE_DIR(tmp_path_factory): yield tmp_path_factory.mktemp("catalog-dir") +@fixture(scope="session") +def v_num(): + return datetime.now().strftime("v%Y-%m-%d") + + def pytest_collection_modifyitems(config, items): """ This function is called by pytest to modify the items collected during test diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 4d60c11a..c328eef5 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -1,5 +1,9 @@ import os -from datetime import datetime +import numpy as np +from pathlib import Path + +import intake +import pytest from access_nri_intake.cli import build @@ -31,12 +35,19 @@ def print_directory_tree(root, indent=""): else: print(f"{indent}├── {item}") +@pytest.fixture(scope="session") +def current_catalog(): + """ + Return the current catalog as an intake catalog. + """ + metacat = intake.cat.access_nri + yield metacat -def test_build_esm_datastore(BASE_DIR): +@pytest.fixture(scope="session") +def metacat(BASE_DIR, v_num): # Build our subset of the catalog. This should take ~2 minutes with the PBS # flags in build_subset.sh print(f"Building the catalog subset & writing to {BASE_DIR}") - v_num = datetime.now().strftime("v%Y-%m-%d") print(f"Version number: {v_num}") build( [ @@ -53,13 +64,458 @@ def test_build_esm_datastore(BASE_DIR): "--no_update", ] ) + cat_path = os.path.join(BASE_DIR, v_num, "metacatalog.csv") + metacat = intake.open_df_catalog(cat_path) + yield metacat + + +def test_catalog_subset_exists(BASE_DIR, v_num, metacat): + assert os.path.exists(os.path.join(BASE_DIR, v_num, "metacatalog.csv")) + + +def test_open_dataframe_catalog(metacat): + assert metacat + print("Catalog opened successfully.") + + +@pytest.mark.parametrize( + "name", + [ + "cmip5_al33", + "1deg_jra55_ryf9091_gadi", + ] +) +def test_datastore_found(metacat, name): + breakpoint() + assert metacat[name] == metacat.search(name=name).to_source() + + + +@pytest.mark.parametrize( + "colname, expected", + [ + ("path", 3700255), + ("file_type", 2), + ("project", 3), + ("institute", 62), + ("model", 74), + ("experiment", 94), + ("frequency", 9), + ("realm", 9), + ("table", 20), + ("ensemble", 240), + ("version", 610), + ("variable", 584), + ("time_range", 31152), + ("derived_variable", -999), + ], +) +def test_cmip5_datastore_nunique(metacat, colname, expected): + + cat = metacat["cmip5_al33"] - print_directory_tree(BASE_DIR) + if colname != "derived_variable": + assert len(cat.df[colname].unique()) == expected + else: + with pytest.raises(KeyError): + assert len(cat.df[colname].unique()) == expected + + +@pytest.mark.parametrize( + "colname, expected", + [ + ('filename',9367), + ('file_id',8), + ('path',9677), + ('filename_timestamp',9361), + ('frequency',3), + ('start_date',9361), + ('end_date',9360), + ('variable',15), + ('variable_long_name',15), + ('variable_standard_name',15), + ('variable_cell_methods',15), + ('variable_units',15), + ('realm',2) + ] +) +def test_om2_datastore_nunique(metacat, colname, expected): + + cat = metacat["1deg_jra55_ryf9091_gadi"] + + + if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: + assert len(cat.df[colname].unique()) == expected + else: + # These should fail because they contains lists (unhashable) + with pytest.raises(TypeError): + assert len(cat.df[colname].unique()) == expected + # cast to tuple to make them hashable, then check the length + tuplified = cat.df[colname].apply(lambda x : tuple(x)).unique() + assert len(tuplified) == expected + +@pytest.mark.parametrize( + "colname, expected", + [ + ("file_type", {"l", "f"}), + ("project", {"CMIP5", "CORDEX", "isimip2b"}), + ( + "institute", + { + "BNU", + "CMCC", + "IPSL", + "LASG-CESS", + "LASG-IAP", + "MPI-M", + "MRI", + "NASA-GISS", + "NASA-GMAO", + "NIMR-KMA", + }, + ), + ( + "model", + { + "BNU-ESM", + "FGOALS-g2", + "FGOALS-s2", + "GISS-E2-H", + "GISS-E2-H-CC", + "GISS-E2-R", + "GISS-E2-R-CC", + "IPSL-CM5A-LR", + "IPSL-CM5A-MR", + "IPSL-CM5B-LR", + }, + ), + ( + "realm", + { + "atmos", + "ocean", + "seaIce", + "land", + "aerosol", + "ocnBgchem", + "landIce", + "na", + "landonly", + }, + ), + ( + "experiment", + { + "amip", + "esmFdbk2", + "historical", + "midHolocene", + "piControl", + "rcp45", + "rcp85", + "sstClim", + "sstClimAerosol", + "sstClimSulfate", + }, + ), + ("frequency", {'3hr', '6hr', 'daily', 'day', 'fx', 'mon', 'monClim', 'subhr', 'yr'}), + ( + "table", + { + "6hrLev", + "6hrPlev", + "Amon", + "Lmon", + "OImon", + "Omon", + "cfDay", + "cfMon", + "day", + "fx", + }, + ), + ( + "ensemble", + { + "r0i0p0", + "r11i1p1", + "r1i1p1", + "r1i1p2", + "r2i1p1", + "r3i1p1", + "r4i1p1", + "r5i1p1", + "r6i1p1", + "r8i1p1", + }, + ), + ( + "version", + { + "v1", + "v2", + "v20110726", + "v20111119", + "v20111219", + "v20120430", + "v20120526", + "v20120804", + "v20130506", + "v20161204", + }, + ), + ( + "variable", + {"ccb", "pr", "psl", "tas", "tasmax", "tasmin", "tro3", "ua", "va", "wmo"}, + ), + ( + "time_range", + { + "000101-010012", + "185001-234912", + "18520101-18521231", + "18780101-18781231", + "19170101-19171231", + "195001-200512", + "198201010000-198212311800", + "19910101-19911231", + "199201010000-199212311800", + "20010101-20011231", + }, + ), + ], +) +def test_cmip5_metacat_vals_found(metacat, colname, expected): + # Test that the unique values in the column are as expected. I've truncated + # the unique values to the first 10 for brevity because I'm not typing out + # 3700255 unique values. + cat = metacat["cmip5_al33"] + found = set(cat.df[colname].unique()[:10]) + + assert found == expected + +@pytest.mark.parametrize( + "colname, expected", + [ + ('filename',{'iceh.1900-07.nc', 'iceh.1900-05.nc', 'iceh.1900-04.nc', 'iceh.1900-03.nc', 'iceh.1900-08.nc', 'iceh.1900-06.nc', 'iceh.1900-02.nc', 'iceh.1900-09.nc', 'iceh.1900-10.nc', 'iceh.1900-01.nc'}), + ('file_id',{'iceh_XXXX_XX', 'ocean_scalar', 'ocean_snap', 'ocean_grid', 'ocean', 'ocean_wmass', 'ocean_heat', 'ocean_month'}), + ('path',{'/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-01.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-07.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-08.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-03.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-06.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-10.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-05.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-04.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-09.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-02.nc'}), + ('filename_timestamp',{'1900-04', '1900-05', '1900-06', '1900-01', '1900-08', '1900-09', '1900-07', '1900-03', '1900-10', '1900-02'}), + ('frequency',{'fx', '1yr', '1mon'}), + ('start_date',{'1900-02-01, 00:00:00', '1900-03-01, 00:00:00', '1900-06-01, 00:00:00', '1900-10-01, 00:00:00', '1900-07-01, 00:00:00', '1900-09-01, 00:00:00', '1900-05-01, 00:00:00', '1900-01-01, 00:00:00', '1900-04-01, 00:00:00', '1900-08-01, 00:00:00'}), + ('end_date',{'1900-02-01, 00:00:00', '1900-03-01, 00:00:00', '1900-06-01, 00:00:00', '1900-10-01, 00:00:00', '1900-07-01, 00:00:00', '1900-09-01, 00:00:00', '1900-11-01, 00:00:00', '1900-05-01, 00:00:00', '1900-04-01, 00:00:00', '1900-08-01, 00:00:00'}), + ('variable',{('scalar_axis', 'time', 'nv', 'ke_tot', 'pe_tot', 'temp_global_ave', 'salt_global_ave', 'rhoave', 'temp_surface_ave', 'salt_surface_ave', 'total_ocean_salt', 'total_ocean_heat', 'eta_global', 'total_ocean_sfc_salt_flux_coupler', 'total_ocean_pme_river', 'total_ocean_river', 'total_ocean_runoff', 'total_ocean_calving', 'total_ocean_melt', 'total_ocean_evap', 'total_ocean_lprec', 'total_ocean_fprec', 'total_ocean_runoff_heat', 'total_ocean_calving_heat', 'total_ocean_river_heat', 'total_ocean_hflux_prec', 'total_ocean_hflux_evap', 'total_ocean_hflux_coupler', 'total_ocean_swflx', 'total_ocean_swflx_vis', 'total_ocean_lw_heat', 'total_ocean_evap_heat', 'total_ocean_fprec_melt_heat', 'total_ocean_calving_melt_heat', 'total_ocean_sens_heat', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'dzt', 'pot_rho_0', 'tx_trans', 'ty_trans', 'tx_trans_gm', 'ty_trans_gm', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('grid_xu_ocean', 'grid_yt_ocean', 'neutral', 'neutralrho_edges', 'time', 'nv', 'grid_xt_ocean', 'grid_yu_ocean', 'tx_trans_nrho', 'ty_trans_nrho', 'tx_trans_nrho_gm', 'ty_trans_nrho_gm', 'tx_trans_nrho_submeso', 'ty_trans_nrho_submeso', 'mass_pmepr_on_nrho', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'dzt', 'pot_rho_0', 'tx_trans', 'ty_trans', 'tx_trans_gm', 'ty_trans_gm', 'tx_trans_submeso', 'ty_trans_submeso', 'temp_xflux_adv', 'temp_yflux_adv', 'temp_xflux_gm', 'temp_yflux_gm', 'temp_xflux_submeso', 'temp_yflux_submeso', 'temp_xflux_ndiffuse', 'temp_yflux_ndiffuse', 'diff_cbt_t', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sea_level', 'eta_t', 'sea_levelsq', 'mld', 'pme_river', 'river', 'runoff', 'ice_calving', 'evap', 'melt', 'sfc_salt_flux_restore', 'sfc_salt_flux_ice', 'sfc_salt_flux_coupler', 'net_sfc_heating', 'frazil_3d_int_z', 'tau_x', 'tau_y', 'bmf_u', 'bmf_v', 'tx_trans_int_z', 'ty_trans_int_z', 'pbot_t', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('grid_xu_ocean', 'grid_yt_ocean', 'neutral', 'neutralrho_edges', 'time', 'nv', 'grid_xt_ocean', 'grid_yu_ocean', 'tx_trans_nrho', 'ty_trans_nrho', 'tx_trans_nrho_gm', 'ty_trans_nrho_gm', 'tx_trans_nrho_submeso', 'ty_trans_nrho_submeso', 'temp_xflux_adv_on_nrho', 'temp_yflux_adv_on_nrho', 'temp_xflux_submeso_on_nrho', 'temp_yflux_submeso_on_nrho', 'temp_xflux_gm_on_nrho', 'temp_yflux_gm_on_nrho', 'temp_xflux_ndiffuse_on_nrho', 'temp_yflux_ndiffuse_on_nrho', 'mass_pmepr_on_nrho', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'time', 'xu_ocean', 'yu_ocean', 'geolon_t', 'geolat_t', 'geolon_c', 'geolat_c', 'ht', 'hu', 'dxt', 'dyt', 'dxu', 'dyu', 'area_t', 'area_u', 'kmt', 'kmu', 'drag_coeff'), ('time', 'time_bounds', 'TLON', 'TLAT', 'ULON', 'ULAT', 'NCAT', 'tmask', 'blkmask', 'tarea', 'uarea', 'dxt', 'dyt', 'dxu', 'dyu', 'HTN', 'HTE', 'ANGLE', 'ANGLET', 'hi_m', 'hs_m', 'Tsfc_m', 'aice_m', 'uvel_m', 'vvel_m', 'uatm_m', 'vatm_m', 'sice_m', 'fswdn_m', 'fswup_m', 'flwdn_m', 'snow_ai_m', 'rain_ai_m', 'sst_m', 'sss_m', 'uocn_m', 'vocn_m', 'frzmlt_m', 'fswfac_m', 'fswabs_ai_m', 'albsni_m', 'alvdr_ai_m', 'alidr_ai_m', 'alvdf_ai_m', 'alidf_ai_m', 'albice_m', 'albsno_m', 'flat_ai_m', 'fsens_ai_m', 'flwup_ai_m', 'evap_ai_m', 'Tair_m', 'congel_m', 'frazil_m', 'snoice_m', 'meltt_m', 'melts_m', 'meltb_m', 'meltl_m', 'fresh_ai_m', 'fsalt_ai_m', 'fhocn_ai_m', 'fswthru_ai_m', 'strairx_m', 'strairy_m', 'strtltx_m', 'strtlty_m', 'strcorx_m', 'strcory_m', 'strocnx_m', 'strocny_m', 'strintx_m', 'strinty_m', 'strength_m', 'divu_m', 'shear_m', 'dvidtt_m', 'dvidtd_m', 'daidtt_m', 'daidtd_m', 'mlt_onset_m', 'frz_onset_m', 'trsig_m', 'ice_present_m', 'fcondtop_ai_m', 'aicen_m', 'vicen_m', 'fsurfn_ai_m', 'fcondtopn_ai_m', 'fmelttn_ai_m', 'flatn_ai_m'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'grid_xt_ocean', 'grid_yu_ocean', 'potrho', 'potrho_edges', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'pot_rho_0', 'ty_trans_rho', 'ty_trans_rho_gm', 'average_T1', 'average_T2', 'average_DT', 'time_bounds')}), + ('variable_long_name',{('tcell longitude', 'tcell latitude', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'effective sea level (eta_t + patm/(rho0*g)) on T cells', 'surface height on T cells [Boussinesq (volume conserving) model]', 'square of effective sea level (eta_t + patm/(rho0*g)) on T cells', 'mixed layer depth determined by density criteria', 'mass flux of precip-evap+river via sbc (liquid, frozen, evaporation)', 'mass flux of river (runoff + calving) entering ocean', 'mass flux of liquid river runoff entering ocean', 'mass flux of land ice calving into ocean', 'mass flux from evaporation/condensation (>0 enters ocean)', 'water flux transferred with sea ice form/melt (>0 enters ocean)', 'sfc_salt_flux_restore: flux from restoring term', 'sfc_salt_flux_ice', 'sfc_salt_flux_coupler: flux from the coupler', 'surface ocean heat flux coming through coupler and mass transfer', 'Vertical sum of ocn frazil heat flux over time step', 'i-directed wind stress forcing u-velocity', 'j-directed wind stress forcing v-velocity', 'Bottom u-stress via bottom drag', 'Bottom v-stress via bottom drag', 'T-cell i-mass transport vertically summed', 'T-cell j-mass transport vertically summed', 'bottom pressure on T cells [Boussinesq (volume conserving) model]', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('none', 'time', 'vertex number', 'Globally integrated ocean kinetic energy', 'Globally integrated ocean potential energy', 'Global mean temp in liquid seawater', 'Global mean salt in liquid seawater', 'global mean ocean in-situ density from ocean_density_mod', 'Global mass weighted mean surface temp in liquid seawater', 'Global mass weighted mean surface salt in liquid seawater', 'total mass of salt in liquid seawater', 'Total heat in the liquid ocean referenced to 0degC', 'global ave eta_t plus patm_t/(g*rho0)', 'total_ocean_sfc_salt_flux_coupler', 'total ocean precip-evap+river via sbc (liquid, frozen, evaporation)', 'total liquid river water and calving ice entering ocean', 'total liquid river runoff (>0 water enters ocean)', 'total water entering ocean from calving land ice', 'total liquid water melted from sea ice (>0 enters ocean)', 'total evaporative ocean mass flux (>0 enters ocean)', 'total liquid precip into ocean (>0 enters ocean)', 'total snow falling onto ocean (>0 enters ocean)', 'total ocean heat flux from liquid river runoff', 'total ocean heat flux from calving land ice', 'total heat flux into ocean from liquid+solid runoff (<0 cools ocean)', 'total ocean heat flux from precip transferring water across surface', 'total ocean heat flux from evap transferring water across surface', 'total surface heat flux passed through coupler', 'total shortwave flux into ocean (>0 heats ocean)', 'total visible shortwave into ocean (>0 heats ocean)', 'total longwave flux into ocean (<0 cools ocean)', 'total latent heat flux into ocean (<0 cools ocean)', 'total heat flux to melt frozen precip (<0 cools ocean)', 'total heat flux to melt frozen land ice (<0 cools ocean)', 'total sensible heat into ocean (<0 cools ocean)', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'time', 'ucell longitude', 'ucell latitude', 'tracer longitude', 'tracer latitude', 'uv longitude', 'uv latitude', 'ocean depth on t-cells', 'ocean depth on u-cells', 'ocean dxt on t-cells', 'ocean dyt on t-cells', 'ocean dxu on u-cells', 'ocean dyu on u-cells', 'tracer cell area', 'velocity cell area', 'number of depth levels on t-grid', 'number of depth levels on u-grid', 'Dimensionless bottom drag coefficient'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 't-cell thickness', 'potential density referenced to 0 dbar', 'T-cell i-mass transport', 'T-cell j-mass transport', 'T-cell mass i-transport from GM', 'T-cell mass j-transport from GM', 'T-cell mass i-transport from submesoscale param', 'T-cell mass j-transport from submesoscale param', 'cp*rho*dzt*dyt*u*temp', 'cp*rho*dzt*dxt*v*temp', 'cp*gm_xflux*dyt*rho_dzt*temp', 'cp*gm_yflux*dxt*rho_dzt*temp', 'cp*submeso_xflux*dyt*rho_dzt*temp', 'cp*submeso_yflux*dxt*rho_dzt*temp', 'cp*ndiffuse_xflux*dyt*rho_dzt*temp', 'cp*ndiffuse_yflux*dxt*rho_dzt*temp', 'total vert diff_cbt(temp) (w/o neutral included)', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 't-cell thickness', 'potential density referenced to 0 dbar', 'T-cell i-mass transport', 'T-cell j-mass transport', 'T-cell mass i-transport from GM', 'T-cell mass j-transport from GM', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('ucell longitude', 'tcell latitude', 'neutral density', 'neutral density edges', 'time', 'vertex number', 'tcell longitude', 'ucell latitude', 'T-cell i-mass transport on neutral rho', 'T-cell j-mass transport on neutral rho', 'T-cell i-mass transport from GM on neutral rho', 'T-cell j-mass transport from GM on neutral rho', 'T-cell i-mass transport from submesoscale param on neutral rho', 'T-cell j-mass transport from submesoscale param on neutral rho', 'mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'tcell longitude', 'ucell latitude', 'potential density', 'potential density edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 'potential density referenced to 0 dbar', 'T-cell j-mass transport on pot_rho', 'T-cell j-mass transport from GM on pot_rho', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('model time', 'boundaries for time-averaging interval', 'T grid center longitude', 'T grid center latitude', 'U grid center longitude', 'U grid center latitude', 'category maximum thickness', 'ocean grid mask', 'ice grid block mask', 'area of T grid cells', 'area of U grid cells', 'T cell width through middle', 'T cell height through middle', 'U cell width through middle', 'U cell height through middle', 'T cell width on North side', 'T cell width on East side', 'angle grid makes with latitude line on U grid', 'angle grid makes with latitude line on T grid', 'grid cell mean ice thickness', 'grid cell mean snow thickness', 'snow/ice surface temperature', 'ice area (aggregate)', 'ice velocity (x)', 'ice velocity (y)', 'atm velocity (x)', 'atm velocity (y)', 'bulk ice salinity', 'down solar flux', 'upward solar flux', 'down longwave flux', 'snowfall rate', 'rainfall rate', 'sea surface temperature', 'sea surface salinity', 'ocean current (x)', 'ocean current (y)', 'freeze/melt potential', 'shortwave scaling factor', 'snow/ice/ocn absorbed solar flux', 'snow/ice broad band albedo', 'visible direct albedo', 'near IR direct albedo', 'visible diffuse albedo', 'near IR diffuse albedo', 'bare ice albedo', 'snow albedo', 'latent heat flux', 'sensible heat flux', 'upward longwave flux', 'evaporative water flux', 'air temperature', 'congelation ice growth', 'frazil ice growth', 'snow-ice formation', 'top ice melt', 'top snow melt', 'basal ice melt', 'lateral ice melt', 'freshwtr flx ice to ocn', 'salt flux ice to ocean', 'heat flux ice to ocean', 'SW flux thru ice to ocean', 'atm/ice stress (x)', 'atm/ice stress (y)', 'sea sfc tilt stress (x)', 'sea sfc tilt stress (y)', 'coriolis stress (x)', 'coriolis stress (y)', 'ocean/ice stress (x)', 'ocean/ice stress (y)', 'internal ice stress (x)', 'internal ice stress (y)', 'compressive ice strength', 'strain rate (divergence)', 'strain rate (shear)', 'volume tendency thermo', 'volume tendency dynamics', 'area tendency thermo', 'area tendency dynamics', 'melt onset date', 'freeze onset date', 'internal stress tensor trace', 'fraction of time-avg interval that ice is present', 'top surface conductive heat flux', 'ice area, categories', 'ice volume, categories', 'net surface heat flux, categories', 'top sfc conductive heat flux, cat', 'net sfc heat flux causing melt, cat', 'latent heat flux, category'), ('ucell longitude', 'tcell latitude', 'neutral density', 'neutral density edges', 'time', 'vertex number', 'tcell longitude', 'ucell latitude', 'T-cell i-mass transport on neutral rho', 'T-cell j-mass transport on neutral rho', 'T-cell i-mass transport from GM on neutral rho', 'T-cell j-mass transport from GM on neutral rho', 'T-cell i-mass transport from submesoscale param on neutral rho', 'T-cell j-mass transport from submesoscale param on neutral rho', 'cp*rho*dzt*dyt*u*temp binned to neutral density', 'cp*rho*dzt*dxt*v*temp binned to neutral density', 'cp*submeso_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*submeso_yflux*dxt*rho_dzt*temp binned to neutral density', 'cp*gm_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*gm_yflux*dxt*rho_dzt*temp binned to neutral density', 'cp*ndiffuse_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*ndiffuse_yflux*dxt*rho_dzt*temp binned to neutral density', 'mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries')}), + ('variable_standard_name',{('', '', '', '', '', '', '', '', '', 'sea_floor_depth_below_geoid', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', 'sea_water_potential_temperature', 'sea_water_salinity', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'cell_thickness', 'sea_water_potential_density', 'ocean_mass_x_transport', 'ocean_mass_y_transport', '', '', '', '', '', '', '', '', '', '', '', '', 'ocean_vertical_heat_diffusivity', '', '', '', ''), ('', '', '', '', '', '', 'sea_surface_height_above_geoid', '', 'square_of_sea_surface_height_above_geoid', 'ocean_mixed_layer_thickness_defined_by_sigma_t', 'water_flux_into_sea_water', '', 'water_flux_into_sea_water_from_rivers', 'water_flux_into_sea_water_from_icebergs', 'water_evaporation_flux', 'water_flux_into_sea_water_due_to_sea_ice_thermodynamics', '', 'downward_sea_ice_basal_salt_flux', '', '', '', 'surface_downward_x_stress', 'surface_downward_y_stress', '', '', '', '', 'sea_water_pressure_at_sea_floor', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'sea_water_potential_density', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'cell_thickness', 'sea_water_potential_density', 'ocean_mass_x_transport', 'ocean_mass_y_transport', '', '', '', '', '', '')}), + ('variable_cell_methods',{('', '', '', '', '', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point'), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean'), ('', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', '')}), + ('variable_units',{('degrees_E', 'degrees_N', 'days since 1900-01-01 00:00:00', 'degrees_E', 'degrees_N', 'degrees_E', 'degrees_N', 'degrees_E', 'degrees_N', 'm', 'm', 'm', 'm', 'm', 'm', 'm^2', 'm^2', 'dimensionless', 'dimensionless', 'dimensionless'), ('none', 'days since 1900-01-01 00:00:00', 'none', '10^15 Joules', '10^15 Joules', 'deg_C', 'psu', 'kg/m^3', 'deg_C', 'psu', 'kg/1e18', 'Joule/1e25', 'meter', 'kg/sec (*1e-15)', '(kg/sec)/1e15', 'kg/sec/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'm', 'kg/m^3', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'Watts', 'Watts', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'm^2/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'Watts', 'Watts', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'kg/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'm', 'kg/m^3', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meter', 'meter', 'm^2', 'm', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', 'kg/(m^2*sec)', 'kg/(m^2*sec)', 'kg/(m^2*sec)', 'Watts/m^2', 'W/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'kg/s', 'kg/s', 'dbar', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'kg/m^3', 'kg/s', 'kg/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'degrees_east', 'degrees_north', 'degrees_east', 'degrees_north', 'm', '', '', 'm^2', 'm^2', 'm', 'm', 'm', 'm', 'm', 'm', 'radians', 'radians', 'm', 'm', 'C', '1', 'm/s', 'm/s', 'm/s', 'm/s', 'ppt', 'W/m^2', 'W/m^2', 'W/m^2', 'cm/day', 'cm/day', 'C', 'ppt', 'm/s', 'm/s', 'W/m^2', '1', 'W/m^2', '%', '%', '%', '%', '%', '%', '%', 'W/m^2', 'W/m^2', 'W/m^2', 'cm/day', 'C', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'kg/m^2/s', 'W/m^2', 'W/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m', '%/day', '%/day', 'cm/day', 'cm/day', '%/day', '%/day', 'day of year', 'day of year', 'N/m^2', '1', 'W/m^2', '1', 'm', 'W/m^2', 'W/m^2', 'W/m^2', 'W/m^2'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days')}), + ('realm',{'ocean', 'seaIce'} ) + ], +) +def test_om2_metacat_vals_found(metacat, colname, expected, current_catalog): + # Test that the unique values in the column are as expected. I've truncated + # the unique values to the first 10 for brevity because I'm not typing out + # 3700255 unique values. + breakpoint() + cat = metacat["1deg_jra55_ryf9091_gadi"] + if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: + found = set(cat.df[colname].unique()[:10]) + assert found == expected + else: + # These should fail because they contains lists (unhashable) + with pytest.raises(TypeError): + _found = set(cat.df[colname].unique()[:10]) + # cast to tuple to make them hashable, then check the length + found = set(cat.df[colname].apply(lambda x : tuple(x)).unique()[:10]) + assert found == expected + + # Repeat the test with the current catalog + cat = current_catalog["1deg_jra55_ryf9091_gadi"] + if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: + found = set(cat.df[colname].unique()[:10]) + assert found >= expected + else: + # These should fail because they contains lists (unhashable) + with pytest.raises(TypeError): + _found = set(cat.df[colname].unique()[:10]) + # cast to tuple to make them hashable, then check the length + found = set(cat.df[colname].apply(lambda x : tuple(x)).unique()[:10]) + assert found >= expected + + + +@pytest.mark.parametrize( + "path, varname, first_ten_mean", + [ + ('/g/data/al33/replicas/CMIP5/combined/LASG-IAP/FGOALS-s2/amip/6hr/atmos/6hrLev/r1i1p1/v1/va/va_6hrLev_FGOALS-s2_amip_r1i1p1_198201010000-198212311800.nc', + 'va', + -6.1719556 + ), + ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/rcp45/day/seaIce/day/r1i1p1/v20120717/sit/sit_day_CMCC-CMS_rcp45_r1i1p1_20700101-20791231.nc', + 'sit', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/abrupt4xCO2/mon/land/Lmon/r1i1p1/v1/prveg/prveg_Lmon_FGOALS-g2_abrupt4xCO2_r1i1p1_063001-063912.nc', + 'prveg', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/output1/CMCC/CMCC-CM/rcp85/6hr/atmos/6hrPlev/r1i1p1/v20170725/ta/ta_6hrPlev_CMCC-CM_rcp85_r1i1p1_2068030100-2068033118.nc', + 'ta', + 247.55783 + ), + ('/g/data/al33/replicas/CMIP5/combined/MOHC/HadGEM2-CC/rcp45/day/atmos/day/r1i1p1/v20120531/rlut/rlut_day_HadGEM2-CC_rcp45_r1i1p1_20351201-20401130.nc', + 'rlut', + 200.8389 + ), + ('/g/data/al33/replicas/CMIP5/combined/IPSL/IPSL-CM5A-LR/rcp26/day/atmos/cfDay/r1i1p1/v20120114/clw/clw_cfDay_IPSL-CM5A-LR_rcp26_r1i1p1_22060101-22151231.nc', + 'clw', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/output1/IPSL/IPSL-CM5A-LR/abrupt4xCO2/mon/atmos/Amon/r5i1p1/v20110921/rsds/rsds_Amon_IPSL-CM5A-LR_abrupt4xCO2_r5i1p1_185005-185504.nc', + 'rsds', + 153.31345 + ), + ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC5/1pctCO2/mon/ocean/Omon/r1i1p1/v20131009/so/so_Omon_MIROC5_1pctCO2_r1i1p1_228501-228512.nc', + 'so', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/combined/CCCma/CanCM4/decadal1981/mon/ocean/Omon/r4i1p1/v20120622/hfls/hfls_Omon_CanCM4_decadal1981_r4i1p1_198201-199112.nc', + 'hfls', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/combined/MPI-M/MPI-ESM-LR/decadal1992/mon/land/Lmon/r1i1p1/v20120529/cLitter/cLitter_Lmon_MPI-ESM-LR_decadal1992_r1i1p1_199301-200212.nc', + 'cLitter', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/1pctCO2/mon/aerosol/aero/r1i1p3/v20160425/emiss/emiss_aero_GISS-E2-R_1pctCO2_r1i1p3_192601-195012.nc', + 'emiss', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC-ESM-CHEM/rcp85/6hr/atmos/6hrLev/r1i1p1/v20111129/hus/hus_6hrLev_MIROC-ESM-CHEM_rcp85_r1i1p1_2063060106-2063070100.nc', + 'hus', + 2.2376184e-05 + ), + ('/g/data/al33/replicas/CMIP5/output1/MOHC/HadCM3/decadal1964/day/atmos/day/r6i3p1/v20140110/va/va_day_HadCM3_decadal1964_r6i3p1_19641101-19741230.nc', + 'va', + -4.4489503 + ), + ('/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/rcp45/day/seaIce/day/r1i1p1/v20161204/sit/sit_day_FGOALS-g2_rcp45_r1i1p1_20200101-20201231.nc', + 'sit', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/output1/NCAR/CCSM4/decadal1991/mon/seaIce/OImon/r3i2p1/v20120529/grCongel/grCongel_OImon_CCSM4_decadal1991_r3i2p1_199101-200012.nc', + 'grCongel', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/decadal1960/mon/atmos/Amon/r1i1p1/v3/rsdscs/rsdscs_Amon_FGOALS-g2_decadal1960_r1i1p1_198101-199012.nc', + 'rsdscs', + 81.612854 + ), + ('/g/data/al33/replicas/CMIP5/output1/MRI/MRI-CGCM3/amip/mon/atmos/cfMon/r1i1p1/v20131011/hur/hur_cfMon_MRI-CGCM3_amip_r1i1p1_198901-199812.nc', + 'hur', + 92.70255 + ), + ('/g/data/al33/replicas/CMIP5/combined/INM/inmcm4/amip/3hr/atmos/3hr/r1i1p1/v20110323/huss/huss_3hr_inmcm4_amip_r1i1p1_2006010100-2006123121.nc', + 'huss', + 0.0006068 + ), + ('/g/data/al33/replicas/cordex/output/EAS-22/ICTP/MOHC-HadGEM2-ES/historical/r1i1p1/RegCM4-4/v0/day/ua925/v20190502/ua925_EAS-22_MOHC-HadGEM2-ES_historical_r1i1p1_ICTP-RegCM4-4_v0_day_19800101-19801230.nc', + 'ua925', + -0.32869282 + ), + ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CM/rcp45/6hr/atmos/6hrPlev/r1i1p1/v20170725/ua/ua_6hrPlev_CMCC-CM_rcp45_r1i1p1_2011010100-2011013118.nc', + 'ua', + -5.155791 + ), + ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-H/rcp45/mon/atmos/Amon/r4i1p3/v20160512/ccb/ccb_Amon_GISS-E2-H_rcp45_r4i1p3_215101-220012.nc', + 'ccb', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/decadal1971/mon/land/Lmon/r1i1p1/v20120529/grassFrac/grassFrac_Lmon_MPI-ESM-LR_decadal1971_r1i1p1_197201-198112.nc', + 'grassFrac', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/combined/CNRM-CERFACS/CNRM-CM5/rcp85/6hr/atmos/6hrLev/r1i1p1/v20120525/ta/ta_6hrLev_CNRM-CM5_rcp85_r1i1p1_2095100106-2095110100.nc', + 'ta', + 233.56656 + ), + ('/g/data/al33/replicas/CMIP5/combined/NASA-GISS/GISS-E2-R/historical/mon/atmos/Amon/r5i1p3/v20160503/ch4/ch4_Amon_GISS-E2-R_historical_r5i1p3_197601-200012.nc', + 'ch4', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/output1/ICHEC/EC-EARTH/decadal1965/mon/ocean/Omon/r8i2p1/v20120710/so/so_Omon_EC-EARTH_decadal1965_r8i2p1_196601-197512.nc', + 'so', + 0.0 + ), + ('/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-ESM2G/rcp60/mon/atmos/Amon/r1i1p1/v20120412/evspsbl/evspsbl_Amon_GFDL-ESM2G_rcp60_r1i1p1_202101-202512.nc', + 'evspsbl', + 1.9350772e-08 + ), + ('/g/data/al33/replicas/CMIP5/output1/MOHC/HadGEM2-CC/historical/day/landIce/day/r1i1p1/v20110930/snw/snw_day_HadGEM2-CC_historical_r1i1p1_19691201-19741130.nc', + 'snw', + 106252.55 + ), + ('/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/decadal1980/day/atmos/day/r2i1p1/v1/psl/psl_day_FGOALS-g2_decadal1980_r2i1p1_20000101-20001231.nc', + 'psl', + 100025.44 + ), + ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/piControl/mon/atmos/Amon/r1i1p1/v20120717/clivi/clivi_Amon_CMCC-CMS_piControl_r1i1p1_394401-395312.nc', + 'clivi', + 0.00519617 + ), + ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/historicalMisc/mon/atmos/Amon/r1i1p315/v20160503/cli/cli_Amon_GISS-E2-R_historicalMisc_r1i1p315_197601-200012.nc', + 'cli', + 3.8851712e-07 + ), + ('/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/1pctCO2/mon/atmos/Amon/r1i1p1/v20120308/va/va_Amon_MPI-ESM-LR_1pctCO2_r1i1p1_190001-190912.nc', + 'va', + -4.030592 + ), + ('/g/data/al33/replicas/CMIP5/combined/NCC/NorESM1-ME/rcp85/mon/ocean/Omon/r1i1p1/v20130926/msftmyz/msftmyz_Omon_NorESM1-ME_rcp85_r1i1p1_204501-210012.nc', + 'msftmyz', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-CM2p1/rcp45/mon/ocean/Omon/r3i1p1/v20110601/tauvo/tauvo_Omon_GFDL-CM2p1_rcp45_r3i1p1_201601-202012.nc', + 'tauvo', + np.nan + ), + ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC4h/decadal1990/mon/ocean/Omon/r5i1p1/v20120326/wmo/wmo_Omon_MIROC4h_decadal1990_r5i1p1_199301-199306.nc', + 'wmo', + np.nan + ), + ('/g/data/al33/replicas/cordex/output/AUS-44i/CSIRO/CSIRO-BOM-ACCESS1-0/rcp85/r1i1p1/CCAM-2008/v1/day/vas/v20210518/vas_AUS-44i_CSIRO-BOM-ACCESS1-0_rcp85_r1i1p1_CSIRO-CCAM-2008_v1_day_20620101-20621231.nc', + 'vas', + -3.0647216 + ), + ] +) +def test_cmip5_values_correct(metacat,current_catalog,path, varname, first_ten_mean): + """ + All these values are taken from the first 10 values of the first dimension + to minimize the amount of data we need to load. They have been verified against + the production catalogd (as of 2024-11-20). + """ + cmip5_cat = metacat["cmip5_al33"] + esm_ds = cmip5_cat.search(path=path,variable=varname).to_dask() + assert esm_ds + # Subset to the first 10 values in the 0th dimension, first in all others + da = esm_ds[varname] + da = da.isel(**{da.dims[0]: slice(10), }) + da = da.isel(**{dim: 0 for dim in da.dims[1:]}) + da_val = da.mean(dim=da.dims[0], skipna=True).values - assert True + if np.isnan(da_val).all(): + vals_equal = np.isnan(first_ten_mean) + else: + vals_equal = da_val == pytest.approx(first_ten_mean, abs=1e-6) + + assert vals_equal - print("Catalog built successfully. Finish test tomorrow.") + # Check that the data is the same in the current catalog + cmip5_cat = current_catalog["cmip5_al33"] + ... # Repeat above -def test_translate_esm_datastore(): +@pytest.mark.order(after="test_catalog_subset_exists") +def test_built_esm_datastore(): pass diff --git a/pyproject.toml b/pyproject.toml index 97964914..25d196d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,10 @@ test = [ "pytest", "tox", ] +e2e = [ + "pytest", + "pytest-ordering", +] [project.scripts] catalog-build = "access_nri_intake.cli:build" From be30446bad38e7224029e6f0147132576a35457c Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 20 Nov 2024 14:24:04 +1100 Subject: [PATCH 09/24] Pre-commit --- e2e/test_end_to_end.py | 2254 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 2086 insertions(+), 168 deletions(-) diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index c328eef5..3da36431 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -1,8 +1,7 @@ import os -import numpy as np -from pathlib import Path import intake +import numpy as np import pytest from access_nri_intake.cli import build @@ -35,6 +34,7 @@ def print_directory_tree(root, indent=""): else: print(f"{indent}├── {item}") + @pytest.fixture(scope="session") def current_catalog(): """ @@ -43,6 +43,7 @@ def current_catalog(): metacat = intake.cat.access_nri yield metacat + @pytest.fixture(scope="session") def metacat(BASE_DIR, v_num): # Build our subset of the catalog. This should take ~2 minutes with the PBS @@ -79,18 +80,17 @@ def test_open_dataframe_catalog(metacat): @pytest.mark.parametrize( - "name", - [ - "cmip5_al33", - "1deg_jra55_ryf9091_gadi", - ] + "name", + [ + "cmip5_al33", + "1deg_jra55_ryf9091_gadi", + ], ) def test_datastore_found(metacat, name): breakpoint() assert metacat[name] == metacat.search(name=name).to_source() - @pytest.mark.parametrize( "colname, expected", [ @@ -124,36 +124,42 @@ def test_cmip5_datastore_nunique(metacat, colname, expected): @pytest.mark.parametrize( "colname, expected", [ - ('filename',9367), - ('file_id',8), - ('path',9677), - ('filename_timestamp',9361), - ('frequency',3), - ('start_date',9361), - ('end_date',9360), - ('variable',15), - ('variable_long_name',15), - ('variable_standard_name',15), - ('variable_cell_methods',15), - ('variable_units',15), - ('realm',2) - ] + ("filename", 9367), + ("file_id", 8), + ("path", 9677), + ("filename_timestamp", 9361), + ("frequency", 3), + ("start_date", 9361), + ("end_date", 9360), + ("variable", 15), + ("variable_long_name", 15), + ("variable_standard_name", 15), + ("variable_cell_methods", 15), + ("variable_units", 15), + ("realm", 2), + ], ) def test_om2_datastore_nunique(metacat, colname, expected): cat = metacat["1deg_jra55_ryf9091_gadi"] - - if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: + if colname not in [ + "variable", + "variable_long_name", + "variable_standard_name", + "variable_cell_methods", + "variable_units", + ]: assert len(cat.df[colname].unique()) == expected else: # These should fail because they contains lists (unhashable) with pytest.raises(TypeError): assert len(cat.df[colname].unique()) == expected # cast to tuple to make them hashable, then check the length - tuplified = cat.df[colname].apply(lambda x : tuple(x)).unique() + tuplified = cat.df[colname].apply(lambda x: tuple(x)).unique() assert len(tuplified) == expected + @pytest.mark.parametrize( "colname, expected", [ @@ -218,7 +224,10 @@ def test_om2_datastore_nunique(metacat, colname, expected): "sstClimSulfate", }, ), - ("frequency", {'3hr', '6hr', 'daily', 'day', 'fx', 'mon', 'monClim', 'subhr', 'yr'}), + ( + "frequency", + {"3hr", "6hr", "daily", "day", "fx", "mon", "monClim", "subhr", "yr"}, + ), ( "table", { @@ -294,31 +303,1915 @@ def test_cmip5_metacat_vals_found(metacat, colname, expected): assert found == expected + @pytest.mark.parametrize( "colname, expected", [ - ('filename',{'iceh.1900-07.nc', 'iceh.1900-05.nc', 'iceh.1900-04.nc', 'iceh.1900-03.nc', 'iceh.1900-08.nc', 'iceh.1900-06.nc', 'iceh.1900-02.nc', 'iceh.1900-09.nc', 'iceh.1900-10.nc', 'iceh.1900-01.nc'}), - ('file_id',{'iceh_XXXX_XX', 'ocean_scalar', 'ocean_snap', 'ocean_grid', 'ocean', 'ocean_wmass', 'ocean_heat', 'ocean_month'}), - ('path',{'/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-01.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-07.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-08.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-03.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-06.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-10.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-05.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-04.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-09.nc', '/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-02.nc'}), - ('filename_timestamp',{'1900-04', '1900-05', '1900-06', '1900-01', '1900-08', '1900-09', '1900-07', '1900-03', '1900-10', '1900-02'}), - ('frequency',{'fx', '1yr', '1mon'}), - ('start_date',{'1900-02-01, 00:00:00', '1900-03-01, 00:00:00', '1900-06-01, 00:00:00', '1900-10-01, 00:00:00', '1900-07-01, 00:00:00', '1900-09-01, 00:00:00', '1900-05-01, 00:00:00', '1900-01-01, 00:00:00', '1900-04-01, 00:00:00', '1900-08-01, 00:00:00'}), - ('end_date',{'1900-02-01, 00:00:00', '1900-03-01, 00:00:00', '1900-06-01, 00:00:00', '1900-10-01, 00:00:00', '1900-07-01, 00:00:00', '1900-09-01, 00:00:00', '1900-11-01, 00:00:00', '1900-05-01, 00:00:00', '1900-04-01, 00:00:00', '1900-08-01, 00:00:00'}), - ('variable',{('scalar_axis', 'time', 'nv', 'ke_tot', 'pe_tot', 'temp_global_ave', 'salt_global_ave', 'rhoave', 'temp_surface_ave', 'salt_surface_ave', 'total_ocean_salt', 'total_ocean_heat', 'eta_global', 'total_ocean_sfc_salt_flux_coupler', 'total_ocean_pme_river', 'total_ocean_river', 'total_ocean_runoff', 'total_ocean_calving', 'total_ocean_melt', 'total_ocean_evap', 'total_ocean_lprec', 'total_ocean_fprec', 'total_ocean_runoff_heat', 'total_ocean_calving_heat', 'total_ocean_river_heat', 'total_ocean_hflux_prec', 'total_ocean_hflux_evap', 'total_ocean_hflux_coupler', 'total_ocean_swflx', 'total_ocean_swflx_vis', 'total_ocean_lw_heat', 'total_ocean_evap_heat', 'total_ocean_fprec_melt_heat', 'total_ocean_calving_melt_heat', 'total_ocean_sens_heat', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'dzt', 'pot_rho_0', 'tx_trans', 'ty_trans', 'tx_trans_gm', 'ty_trans_gm', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('grid_xu_ocean', 'grid_yt_ocean', 'neutral', 'neutralrho_edges', 'time', 'nv', 'grid_xt_ocean', 'grid_yu_ocean', 'tx_trans_nrho', 'ty_trans_nrho', 'tx_trans_nrho_gm', 'ty_trans_nrho_gm', 'tx_trans_nrho_submeso', 'ty_trans_nrho_submeso', 'mass_pmepr_on_nrho', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'dzt', 'pot_rho_0', 'tx_trans', 'ty_trans', 'tx_trans_gm', 'ty_trans_gm', 'tx_trans_submeso', 'ty_trans_submeso', 'temp_xflux_adv', 'temp_yflux_adv', 'temp_xflux_gm', 'temp_yflux_gm', 'temp_xflux_submeso', 'temp_yflux_submeso', 'temp_xflux_ndiffuse', 'temp_yflux_ndiffuse', 'diff_cbt_t', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sea_level', 'eta_t', 'sea_levelsq', 'mld', 'pme_river', 'river', 'runoff', 'ice_calving', 'evap', 'melt', 'sfc_salt_flux_restore', 'sfc_salt_flux_ice', 'sfc_salt_flux_coupler', 'net_sfc_heating', 'frazil_3d_int_z', 'tau_x', 'tau_y', 'bmf_u', 'bmf_v', 'tx_trans_int_z', 'ty_trans_int_z', 'pbot_t', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('grid_xu_ocean', 'grid_yt_ocean', 'neutral', 'neutralrho_edges', 'time', 'nv', 'grid_xt_ocean', 'grid_yu_ocean', 'tx_trans_nrho', 'ty_trans_nrho', 'tx_trans_nrho_gm', 'ty_trans_nrho_gm', 'tx_trans_nrho_submeso', 'ty_trans_nrho_submeso', 'temp_xflux_adv_on_nrho', 'temp_yflux_adv_on_nrho', 'temp_xflux_submeso_on_nrho', 'temp_yflux_submeso_on_nrho', 'temp_xflux_gm_on_nrho', 'temp_yflux_gm_on_nrho', 'temp_xflux_ndiffuse_on_nrho', 'temp_yflux_ndiffuse_on_nrho', 'mass_pmepr_on_nrho', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'temp', 'salt', 'age_global', 'u', 'v', 'average_T1', 'average_T2', 'average_DT', 'time_bounds'), ('xt_ocean', 'yt_ocean', 'time', 'xu_ocean', 'yu_ocean', 'geolon_t', 'geolat_t', 'geolon_c', 'geolat_c', 'ht', 'hu', 'dxt', 'dyt', 'dxu', 'dyu', 'area_t', 'area_u', 'kmt', 'kmu', 'drag_coeff'), ('time', 'time_bounds', 'TLON', 'TLAT', 'ULON', 'ULAT', 'NCAT', 'tmask', 'blkmask', 'tarea', 'uarea', 'dxt', 'dyt', 'dxu', 'dyu', 'HTN', 'HTE', 'ANGLE', 'ANGLET', 'hi_m', 'hs_m', 'Tsfc_m', 'aice_m', 'uvel_m', 'vvel_m', 'uatm_m', 'vatm_m', 'sice_m', 'fswdn_m', 'fswup_m', 'flwdn_m', 'snow_ai_m', 'rain_ai_m', 'sst_m', 'sss_m', 'uocn_m', 'vocn_m', 'frzmlt_m', 'fswfac_m', 'fswabs_ai_m', 'albsni_m', 'alvdr_ai_m', 'alidr_ai_m', 'alvdf_ai_m', 'alidf_ai_m', 'albice_m', 'albsno_m', 'flat_ai_m', 'fsens_ai_m', 'flwup_ai_m', 'evap_ai_m', 'Tair_m', 'congel_m', 'frazil_m', 'snoice_m', 'meltt_m', 'melts_m', 'meltb_m', 'meltl_m', 'fresh_ai_m', 'fsalt_ai_m', 'fhocn_ai_m', 'fswthru_ai_m', 'strairx_m', 'strairy_m', 'strtltx_m', 'strtlty_m', 'strcorx_m', 'strcory_m', 'strocnx_m', 'strocny_m', 'strintx_m', 'strinty_m', 'strength_m', 'divu_m', 'shear_m', 'dvidtt_m', 'dvidtd_m', 'daidtt_m', 'daidtd_m', 'mlt_onset_m', 'frz_onset_m', 'trsig_m', 'ice_present_m', 'fcondtop_ai_m', 'aicen_m', 'vicen_m', 'fsurfn_ai_m', 'fcondtopn_ai_m', 'fmelttn_ai_m', 'flatn_ai_m'), ('xt_ocean', 'yt_ocean', 'st_ocean', 'st_edges_ocean', 'time', 'nv', 'xu_ocean', 'yu_ocean', 'sw_ocean', 'sw_edges_ocean', 'grid_xt_ocean', 'grid_yu_ocean', 'potrho', 'potrho_edges', 'temp', 'salt', 'age_global', 'u', 'v', 'wt', 'pot_rho_0', 'ty_trans_rho', 'ty_trans_rho_gm', 'average_T1', 'average_T2', 'average_DT', 'time_bounds')}), - ('variable_long_name',{('tcell longitude', 'tcell latitude', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'effective sea level (eta_t + patm/(rho0*g)) on T cells', 'surface height on T cells [Boussinesq (volume conserving) model]', 'square of effective sea level (eta_t + patm/(rho0*g)) on T cells', 'mixed layer depth determined by density criteria', 'mass flux of precip-evap+river via sbc (liquid, frozen, evaporation)', 'mass flux of river (runoff + calving) entering ocean', 'mass flux of liquid river runoff entering ocean', 'mass flux of land ice calving into ocean', 'mass flux from evaporation/condensation (>0 enters ocean)', 'water flux transferred with sea ice form/melt (>0 enters ocean)', 'sfc_salt_flux_restore: flux from restoring term', 'sfc_salt_flux_ice', 'sfc_salt_flux_coupler: flux from the coupler', 'surface ocean heat flux coming through coupler and mass transfer', 'Vertical sum of ocn frazil heat flux over time step', 'i-directed wind stress forcing u-velocity', 'j-directed wind stress forcing v-velocity', 'Bottom u-stress via bottom drag', 'Bottom v-stress via bottom drag', 'T-cell i-mass transport vertically summed', 'T-cell j-mass transport vertically summed', 'bottom pressure on T cells [Boussinesq (volume conserving) model]', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('none', 'time', 'vertex number', 'Globally integrated ocean kinetic energy', 'Globally integrated ocean potential energy', 'Global mean temp in liquid seawater', 'Global mean salt in liquid seawater', 'global mean ocean in-situ density from ocean_density_mod', 'Global mass weighted mean surface temp in liquid seawater', 'Global mass weighted mean surface salt in liquid seawater', 'total mass of salt in liquid seawater', 'Total heat in the liquid ocean referenced to 0degC', 'global ave eta_t plus patm_t/(g*rho0)', 'total_ocean_sfc_salt_flux_coupler', 'total ocean precip-evap+river via sbc (liquid, frozen, evaporation)', 'total liquid river water and calving ice entering ocean', 'total liquid river runoff (>0 water enters ocean)', 'total water entering ocean from calving land ice', 'total liquid water melted from sea ice (>0 enters ocean)', 'total evaporative ocean mass flux (>0 enters ocean)', 'total liquid precip into ocean (>0 enters ocean)', 'total snow falling onto ocean (>0 enters ocean)', 'total ocean heat flux from liquid river runoff', 'total ocean heat flux from calving land ice', 'total heat flux into ocean from liquid+solid runoff (<0 cools ocean)', 'total ocean heat flux from precip transferring water across surface', 'total ocean heat flux from evap transferring water across surface', 'total surface heat flux passed through coupler', 'total shortwave flux into ocean (>0 heats ocean)', 'total visible shortwave into ocean (>0 heats ocean)', 'total longwave flux into ocean (<0 cools ocean)', 'total latent heat flux into ocean (<0 cools ocean)', 'total heat flux to melt frozen precip (<0 cools ocean)', 'total heat flux to melt frozen land ice (<0 cools ocean)', 'total sensible heat into ocean (<0 cools ocean)', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'time', 'ucell longitude', 'ucell latitude', 'tracer longitude', 'tracer latitude', 'uv longitude', 'uv latitude', 'ocean depth on t-cells', 'ocean depth on u-cells', 'ocean dxt on t-cells', 'ocean dyt on t-cells', 'ocean dxu on u-cells', 'ocean dyu on u-cells', 'tracer cell area', 'velocity cell area', 'number of depth levels on t-grid', 'number of depth levels on u-grid', 'Dimensionless bottom drag coefficient'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 't-cell thickness', 'potential density referenced to 0 dbar', 'T-cell i-mass transport', 'T-cell j-mass transport', 'T-cell mass i-transport from GM', 'T-cell mass j-transport from GM', 'T-cell mass i-transport from submesoscale param', 'T-cell mass j-transport from submesoscale param', 'cp*rho*dzt*dyt*u*temp', 'cp*rho*dzt*dxt*v*temp', 'cp*gm_xflux*dyt*rho_dzt*temp', 'cp*gm_yflux*dxt*rho_dzt*temp', 'cp*submeso_xflux*dyt*rho_dzt*temp', 'cp*submeso_yflux*dxt*rho_dzt*temp', 'cp*ndiffuse_xflux*dyt*rho_dzt*temp', 'cp*ndiffuse_yflux*dxt*rho_dzt*temp', 'total vert diff_cbt(temp) (w/o neutral included)', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 't-cell thickness', 'potential density referenced to 0 dbar', 'T-cell i-mass transport', 'T-cell j-mass transport', 'T-cell mass i-transport from GM', 'T-cell mass j-transport from GM', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('ucell longitude', 'tcell latitude', 'neutral density', 'neutral density edges', 'time', 'vertex number', 'tcell longitude', 'ucell latitude', 'T-cell i-mass transport on neutral rho', 'T-cell j-mass transport on neutral rho', 'T-cell i-mass transport from GM on neutral rho', 'T-cell j-mass transport from GM on neutral rho', 'T-cell i-mass transport from submesoscale param on neutral rho', 'T-cell j-mass transport from submesoscale param on neutral rho', 'mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('tcell longitude', 'tcell latitude', 'tcell zstar depth', 'tcell zstar depth edges', 'time', 'vertex number', 'ucell longitude', 'ucell latitude', 'ucell zstar depth', 'ucell zstar depth edges', 'tcell longitude', 'ucell latitude', 'potential density', 'potential density edges', 'Conservative temperature', 'Practical Salinity', 'Age (global)', 'i-current', 'j-current', 'dia-surface velocity T-points', 'potential density referenced to 0 dbar', 'T-cell j-mass transport on pot_rho', 'T-cell j-mass transport from GM on pot_rho', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries'), ('model time', 'boundaries for time-averaging interval', 'T grid center longitude', 'T grid center latitude', 'U grid center longitude', 'U grid center latitude', 'category maximum thickness', 'ocean grid mask', 'ice grid block mask', 'area of T grid cells', 'area of U grid cells', 'T cell width through middle', 'T cell height through middle', 'U cell width through middle', 'U cell height through middle', 'T cell width on North side', 'T cell width on East side', 'angle grid makes with latitude line on U grid', 'angle grid makes with latitude line on T grid', 'grid cell mean ice thickness', 'grid cell mean snow thickness', 'snow/ice surface temperature', 'ice area (aggregate)', 'ice velocity (x)', 'ice velocity (y)', 'atm velocity (x)', 'atm velocity (y)', 'bulk ice salinity', 'down solar flux', 'upward solar flux', 'down longwave flux', 'snowfall rate', 'rainfall rate', 'sea surface temperature', 'sea surface salinity', 'ocean current (x)', 'ocean current (y)', 'freeze/melt potential', 'shortwave scaling factor', 'snow/ice/ocn absorbed solar flux', 'snow/ice broad band albedo', 'visible direct albedo', 'near IR direct albedo', 'visible diffuse albedo', 'near IR diffuse albedo', 'bare ice albedo', 'snow albedo', 'latent heat flux', 'sensible heat flux', 'upward longwave flux', 'evaporative water flux', 'air temperature', 'congelation ice growth', 'frazil ice growth', 'snow-ice formation', 'top ice melt', 'top snow melt', 'basal ice melt', 'lateral ice melt', 'freshwtr flx ice to ocn', 'salt flux ice to ocean', 'heat flux ice to ocean', 'SW flux thru ice to ocean', 'atm/ice stress (x)', 'atm/ice stress (y)', 'sea sfc tilt stress (x)', 'sea sfc tilt stress (y)', 'coriolis stress (x)', 'coriolis stress (y)', 'ocean/ice stress (x)', 'ocean/ice stress (y)', 'internal ice stress (x)', 'internal ice stress (y)', 'compressive ice strength', 'strain rate (divergence)', 'strain rate (shear)', 'volume tendency thermo', 'volume tendency dynamics', 'area tendency thermo', 'area tendency dynamics', 'melt onset date', 'freeze onset date', 'internal stress tensor trace', 'fraction of time-avg interval that ice is present', 'top surface conductive heat flux', 'ice area, categories', 'ice volume, categories', 'net surface heat flux, categories', 'top sfc conductive heat flux, cat', 'net sfc heat flux causing melt, cat', 'latent heat flux, category'), ('ucell longitude', 'tcell latitude', 'neutral density', 'neutral density edges', 'time', 'vertex number', 'tcell longitude', 'ucell latitude', 'T-cell i-mass transport on neutral rho', 'T-cell j-mass transport on neutral rho', 'T-cell i-mass transport from GM on neutral rho', 'T-cell j-mass transport from GM on neutral rho', 'T-cell i-mass transport from submesoscale param on neutral rho', 'T-cell j-mass transport from submesoscale param on neutral rho', 'cp*rho*dzt*dyt*u*temp binned to neutral density', 'cp*rho*dzt*dxt*v*temp binned to neutral density', 'cp*submeso_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*submeso_yflux*dxt*rho_dzt*temp binned to neutral density', 'cp*gm_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*gm_yflux*dxt*rho_dzt*temp binned to neutral density', 'cp*ndiffuse_xflux*dyt*rho_dzt*temp binned to neutral density', 'cp*ndiffuse_yflux*dxt*rho_dzt*temp binned to neutral density', 'mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes', 'Start time for average period', 'End time for average period', 'Length of average period', 'time axis boundaries')}), - ('variable_standard_name',{('', '', '', '', '', '', '', '', '', 'sea_floor_depth_below_geoid', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', 'sea_water_potential_temperature', 'sea_water_salinity', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'cell_thickness', 'sea_water_potential_density', 'ocean_mass_x_transport', 'ocean_mass_y_transport', '', '', '', '', '', '', '', '', '', '', '', '', 'ocean_vertical_heat_diffusivity', '', '', '', ''), ('', '', '', '', '', '', 'sea_surface_height_above_geoid', '', 'square_of_sea_surface_height_above_geoid', 'ocean_mixed_layer_thickness_defined_by_sigma_t', 'water_flux_into_sea_water', '', 'water_flux_into_sea_water_from_rivers', 'water_flux_into_sea_water_from_icebergs', 'water_evaporation_flux', 'water_flux_into_sea_water_due_to_sea_ice_thermodynamics', '', 'downward_sea_ice_basal_salt_flux', '', '', '', 'surface_downward_x_stress', 'surface_downward_y_stress', '', '', '', '', 'sea_water_pressure_at_sea_floor', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'sea_water_potential_density', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''), ('', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'sea_water_conservative_temperature', 'sea_water_salinity', 'sea_water_age_since_surface_contact', 'sea_water_x_velocity', 'sea_water_y_velocity', '', 'cell_thickness', 'sea_water_potential_density', 'ocean_mass_x_transport', 'ocean_mass_y_transport', '', '', '', '', '', '')}), - ('variable_cell_methods',{('', '', '', '', '', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point', 'time: point'), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean'), ('', '', '', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', ''), ('', '', '', '', '', '', '', '', 'time: mean', 'time: mean', 'time: mean', 'time: mean', 'time: mean', '', '', '', '')}), - ('variable_units',{('degrees_E', 'degrees_N', 'days since 1900-01-01 00:00:00', 'degrees_E', 'degrees_N', 'degrees_E', 'degrees_N', 'degrees_E', 'degrees_N', 'm', 'm', 'm', 'm', 'm', 'm', 'm^2', 'm^2', 'dimensionless', 'dimensionless', 'dimensionless'), ('none', 'days since 1900-01-01 00:00:00', 'none', '10^15 Joules', '10^15 Joules', 'deg_C', 'psu', 'kg/m^3', 'deg_C', 'psu', 'kg/1e18', 'Joule/1e25', 'meter', 'kg/sec (*1e-15)', '(kg/sec)/1e15', 'kg/sec/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', '(kg/sec)/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'Watts/1e15', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'm', 'kg/m^3', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'Watts', 'Watts', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'm^2/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'Watts', 'Watts', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'Watt', 'kg/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'm', 'kg/m^3', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meter', 'meter', 'm^2', 'm', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', '(kg/m^3)*(m/sec)', 'kg/(m^2*sec)', 'kg/(m^2*sec)', 'kg/(m^2*sec)', 'Watts/m^2', 'W/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'kg/s', 'kg/s', 'dbar', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'meters', 'meters', 'degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'm/sec', 'kg/m^3', 'kg/s', 'kg/s', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('degrees_E', 'degrees_N', 'kg/m^3', 'kg/m^3', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/s', 'kg/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days'), ('days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'degrees_east', 'degrees_north', 'degrees_east', 'degrees_north', 'm', '', '', 'm^2', 'm^2', 'm', 'm', 'm', 'm', 'm', 'm', 'radians', 'radians', 'm', 'm', 'C', '1', 'm/s', 'm/s', 'm/s', 'm/s', 'ppt', 'W/m^2', 'W/m^2', 'W/m^2', 'cm/day', 'cm/day', 'C', 'ppt', 'm/s', 'm/s', 'W/m^2', '1', 'W/m^2', '%', '%', '%', '%', '%', '%', '%', 'W/m^2', 'W/m^2', 'W/m^2', 'cm/day', 'C', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'cm/day', 'kg/m^2/s', 'W/m^2', 'W/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m^2', 'N/m', '%/day', '%/day', 'cm/day', 'cm/day', '%/day', '%/day', 'day of year', 'day of year', 'N/m^2', '1', 'W/m^2', '1', 'm', 'W/m^2', 'W/m^2', 'W/m^2', 'W/m^2'), ('degrees_E', 'degrees_N', 'meters', 'meters', 'days since 1900-01-01 00:00:00', 'none', 'degrees_E', 'degrees_N', 'K', 'psu', 'yr', 'm/sec', 'm/sec', 'days since 1900-01-01 00:00:00', 'days since 1900-01-01 00:00:00', 'days', 'days')}), - ('realm',{'ocean', 'seaIce'} ) + ( + "filename", + { + "iceh.1900-07.nc", + "iceh.1900-05.nc", + "iceh.1900-04.nc", + "iceh.1900-03.nc", + "iceh.1900-08.nc", + "iceh.1900-06.nc", + "iceh.1900-02.nc", + "iceh.1900-09.nc", + "iceh.1900-10.nc", + "iceh.1900-01.nc", + }, + ), + ( + "file_id", + { + "iceh_XXXX_XX", + "ocean_scalar", + "ocean_snap", + "ocean_grid", + "ocean", + "ocean_wmass", + "ocean_heat", + "ocean_month", + }, + ), + ( + "path", + { + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-01.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-07.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-08.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-03.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-06.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-10.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-05.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-04.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-09.nc", + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-02.nc", + }, + ), + ( + "filename_timestamp", + { + "1900-04", + "1900-05", + "1900-06", + "1900-01", + "1900-08", + "1900-09", + "1900-07", + "1900-03", + "1900-10", + "1900-02", + }, + ), + ("frequency", {"fx", "1yr", "1mon"}), + ( + "start_date", + { + "1900-02-01, 00:00:00", + "1900-03-01, 00:00:00", + "1900-06-01, 00:00:00", + "1900-10-01, 00:00:00", + "1900-07-01, 00:00:00", + "1900-09-01, 00:00:00", + "1900-05-01, 00:00:00", + "1900-01-01, 00:00:00", + "1900-04-01, 00:00:00", + "1900-08-01, 00:00:00", + }, + ), + ( + "end_date", + { + "1900-02-01, 00:00:00", + "1900-03-01, 00:00:00", + "1900-06-01, 00:00:00", + "1900-10-01, 00:00:00", + "1900-07-01, 00:00:00", + "1900-09-01, 00:00:00", + "1900-11-01, 00:00:00", + "1900-05-01, 00:00:00", + "1900-04-01, 00:00:00", + "1900-08-01, 00:00:00", + }, + ), + ( + "variable", + { + ( + "scalar_axis", + "time", + "nv", + "ke_tot", + "pe_tot", + "temp_global_ave", + "salt_global_ave", + "rhoave", + "temp_surface_ave", + "salt_surface_ave", + "total_ocean_salt", + "total_ocean_heat", + "eta_global", + "total_ocean_sfc_salt_flux_coupler", + "total_ocean_pme_river", + "total_ocean_river", + "total_ocean_runoff", + "total_ocean_calving", + "total_ocean_melt", + "total_ocean_evap", + "total_ocean_lprec", + "total_ocean_fprec", + "total_ocean_runoff_heat", + "total_ocean_calving_heat", + "total_ocean_river_heat", + "total_ocean_hflux_prec", + "total_ocean_hflux_evap", + "total_ocean_hflux_coupler", + "total_ocean_swflx", + "total_ocean_swflx_vis", + "total_ocean_lw_heat", + "total_ocean_evap_heat", + "total_ocean_fprec_melt_heat", + "total_ocean_calving_melt_heat", + "total_ocean_sens_heat", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "xt_ocean", + "yt_ocean", + "st_ocean", + "st_edges_ocean", + "time", + "nv", + "xu_ocean", + "yu_ocean", + "sw_ocean", + "sw_edges_ocean", + "temp", + "salt", + "age_global", + "u", + "v", + "wt", + "dzt", + "pot_rho_0", + "tx_trans", + "ty_trans", + "tx_trans_gm", + "ty_trans_gm", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "grid_xu_ocean", + "grid_yt_ocean", + "neutral", + "neutralrho_edges", + "time", + "nv", + "grid_xt_ocean", + "grid_yu_ocean", + "tx_trans_nrho", + "ty_trans_nrho", + "tx_trans_nrho_gm", + "ty_trans_nrho_gm", + "tx_trans_nrho_submeso", + "ty_trans_nrho_submeso", + "mass_pmepr_on_nrho", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "xt_ocean", + "yt_ocean", + "st_ocean", + "st_edges_ocean", + "time", + "nv", + "xu_ocean", + "yu_ocean", + "sw_ocean", + "sw_edges_ocean", + "temp", + "salt", + "age_global", + "u", + "v", + "wt", + "dzt", + "pot_rho_0", + "tx_trans", + "ty_trans", + "tx_trans_gm", + "ty_trans_gm", + "tx_trans_submeso", + "ty_trans_submeso", + "temp_xflux_adv", + "temp_yflux_adv", + "temp_xflux_gm", + "temp_yflux_gm", + "temp_xflux_submeso", + "temp_yflux_submeso", + "temp_xflux_ndiffuse", + "temp_yflux_ndiffuse", + "diff_cbt_t", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "xt_ocean", + "yt_ocean", + "time", + "nv", + "xu_ocean", + "yu_ocean", + "sea_level", + "eta_t", + "sea_levelsq", + "mld", + "pme_river", + "river", + "runoff", + "ice_calving", + "evap", + "melt", + "sfc_salt_flux_restore", + "sfc_salt_flux_ice", + "sfc_salt_flux_coupler", + "net_sfc_heating", + "frazil_3d_int_z", + "tau_x", + "tau_y", + "bmf_u", + "bmf_v", + "tx_trans_int_z", + "ty_trans_int_z", + "pbot_t", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "grid_xu_ocean", + "grid_yt_ocean", + "neutral", + "neutralrho_edges", + "time", + "nv", + "grid_xt_ocean", + "grid_yu_ocean", + "tx_trans_nrho", + "ty_trans_nrho", + "tx_trans_nrho_gm", + "ty_trans_nrho_gm", + "tx_trans_nrho_submeso", + "ty_trans_nrho_submeso", + "temp_xflux_adv_on_nrho", + "temp_yflux_adv_on_nrho", + "temp_xflux_submeso_on_nrho", + "temp_yflux_submeso_on_nrho", + "temp_xflux_gm_on_nrho", + "temp_yflux_gm_on_nrho", + "temp_xflux_ndiffuse_on_nrho", + "temp_yflux_ndiffuse_on_nrho", + "mass_pmepr_on_nrho", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "xt_ocean", + "yt_ocean", + "st_ocean", + "st_edges_ocean", + "time", + "nv", + "xu_ocean", + "yu_ocean", + "temp", + "salt", + "age_global", + "u", + "v", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + ( + "xt_ocean", + "yt_ocean", + "time", + "xu_ocean", + "yu_ocean", + "geolon_t", + "geolat_t", + "geolon_c", + "geolat_c", + "ht", + "hu", + "dxt", + "dyt", + "dxu", + "dyu", + "area_t", + "area_u", + "kmt", + "kmu", + "drag_coeff", + ), + ( + "time", + "time_bounds", + "TLON", + "TLAT", + "ULON", + "ULAT", + "NCAT", + "tmask", + "blkmask", + "tarea", + "uarea", + "dxt", + "dyt", + "dxu", + "dyu", + "HTN", + "HTE", + "ANGLE", + "ANGLET", + "hi_m", + "hs_m", + "Tsfc_m", + "aice_m", + "uvel_m", + "vvel_m", + "uatm_m", + "vatm_m", + "sice_m", + "fswdn_m", + "fswup_m", + "flwdn_m", + "snow_ai_m", + "rain_ai_m", + "sst_m", + "sss_m", + "uocn_m", + "vocn_m", + "frzmlt_m", + "fswfac_m", + "fswabs_ai_m", + "albsni_m", + "alvdr_ai_m", + "alidr_ai_m", + "alvdf_ai_m", + "alidf_ai_m", + "albice_m", + "albsno_m", + "flat_ai_m", + "fsens_ai_m", + "flwup_ai_m", + "evap_ai_m", + "Tair_m", + "congel_m", + "frazil_m", + "snoice_m", + "meltt_m", + "melts_m", + "meltb_m", + "meltl_m", + "fresh_ai_m", + "fsalt_ai_m", + "fhocn_ai_m", + "fswthru_ai_m", + "strairx_m", + "strairy_m", + "strtltx_m", + "strtlty_m", + "strcorx_m", + "strcory_m", + "strocnx_m", + "strocny_m", + "strintx_m", + "strinty_m", + "strength_m", + "divu_m", + "shear_m", + "dvidtt_m", + "dvidtd_m", + "daidtt_m", + "daidtd_m", + "mlt_onset_m", + "frz_onset_m", + "trsig_m", + "ice_present_m", + "fcondtop_ai_m", + "aicen_m", + "vicen_m", + "fsurfn_ai_m", + "fcondtopn_ai_m", + "fmelttn_ai_m", + "flatn_ai_m", + ), + ( + "xt_ocean", + "yt_ocean", + "st_ocean", + "st_edges_ocean", + "time", + "nv", + "xu_ocean", + "yu_ocean", + "sw_ocean", + "sw_edges_ocean", + "grid_xt_ocean", + "grid_yu_ocean", + "potrho", + "potrho_edges", + "temp", + "salt", + "age_global", + "u", + "v", + "wt", + "pot_rho_0", + "ty_trans_rho", + "ty_trans_rho_gm", + "average_T1", + "average_T2", + "average_DT", + "time_bounds", + ), + }, + ), + ( + "variable_long_name", + { + ( + "tcell longitude", + "tcell latitude", + "time", + "vertex number", + "ucell longitude", + "ucell latitude", + "effective sea level (eta_t + patm/(rho0*g)) on T cells", + "surface height on T cells [Boussinesq (volume conserving) model]", + "square of effective sea level (eta_t + patm/(rho0*g)) on T cells", + "mixed layer depth determined by density criteria", + "mass flux of precip-evap+river via sbc (liquid, frozen, evaporation)", + "mass flux of river (runoff + calving) entering ocean", + "mass flux of liquid river runoff entering ocean", + "mass flux of land ice calving into ocean", + "mass flux from evaporation/condensation (>0 enters ocean)", + "water flux transferred with sea ice form/melt (>0 enters ocean)", + "sfc_salt_flux_restore: flux from restoring term", + "sfc_salt_flux_ice", + "sfc_salt_flux_coupler: flux from the coupler", + "surface ocean heat flux coming through coupler and mass transfer", + "Vertical sum of ocn frazil heat flux over time step", + "i-directed wind stress forcing u-velocity", + "j-directed wind stress forcing v-velocity", + "Bottom u-stress via bottom drag", + "Bottom v-stress via bottom drag", + "T-cell i-mass transport vertically summed", + "T-cell j-mass transport vertically summed", + "bottom pressure on T cells [Boussinesq (volume conserving) model]", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "none", + "time", + "vertex number", + "Globally integrated ocean kinetic energy", + "Globally integrated ocean potential energy", + "Global mean temp in liquid seawater", + "Global mean salt in liquid seawater", + "global mean ocean in-situ density from ocean_density_mod", + "Global mass weighted mean surface temp in liquid seawater", + "Global mass weighted mean surface salt in liquid seawater", + "total mass of salt in liquid seawater", + "Total heat in the liquid ocean referenced to 0degC", + "global ave eta_t plus patm_t/(g*rho0)", + "total_ocean_sfc_salt_flux_coupler", + "total ocean precip-evap+river via sbc (liquid, frozen, evaporation)", + "total liquid river water and calving ice entering ocean", + "total liquid river runoff (>0 water enters ocean)", + "total water entering ocean from calving land ice", + "total liquid water melted from sea ice (>0 enters ocean)", + "total evaporative ocean mass flux (>0 enters ocean)", + "total liquid precip into ocean (>0 enters ocean)", + "total snow falling onto ocean (>0 enters ocean)", + "total ocean heat flux from liquid river runoff", + "total ocean heat flux from calving land ice", + "total heat flux into ocean from liquid+solid runoff (<0 cools ocean)", + "total ocean heat flux from precip transferring water across surface", + "total ocean heat flux from evap transferring water across surface", + "total surface heat flux passed through coupler", + "total shortwave flux into ocean (>0 heats ocean)", + "total visible shortwave into ocean (>0 heats ocean)", + "total longwave flux into ocean (<0 cools ocean)", + "total latent heat flux into ocean (<0 cools ocean)", + "total heat flux to melt frozen precip (<0 cools ocean)", + "total heat flux to melt frozen land ice (<0 cools ocean)", + "total sensible heat into ocean (<0 cools ocean)", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "tcell longitude", + "tcell latitude", + "time", + "ucell longitude", + "ucell latitude", + "tracer longitude", + "tracer latitude", + "uv longitude", + "uv latitude", + "ocean depth on t-cells", + "ocean depth on u-cells", + "ocean dxt on t-cells", + "ocean dyt on t-cells", + "ocean dxu on u-cells", + "ocean dyu on u-cells", + "tracer cell area", + "velocity cell area", + "number of depth levels on t-grid", + "number of depth levels on u-grid", + "Dimensionless bottom drag coefficient", + ), + ( + "tcell longitude", + "tcell latitude", + "tcell zstar depth", + "tcell zstar depth edges", + "time", + "vertex number", + "ucell longitude", + "ucell latitude", + "Conservative temperature", + "Practical Salinity", + "Age (global)", + "i-current", + "j-current", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "tcell longitude", + "tcell latitude", + "tcell zstar depth", + "tcell zstar depth edges", + "time", + "vertex number", + "ucell longitude", + "ucell latitude", + "ucell zstar depth", + "ucell zstar depth edges", + "Conservative temperature", + "Practical Salinity", + "Age (global)", + "i-current", + "j-current", + "dia-surface velocity T-points", + "t-cell thickness", + "potential density referenced to 0 dbar", + "T-cell i-mass transport", + "T-cell j-mass transport", + "T-cell mass i-transport from GM", + "T-cell mass j-transport from GM", + "T-cell mass i-transport from submesoscale param", + "T-cell mass j-transport from submesoscale param", + "cp*rho*dzt*dyt*u*temp", + "cp*rho*dzt*dxt*v*temp", + "cp*gm_xflux*dyt*rho_dzt*temp", + "cp*gm_yflux*dxt*rho_dzt*temp", + "cp*submeso_xflux*dyt*rho_dzt*temp", + "cp*submeso_yflux*dxt*rho_dzt*temp", + "cp*ndiffuse_xflux*dyt*rho_dzt*temp", + "cp*ndiffuse_yflux*dxt*rho_dzt*temp", + "total vert diff_cbt(temp) (w/o neutral included)", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "tcell longitude", + "tcell latitude", + "tcell zstar depth", + "tcell zstar depth edges", + "time", + "vertex number", + "ucell longitude", + "ucell latitude", + "ucell zstar depth", + "ucell zstar depth edges", + "Conservative temperature", + "Practical Salinity", + "Age (global)", + "i-current", + "j-current", + "dia-surface velocity T-points", + "t-cell thickness", + "potential density referenced to 0 dbar", + "T-cell i-mass transport", + "T-cell j-mass transport", + "T-cell mass i-transport from GM", + "T-cell mass j-transport from GM", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "ucell longitude", + "tcell latitude", + "neutral density", + "neutral density edges", + "time", + "vertex number", + "tcell longitude", + "ucell latitude", + "T-cell i-mass transport on neutral rho", + "T-cell j-mass transport on neutral rho", + "T-cell i-mass transport from GM on neutral rho", + "T-cell j-mass transport from GM on neutral rho", + "T-cell i-mass transport from submesoscale param on neutral rho", + "T-cell j-mass transport from submesoscale param on neutral rho", + "mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "tcell longitude", + "tcell latitude", + "tcell zstar depth", + "tcell zstar depth edges", + "time", + "vertex number", + "ucell longitude", + "ucell latitude", + "ucell zstar depth", + "ucell zstar depth edges", + "tcell longitude", + "ucell latitude", + "potential density", + "potential density edges", + "Conservative temperature", + "Practical Salinity", + "Age (global)", + "i-current", + "j-current", + "dia-surface velocity T-points", + "potential density referenced to 0 dbar", + "T-cell j-mass transport on pot_rho", + "T-cell j-mass transport from GM on pot_rho", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + ( + "model time", + "boundaries for time-averaging interval", + "T grid center longitude", + "T grid center latitude", + "U grid center longitude", + "U grid center latitude", + "category maximum thickness", + "ocean grid mask", + "ice grid block mask", + "area of T grid cells", + "area of U grid cells", + "T cell width through middle", + "T cell height through middle", + "U cell width through middle", + "U cell height through middle", + "T cell width on North side", + "T cell width on East side", + "angle grid makes with latitude line on U grid", + "angle grid makes with latitude line on T grid", + "grid cell mean ice thickness", + "grid cell mean snow thickness", + "snow/ice surface temperature", + "ice area (aggregate)", + "ice velocity (x)", + "ice velocity (y)", + "atm velocity (x)", + "atm velocity (y)", + "bulk ice salinity", + "down solar flux", + "upward solar flux", + "down longwave flux", + "snowfall rate", + "rainfall rate", + "sea surface temperature", + "sea surface salinity", + "ocean current (x)", + "ocean current (y)", + "freeze/melt potential", + "shortwave scaling factor", + "snow/ice/ocn absorbed solar flux", + "snow/ice broad band albedo", + "visible direct albedo", + "near IR direct albedo", + "visible diffuse albedo", + "near IR diffuse albedo", + "bare ice albedo", + "snow albedo", + "latent heat flux", + "sensible heat flux", + "upward longwave flux", + "evaporative water flux", + "air temperature", + "congelation ice growth", + "frazil ice growth", + "snow-ice formation", + "top ice melt", + "top snow melt", + "basal ice melt", + "lateral ice melt", + "freshwtr flx ice to ocn", + "salt flux ice to ocean", + "heat flux ice to ocean", + "SW flux thru ice to ocean", + "atm/ice stress (x)", + "atm/ice stress (y)", + "sea sfc tilt stress (x)", + "sea sfc tilt stress (y)", + "coriolis stress (x)", + "coriolis stress (y)", + "ocean/ice stress (x)", + "ocean/ice stress (y)", + "internal ice stress (x)", + "internal ice stress (y)", + "compressive ice strength", + "strain rate (divergence)", + "strain rate (shear)", + "volume tendency thermo", + "volume tendency dynamics", + "area tendency thermo", + "area tendency dynamics", + "melt onset date", + "freeze onset date", + "internal stress tensor trace", + "fraction of time-avg interval that ice is present", + "top surface conductive heat flux", + "ice area, categories", + "ice volume, categories", + "net surface heat flux, categories", + "top sfc conductive heat flux, cat", + "net sfc heat flux causing melt, cat", + "latent heat flux, category", + ), + ( + "ucell longitude", + "tcell latitude", + "neutral density", + "neutral density edges", + "time", + "vertex number", + "tcell longitude", + "ucell latitude", + "T-cell i-mass transport on neutral rho", + "T-cell j-mass transport on neutral rho", + "T-cell i-mass transport from GM on neutral rho", + "T-cell j-mass transport from GM on neutral rho", + "T-cell i-mass transport from submesoscale param on neutral rho", + "T-cell j-mass transport from submesoscale param on neutral rho", + "cp*rho*dzt*dyt*u*temp binned to neutral density", + "cp*rho*dzt*dxt*v*temp binned to neutral density", + "cp*submeso_xflux*dyt*rho_dzt*temp binned to neutral density", + "cp*submeso_yflux*dxt*rho_dzt*temp binned to neutral density", + "cp*gm_xflux*dyt*rho_dzt*temp binned to neutral density", + "cp*gm_yflux*dxt*rho_dzt*temp binned to neutral density", + "cp*ndiffuse_xflux*dyt*rho_dzt*temp binned to neutral density", + "cp*ndiffuse_yflux*dxt*rho_dzt*temp binned to neutral density", + "mass transport from liquid+frozen mass and seaice melt+form (>0 enters ocean) binned to neutral density classes", + "Start time for average period", + "End time for average period", + "Length of average period", + "time axis boundaries", + ), + }, + ), + ( + "variable_standard_name", + { + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sea_floor_depth_below_geoid", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "sea_water_potential_temperature", + "sea_water_salinity", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sea_water_conservative_temperature", + "sea_water_salinity", + "sea_water_age_since_surface_contact", + "sea_water_x_velocity", + "sea_water_y_velocity", + "", + "cell_thickness", + "sea_water_potential_density", + "ocean_mass_x_transport", + "ocean_mass_y_transport", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "ocean_vertical_heat_diffusivity", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "sea_surface_height_above_geoid", + "", + "square_of_sea_surface_height_above_geoid", + "ocean_mixed_layer_thickness_defined_by_sigma_t", + "water_flux_into_sea_water", + "", + "water_flux_into_sea_water_from_rivers", + "water_flux_into_sea_water_from_icebergs", + "water_evaporation_flux", + "water_flux_into_sea_water_due_to_sea_ice_thermodynamics", + "", + "downward_sea_ice_basal_salt_flux", + "", + "", + "", + "surface_downward_x_stress", + "surface_downward_y_stress", + "", + "", + "", + "", + "sea_water_pressure_at_sea_floor", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sea_water_conservative_temperature", + "sea_water_salinity", + "sea_water_age_since_surface_contact", + "sea_water_x_velocity", + "sea_water_y_velocity", + "", + "sea_water_potential_density", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "sea_water_conservative_temperature", + "sea_water_salinity", + "sea_water_age_since_surface_contact", + "sea_water_x_velocity", + "sea_water_y_velocity", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "sea_water_conservative_temperature", + "sea_water_salinity", + "sea_water_age_since_surface_contact", + "sea_water_x_velocity", + "sea_water_y_velocity", + "", + "cell_thickness", + "sea_water_potential_density", + "ocean_mass_x_transport", + "ocean_mass_y_transport", + "", + "", + "", + "", + "", + "", + ), + }, + ), + ( + "variable_cell_methods", + { + ( + "", + "", + "", + "", + "", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + "time: point", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "time: mean", + "", + "", + "", + "", + ), + }, + ), + ( + "variable_units", + { + ( + "degrees_E", + "degrees_N", + "days since 1900-01-01 00:00:00", + "degrees_E", + "degrees_N", + "degrees_E", + "degrees_N", + "degrees_E", + "degrees_N", + "m", + "m", + "m", + "m", + "m", + "m", + "m^2", + "m^2", + "dimensionless", + "dimensionless", + "dimensionless", + ), + ( + "none", + "days since 1900-01-01 00:00:00", + "none", + "10^15 Joules", + "10^15 Joules", + "deg_C", + "psu", + "kg/m^3", + "deg_C", + "psu", + "kg/1e18", + "Joule/1e25", + "meter", + "kg/sec (*1e-15)", + "(kg/sec)/1e15", + "kg/sec/1e15", + "(kg/sec)/1e15", + "(kg/sec)/1e15", + "(kg/sec)/1e15", + "(kg/sec)/1e15", + "(kg/sec)/1e15", + "(kg/sec)/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "Watts/1e15", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "meters", + "meters", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "meters", + "meters", + "K", + "psu", + "yr", + "m/sec", + "m/sec", + "m/sec", + "m", + "kg/m^3", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "Watts", + "Watts", + "Watt", + "Watt", + "Watt", + "Watt", + "Watt", + "Watt", + "m^2/s", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "kg/m^3", + "kg/m^3", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "Watts", + "Watts", + "Watt", + "Watt", + "Watt", + "Watt", + "Watt", + "Watt", + "kg/sec", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "meters", + "meters", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "meters", + "meters", + "K", + "psu", + "yr", + "m/sec", + "m/sec", + "m/sec", + "m", + "kg/m^3", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "meter", + "meter", + "m^2", + "m", + "(kg/m^3)*(m/sec)", + "(kg/m^3)*(m/sec)", + "(kg/m^3)*(m/sec)", + "(kg/m^3)*(m/sec)", + "(kg/m^3)*(m/sec)", + "(kg/m^3)*(m/sec)", + "kg/(m^2*sec)", + "kg/(m^2*sec)", + "kg/(m^2*sec)", + "Watts/m^2", + "W/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "kg/s", + "kg/s", + "dbar", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "meters", + "meters", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "meters", + "meters", + "degrees_E", + "degrees_N", + "kg/m^3", + "kg/m^3", + "K", + "psu", + "yr", + "m/sec", + "m/sec", + "m/sec", + "kg/m^3", + "kg/s", + "kg/s", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "degrees_E", + "degrees_N", + "kg/m^3", + "kg/m^3", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/s", + "kg/sec", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + ( + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "degrees_east", + "degrees_north", + "degrees_east", + "degrees_north", + "m", + "", + "", + "m^2", + "m^2", + "m", + "m", + "m", + "m", + "m", + "m", + "radians", + "radians", + "m", + "m", + "C", + "1", + "m/s", + "m/s", + "m/s", + "m/s", + "ppt", + "W/m^2", + "W/m^2", + "W/m^2", + "cm/day", + "cm/day", + "C", + "ppt", + "m/s", + "m/s", + "W/m^2", + "1", + "W/m^2", + "%", + "%", + "%", + "%", + "%", + "%", + "%", + "W/m^2", + "W/m^2", + "W/m^2", + "cm/day", + "C", + "cm/day", + "cm/day", + "cm/day", + "cm/day", + "cm/day", + "cm/day", + "cm/day", + "cm/day", + "kg/m^2/s", + "W/m^2", + "W/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m^2", + "N/m", + "%/day", + "%/day", + "cm/day", + "cm/day", + "%/day", + "%/day", + "day of year", + "day of year", + "N/m^2", + "1", + "W/m^2", + "1", + "m", + "W/m^2", + "W/m^2", + "W/m^2", + "W/m^2", + ), + ( + "degrees_E", + "degrees_N", + "meters", + "meters", + "days since 1900-01-01 00:00:00", + "none", + "degrees_E", + "degrees_N", + "K", + "psu", + "yr", + "m/sec", + "m/sec", + "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", + "days", + "days", + ), + }, + ), + ("realm", {"ocean", "seaIce"}), ], ) -def test_om2_metacat_vals_found(metacat, colname, expected, current_catalog): +def test_om2_metacat_vals_found(metacat, colname, expected): # Test that the unique values in the column are as expected. I've truncated # the unique values to the first 10 for brevity because I'm not typing out # 3700255 unique values. breakpoint() cat = metacat["1deg_jra55_ryf9091_gadi"] - if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: + if colname not in [ + "variable", + "variable_long_name", + "variable_standard_name", + "variable_cell_methods", + "variable_units", + ]: found = set(cat.df[colname].unique()[:10]) assert found == expected else: @@ -326,181 +2219,206 @@ def test_om2_metacat_vals_found(metacat, colname, expected, current_catalog): with pytest.raises(TypeError): _found = set(cat.df[colname].unique()[:10]) # cast to tuple to make them hashable, then check the length - found = set(cat.df[colname].apply(lambda x : tuple(x)).unique()[:10]) + found = set(cat.df[colname].apply(lambda x: tuple(x)).unique()[:10]) assert found == expected - # Repeat the test with the current catalog - cat = current_catalog["1deg_jra55_ryf9091_gadi"] - if colname not in ['variable','variable_long_name','variable_standard_name','variable_cell_methods','variable_units']: - found = set(cat.df[colname].unique()[:10]) - assert found >= expected - else: - # These should fail because they contains lists (unhashable) - with pytest.raises(TypeError): - _found = set(cat.df[colname].unique()[:10]) - # cast to tuple to make them hashable, then check the length - found = set(cat.df[colname].apply(lambda x : tuple(x)).unique()[:10]) - assert found >= expected - - @pytest.mark.parametrize( "path, varname, first_ten_mean", [ - ('/g/data/al33/replicas/CMIP5/combined/LASG-IAP/FGOALS-s2/amip/6hr/atmos/6hrLev/r1i1p1/v1/va/va_6hrLev_FGOALS-s2_amip_r1i1p1_198201010000-198212311800.nc', - 'va', - -6.1719556 + ( + "/g/data/al33/replicas/CMIP5/combined/LASG-IAP/FGOALS-s2/amip/6hr/atmos/6hrLev/r1i1p1/v1/va/va_6hrLev_FGOALS-s2_amip_r1i1p1_198201010000-198212311800.nc", + "va", + -6.1719556, ), - ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/rcp45/day/seaIce/day/r1i1p1/v20120717/sit/sit_day_CMCC-CMS_rcp45_r1i1p1_20700101-20791231.nc', - 'sit', - np.nan + ( + "/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/rcp45/day/seaIce/day/r1i1p1/v20120717/sit/sit_day_CMCC-CMS_rcp45_r1i1p1_20700101-20791231.nc", + "sit", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/abrupt4xCO2/mon/land/Lmon/r1i1p1/v1/prveg/prveg_Lmon_FGOALS-g2_abrupt4xCO2_r1i1p1_063001-063912.nc', - 'prveg', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/abrupt4xCO2/mon/land/Lmon/r1i1p1/v1/prveg/prveg_Lmon_FGOALS-g2_abrupt4xCO2_r1i1p1_063001-063912.nc", + "prveg", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/output1/CMCC/CMCC-CM/rcp85/6hr/atmos/6hrPlev/r1i1p1/v20170725/ta/ta_6hrPlev_CMCC-CM_rcp85_r1i1p1_2068030100-2068033118.nc', - 'ta', - 247.55783 + ( + "/g/data/al33/replicas/CMIP5/output1/CMCC/CMCC-CM/rcp85/6hr/atmos/6hrPlev/r1i1p1/v20170725/ta/ta_6hrPlev_CMCC-CM_rcp85_r1i1p1_2068030100-2068033118.nc", + "ta", + 247.55783, ), - ('/g/data/al33/replicas/CMIP5/combined/MOHC/HadGEM2-CC/rcp45/day/atmos/day/r1i1p1/v20120531/rlut/rlut_day_HadGEM2-CC_rcp45_r1i1p1_20351201-20401130.nc', - 'rlut', - 200.8389 + ( + "/g/data/al33/replicas/CMIP5/combined/MOHC/HadGEM2-CC/rcp45/day/atmos/day/r1i1p1/v20120531/rlut/rlut_day_HadGEM2-CC_rcp45_r1i1p1_20351201-20401130.nc", + "rlut", + 200.8389, ), - ('/g/data/al33/replicas/CMIP5/combined/IPSL/IPSL-CM5A-LR/rcp26/day/atmos/cfDay/r1i1p1/v20120114/clw/clw_cfDay_IPSL-CM5A-LR_rcp26_r1i1p1_22060101-22151231.nc', - 'clw', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/combined/IPSL/IPSL-CM5A-LR/rcp26/day/atmos/cfDay/r1i1p1/v20120114/clw/clw_cfDay_IPSL-CM5A-LR_rcp26_r1i1p1_22060101-22151231.nc", + "clw", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/output1/IPSL/IPSL-CM5A-LR/abrupt4xCO2/mon/atmos/Amon/r5i1p1/v20110921/rsds/rsds_Amon_IPSL-CM5A-LR_abrupt4xCO2_r5i1p1_185005-185504.nc', - 'rsds', - 153.31345 + ( + "/g/data/al33/replicas/CMIP5/output1/IPSL/IPSL-CM5A-LR/abrupt4xCO2/mon/atmos/Amon/r5i1p1/v20110921/rsds/rsds_Amon_IPSL-CM5A-LR_abrupt4xCO2_r5i1p1_185005-185504.nc", + "rsds", + 153.31345, ), - ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC5/1pctCO2/mon/ocean/Omon/r1i1p1/v20131009/so/so_Omon_MIROC5_1pctCO2_r1i1p1_228501-228512.nc', - 'so', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC5/1pctCO2/mon/ocean/Omon/r1i1p1/v20131009/so/so_Omon_MIROC5_1pctCO2_r1i1p1_228501-228512.nc", + "so", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/combined/CCCma/CanCM4/decadal1981/mon/ocean/Omon/r4i1p1/v20120622/hfls/hfls_Omon_CanCM4_decadal1981_r4i1p1_198201-199112.nc', - 'hfls', - np.nan + ( + "/g/data/al33/replicas/CMIP5/combined/CCCma/CanCM4/decadal1981/mon/ocean/Omon/r4i1p1/v20120622/hfls/hfls_Omon_CanCM4_decadal1981_r4i1p1_198201-199112.nc", + "hfls", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/combined/MPI-M/MPI-ESM-LR/decadal1992/mon/land/Lmon/r1i1p1/v20120529/cLitter/cLitter_Lmon_MPI-ESM-LR_decadal1992_r1i1p1_199301-200212.nc', - 'cLitter', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/combined/MPI-M/MPI-ESM-LR/decadal1992/mon/land/Lmon/r1i1p1/v20120529/cLitter/cLitter_Lmon_MPI-ESM-LR_decadal1992_r1i1p1_199301-200212.nc", + "cLitter", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/1pctCO2/mon/aerosol/aero/r1i1p3/v20160425/emiss/emiss_aero_GISS-E2-R_1pctCO2_r1i1p3_192601-195012.nc', - 'emiss', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/1pctCO2/mon/aerosol/aero/r1i1p3/v20160425/emiss/emiss_aero_GISS-E2-R_1pctCO2_r1i1p3_192601-195012.nc", + "emiss", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC-ESM-CHEM/rcp85/6hr/atmos/6hrLev/r1i1p1/v20111129/hus/hus_6hrLev_MIROC-ESM-CHEM_rcp85_r1i1p1_2063060106-2063070100.nc', - 'hus', - 2.2376184e-05 + ( + "/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC-ESM-CHEM/rcp85/6hr/atmos/6hrLev/r1i1p1/v20111129/hus/hus_6hrLev_MIROC-ESM-CHEM_rcp85_r1i1p1_2063060106-2063070100.nc", + "hus", + 2.2376184e-05, ), - ('/g/data/al33/replicas/CMIP5/output1/MOHC/HadCM3/decadal1964/day/atmos/day/r6i3p1/v20140110/va/va_day_HadCM3_decadal1964_r6i3p1_19641101-19741230.nc', - 'va', - -4.4489503 + ( + "/g/data/al33/replicas/CMIP5/output1/MOHC/HadCM3/decadal1964/day/atmos/day/r6i3p1/v20140110/va/va_day_HadCM3_decadal1964_r6i3p1_19641101-19741230.nc", + "va", + -4.4489503, ), - ('/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/rcp45/day/seaIce/day/r1i1p1/v20161204/sit/sit_day_FGOALS-g2_rcp45_r1i1p1_20200101-20201231.nc', - 'sit', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/rcp45/day/seaIce/day/r1i1p1/v20161204/sit/sit_day_FGOALS-g2_rcp45_r1i1p1_20200101-20201231.nc", + "sit", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/output1/NCAR/CCSM4/decadal1991/mon/seaIce/OImon/r3i2p1/v20120529/grCongel/grCongel_OImon_CCSM4_decadal1991_r3i2p1_199101-200012.nc', - 'grCongel', - np.nan + ( + "/g/data/al33/replicas/CMIP5/output1/NCAR/CCSM4/decadal1991/mon/seaIce/OImon/r3i2p1/v20120529/grCongel/grCongel_OImon_CCSM4_decadal1991_r3i2p1_199101-200012.nc", + "grCongel", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/decadal1960/mon/atmos/Amon/r1i1p1/v3/rsdscs/rsdscs_Amon_FGOALS-g2_decadal1960_r1i1p1_198101-199012.nc', - 'rsdscs', - 81.612854 + ( + "/g/data/al33/replicas/CMIP5/output1/LASG-CESS/FGOALS-g2/decadal1960/mon/atmos/Amon/r1i1p1/v3/rsdscs/rsdscs_Amon_FGOALS-g2_decadal1960_r1i1p1_198101-199012.nc", + "rsdscs", + 81.612854, ), - ('/g/data/al33/replicas/CMIP5/output1/MRI/MRI-CGCM3/amip/mon/atmos/cfMon/r1i1p1/v20131011/hur/hur_cfMon_MRI-CGCM3_amip_r1i1p1_198901-199812.nc', - 'hur', - 92.70255 + ( + "/g/data/al33/replicas/CMIP5/output1/MRI/MRI-CGCM3/amip/mon/atmos/cfMon/r1i1p1/v20131011/hur/hur_cfMon_MRI-CGCM3_amip_r1i1p1_198901-199812.nc", + "hur", + 92.70255, ), - ('/g/data/al33/replicas/CMIP5/combined/INM/inmcm4/amip/3hr/atmos/3hr/r1i1p1/v20110323/huss/huss_3hr_inmcm4_amip_r1i1p1_2006010100-2006123121.nc', - 'huss', - 0.0006068 + ( + "/g/data/al33/replicas/CMIP5/combined/INM/inmcm4/amip/3hr/atmos/3hr/r1i1p1/v20110323/huss/huss_3hr_inmcm4_amip_r1i1p1_2006010100-2006123121.nc", + "huss", + 0.0006068, ), - ('/g/data/al33/replicas/cordex/output/EAS-22/ICTP/MOHC-HadGEM2-ES/historical/r1i1p1/RegCM4-4/v0/day/ua925/v20190502/ua925_EAS-22_MOHC-HadGEM2-ES_historical_r1i1p1_ICTP-RegCM4-4_v0_day_19800101-19801230.nc', - 'ua925', - -0.32869282 + ( + "/g/data/al33/replicas/cordex/output/EAS-22/ICTP/MOHC-HadGEM2-ES/historical/r1i1p1/RegCM4-4/v0/day/ua925/v20190502/ua925_EAS-22_MOHC-HadGEM2-ES_historical_r1i1p1_ICTP-RegCM4-4_v0_day_19800101-19801230.nc", + "ua925", + -0.32869282, ), - ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CM/rcp45/6hr/atmos/6hrPlev/r1i1p1/v20170725/ua/ua_6hrPlev_CMCC-CM_rcp45_r1i1p1_2011010100-2011013118.nc', - 'ua', - -5.155791 + ( + "/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CM/rcp45/6hr/atmos/6hrPlev/r1i1p1/v20170725/ua/ua_6hrPlev_CMCC-CM_rcp45_r1i1p1_2011010100-2011013118.nc", + "ua", + -5.155791, ), - ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-H/rcp45/mon/atmos/Amon/r4i1p3/v20160512/ccb/ccb_Amon_GISS-E2-H_rcp45_r4i1p3_215101-220012.nc', - 'ccb', - np.nan + ( + "/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-H/rcp45/mon/atmos/Amon/r4i1p3/v20160512/ccb/ccb_Amon_GISS-E2-H_rcp45_r4i1p3_215101-220012.nc", + "ccb", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/decadal1971/mon/land/Lmon/r1i1p1/v20120529/grassFrac/grassFrac_Lmon_MPI-ESM-LR_decadal1971_r1i1p1_197201-198112.nc', - 'grassFrac', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/decadal1971/mon/land/Lmon/r1i1p1/v20120529/grassFrac/grassFrac_Lmon_MPI-ESM-LR_decadal1971_r1i1p1_197201-198112.nc", + "grassFrac", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/combined/CNRM-CERFACS/CNRM-CM5/rcp85/6hr/atmos/6hrLev/r1i1p1/v20120525/ta/ta_6hrLev_CNRM-CM5_rcp85_r1i1p1_2095100106-2095110100.nc', - 'ta', - 233.56656 + ( + "/g/data/al33/replicas/CMIP5/combined/CNRM-CERFACS/CNRM-CM5/rcp85/6hr/atmos/6hrLev/r1i1p1/v20120525/ta/ta_6hrLev_CNRM-CM5_rcp85_r1i1p1_2095100106-2095110100.nc", + "ta", + 233.56656, ), - ('/g/data/al33/replicas/CMIP5/combined/NASA-GISS/GISS-E2-R/historical/mon/atmos/Amon/r5i1p3/v20160503/ch4/ch4_Amon_GISS-E2-R_historical_r5i1p3_197601-200012.nc', - 'ch4', - np.nan + ( + "/g/data/al33/replicas/CMIP5/combined/NASA-GISS/GISS-E2-R/historical/mon/atmos/Amon/r5i1p3/v20160503/ch4/ch4_Amon_GISS-E2-R_historical_r5i1p3_197601-200012.nc", + "ch4", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/output1/ICHEC/EC-EARTH/decadal1965/mon/ocean/Omon/r8i2p1/v20120710/so/so_Omon_EC-EARTH_decadal1965_r8i2p1_196601-197512.nc', - 'so', - 0.0 + ( + "/g/data/al33/replicas/CMIP5/output1/ICHEC/EC-EARTH/decadal1965/mon/ocean/Omon/r8i2p1/v20120710/so/so_Omon_EC-EARTH_decadal1965_r8i2p1_196601-197512.nc", + "so", + 0.0, ), - ('/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-ESM2G/rcp60/mon/atmos/Amon/r1i1p1/v20120412/evspsbl/evspsbl_Amon_GFDL-ESM2G_rcp60_r1i1p1_202101-202512.nc', - 'evspsbl', - 1.9350772e-08 + ( + "/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-ESM2G/rcp60/mon/atmos/Amon/r1i1p1/v20120412/evspsbl/evspsbl_Amon_GFDL-ESM2G_rcp60_r1i1p1_202101-202512.nc", + "evspsbl", + 1.9350772e-08, ), - ('/g/data/al33/replicas/CMIP5/output1/MOHC/HadGEM2-CC/historical/day/landIce/day/r1i1p1/v20110930/snw/snw_day_HadGEM2-CC_historical_r1i1p1_19691201-19741130.nc', - 'snw', - 106252.55 + ( + "/g/data/al33/replicas/CMIP5/output1/MOHC/HadGEM2-CC/historical/day/landIce/day/r1i1p1/v20110930/snw/snw_day_HadGEM2-CC_historical_r1i1p1_19691201-19741130.nc", + "snw", + 106252.55, ), - ('/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/decadal1980/day/atmos/day/r2i1p1/v1/psl/psl_day_FGOALS-g2_decadal1980_r2i1p1_20000101-20001231.nc', - 'psl', - 100025.44 + ( + "/g/data/al33/replicas/CMIP5/combined/LASG-CESS/FGOALS-g2/decadal1980/day/atmos/day/r2i1p1/v1/psl/psl_day_FGOALS-g2_decadal1980_r2i1p1_20000101-20001231.nc", + "psl", + 100025.44, ), - ('/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/piControl/mon/atmos/Amon/r1i1p1/v20120717/clivi/clivi_Amon_CMCC-CMS_piControl_r1i1p1_394401-395312.nc', - 'clivi', - 0.00519617 + ( + "/g/data/al33/replicas/CMIP5/combined/CMCC/CMCC-CMS/piControl/mon/atmos/Amon/r1i1p1/v20120717/clivi/clivi_Amon_CMCC-CMS_piControl_r1i1p1_394401-395312.nc", + "clivi", + 0.00519617, ), - ('/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/historicalMisc/mon/atmos/Amon/r1i1p315/v20160503/cli/cli_Amon_GISS-E2-R_historicalMisc_r1i1p315_197601-200012.nc', - 'cli', - 3.8851712e-07 + ( + "/g/data/al33/replicas/CMIP5/output1/NASA-GISS/GISS-E2-R/historicalMisc/mon/atmos/Amon/r1i1p315/v20160503/cli/cli_Amon_GISS-E2-R_historicalMisc_r1i1p315_197601-200012.nc", + "cli", + 3.8851712e-07, ), - ('/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/1pctCO2/mon/atmos/Amon/r1i1p1/v20120308/va/va_Amon_MPI-ESM-LR_1pctCO2_r1i1p1_190001-190912.nc', - 'va', - -4.030592 + ( + "/g/data/al33/replicas/CMIP5/output1/MPI-M/MPI-ESM-LR/1pctCO2/mon/atmos/Amon/r1i1p1/v20120308/va/va_Amon_MPI-ESM-LR_1pctCO2_r1i1p1_190001-190912.nc", + "va", + -4.030592, ), - ('/g/data/al33/replicas/CMIP5/combined/NCC/NorESM1-ME/rcp85/mon/ocean/Omon/r1i1p1/v20130926/msftmyz/msftmyz_Omon_NorESM1-ME_rcp85_r1i1p1_204501-210012.nc', - 'msftmyz', - np.nan + ( + "/g/data/al33/replicas/CMIP5/combined/NCC/NorESM1-ME/rcp85/mon/ocean/Omon/r1i1p1/v20130926/msftmyz/msftmyz_Omon_NorESM1-ME_rcp85_r1i1p1_204501-210012.nc", + "msftmyz", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-CM2p1/rcp45/mon/ocean/Omon/r3i1p1/v20110601/tauvo/tauvo_Omon_GFDL-CM2p1_rcp45_r3i1p1_201601-202012.nc', - 'tauvo', - np.nan + ( + "/g/data/al33/replicas/CMIP5/output1/NOAA-GFDL/GFDL-CM2p1/rcp45/mon/ocean/Omon/r3i1p1/v20110601/tauvo/tauvo_Omon_GFDL-CM2p1_rcp45_r3i1p1_201601-202012.nc", + "tauvo", + np.nan, ), - ('/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC4h/decadal1990/mon/ocean/Omon/r5i1p1/v20120326/wmo/wmo_Omon_MIROC4h_decadal1990_r5i1p1_199301-199306.nc', - 'wmo', - np.nan + ( + "/g/data/al33/replicas/CMIP5/combined/MIROC/MIROC4h/decadal1990/mon/ocean/Omon/r5i1p1/v20120326/wmo/wmo_Omon_MIROC4h_decadal1990_r5i1p1_199301-199306.nc", + "wmo", + np.nan, ), - ('/g/data/al33/replicas/cordex/output/AUS-44i/CSIRO/CSIRO-BOM-ACCESS1-0/rcp85/r1i1p1/CCAM-2008/v1/day/vas/v20210518/vas_AUS-44i_CSIRO-BOM-ACCESS1-0_rcp85_r1i1p1_CSIRO-CCAM-2008_v1_day_20620101-20621231.nc', - 'vas', - -3.0647216 + ( + "/g/data/al33/replicas/cordex/output/AUS-44i/CSIRO/CSIRO-BOM-ACCESS1-0/rcp85/r1i1p1/CCAM-2008/v1/day/vas/v20210518/vas_AUS-44i_CSIRO-BOM-ACCESS1-0_rcp85_r1i1p1_CSIRO-CCAM-2008_v1_day_20620101-20621231.nc", + "vas", + -3.0647216, ), - ] + ], ) -def test_cmip5_values_correct(metacat,current_catalog,path, varname, first_ten_mean): +def test_cmip5_values_correct(metacat, current_catalog, path, varname, first_ten_mean): """ - All these values are taken from the first 10 values of the first dimension + All these values are taken from the first 10 values of the first dimension to minimize the amount of data we need to load. They have been verified against the production catalogd (as of 2024-11-20). """ cmip5_cat = metacat["cmip5_al33"] - esm_ds = cmip5_cat.search(path=path,variable=varname).to_dask() + esm_ds = cmip5_cat.search(path=path, variable=varname).to_dask() assert esm_ds # Subset to the first 10 values in the 0th dimension, first in all others da = esm_ds[varname] - da = da.isel(**{da.dims[0]: slice(10), }) + da = da.isel( + **{ + da.dims[0]: slice(10), + } + ) da = da.isel(**{dim: 0 for dim in da.dims[1:]}) da_val = da.mean(dim=da.dims[0], skipna=True).values @@ -508,12 +2426,12 @@ def test_cmip5_values_correct(metacat,current_catalog,path, varname, first_ten_m vals_equal = np.isnan(first_ten_mean) else: vals_equal = da_val == pytest.approx(first_ten_mean, abs=1e-6) - + assert vals_equal # Check that the data is the same in the current catalog cmip5_cat = current_catalog["cmip5_al33"] - ... # Repeat above + ... # Repeat above @pytest.mark.order(after="test_catalog_subset_exists") From 37d3a68f887c8e8aa33e792b0170a8532e825907 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 20 Nov 2024 18:49:22 +1100 Subject: [PATCH 10/24] End to end test done & working. Now just needs a workflow trigger --- e2e/test_end_to_end.py | 90 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 6 deletions(-) diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 3da36431..6ba396c6 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -2429,11 +2429,89 @@ def test_cmip5_values_correct(metacat, current_catalog, path, varname, first_ten assert vals_equal - # Check that the data is the same in the current catalog - cmip5_cat = current_catalog["cmip5_al33"] - ... # Repeat above +@pytest.mark.parametrize( + "path, varname, first_ten_mean", + [ + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output000/ice/OUTPUT/iceh.1900-01.nc", + "aice_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output007/ice/OUTPUT/iceh.1972-09.nc", + "tarea", + 730422336.0, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output014/ice/OUTPUT/iceh.2045-05.nc", + "fsurfn_ai_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output021/ice/OUTPUT/iceh.2118-01.nc", + "vicen_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output029/ice/OUTPUT/iceh.2190-05.nc", + "sst_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output036/ice/OUTPUT/iceh.2262-11.nc", + "ANGLE", + 0.0, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output043/ice/OUTPUT/iceh.2335-07.nc", + "meltt_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output050/ice/OUTPUT/iceh.2408-03.nc", + "divu_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output058/ice/OUTPUT/iceh.2480-04.nc", + "blkmask", + 0.01, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output065/ice/OUTPUT/iceh.2552-12.nc", + "mlt_onset_m", + np.nan, + ), + ( + "/g/data/ik11/outputs/access-om2/1deg_jra55_ryf9091_gadi/output072/ice/OUTPUT/iceh.2625-08.nc", + "alvdf_ai_m", + np.nan, + ), + ], +) +def test_om2_values_correct(metacat, path, varname, first_ten_mean): + """ + All these values are taken from the first 10 values of the first dimension + to minimize the amount of data we need to load. They have been verified against + the production catalog (as of 2024-11-20). + """ + om2_cat = metacat["1deg_jra55_ryf9091_gadi"] + esm_ds = om2_cat.search(path=path).to_dask() + assert esm_ds + # Subset to the first 10 values in the 0th dimension, first in all others + da = esm_ds[varname] + da = da.isel( + **{ + da.dims[0]: slice(10), + } + ) + da = da.isel(**{dim: 0 for dim in da.dims[1:]}) + da_val = da.mean(dim=da.dims[0], skipna=True).values + + if np.isnan(da_val).all(): + vals_equal = np.isnan(first_ten_mean) + else: + vals_equal = da_val == pytest.approx(first_ten_mean, abs=1e-6) -@pytest.mark.order(after="test_catalog_subset_exists") -def test_built_esm_datastore(): - pass + assert vals_equal \ No newline at end of file From 436676a4468a339eb818a3465cc42051383d4c7b Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 20 Nov 2024 18:51:23 +1100 Subject: [PATCH 11/24] formatting --- e2e/test_end_to_end.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 6ba396c6..5066754a 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -2514,4 +2514,4 @@ def test_om2_values_correct(metacat, path, varname, first_ten_mean): else: vals_equal = da_val == pytest.approx(first_ten_mean, abs=1e-6) - assert vals_equal \ No newline at end of file + assert vals_equal From 1d69c30b6d9a23dd4e0ccca0af032f876eab683b Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 20 Nov 2024 18:55:50 +1100 Subject: [PATCH 12/24] Removed unused build_subset.sh file --- e2e/build_subset.sh | 43 ------------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 e2e/build_subset.sh diff --git a/e2e/build_subset.sh b/e2e/build_subset.sh deleted file mode 100644 index caffd763..00000000 --- a/e2e/build_subset.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -l - -#PBS -P iq82 -#PBS -l storage=gdata/xp65+gdata/ik11+gdata/cj50+gdata/hh5+gdata/p73+gdata/dk92+gdata/al33+gdata/rr3+gdata/fs38+gdata/oi10 -#PBS -q normal -#PBS -W block=true -#PBS -l walltime=03:00:00 -#PBS -l mem=192gb -#PBS -l ncpus=48 -#PBS -l wd -#PBS -j oe - -########################################################################################### -# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. -# SPDX-License-Identifier: Apache-2.0 - -# Description: -# Generate access-nri intake metacatalog from config files - -########################################################################################### - -set -e - -if [ ! $# -eq 0 ]; then - version=$1 -fi - -module use /g/data/xp65/public/modules -module load conda/access-med-0.6 -source /home/189/ct1163/end2end/venv/bin/activate - -OUTPUT_BASE_PATH=/scratch/tm70/ct1163/test_cat/ -CONFIG_DIR=/g/data/xp65/admin/access-nri-intake-catalog/config -CONFIGS=( cmip5.yaml access-om2.yaml ) - -config_paths=( "${CONFIGS[@]/#/${CONFIG_DIR}/}" ) - -if [ -z "$version" ]; then - catalog-build --build_base_path=${OUTPUT_BASE_PATH} ${config_paths[@]} - -else - catalog-build --build_base_path=${OUTPUT_BASE_PATH} ${config_paths[@]} -fi From 6511acc167f1450043b1a7449fa1c6daa287e4da Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 08:36:55 +1100 Subject: [PATCH 13/24] Removed some redundant stuff (Marc's comments) --- e2e/conftest.py | 49 ++---------------------------------- e2e/test_end_to_end.py | 3 --- src/access_nri_intake/cli.py | 2 +- 3 files changed, 3 insertions(+), 51 deletions(-) diff --git a/e2e/conftest.py b/e2e/conftest.py index c9ef271d..58bdf5a9 100644 --- a/e2e/conftest.py +++ b/e2e/conftest.py @@ -1,43 +1,17 @@ # Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -import os -import warnings from datetime import datetime from pathlib import Path from pytest import fixture -here = os.path.abspath(os.path.dirname(__file__)) - - -def _get_xfail(): - """ - Get the XFAILS environment variable. We use a default of 1, indicating we expect - to add xfail marker to `test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]` - unless specified. - """ - xfails_default = 1 - - try: - return int(os.environ["XFAILS"]) - except KeyError: - warnings.warn( - message=( - "XFAILS enabled by default as coordinate discovery disabled by default. " - "This will be deprecated when coordinate discovery is enabled by default" - ), - category=PendingDeprecationWarning, - ) - return xfails_default - - -_add_xfail = _get_xfail() +here = Path(__file__).parent @fixture(scope="session") def test_data(): - return Path(os.path.join(here, "data")) + return Path(here / "data") @fixture(scope="session") @@ -48,22 +22,3 @@ def BASE_DIR(tmp_path_factory): @fixture(scope="session") def v_num(): return datetime.now().strftime("v%Y-%m-%d") - - -def pytest_collection_modifyitems(config, items): - """ - This function is called by pytest to modify the items collected during test - collection. We use it here to mark the xfail tests in - test_builders::test_parse_access_ncfile when we check the file contents & to - ensure we correctly get xfails if we don't have cordinate discovery enabled - in intake-esm. - """ - for item in items: - if ( - item.name - in ( - "test_parse_access_ncfile[AccessOm2Builder-access-om2/output000/ocean/ocean_grid.nc-expected0-True]", - ) - and _add_xfail - ): - item.add_marker("xfail") diff --git a/e2e/test_end_to_end.py b/e2e/test_end_to_end.py index 5066754a..b9edb14a 100644 --- a/e2e/test_end_to_end.py +++ b/e2e/test_end_to_end.py @@ -87,7 +87,6 @@ def test_open_dataframe_catalog(metacat): ], ) def test_datastore_found(metacat, name): - breakpoint() assert metacat[name] == metacat.search(name=name).to_source() @@ -111,7 +110,6 @@ def test_datastore_found(metacat, name): ], ) def test_cmip5_datastore_nunique(metacat, colname, expected): - cat = metacat["cmip5_al33"] if colname != "derived_variable": @@ -140,7 +138,6 @@ def test_cmip5_datastore_nunique(metacat, colname, expected): ], ) def test_om2_datastore_nunique(metacat, colname, expected): - cat = metacat["1deg_jra55_ryf9091_gadi"] if colname not in [ diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 771b3ef8..e662ddd6 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -95,7 +95,7 @@ def _check_build_args(args_list: list[dict]) -> None: if len(names) != len(set(names)): seen = set() dupes = [name for name in names if name in seen or seen.add(name)] # type: ignore - # seen.add(name) returns None & so is always Falsey - so what is it doing? + # TODO: We get N-1 errors here - can we get this down to 1? Do we want to? raise MetadataCheckError(f"There are experiments with the same name: {dupes}") if len(uuids) != len(set(uuids)): seen = set() From ab511ab8a0e2808eae09c7f861325dac83e8d1d2 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 08:55:30 +1100 Subject: [PATCH 14/24] Moved end to end test into tests dir (yet to see if we can get it all working smoothly from there --- src/access_nri_intake/cli.py | 7 ++--- tests/conftest.py | 33 ++++++++++++++++++---- {e2e => tests/e2e}/__init__.py | 0 {e2e => tests/e2e}/configs/access-om2.yaml | 0 {e2e => tests/e2e}/configs/cmip5.yaml | 0 {e2e => tests/e2e}/conftest.py | 0 {e2e => tests/e2e}/test_end_to_end.py | 28 +++++++++--------- 7 files changed, 44 insertions(+), 24 deletions(-) rename {e2e => tests/e2e}/__init__.py (100%) rename {e2e => tests/e2e}/configs/access-om2.yaml (100%) rename {e2e => tests/e2e}/configs/cmip5.yaml (100%) rename {e2e => tests/e2e}/conftest.py (100%) rename {e2e => tests/e2e}/test_end_to_end.py (99%) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index e662ddd6..138c257a 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -95,7 +95,6 @@ def _check_build_args(args_list: list[dict]) -> None: if len(names) != len(set(names)): seen = set() dupes = [name for name in names if name in seen or seen.add(name)] # type: ignore - # TODO: We get N-1 errors here - can we get this down to 1? Do we want to? raise MetadataCheckError(f"There are experiments with the same name: {dupes}") if len(uuids) != len(set(uuids)): seen = set() @@ -300,9 +299,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"][ - "storage" - ] = _combine_storage_flags(storage_new, storage_old) + yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( + _combine_storage_flags(storage_new, storage_old) + ) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above diff --git a/tests/conftest.py b/tests/conftest.py index dfc8a12d..c4838dec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,37 @@ import os import warnings +from datetime import datetime from pathlib import Path from pytest import fixture -here = os.path.abspath(os.path.dirname(__file__)) +here = Path(__file__).parent + + +@fixture(scope="session") +def test_data(): + return Path(here / "data") + + +@fixture(scope="session") +def BASE_DIR(tmp_path_factory): + yield tmp_path_factory.mktemp("catalog-dir") + + +@fixture(scope="session") +def v_num(): + return datetime.now().strftime("v%Y-%m-%d") + + +def pytest_addoption(parser): + parser.addoption( + "--e2e", + action="store_true", + default=False, + help="Run end-to-end tests", + dest="e2e", + ) def _get_xfail(): @@ -34,11 +60,6 @@ def _get_xfail(): _add_xfail = _get_xfail() -@fixture(scope="session") -def test_data(): - return Path(os.path.join(here, "data")) - - def pytest_collection_modifyitems(config, items): """ This function is called by pytest to modify the items collected during test diff --git a/e2e/__init__.py b/tests/e2e/__init__.py similarity index 100% rename from e2e/__init__.py rename to tests/e2e/__init__.py diff --git a/e2e/configs/access-om2.yaml b/tests/e2e/configs/access-om2.yaml similarity index 100% rename from e2e/configs/access-om2.yaml rename to tests/e2e/configs/access-om2.yaml diff --git a/e2e/configs/cmip5.yaml b/tests/e2e/configs/cmip5.yaml similarity index 100% rename from e2e/configs/cmip5.yaml rename to tests/e2e/configs/cmip5.yaml diff --git a/e2e/conftest.py b/tests/e2e/conftest.py similarity index 100% rename from e2e/conftest.py rename to tests/e2e/conftest.py diff --git a/e2e/test_end_to_end.py b/tests/e2e/test_end_to_end.py similarity index 99% rename from e2e/test_end_to_end.py rename to tests/e2e/test_end_to_end.py index b9edb14a..22800882 100644 --- a/e2e/test_end_to_end.py +++ b/tests/e2e/test_end_to_end.py @@ -8,6 +8,9 @@ from .conftest import here +e2e = pytest.mark.skipif( + "not config.getoption('--e2e')", +) """ args=Namespace( config_yaml=[ @@ -22,19 +25,7 @@ """ -def print_directory_tree(root, indent=""): - """ - Pretty print a directory tree - code from chatgpt. - """ - for item in os.listdir(root): - path = os.path.join(root, item) - if os.path.isdir(path): - print(f"{indent}├── {item}/") - print_directory_tree(path, indent + "│ ") - else: - print(f"{indent}├── {item}") - - +@e2e @pytest.fixture(scope="session") def current_catalog(): """ @@ -44,6 +35,7 @@ def current_catalog(): yield metacat +@e2e @pytest.fixture(scope="session") def metacat(BASE_DIR, v_num): # Build our subset of the catalog. This should take ~2 minutes with the PBS @@ -70,15 +62,18 @@ def metacat(BASE_DIR, v_num): yield metacat +@e2e def test_catalog_subset_exists(BASE_DIR, v_num, metacat): assert os.path.exists(os.path.join(BASE_DIR, v_num, "metacatalog.csv")) +@e2e def test_open_dataframe_catalog(metacat): assert metacat print("Catalog opened successfully.") +@e2e @pytest.mark.parametrize( "name", [ @@ -90,6 +85,7 @@ def test_datastore_found(metacat, name): assert metacat[name] == metacat.search(name=name).to_source() +@e2e @pytest.mark.parametrize( "colname, expected", [ @@ -119,6 +115,7 @@ def test_cmip5_datastore_nunique(metacat, colname, expected): assert len(cat.df[colname].unique()) == expected +@e2e @pytest.mark.parametrize( "colname, expected", [ @@ -157,6 +154,7 @@ def test_om2_datastore_nunique(metacat, colname, expected): assert len(tuplified) == expected +@e2e @pytest.mark.parametrize( "colname, expected", [ @@ -301,6 +299,7 @@ def test_cmip5_metacat_vals_found(metacat, colname, expected): assert found == expected +@e2e @pytest.mark.parametrize( "colname, expected", [ @@ -2200,7 +2199,6 @@ def test_om2_metacat_vals_found(metacat, colname, expected): # Test that the unique values in the column are as expected. I've truncated # the unique values to the first 10 for brevity because I'm not typing out # 3700255 unique values. - breakpoint() cat = metacat["1deg_jra55_ryf9091_gadi"] if colname not in [ "variable", @@ -2220,6 +2218,7 @@ def test_om2_metacat_vals_found(metacat, colname, expected): assert found == expected +@e2e @pytest.mark.parametrize( "path, varname, first_ten_mean", [ @@ -2427,6 +2426,7 @@ def test_cmip5_values_correct(metacat, current_catalog, path, varname, first_ten assert vals_equal +@e2e @pytest.mark.parametrize( "path, varname, first_ten_mean", [ From efd3fcd6b79fa6604cff67dcaab07a681c4970b4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 24 Nov 2024 21:57:00 +0000 Subject: [PATCH 15/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/access_nri_intake/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 138c257a..64aa05bc 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -299,9 +299,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( - _combine_storage_flags(storage_new, storage_old) - ) + yaml_dict["sources"]["access_nri"]["metadata"][ + "storage" + ] = _combine_storage_flags(storage_new, storage_old) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above From 7a5628905fd84b24831df7649d43e0692bba3c01 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 09:33:31 +1100 Subject: [PATCH 16/24] Added workflow & shell script to submit it to Gadi --- .github/workflows/e2e.yaml | 36 ++++++++++++++++++++++++++++++++++++ bin/test_end_to_end.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 .github/workflows/e2e.yaml create mode 100644 bin/test_end_to_end.sh diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml new file mode 100644 index 00000000..1bb0c89a --- /dev/null +++ b/.github/workflows/e2e.yaml @@ -0,0 +1,36 @@ +name: Run end-to-end tests on Gadi +on: + workflow_dispatch: + inputs: + release_version: + description: 'Release version' + required: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + ### Latest at time of writing + uses: actions/checkout@v4.2.2 + - name: Sync repository to Gadi + ### Latest at time of writing + uses: up9cloud/action-rsync@v1.4 + env: + HOST: gadi.nci.org.au + TARGET: ${{secrets.GADI_REPO_PATH}} + KEY: ${{secrets.DEPLOY_KEY}} + USER: ${{secrets.GADI_USER}} + PRE_SCRIPT: | + export PROJECT=xp65_w + - name: Run end-to-end tests + uses: appleboy/ssh-action@v1.1.0 + with: + host: gadi.nci.org.au + username: ${{secrets.GADI_USER}} + key: ${{secrets.DEPLOY_KEY}} + script: | + cd ${{secrets.GADI_REPO_PATH}} + qsub bin/build_all.sh + + diff --git a/bin/test_end_to_end.sh b/bin/test_end_to_end.sh new file mode 100644 index 00000000..2f8f8ea3 --- /dev/null +++ b/bin/test_end_to_end.sh @@ -0,0 +1,27 @@ +#!/bin/bash -l + +#PBS -P iq82 +#PBS -l storage=gdata/xp65+gdata/ik11+gdata/cj50+gdata/hh5+gdata/p73+gdata/dk92+gdata/al33+gdata/rr3+gdata/fs38+gdata/oi10 +#PBS -q normal +#PBS -W block=true +#PBS -l walltime=00:30:00 +#PBS -l mem=32gb +#PBS -l ncpus=12 +#PBS -l wd +#PBS -j oe + +########################################################################################### +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +# Description: +# Generate access-nri intake metacatalog from config files + +########################################################################################### + +set -e + +module use /g/data/xp65/public/modules +module load conda/access-med-0.6 + +pytest -s --e2e tests \ No newline at end of file From b91d1dfc90d9a0fb65bfd74ad4fb57aa83a35eb3 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 09:41:18 +1100 Subject: [PATCH 17/24] Clean up test file (remove unused string, etc etc) --- bin/test_end_to_end.sh | 2 +- tests/e2e/test_end_to_end.py | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/bin/test_end_to_end.sh b/bin/test_end_to_end.sh index 2f8f8ea3..82fe02f1 100644 --- a/bin/test_end_to_end.sh +++ b/bin/test_end_to_end.sh @@ -24,4 +24,4 @@ set -e module use /g/data/xp65/public/modules module load conda/access-med-0.6 -pytest -s --e2e tests \ No newline at end of file +pytest -s --e2e tests diff --git a/tests/e2e/test_end_to_end.py b/tests/e2e/test_end_to_end.py index 22800882..d9742a22 100644 --- a/tests/e2e/test_end_to_end.py +++ b/tests/e2e/test_end_to_end.py @@ -11,18 +11,6 @@ e2e = pytest.mark.skipif( "not config.getoption('--e2e')", ) -""" -args=Namespace( - config_yaml=[ - '/scratch/tm70/ct1163/configs/cmip5.yaml', - '/scratch/tm70/ct1163/configs/access-om2.yaml'], - build_base_path='/scratch/tm70/ct1163/test_cat/', - catalog_base_path='./', - catalog_file='metacatalog.csv', - version='v2024-11-18', - no_update=False - ) -""" @e2e @@ -2212,7 +2200,7 @@ def test_om2_metacat_vals_found(metacat, colname, expected): else: # These should fail because they contains lists (unhashable) with pytest.raises(TypeError): - _found = set(cat.df[colname].unique()[:10]) + _ = set(cat.df[colname].unique()[:10]) # cast to tuple to make them hashable, then check the length found = set(cat.df[colname].apply(lambda x: tuple(x)).unique()[:10]) assert found == expected From 823613b008a3d3e39802ea976e2bfe88c55fe453 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 09:41:18 +1100 Subject: [PATCH 18/24] Clean up test file (remove unused string, scope=session => module) --- bin/test_end_to_end.sh | 2 +- tests/e2e/test_end_to_end.py | 18 +++--------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/bin/test_end_to_end.sh b/bin/test_end_to_end.sh index 2f8f8ea3..82fe02f1 100644 --- a/bin/test_end_to_end.sh +++ b/bin/test_end_to_end.sh @@ -24,4 +24,4 @@ set -e module use /g/data/xp65/public/modules module load conda/access-med-0.6 -pytest -s --e2e tests \ No newline at end of file +pytest -s --e2e tests diff --git a/tests/e2e/test_end_to_end.py b/tests/e2e/test_end_to_end.py index 22800882..f7e932ef 100644 --- a/tests/e2e/test_end_to_end.py +++ b/tests/e2e/test_end_to_end.py @@ -11,22 +11,10 @@ e2e = pytest.mark.skipif( "not config.getoption('--e2e')", ) -""" -args=Namespace( - config_yaml=[ - '/scratch/tm70/ct1163/configs/cmip5.yaml', - '/scratch/tm70/ct1163/configs/access-om2.yaml'], - build_base_path='/scratch/tm70/ct1163/test_cat/', - catalog_base_path='./', - catalog_file='metacatalog.csv', - version='v2024-11-18', - no_update=False - ) -""" @e2e -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def current_catalog(): """ Return the current catalog as an intake catalog. @@ -36,7 +24,7 @@ def current_catalog(): @e2e -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def metacat(BASE_DIR, v_num): # Build our subset of the catalog. This should take ~2 minutes with the PBS # flags in build_subset.sh @@ -2212,7 +2200,7 @@ def test_om2_metacat_vals_found(metacat, colname, expected): else: # These should fail because they contains lists (unhashable) with pytest.raises(TypeError): - _found = set(cat.df[colname].unique()[:10]) + _ = set(cat.df[colname].unique()[:10]) # cast to tuple to make them hashable, then check the length found = set(cat.df[colname].apply(lambda x: tuple(x)).unique()[:10]) assert found == expected From 40918c014a45e552072042bb884705a09cbe9e39 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 11:40:08 +1100 Subject: [PATCH 19/24] Added reference to e2e tests in docs --- docs/contributing/code.rst | 5 +++++ tests/conftest.py | 5 +++++ tests/e2e/test_end_to_end.py | 8 +++----- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docs/contributing/code.rst b/docs/contributing/code.rst index 2c888f13..5bfe93b9 100644 --- a/docs/contributing/code.rst +++ b/docs/contributing/code.rst @@ -60,5 +60,10 @@ contributions and submitting a pull request. pytest . + This project has both unit tests and integration tests. Integration tests are disabled by default due to computational + expense, and can only be run on Gadi. To run the full test suite, including integration tests, run:: + + pytest --e2e . + #. Once you are happy with your contribution, go `here `_ and open a new pull request to merge your branch of your fork with the main branch of the base. diff --git a/tests/conftest.py b/tests/conftest.py index c4838dec..ee39c370 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,11 @@ def test_data(): return Path(here / "data") +@fixture(scope="session") +def config_dir(): + return Path(here / "configs") + + @fixture(scope="session") def BASE_DIR(tmp_path_factory): yield tmp_path_factory.mktemp("catalog-dir") diff --git a/tests/e2e/test_end_to_end.py b/tests/e2e/test_end_to_end.py index f7e932ef..edb31515 100644 --- a/tests/e2e/test_end_to_end.py +++ b/tests/e2e/test_end_to_end.py @@ -6,8 +6,6 @@ from access_nri_intake.cli import build -from .conftest import here - e2e = pytest.mark.skipif( "not config.getoption('--e2e')", ) @@ -25,15 +23,15 @@ def current_catalog(): @e2e @pytest.fixture(scope="module") -def metacat(BASE_DIR, v_num): +def metacat(BASE_DIR, config_dir, v_num): # Build our subset of the catalog. This should take ~2 minutes with the PBS # flags in build_subset.sh print(f"Building the catalog subset & writing to {BASE_DIR}") print(f"Version number: {v_num}") build( [ - f"{here}/configs/cmip5.yaml", - f"{here}/configs/access-om2.yaml", + str(config_dir / "cmip5.yaml"), + str(config_dir / "access-om2.yaml"), "--build_base_path", str(BASE_DIR), "--catalog_base_path", From dd8c0904bbb0c82f365ebb72b0892d4726509785 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 12:03:08 +1100 Subject: [PATCH 20/24] Cleaned up fixture, removed redundant second conftest.py, fixed configs location --- tests/conftest.py | 2 +- tests/e2e/conftest.py | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 tests/e2e/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py index ee39c370..47bc9b54 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ def test_data(): @fixture(scope="session") def config_dir(): - return Path(here / "configs") + return Path(here / "e2e/configs") @fixture(scope="session") diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py deleted file mode 100644 index 58bdf5a9..00000000 --- a/tests/e2e/conftest.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. -# SPDX-License-Identifier: Apache-2.0 - -from datetime import datetime -from pathlib import Path - -from pytest import fixture - -here = Path(__file__).parent - - -@fixture(scope="session") -def test_data(): - return Path(here / "data") - - -@fixture(scope="session") -def BASE_DIR(tmp_path_factory): - yield tmp_path_factory.mktemp("catalog-dir") - - -@fixture(scope="session") -def v_num(): - return datetime.now().strftime("v%Y-%m-%d") From 268c98f5779d994bfdd08c58b3bc65f2bbd78e81 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Mon, 25 Nov 2024 14:33:52 +1100 Subject: [PATCH 21/24] Cleaning up a few tests where mocks no longer necessary --- src/access_nri_intake/cli.py | 13 ++- tests/test_cli.py | 175 +++++++++++++++-------------------- 2 files changed, 79 insertions(+), 109 deletions(-) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 3827f7c2..dda18b90 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -10,7 +10,6 @@ import re from collections.abc import Sequence from pathlib import Path -from typing import Optional import jsonschema import yaml @@ -105,7 +104,7 @@ def _check_build_args(args_list: list[dict]) -> None: ) -def build(argv: Optional[Sequence[str]] = None): +def build(argv: Sequence[str] | None = None): """ Build an intake-dataframe-catalog from YAML configuration file(s). """ @@ -299,9 +298,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"][ - "storage" - ] = _combine_storage_flags(storage_new, storage_old) + yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( + _combine_storage_flags(storage_new, storage_old) + ) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above @@ -359,7 +358,7 @@ def _combine_storage_flags(a: str, b: str) -> str: return "+".join(sorted(list(set(aflags + bflags)))) -def metadata_validate(argv: Optional[Sequence[str]] = None): +def metadata_validate(argv: Sequence[str] | None = None): """ Check provided metadata.yaml file(s) against the experiment schema """ @@ -371,7 +370,7 @@ def metadata_validate(argv: Optional[Sequence[str]] = None): help="The path to the metadata.yaml file. Multiple file paths can be passed.", ) - args = parser.parse_args() + args = parser.parse_args(argv) files = args.file for f in files: diff --git a/tests/test_cli.py b/tests/test_cli.py index 994b0e3c..17d6e069 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -104,20 +104,6 @@ def test_check_build_args(args, raises): _check_build_args(args) -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - config_yaml=[ - "config/access-om2.yaml", - "config/cmip5.yaml", - ], - build_base_path=None, # Use pytest fixture here? - catalog_base_path=None, - catalog_file="cat.csv", - version=None, - no_update=True, - ), -) @pytest.mark.parametrize( "version", [ @@ -125,45 +111,42 @@ def test_check_build_args(args, raises): "2024-01-01", ], ) -def test_build(mockargs, version, test_data): +def test_build(version, test_data): """Test full catalog build process from config files""" # Update the config_yaml paths - mockargs.return_value.build_base_path = tempfile.TemporaryDirectory().name - mockargs.return_value.catalog_base_path = mockargs.return_value.build_base_path - for i, p in enumerate(mockargs.return_value.config_yaml): - mockargs.return_value.config_yaml[i] = os.path.join(test_data, p) - mockargs.return_value.version = version - - build() + build_base_path = tempfile.TemporaryDirectory().name + + configs = [ + str(test_data / fname) + for fname in ["config/access-om2.yaml", "config/cmip5.yaml"] + ] + + build( + [ + *configs, + "--catalog_file", + "cat.csv", + "--no_update", + "--version", + version, + "--build_base_path", + build_base_path, + "--catalog_base_path", + build_base_path, + ] + ) - # Manually fix the version so we can correctly build the test path - if not mockargs.return_value.version.startswith("v"): - mockargs.return_value.version = f"v{mockargs.return_value.version}" + # manually fix the version so we can correctly build the test path: build + # will do this for us so we need to replicate it here + if not version.startswith("v"): + version = f"v{version}" # Try to open the catalog - build_path = ( - Path(mockargs.return_value.build_base_path) - / mockargs.return_value.version - / mockargs.return_value.catalog_file - ) + build_path = Path(build_base_path) / version / "cat.csv" cat = intake.open_df_catalog(build_path) assert len(cat) == 2 -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - config_yaml=[ - "config/access-om2.yaml", - "config/cmip5.yaml", - ], - build_base_path=tempfile.TemporaryDirectory().name, # Use pytest fixture here? - catalog_base_path=tempfile.TemporaryDirectory().name, - catalog_file="cat.csv", - version="v2024-01-01", - no_update=True, - ), -) @pytest.mark.parametrize( "bad_vers", [ @@ -179,16 +162,31 @@ def test_build(mockargs, version, test_data): "v0.1.2", # Old-style version numbers ], ) -def test_build_bad_version(mockargs, bad_vers, test_data): +def test_build_bad_version(bad_vers, test_data): """Test full catalog build process from config files""" # Update the config_yaml paths - for i, p in enumerate(mockargs.return_value.config_yaml): - mockargs.return_value.config_yaml[i] = os.path.join(test_data, p) + build_base_path = tempfile.TemporaryDirectory().name - mockargs.return_value.version = bad_vers + configs = [ + str(test_data / fname) + for fname in ["config/access-om2.yaml", "config/cmip5.yaml"] + ] with pytest.raises(ValueError): - build() + build( + [ + *configs, + "--catalog_file", + "cat.csv", + "--no_update", + "--version", + bad_vers, + "--build_base_path", + build_base_path, + "--catalog_base_path", + build_base_path, + ] + ) @mock.patch("access_nri_intake.cli.get_catalog_fp") @@ -391,11 +389,13 @@ def test_build_existing_data(mockargs, get_catalog_fp, test_data, min_vers, max_ with Path(get_catalog_fp.return_value).open(mode="r") as fobj: cat_yaml = yaml.safe_load(fobj) - assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") == ( - min_vers if min_vers is not None else mockargs.return_value.version + assert ( + cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") + == (min_vers if min_vers is not None else mockargs.return_value.version) ), f'Min version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min")} does not match expected {min_vers if min_vers is not None else mockargs.return_value.version}' - assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") == ( - max_vers if max_vers is not None else mockargs.return_value.version + assert ( + cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") + == (max_vers if max_vers is not None else mockargs.return_value.version) ), f'Max version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max")} does not match expected {max_vers if max_vers is not None else mockargs.return_value.version}' # Default should always be the newly-built version assert ( @@ -467,11 +467,13 @@ def test_build_existing_data_existing_old_cat( with Path(get_catalog_fp.return_value).open(mode="r") as fobj: cat_yaml = yaml.safe_load(fobj) - assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") == ( - min_vers if min_vers is not None else mockargs.return_value.version + assert ( + cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") + == (min_vers if min_vers is not None else mockargs.return_value.version) ), f'Min version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min")} does not match expected {min_vers if min_vers is not None else mockargs.return_value.version}' - assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") == ( - max_vers if max_vers is not None else mockargs.return_value.version + assert ( + cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") + == (max_vers if max_vers is not None else mockargs.return_value.version) ), f'Max version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max")} does not match expected {max_vers if max_vers is not None else mockargs.return_value.version}' # Default should always be the newly-built version assert ( @@ -544,8 +546,6 @@ def test_build_separation_between_catalog_and_buildbase( exist_ok=False, ) - # import pdb; pdb.set_trace() - build() # The version folders exist in the catalog directory, not the build @@ -889,25 +889,13 @@ def test_build_repeat_altercatalogstruct_multivers( ), f'Default version {cat_second["sources"]["access_nri"]["parameters"]["version"].get("default")} does not match expected v2025-01-01' -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - file=["access-om2/metadata.yaml"], - ), -) -def test_metadata_validate(mockargs, test_data): +def test_metadata_validate(test_data): """Test metadata_validate""" - for i, p in enumerate(mockargs.return_value.file): - mockargs.return_value.file[i] = os.path.join(test_data, p) - metadata_validate() + + file = str(test_data / "access-om2/metadata.yaml") + metadata_validate([file]) -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - file=None, - ), -) @pytest.mark.parametrize( "bad_yaml,e", [ @@ -915,45 +903,28 @@ def test_metadata_validate(mockargs, test_data): ("bad_metadata/doesntexist.yaml", FileNotFoundError), ], ) -def test_metadata_validate_bad(mockargs, test_data, bad_yaml, e): - bad_yaml = os.path.join(test_data, bad_yaml) - mockargs.return_value.file = [bad_yaml] +def test_metadata_validate_bad(test_data, bad_yaml, e): + bad_yaml = str(test_data / bad_yaml) if ( e is None ): # These are situations where an exception is raised, caught, and printed - metadata_validate() + metadata_validate([bad_yaml]) else: with pytest.raises(e): - metadata_validate() + metadata_validate([bad_yaml]) -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - file=[ - "access-om2/metadata.yaml", - "access-om3/metadata.yaml", - ], - ), -) -def test_metadata_validate_multi(mockargs, test_data): +def test_metadata_validate_multi(test_data): """Test metadata_validate""" - # Update the config_yaml paths - for i, p in enumerate(mockargs.return_value.file): - mockargs.return_value.file[i] = os.path.join(test_data, p) - metadata_validate() + files = ["access-om2/metadata.yaml", "access-om3/metadata.yaml"] + files = [str(test_data / f) for f in files] + metadata_validate(files) -@mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace( - file="./does/not/exist.yaml", - ), -) -def test_metadata_validate_no_file(mockargs): +def test_metadata_validate_no_file(): """Test metadata_validate""" with pytest.raises(FileNotFoundError) as excinfo: - metadata_validate() + metadata_validate(["./does/not/exist.yaml"]) assert "No such file(s)" in str(excinfo.value) From a63bdab1142f10007eb0cfc02c7a1cf7aaed2b02 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 03:34:06 +0000 Subject: [PATCH 22/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/access_nri_intake/cli.py | 6 +++--- tests/test_cli.py | 20 ++++++++------------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index dda18b90..b3393095 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -298,9 +298,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( - _combine_storage_flags(storage_new, storage_old) - ) + yaml_dict["sources"]["access_nri"]["metadata"][ + "storage" + ] = _combine_storage_flags(storage_new, storage_old) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above diff --git a/tests/test_cli.py b/tests/test_cli.py index 17d6e069..a530ba90 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -389,13 +389,11 @@ def test_build_existing_data(mockargs, get_catalog_fp, test_data, min_vers, max_ with Path(get_catalog_fp.return_value).open(mode="r") as fobj: cat_yaml = yaml.safe_load(fobj) - assert ( - cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") - == (min_vers if min_vers is not None else mockargs.return_value.version) + assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") == ( + min_vers if min_vers is not None else mockargs.return_value.version ), f'Min version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min")} does not match expected {min_vers if min_vers is not None else mockargs.return_value.version}' - assert ( - cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") - == (max_vers if max_vers is not None else mockargs.return_value.version) + assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") == ( + max_vers if max_vers is not None else mockargs.return_value.version ), f'Max version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max")} does not match expected {max_vers if max_vers is not None else mockargs.return_value.version}' # Default should always be the newly-built version assert ( @@ -467,13 +465,11 @@ def test_build_existing_data_existing_old_cat( with Path(get_catalog_fp.return_value).open(mode="r") as fobj: cat_yaml = yaml.safe_load(fobj) - assert ( - cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") - == (min_vers if min_vers is not None else mockargs.return_value.version) + assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min") == ( + min_vers if min_vers is not None else mockargs.return_value.version ), f'Min version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("min")} does not match expected {min_vers if min_vers is not None else mockargs.return_value.version}' - assert ( - cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") - == (max_vers if max_vers is not None else mockargs.return_value.version) + assert cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max") == ( + max_vers if max_vers is not None else mockargs.return_value.version ), f'Max version {cat_yaml["sources"]["access_nri"]["parameters"]["version"].get("max")} does not match expected {max_vers if max_vers is not None else mockargs.return_value.version}' # Default should always be the newly-built version assert ( From 54abc39df09a93579216ec5da58f95853a4aabe5 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 26 Nov 2024 16:54:58 +1100 Subject: [PATCH 23/24] Fixed workflow trigger & type hint that disappeared --- .github/workflows/e2e.yaml | 13 ------------- src/access_nri_intake/cli.py | 8 ++++---- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 1bb0c89a..b53d73f9 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -10,19 +10,6 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Checkout repository - ### Latest at time of writing - uses: actions/checkout@v4.2.2 - - name: Sync repository to Gadi - ### Latest at time of writing - uses: up9cloud/action-rsync@v1.4 - env: - HOST: gadi.nci.org.au - TARGET: ${{secrets.GADI_REPO_PATH}} - KEY: ${{secrets.DEPLOY_KEY}} - USER: ${{secrets.GADI_USER}} - PRE_SCRIPT: | - export PROJECT=xp65_w - name: Run end-to-end tests uses: appleboy/ssh-action@v1.1.0 with: diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 555a602e..1d30be30 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -29,7 +29,7 @@ class MetadataCheckError(Exception): def _parse_build_inputs( - config_yamls: list[str], build_path, data_base_path: str + config_yamls: list[str], build_path: str, data_base_path: str ) -> list[tuple[str, dict]]: """ Parse build inputs into a list of tuples of CatalogManager methods and args to @@ -313,9 +313,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"][ - "storage" - ] = _combine_storage_flags(storage_new, storage_old) + yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( + _combine_storage_flags(storage_new, storage_old) + ) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above From 6d543fd1f732a394242dc22d603538472952d8c0 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Tue, 26 Nov 2024 16:58:06 +1100 Subject: [PATCH 24/24] Pre-commit --- src/access_nri_intake/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 1d30be30..408f7802 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -313,9 +313,9 @@ def _get_project(path): yaml_dict, version, version ) elif storage_new != storage_old: - yaml_dict["sources"]["access_nri"]["metadata"]["storage"] = ( - _combine_storage_flags(storage_new, storage_old) - ) + yaml_dict["sources"]["access_nri"]["metadata"][ + "storage" + ] = _combine_storage_flags(storage_new, storage_old) # Set the minimum and maximum catalog versions, if they're not set already # in the 'new catalog' if statement above