From a48f36302a440cba055295735c4441181e2cb09d Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Mon, 4 Dec 2023 16:44:01 -0800 Subject: [PATCH] Regression testing for lat_lon variables `NET_FLUX_SRF` and `RESTOM` (#754) --- .../671-lat-lon/11_28_23_qa_diffs.py | 70 + .../12-4-23-qa-no-cdms-slice.ipynb | 800 ++++++++++ .../671-lat-lon/12_4_23_qa_no_cdms_slice.py | 100 ++ .../671-lat-lon/671-diags.cfg | 15 + .../671-lat-lon/671-lat-lon.ipynb | 475 ++++++ .../671-lat-lon/ex1.py | 61 + .../671-lat-lon/ex1_3d.py | 41 + .../template_cdat_regression_test.ipynb | 468 ++++++ .../cdat_regression_testing/utils.py | 162 ++ .../template_cdat_regression_test.ipynb | 1333 ----------------- 10 files changed, 2192 insertions(+), 1333 deletions(-) create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/11_28_23_qa_diffs.py create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/12-4-23-qa-no-cdms-slice.ipynb create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/12_4_23_qa_no_cdms_slice.py create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/671-diags.cfg create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/671-lat-lon.ipynb create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1.py create mode 100644 auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1_3d.py create mode 100644 auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb create mode 100644 auxiliary_tools/cdat_regression_testing/utils.py delete mode 100644 auxiliary_tools/template_cdat_regression_test.ipynb diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/11_28_23_qa_diffs.py b/auxiliary_tools/cdat_regression_testing/671-lat-lon/11_28_23_qa_diffs.py new file mode 100644 index 000000000..757ed9baa --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/11_28_23_qa_diffs.py @@ -0,0 +1,70 @@ +""" +QA diffs + +* NET_FLUX_SRF - test and ref +* RESTOM - test and ref + +""" +# %% +import os +import sys + +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +# Location of the data. +param.test_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1" +param.reference_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1" + +# Variables +param.variables = ["NET_FLUX_SRF", "RESTOM"] + +# Set this parameter to True. +# By default, e3sm_diags expects the test data to be climo data. +param.test_timeseries_input = True +# Years to slice the test data, base this off the years in the filenames. +param.test_start_yr = "2011" +param.test_end_yr = "2013" + +# Set this parameter to True. +# By default, e3sm_diags expects the ref data to be climo data. +param.ref_timeseries_input = True +# Years to slice the ref data, base this off the years in the filenames +param.ref_start_yr = "1850" +param.ref_end_yr = "1852" + +# When running with time-series data, you don't need to specify the name of the data. +# But you should, otherwise nothing is displayed when the test/ref name is needed. +param.short_test_name = "historical_H1" +param.short_ref_name = "historical_H1" + +# This parameter modifies the software to accommodate model vs model runs. +# The default setting for run_type is 'model_vs_obs'. +param.run_type = "model_vs_model" +# Name of the folder where the results are stored. +# Change `prefix` to use your directory. +prefix = "/global/cfs/cdirs/e3sm/www/vo13/examples" +param.results_dir = os.path.join(prefix, "run_refactor_single_param") + +# Below are more optional arguments. + +# What plotsets to run the diags on. +# If not defined, then all available sets are used. +param.sets = ["lat_lon"] +# What seasons to run the diags on. +# If not defined, diags are run on ['ANN', 'DJF', 'MAM', 'JJA', 'SON']. +param.seasons = ["ANN"] +# Title of the difference plots. +param.diff_title = "Model (2011-2013) - Model (1850-1852)" + +# For running with multiprocessing. +param.multiprocessing = False +# param.num_workers = 24 + +# %% +DIR_PATH = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/671-lat-lon" +CFG_PATH = os.path.join(DIR_PATH, "671-diags.cfg") +sys.argv.extend(["-d", CFG_PATH]) +runner.run_diags([param]) diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/12-4-23-qa-no-cdms-slice.ipynb b/auxiliary_tools/cdat_regression_testing/671-lat-lon/12-4-23-qa-no-cdms-slice.ipynb new file mode 100644 index 000000000..7f53b053c --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/12-4-23-qa-no-cdms-slice.ipynb @@ -0,0 +1,800 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook\n", + "\n", + "Comparing `cdat-migration-fy24` against `main` without the slice_flag.\n", + "\n", + "FINDNGS: Results are all nearly identical.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from auxiliary_tools.cdat_regression_testing.utils import (\n", + " get_metrics,\n", + " get_rel_diffs,\n", + " get_num_metrics_above_diff_thres,\n", + " highlight_large_diffs,\n", + " sort_columns,\n", + " update_diffs_to_pct,\n", + " PERCENTAGE_COLUMNS,\n", + ")\n", + "\n", + "import pandas as pd\n", + "\n", + "# TODO: Update DEV_RESULTS and MAIN_RESULTS to your diagnostic sets.\n", + "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "MAIN_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_main_no_slice/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Get the metrics for the development and `main` branches and their differences.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_dev = get_metrics(DEV_GLOB)\n", + "df_metrics_main = get_metrics(MAIN_GLOB)\n", + "\n", + "df_metrics_dev2 = df_metrics_dev.reset_index(names=[\"var_key\", \"metric\"])\n", + "df_metrics_dev2 = df_metrics_dev2.loc[\n", + " df_metrics_dev2.var_key.isin(df_metrics_main.index.get_level_values(0).unique())\n", + "]\n", + "df_metrics_dev2 = df_metrics_dev2.set_index([\"var_key\", \"metric\"])\n", + "\n", + "\n", + "df_metrics_diffs = get_rel_diffs(df_metrics_dev2, df_metrics_main)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
test DIFF (%)ref DIFF (%)test_regrid DIFF (%)ref_regrid DIFF (%)diff DIFF (%)misc DIFF (%)
var_keymetric
LHFLXmin0.000000e+000.000000e+000.000000e+000.000000e+002.066978e-16NaN
max0.000000e+002.061090e-160.000000e+002.061090e-160.000000e+00NaN
mean1.607934e-151.118090e-151.607934e-151.118090e-151.693728e-15NaN
stdNaNNaN5.414602e-165.367577e-16NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
LWCFmin0.000000e+000.000000e+000.000000e+000.000000e+001.343861e-15NaN
max0.000000e+003.300170e-160.000000e+003.300170e-160.000000e+00NaN
mean1.894919e-151.310068e-151.894919e-151.310068e-152.985093e-14NaN
stdNaNNaN6.638807e-161.639685e-16NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
NET_FLUX_SRFmin0.000000e+000.000000e+000.000000e+000.000000e+001.873360e-16NaN
max1.825516e-161.706434e-161.825516e-161.706434e-160.000000e+00NaN
mean2.535940e-151.465413e-142.535940e-151.465413e-142.161829e-15NaN
stdNaNNaN3.531975e-164.735973e-16NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
PRECTmin0.000000e+000.000000e+000.000000e+000.000000e+000.000000e+00NaN
max2.054785e-163.506280e-162.054785e-163.506280e-165.683466e-16NaN
mean1.308796e-151.010973e-151.308796e-151.010973e-150.000000e+00NaN
stdNaNNaN4.107430e-164.058335e-16NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
PSLmin0.000000e+001.168177e-160.000000e+001.168177e-163.592752e-14NaN
max2.216200e-160.000000e+002.216200e-160.000000e+003.973074e-14NaN
mean1.124213e-151.236728e-151.124213e-151.236728e-151.300318e-14NaN
stdNaNNaN5.030916e-163.531204e-16NaNNaN
rmseNaNNaNNaNNaNNaN1.064570e-15
corrNaNNaNNaNNaNNaN0.000000e+00
RESTOMmin2.226235e-160.000000e+002.226235e-160.000000e+004.666565e-16NaN
max0.000000e+001.620247e-160.000000e+001.620247e-168.168903e-16NaN
mean6.224919e-151.107688e-136.224919e-151.107688e-132.155737e-15NaN
stdNaNNaN3.940225e-163.955053e-16NaNNaN
rmseNaNNaNNaNNaNNaN2.985625e-16
corrNaNNaNNaNNaNNaN0.000000e+00
TREFHTmin2.525625e-161.221719e-162.525625e-161.221719e-161.466053e-15NaN
max1.140829e-161.191418e-161.140829e-161.191418e-163.565723e-16NaN
mean1.443220e-151.026647e-151.443220e-151.026647e-151.076803e-15NaN
stdNaNNaN3.950659e-165.141888e-16NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
min2.525625e-161.221719e-162.525625e-161.221719e-161.466053e-15NaN
max1.140829e-161.191418e-161.140829e-161.191418e-167.298314e-16NaN
mean9.253642e-158.350693e-159.253642e-158.350693e-159.621027e-15NaN
stdNaNNaN4.922302e-154.475437e-15NaNNaN
rmseNaNNaNNaNNaNNaN0.000000e+00
corrNaNNaNNaNNaNNaN0.000000e+00
\n", + "
" + ], + "text/plain": [ + " test DIFF (%) ref DIFF (%) test_regrid DIFF (%) \\\n", + "var_key metric \n", + "LHFLX min 0.000000e+00 0.000000e+00 0.000000e+00 \n", + " max 0.000000e+00 2.061090e-16 0.000000e+00 \n", + " mean 1.607934e-15 1.118090e-15 1.607934e-15 \n", + " std NaN NaN 5.414602e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "LWCF min 0.000000e+00 0.000000e+00 0.000000e+00 \n", + " max 0.000000e+00 3.300170e-16 0.000000e+00 \n", + " mean 1.894919e-15 1.310068e-15 1.894919e-15 \n", + " std NaN NaN 6.638807e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "NET_FLUX_SRF min 0.000000e+00 0.000000e+00 0.000000e+00 \n", + " max 1.825516e-16 1.706434e-16 1.825516e-16 \n", + " mean 2.535940e-15 1.465413e-14 2.535940e-15 \n", + " std NaN NaN 3.531975e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "PRECT min 0.000000e+00 0.000000e+00 0.000000e+00 \n", + " max 2.054785e-16 3.506280e-16 2.054785e-16 \n", + " mean 1.308796e-15 1.010973e-15 1.308796e-15 \n", + " std NaN NaN 4.107430e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "PSL min 0.000000e+00 1.168177e-16 0.000000e+00 \n", + " max 2.216200e-16 0.000000e+00 2.216200e-16 \n", + " mean 1.124213e-15 1.236728e-15 1.124213e-15 \n", + " std NaN NaN 5.030916e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "RESTOM min 2.226235e-16 0.000000e+00 2.226235e-16 \n", + " max 0.000000e+00 1.620247e-16 0.000000e+00 \n", + " mean 6.224919e-15 1.107688e-13 6.224919e-15 \n", + " std NaN NaN 3.940225e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "TREFHT min 2.525625e-16 1.221719e-16 2.525625e-16 \n", + " max 1.140829e-16 1.191418e-16 1.140829e-16 \n", + " mean 1.443220e-15 1.026647e-15 1.443220e-15 \n", + " std NaN NaN 3.950659e-16 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + " min 2.525625e-16 1.221719e-16 2.525625e-16 \n", + " max 1.140829e-16 1.191418e-16 1.140829e-16 \n", + " mean 9.253642e-15 8.350693e-15 9.253642e-15 \n", + " std NaN NaN 4.922302e-15 \n", + " rmse NaN NaN NaN \n", + " corr NaN NaN NaN \n", + "\n", + " ref_regrid DIFF (%) diff DIFF (%) misc DIFF (%) \n", + "var_key metric \n", + "LHFLX min 0.000000e+00 2.066978e-16 NaN \n", + " max 2.061090e-16 0.000000e+00 NaN \n", + " mean 1.118090e-15 1.693728e-15 NaN \n", + " std 5.367577e-16 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 \n", + "LWCF min 0.000000e+00 1.343861e-15 NaN \n", + " max 3.300170e-16 0.000000e+00 NaN \n", + " mean 1.310068e-15 2.985093e-14 NaN \n", + " std 1.639685e-16 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 \n", + "NET_FLUX_SRF min 0.000000e+00 1.873360e-16 NaN \n", + " max 1.706434e-16 0.000000e+00 NaN \n", + " mean 1.465413e-14 2.161829e-15 NaN \n", + " std 4.735973e-16 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 \n", + "PRECT min 0.000000e+00 0.000000e+00 NaN \n", + " max 3.506280e-16 5.683466e-16 NaN \n", + " mean 1.010973e-15 0.000000e+00 NaN \n", + " std 4.058335e-16 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 \n", + "PSL min 1.168177e-16 3.592752e-14 NaN \n", + " max 0.000000e+00 3.973074e-14 NaN \n", + " mean 1.236728e-15 1.300318e-14 NaN \n", + " std 3.531204e-16 NaN NaN \n", + " rmse NaN NaN 1.064570e-15 \n", + " corr NaN NaN 0.000000e+00 \n", + "RESTOM min 0.000000e+00 4.666565e-16 NaN \n", + " max 1.620247e-16 8.168903e-16 NaN \n", + " mean 1.107688e-13 2.155737e-15 NaN \n", + " std 3.955053e-16 NaN NaN \n", + " rmse NaN NaN 2.985625e-16 \n", + " corr NaN NaN 0.000000e+00 \n", + "TREFHT min 1.221719e-16 1.466053e-15 NaN \n", + " max 1.191418e-16 3.565723e-16 NaN \n", + " mean 1.026647e-15 1.076803e-15 NaN \n", + " std 5.141888e-16 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 \n", + " min 1.221719e-16 1.466053e-15 NaN \n", + " max 1.191418e-16 7.298314e-16 NaN \n", + " mean 8.350693e-15 9.621027e-15 NaN \n", + " std 4.475437e-15 NaN NaN \n", + " rmse NaN NaN 0.000000e+00 \n", + " corr NaN NaN 0.000000e+00 " + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_metrics_diffs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Filter differences to those above maximum threshold (2%).\n", + "\n", + "All values below maximum threshold will be labeled as `NaN`.\n", + "\n", + "- **If all cells in a row are NaN (< 2%)**, the entire row is dropped to make the results easier to parse.\n", + "- Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", + "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", + " axis=0, how=\"all\", ignore_index=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
test DIFF (%)ref DIFF (%)test_regrid DIFF (%)ref_regrid DIFF (%)diff DIFF (%)misc DIFF (%)
var_keymetric
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [test DIFF (%), ref DIFF (%), test_regrid DIFF (%), ref_regrid DIFF (%), diff DIFF (%), misc DIFF (%)]\n", + "Index: []" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_metrics_diffs_thres" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Findings: No metrics are above the 2% threshold after removing the `slice_flag` used in\n", + "the CDAT version of the codebase.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/12_4_23_qa_no_cdms_slice.py b/auxiliary_tools/cdat_regression_testing/671-lat-lon/12_4_23_qa_no_cdms_slice.py new file mode 100644 index 000000000..6d898f18e --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/12_4_23_qa_no_cdms_slice.py @@ -0,0 +1,100 @@ +"""This script compares the time series, climatology, and spatial average of the +climatology between the cdat-migration-fy24 and main branches. + +CONCLUSION: + - In the CDAT version of the Dataset class, cdms2.open is being called + with a slice flag (either "co"/"ccb"). In the case of NETFLUX_SRF, "co" is + being set because time coordinates start at the beginning of the month. + - This adds an additional time coordinate point to the end of the time series + file, which affects the subsequen climatology and spatial averaging + calculations. + +I found omitting the extra time coordinate point before calculating the +climatology and spatial averaging results in an identical result to the xCDAT +version. + +Next options: +1. Add a feature to the Dataset class that adds an extra coordinate point using +the slice flag conditional + +Related lines of code on `main`: + - https://github.com/E3SM-Project/e3sm_diags/blob/633b52c314325e605fe7f62687cc4d00e5a0a3d5/e3sm_diags/driver/utils/dataset.py#L665-L672 + - https://github.com/E3SM-Project/e3sm_diags/blob/633b52c314325e605fe7f62687cc4d00e5a0a3d5/e3sm_diags/driver/utils/dataset.py#L699-L700 + + +""" +import cdms2 +import numpy as np +import xarray as xr + +from e3sm_diags.driver.utils.climo import climo +from e3sm_diags.metrics import mean +from e3sm_diags.metrics.metrics import spatial_avg + +# Path to the netcdf files for NET_FLUX_SRF generated on `cdat-migration-fy24` +# and `main` using `ex1.py`. +DIR_PATH = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/671-lat-lon" + + +# Compare time series -- identical only if extra time coordinate is removed +# ------------------------------------------------------------------------------ +ds_ts1 = xr.open_dataset(f"{DIR_PATH}/671-ts-input.nc") +ds_ts2 = xr.open_dataset(f"{DIR_PATH}/main-ts-input.nc") +ds_ts2 = ds_ts2.rename({"variable_58": "NET_FLUX_SRF"}) + +# Extra time coordinate becaues of the cdms2 slice flag ("co" for beginning of +# the month/"ccb" for mid-month) +ds_ts2_sub = ds_ts2.isel(time=slice(0, -1)) +np.testing.assert_allclose(ds_ts1["NET_FLUX_SRF"], ds_ts2_sub["NET_FLUX_SRF"]) + +# Result: True + +# Compare climatologies -- not identical (due to extra coordinate point) +# ------------------------------------------------------------------------------ +ds_climo1 = xr.open_dataset(f"{DIR_PATH}/671-climo.nc") +ds_climo2 = xr.open_dataset(f"{DIR_PATH}/main-climo.nc") +ds_climo2 = ds_climo2.rename({"variable_58": "NET_FLUX_SRF"}) + +np.testing.assert_allclose(ds_climo1["NET_FLUX_SRF"], ds_climo2["NET_FLUX_SRF"]) + +# Result: AssertionError: +# Not equal to tolerance rtol=1e-07, atol=0 + +# Mismatched elements: 33024 / 33024 (100%) +# Max absolute difference: 17.20686057 +# Max relative difference: 16932.010712 +# x: array([[-0.439175, -0.439179, -0.439189, ..., -0.439205, -0.439189, +# -0.439179], +# [-0.435518, -0.435518, -0.435514, ..., -0.435508, -0.435514,... +# y: array([[ 0.026507, 0.026507, 0.026507, ..., 0.026508, 0.026507, +# 0.026507], +# [-0.020339, -0.02031 , -0.020224, ..., -0.02008 , -0.020224,... + + +# Compare spatial averages -- not identical (due to extra coordinate point) +# ------------------------------------------------------------------------------ +ds_avg1 = spatial_avg(ds_climo1, "NET_FLUX_SRF") + +ds_avg2 = cdms2.open(f"{DIR_PATH}/main-climo.nc")["variable_58"] +ds_avg2 = mean(ds_avg2) + +np.testing.assert_allclose(ds_avg1, ds_avg2.data) +# Mismatched elements: 1 / 1 (100%) +# Max absolute difference: 0.12231419 +# Max relative difference: 0.23689147 +# x: array(0.394016) +# y: array(0.51633) + + +# Now let's try removing that extra coordinate point from the cdms2 time series +# ----------------------------------------------------------------------------- +ds_ts3 = cdms2.open(f"{DIR_PATH}/main-ts-input.nc")["variable_58"] +ds_ts3_sub = ds_ts3(time=("2011-2-1", "2013-12-1")) + +# Climatologies are now identical! +ds_climo3 = climo(ds_ts3_sub, "ANN") +np.testing.assert_allclose(ds_climo1["NET_FLUX_SRF"].data, ds_climo3.data) + +# Spatial averages are now identical! +ds_avg3 = mean(ds_climo3) +np.testing.assert_allclose(ds_avg1, ds_avg3) diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-diags.cfg b/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-diags.cfg new file mode 100644 index 000000000..d411a529f --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-diags.cfg @@ -0,0 +1,15 @@ +[#] +sets = ["lat_lon"] +case_id = "model_vs_model" +variables = ["NET_FLUX_SRF"] +seasons = ["ANN", "DJF", "MAM", "JJA", "SON"] +contour_levels = [-200, -160, -120, -80, -40, 0, 40, 80, 120, 160, 200] +diff_levels = [-75, -50, -25, -10, -5, -2, 2, 5, 10, 25, 50, 75] + +[#] +sets = ["lat_lon"] +case_id = "model_vs_model" +variables = ["RESTOM"] +seasons = ["ANN", "DJF", "MAM", "JJA", "SON"] +contour_levels = [-120, -100, -80, -60, -40, -20, 0, 20, 40, 60, 80] +diff_levels = [-30, -25, -20, -15, -10, -5, -2, 2, 5, 10, 15, 20, 25, 30] diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-lat-lon.ipynb b/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-lat-lon.ipynb new file mode 100644 index 000000000..9a7add496 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/671-lat-lon.ipynb @@ -0,0 +1,475 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between two sets of `.json` files in two\n", + "separate directories, one for the refactored code and the other for the `main` branch.\n", + "\n", + "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", + "\n", + "- Relative differences are in percentages, which shows the scale of the differences.\n", + "- Absolute differences are just a raw number that doesn't factor in\n", + " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `DEV_PATH` and `MAIN_PATH` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>= 2%).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from auxiliary_tools.cdat_regression_testing.utils import (\n", + " get_metrics,\n", + " get_rel_diffs,\n", + " get_num_metrics_above_diff_thres,\n", + " highlight_large_diffs,\n", + " sort_columns,\n", + " update_diffs_to_pct,\n", + " PERCENTAGE_COLUMNS,\n", + ")\n", + "\n", + "import pandas as pd\n", + "\n", + "# TODO: Update DEV_RESULTS and MAIN_RESULTS to your diagnostic sets.\n", + "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "MAIN_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Get the metrics for the development and `main` branches and their differences.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_dev = get_metrics(DEV_GLOB)\n", + "df_metrics_main = get_metrics(MAIN_GLOB)\n", + "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_main)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Filter differences to those above maximum threshold (2%).\n", + "\n", + "All values below maximum threshold will be labeled as `NaN`.\n", + "\n", + "- **If all cells in a row are NaN (< 2%)**, the entire row is dropped to make the results easier to parse.\n", + "- Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", + "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", + " axis=0, how=\"all\", ignore_index=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Combine all DataFrames to get the final result.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_all = pd.concat(\n", + " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_main.add_suffix(\"_main\")],\n", + " axis=1,\n", + " join=\"outer\",\n", + ")\n", + "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", + "df_final = sort_columns(df_final)\n", + "df_final = update_diffs_to_pct(df_final)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Review variables and metrics above difference threshold.\n", + "\n", + "- Red cells are differences >= 2%\n", + "- `nan` cells are differences < 2% and **should be ignored**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Related variables ['FSNTOA', 'LHFLX', 'LWCF', 'NET_FLUX_SRF', 'PRECT', 'PSL', 'RESTOM', 'TREFHT']\n", + "* Number of metrics above 2% max threshold: 11 / 96\n" + ] + } + ], + "source": [ + "remove_metrics = [\"min\", \"max\"]\n", + "df_metrics_sub = df_final.reset_index(names=[\"var_key\", \"metric\"])\n", + "df_metrics_sub = df_metrics_sub[~df_metrics_sub.metric.isin(remove_metrics)]\n", + "get_num_metrics_above_diff_thres(df_metrics_all, df_metrics_sub)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 var_keymetrictest_devtest_maintest DIFF (%)ref_devref_mainref DIFF (%)test_regrid_devtest_regrid_maintest_regrid DIFF (%)ref_regrid_devref_regrid_mainref_regrid DIFF (%)misc_devmisc_mainmisc DIFF (%)
5FSNTOAmean239.859777240.001860nan241.439641241.544384nan239.859777240.001860nan241.439641241.544384nannannannan
8LHFLXmean88.37960988.470270nan88.96955088.976266nan88.37960988.470270nan88.96955088.976266nannannannan
11LWCFmean24.37322424.370539nan24.40669724.391579nan24.37322424.370539nan24.40669724.391579nannannannan
16NET_FLUX_SRFmean0.3940160.51633031.04%-0.0681860.068584200.58%0.3940160.51633031.04%-0.0681860.068584200.58%nannannan
19PRECTmean3.0538023.056760nan3.0748853.074978nan3.0538023.056760nan3.0748853.074978nannannannan
21PSLrmsenannannannannannannannannannannannan1.0428840.9799816.03%
23RESTOMmean0.4815490.65656036.34%0.0180410.162984803.40%0.4815490.65656036.34%0.0180410.162984803.40%nannannan
34TREFHTmean14.76994614.741707nan13.84201313.800258nan14.76994614.741707nan13.84201313.800258nannannannan
35TREFHTmean9.2142249.114572nan8.0833497.957917nan9.2142249.114572nan8.0833497.957917nannannannan
40TREFHTrmsenannannannannannannannannannannannan1.1607181.1799952.68%
41TREFHTrmsenannannannannannannannannannannannan1.3431691.3791412.68%
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "highlight_large_diffs(df_sub_metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `NET_FLUX_SRF` and `RESTOM` contain the highest differences and should be investigated further\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1.py b/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1.py new file mode 100644 index 000000000..4bed0817d --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1.py @@ -0,0 +1,61 @@ +# %% +import os + +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +# Location of the data. +param.test_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1" +param.reference_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1" + +# Variables +param.variables = ["PRECT"] + +# Set this parameter to True. +# By default, e3sm_diags expects the test data to be climo data. +param.test_timeseries_input = True +# Years to slice the test data, base this off the years in the filenames. +param.test_start_yr = "2011" +param.test_end_yr = "2013" + +# Set this parameter to True. +# By default, e3sm_diags expects the ref data to be climo data. +param.ref_timeseries_input = True +# Years to slice the ref data, base this off the years in the filenames +param.ref_start_yr = "1850" +param.ref_end_yr = "1852" + +# When running with time-series data, you don't need to specify the name of the data. +# But you should, otherwise nothing is displayed when the test/ref name is needed. +param.short_test_name = "historical_H1" +param.short_ref_name = "historical_H1" + +# This parameter modifies the software to accommodate model vs model runs. +# The default setting for run_type is 'model_vs_obs'. +param.run_type = "model_vs_model" +# Name of the folder where the results are stored. +# Change `prefix` to use your directory. +prefix = "/global/cfs/cdirs/e3sm/www/vo13/examples" +param.results_dir = os.path.join(prefix, "run_refactor_single_param") + +# Below are more optional arguments. + +# What plotsets to run the diags on. +# If not defined, then all available sets are used. +param.sets = ["lat_lon"] +# What seasons to run the diags on. +# If not defined, diags are run on ['ANN', 'DJF', 'MAM', 'JJA', 'SON']. +param.seasons = ["ANN"] +# Title of the difference plots. +param.diff_title = "Model (2011-2013) - Model (1850-1852)" + +# For running with multiprocessing. +param.multiprocessing = False +# param.num_workers = 24 + +# %% +runner.run_diags([param]) + +# %% diff --git a/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1_3d.py b/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1_3d.py new file mode 100644 index 000000000..0cced4771 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/671-lat-lon/ex1_3d.py @@ -0,0 +1,41 @@ +# %% +import os + +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.run import runner + +param = CoreParameter() + +# %% +param.sets = ["lat_lon"] +param.case_id = "ERA-Interim" +param.variables = ["T"] +param.seasons = ["ANN"] +param.plevs = [850.0] +param.contour_levels = [240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295] +param.diff_levels = [-10, -7.5, -5, -4, -3, -2, -1, -0.5, 0.5, 1, 2, 3, 4, 5, 7.5, 10] + +param.test_name = "system tests" +param.short_test_name = "short_system tests" +param.ref_name = "ERA-Interim" +param.reference_name = "ERA-Interim Reanalysis 1979-2015" +param.reference_data_path = ( + "/global/u2/v/vo13/E3SM-Project/e3sm_diags/tests/integration/integration_test_data" +) +param.ref_file = "ta_ERA-Interim_ANN_198001_201401_climo.nc" +param.test_data_path = ( + "/global/u2/v/vo13/E3SM-Project/e3sm_diags/tests/integration/integration_test_data" +) +param.test_file = "T_20161118.beta0.FC5COSP.ne30_ne30.edison_ANN_climo.nc" + +param.backend = "mpl" +prefix = "/global/cfs/cdirs/e3sm/www/vo13/examples" +param.results_dir = os.path.join(prefix, "lat_lon_3d_var_test") +param.debug = True +param.multiprocessing = False + + +# %% +runner.run_diags([param]) + +# %% diff --git a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb new file mode 100644 index 000000000..3cd30e7e5 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb @@ -0,0 +1,468 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between two sets of `.json` files in two\n", + "separate directories, one for the refactored code and the other for the `main` branch.\n", + "\n", + "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", + "\n", + "- Relative differences are in percentages, which shows the scale of the differences.\n", + "- Absolute differences are just a raw number that doesn't factor in\n", + " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `DEV_PATH` and `MAIN_PATH` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>= 2%).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from auxiliary_tools.cdat_regression_testing.utils import (\n", + " get_metrics,\n", + " get_rel_diffs,\n", + " get_num_metrics_above_diff_thres,\n", + " highlight_large_diffs,\n", + " sort_columns,\n", + " update_diffs_to_pct,\n", + " PERCENTAGE_COLUMNS,\n", + ")\n", + "\n", + "import pandas as pd\n", + "\n", + "# TODO: Update DEV_RESULTS and MAIN_RESULTS to your diagnostic sets.\n", + "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "MAIN_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Get the metrics for the development and `main` branches and their differences.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_dev = get_metrics(DEV_GLOB)\n", + "df_metrics_main = get_metrics(MAIN_GLOB)\n", + "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_main)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Filter differences to those above maximum threshold (2%).\n", + "\n", + "All values below maximum threshold will be labeled as `NaN`.\n", + "\n", + "- **If all cells in a row are NaN (< 2%)**, the entire row is dropped to make the results easier to parse.\n", + "- Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", + "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", + " axis=0, how=\"all\", ignore_index=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Combine all DataFrames to get the final result.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_all = pd.concat(\n", + " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_main.add_suffix(\"_main\")],\n", + " axis=1,\n", + " join=\"outer\",\n", + ")\n", + "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", + "df_final = sort_columns(df_final)\n", + "df_final = update_diffs_to_pct(df_final)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Review variables and metrics above difference threshold.\n", + "\n", + "- Red cells are differences >= 2%\n", + "- `nan` cells are differences < 2% and **should be ignored**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Related variables ['FSNTOA', 'LHFLX', 'LWCF', 'NET_FLUX_SRF', 'PRECT', 'PSL', 'RESTOM', 'TREFHT']\n", + "* Number of metrics above 2% max threshold: 11 / 96\n" + ] + } + ], + "source": [ + "remove_metrics = [\"min\", \"max\"]\n", + "df_metrics_sub = df_final.reset_index(names=[\"var_key\", \"metric\"])\n", + "df_metrics_sub = df_metrics_sub[~df_metrics_sub.metric.isin(remove_metrics)]\n", + "get_num_metrics_above_diff_thres(df_metrics_all, df_metrics_sub)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 var_keymetrictest_devtest_maintest DIFF (%)ref_devref_mainref DIFF (%)test_regrid_devtest_regrid_maintest_regrid DIFF (%)ref_regrid_devref_regrid_mainref_regrid DIFF (%)misc_devmisc_mainmisc DIFF (%)
5FSNTOAmean239.859777240.001860nan241.439641241.544384nan239.859777240.001860nan241.439641241.544384nannannannan
8LHFLXmean88.37960988.470270nan88.96955088.976266nan88.37960988.470270nan88.96955088.976266nannannannan
11LWCFmean24.37322424.370539nan24.40669724.391579nan24.37322424.370539nan24.40669724.391579nannannannan
16NET_FLUX_SRFmean0.3940160.51633031.04%-0.0681860.068584200.58%0.3940160.51633031.04%-0.0681860.068584200.58%nannannan
19PRECTmean3.0538023.056760nan3.0748853.074978nan3.0538023.056760nan3.0748853.074978nannannannan
21PSLrmsenannannannannannannannannannannannan1.0428840.9799816.03%
23RESTOMmean0.4815490.65656036.34%0.0180410.162984803.40%0.4815490.65656036.34%0.0180410.162984803.40%nannannan
34TREFHTmean14.76994614.741707nan13.84201313.800258nan14.76994614.741707nan13.84201313.800258nannannannan
35TREFHTmean9.2142249.114572nan8.0833497.957917nan9.2142249.114572nan8.0833497.957917nannannannan
40TREFHTrmsenannannannannannannannannannannannan1.1607181.1799952.68%
41TREFHTrmsenannannannannannannannannannannannan1.3431691.3791412.68%
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "highlight_large_diffs(df_metrics_sub)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `NET_FLUX_SRF` and `RESTOM` contain the highest differences and should be investigated further\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/utils.py b/auxiliary_tools/cdat_regression_testing/utils.py new file mode 100644 index 000000000..9a9e844b1 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/utils.py @@ -0,0 +1,162 @@ +import math +from typing import List + +import pandas as pd +from IPython.display import display + +# The names of the columns that store percentage difference values. +PERCENTAGE_COLUMNS = [ + "test DIFF (%)", + "ref DIFF (%)", + "test_regrid DIFF (%)", + "ref_regrid DIFF (%)", + "misc DIFF (%)", +] + + +def get_metrics(filepaths: List[str]) -> pd.DataFrame: + """Get the metrics using a glob of `.json` metric files in a directory. + + Parameters + ---------- + filepaths : List[str] + The filepaths for metrics `.json` files. + + Returns + ------- + pd.DataFrame + The DataFrame containing the metrics for all of the variables in + the results directory. + """ + metrics = [] + + for filepath in filepaths: + df = pd.read_json(filepath) + + filename = filepath.split("/")[-1] + var_key = filename.split("-")[1] + + # Add the variable key to the MultiIndex and update the index + # before stacking to make the DataFrame easier to parse. + multiindex = pd.MultiIndex.from_product([[var_key], [*df.index]]) + df = df.set_index(multiindex) + df.stack() + + metrics.append(df) + + df_final = pd.concat(metrics) + + # Reorder columns and drop "unit" column (string dtype breaks Pandas + # arithmetic). + df_final = df_final[["test", "ref", "test_regrid", "ref_regrid", "diff", "misc"]] + + return df_final + + +def get_rel_diffs(df_actual: pd.DataFrame, df_reference: pd.DataFrame) -> pd.DataFrame: + """Get the relative differences between two DataFrames. + + Formula: abs(actual - reference) / abs(actual) + + Parameters + ---------- + df_actual : pd.DataFrame + The first DataFrame representing "actual" results (dev branch). + df_reference : pd.DataFrame + The second DataFrame representing "reference" results (main branch). + + Returns + ------- + pd.DataFrame + The DataFrame containing absolute and relative differences between + the metrics DataFrames. + """ + df_diff = abs(df_actual - df_reference) / abs(df_actual) + df_diff = df_diff.add_suffix(" DIFF (%)") + + return df_diff + + +def sort_columns(df: pd.DataFrame) -> pd.DataFrame: + """Sorts the order of the columns for the final DataFrame output. + + Parameters + ---------- + df : pd.DataFrame + The final DataFrame output. + + Returns + ------- + pd.DataFrame + The final DataFrame output with sorted columns. + """ + columns = [ + "test_dev", + "test_main", + "test DIFF (%)", + "ref_dev", + "ref_main", + "ref DIFF (%)", + "test_regrid_dev", + "test_regrid_main", + "test_regrid DIFF (%)", + "ref_regrid_dev", + "ref_regrid_main", + "ref_regrid DIFF (%)", + "misc_dev", + "misc_main", + "misc DIFF (%)", + ] + + df_new = df.copy() + df_new = df_new[columns] + + return df_new + + +def update_diffs_to_pct(df: pd.DataFrame): + """Update relative diff columns from float to string percentage. + + Parameters + ---------- + df : pd.DataFrame + The final DataFrame containing metrics and diffs (floats). + + Returns + ------- + pd.DataFrame + The final DataFrame containing metrics and diffs (str percentage). + """ + df_new = df.copy() + df_new[PERCENTAGE_COLUMNS] = df_new[PERCENTAGE_COLUMNS].map( + lambda x: "{0:.2f}%".format(x * 100) if not math.isnan(x) else x + ) + + return df_new + + +def highlight_large_diffs(df: pd.DataFrame): + if "var_key" not in df.columns and "metric" not in df.columns: + df_new = df.reset_index(names=["var_key", "metric"]) + else: + df_new = df.copy() + + df_new = df_new.style.map( + lambda x: "background-color : red" if isinstance(x, str) else "", + subset=pd.IndexSlice[:, PERCENTAGE_COLUMNS], + ) + + display(df_new) + + +def get_num_metrics_above_diff_thres( + df_metrics: pd.DataFrame, df_metric_above_thres: pd.DataFrame +): + var_keys = list(df_metric_above_thres.var_key.unique()) + print(f"* Related variables {var_keys}") + + num_rows = df_metrics.shape[0] + num_rows_largest_diffs = df_metric_above_thres.shape[0] + print( + f"* Number of metrics above 2% max threshold: {num_rows_largest_diffs} / {num_rows}" + ) diff --git a/auxiliary_tools/template_cdat_regression_test.ipynb b/auxiliary_tools/template_cdat_regression_test.ipynb deleted file mode 100644 index 8b4d00bd1..000000000 --- a/auxiliary_tools/template_cdat_regression_test.ipynb +++ /dev/null @@ -1,1333 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CDAT Migration Regression Test (FY24)\n", - "\n", - "This notebook is used to perform regression testing between the development and\n", - "production versions of a diagnostic set.\n", - "\n", - "## How it works\n", - "\n", - "It compares the relative differences (%) between two sets of `.json` files in two\n", - "separate directories, one for the refactored code and the other for the `main` branch.\n", - "\n", - "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", - "\n", - "- Relative differences are in percentages, which shows the scale of the differences.\n", - "- Absolute differences are just a raw number that doesn't factor in\n", - " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", - "\n", - "## How to use\n", - "\n", - "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", - "(dev and `main` branches).\n", - "\n", - "1. Make a copy of this notebook.\n", - "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n", - "3. Run `mamba activate cdat_regression_test`\n", - "4. Update `DEV_PATH` and `PROD_PATH` in the copy of your notebook.\n", - "5. Run all cells IN ORDER.\n", - "6. Review results for any outstanding differences (>= 2%).\n", - " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup Code\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import glob\n", - "import math\n", - "from typing import List\n", - "\n", - "import pandas as pd\n", - "\n", - "# TODO: Update DEV_RESULTS and PROD_RESULTS to your diagnostic sets.\n", - "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", - "PROD_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", - "\n", - "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", - "PROD_GLOB = sorted(glob.glob(PROD_PATH + \"/*.json\"))\n", - "\n", - "# The names of the columns that store percentage difference values.\n", - "PERCENTAGE_COLUMNS = [\n", - " \"test DIFF (%)\",\n", - " \"ref DIFF (%)\",\n", - " \"test_regrid DIFF (%)\",\n", - " \"ref_regrid DIFF (%)\",\n", - " \"diff DIFF (%)\",\n", - " \"misc DIFF (%)\",\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Core Functions\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def get_metrics(filepaths: List[str]) -> pd.DataFrame:\n", - " \"\"\"Get the metrics using a glob of `.json` metric files in a directory.\n", - "\n", - " Parameters\n", - " ----------\n", - " filepaths : List[str]\n", - " The filepaths for metrics `.json` files.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The DataFrame containing the metrics for all of the variables in\n", - " the results directory.\n", - " \"\"\"\n", - " metrics = []\n", - "\n", - " for filepath in filepaths:\n", - " df = pd.read_json(filepath)\n", - "\n", - " filename = filepath.split(\"/\")[-1]\n", - " var_key = filename.split(\"-\")[1]\n", - "\n", - " # Add the variable key to the MultiIndex and update the index\n", - " # before stacking to make the DataFrame easier to parse.\n", - " multiindex = pd.MultiIndex.from_product([[var_key], [*df.index]])\n", - " df = df.set_index(multiindex)\n", - " df.stack()\n", - "\n", - " metrics.append(df)\n", - "\n", - " df_final = pd.concat(metrics)\n", - "\n", - " # Reorder columns and drop \"unit\" column (string dtype breaks Pandas\n", - " # arithmetic).\n", - " df_final = df_final[[\"test\", \"ref\", \"test_regrid\", \"ref_regrid\", \"diff\", \"misc\"]]\n", - "\n", - " return df_final\n", - "\n", - "\n", - "def get_rel_diffs(df_actual: pd.DataFrame, df_reference: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Get the relative differences between two DataFrames.\n", - "\n", - " Formula: abs(actual - reference) / abs(actual)\n", - "\n", - " Parameters\n", - " ----------\n", - " df_actual : pd.DataFrame\n", - " The first DataFrame representing \"actual\" results (dev branch).\n", - " df_reference : pd.DataFrame\n", - " The second DataFrame representing \"reference\" results (main branch).\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The DataFrame containing absolute and relative differences between\n", - " the metrics DataFrames.\n", - " \"\"\"\n", - " df_diff = abs(df_actual - df_reference) / abs(df_actual)\n", - " df_diff = df_diff.add_suffix(\" DIFF (%)\")\n", - "\n", - " return df_diff\n", - "\n", - "\n", - "def sort_columns(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Sorts the order of the columns for the final DataFrame output.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " The final DataFrame output.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The final DataFrame output with sorted columns.\n", - " \"\"\"\n", - " columns = [\n", - " \"test_dev\",\n", - " \"test_prod\",\n", - " \"test DIFF (%)\",\n", - " \"ref_dev\",\n", - " \"ref_prod\",\n", - " \"ref DIFF (%)\",\n", - " \"test_regrid_dev\",\n", - " \"test_regrid_prod\",\n", - " \"test_regrid DIFF (%)\",\n", - " \"ref_regrid_dev\",\n", - " \"ref_regrid_prod\",\n", - " \"ref_regrid DIFF (%)\",\n", - " \"diff_dev\",\n", - " \"diff_prod\",\n", - " \"diff DIFF (%)\",\n", - " \"misc_dev\",\n", - " \"misc_prod\",\n", - " \"misc DIFF (%)\",\n", - " ]\n", - "\n", - " df_new = df.copy()\n", - " df_new = df_new[columns]\n", - "\n", - " return df_new\n", - "\n", - "\n", - "def update_diffs_to_pct(df: pd.DataFrame):\n", - " \"\"\"Update relative diff columns from float to string percentage.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " The final DataFrame containing metrics and diffs (floats).\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " The final DataFrame containing metrics and diffs (str percentage).\n", - " \"\"\"\n", - " df_new = df.copy()\n", - " df_new[PERCENTAGE_COLUMNS] = df_new[PERCENTAGE_COLUMNS].map(\n", - " lambda x: \"{0:.2f}%\".format(x * 100) if not math.isnan(x) else x\n", - " )\n", - "\n", - " return df_new" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Get the DataFrame containing development and production metrics.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "df_metrics_dev = get_metrics(DEV_GLOB)\n", - "df_metrics_prod = get_metrics(PROD_GLOB)\n", - "df_metrics_all = pd.concat(\n", - " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_prod.add_suffix(\"_prod\")],\n", - " axis=1,\n", - " join=\"outer\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Get DataFrame for differences >= 2%.\n", - "\n", - "- Get the relative differences for all metrics\n", - "- Filter down metrics to those with differences >= 2%\n", - " - If all cells in a row are NaN (< 2%), the entire row is dropped to make the results easier to parse.\n", - " - Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_prod)\n", - "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", - "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", - " axis=0, how=\"all\", ignore_index=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Combine both DataFrames to get the final result.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", - "df_final = sort_columns(df_final)\n", - "df_final = update_diffs_to_pct(df_final)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Display final DataFrame and review results.\n", - "\n", - "- Red cells are differences >= 2%\n", - "- `nan` cells are differences < 2% and **should be ignored**\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 var_keymetrictest_devtest_prodtest DIFF (%)ref_devref_prodref DIFF (%)test_regrid_devtest_regrid_prodtest_regrid DIFF (%)ref_regrid_devref_regrid_prodref_regrid DIFF (%)diff_devdiff_proddiff DIFF (%)misc_devmisc_prodmisc DIFF (%)
0FLUTmax299.911864299.355074nan300.162128299.776167nan299.911864299.355074nan300.162128299.776167nan9.4923599.7888093.12%nannannan
1FLUTmin124.610884125.987072nan122.878196124.148986nan124.610884125.987072nan122.878196124.148986nan-15.505809-17.0323259.84%nannannan
2FSNSmax269.789702269.798166nan272.722362272.184917nan269.789702269.798166nan272.722362272.184917nan20.64792924.85985220.40%nannannan
3FSNSmin16.89742317.7608895.11%16.71013416.2370612.83%16.89742317.7608895.11%16.71013416.2370612.83%-28.822277-28.324921nannannannan
4FSNTOAmax360.624327360.209193nan362.188816361.778529nan360.624327360.209193nan362.188816361.778529nan18.60227622.62426621.62%nannannan
5FSNTOAmean239.859777240.001860nan241.439641241.544384nan239.859777240.001860nan241.439641241.544384nan-1.579864-1.5425242.36%nannannan
6FSNTOAmin44.90704148.2568187.46%47.22350250.3396086.60%44.90704148.2568187.46%47.22350250.3396086.60%-23.576184-23.171864nannannannan
7LHFLXmax282.280453289.0799402.41%275.792933276.297281nan282.280453289.0799402.41%275.792933276.297281nan47.53550353.16892411.85%nannannan
8LHFLXmean88.37960988.470270nan88.96955088.976266nan88.37960988.470270nan88.96955088.976266nan-0.589942-0.50599614.23%nannannan
9LHFLXmin-0.878371-0.54924837.47%-1.176561-0.94611019.59%-0.878371-0.54924837.47%-1.176561-0.94611019.59%-34.375924-33.902769nannannannan
10LWCFmax78.49365377.473220nan86.12195984.993825nan78.49365377.473220nan86.12195984.993825nan9.61605710.79610412.27%nannannan
11LWCFmean24.37322424.370539nan24.40669724.391579nan24.37322424.370539nan24.40669724.391579nan-0.033473-0.02104037.14%nannannan
12LWCFmin-0.667812-0.6171077.59%-1.360010-1.18178713.10%-0.667812-0.6171077.59%-1.360010-1.18178713.10%-10.574643-10.1451884.06%nannannan
13NETCFmax13.22460412.6218254.56%13.71543813.2327163.52%13.22460412.6218254.56%13.71543813.2327163.52%10.89934410.2848255.64%nannannan
14NETCFmin-66.633044-66.008633nan-64.832041-67.3980473.96%-66.633044-66.008633nan-64.832041-67.3980473.96%-17.923932-17.940099nannannannan
15NET_FLUX_SRFmax155.691338156.424180nan166.556120166.506173nan155.691338156.424180nan166.556120166.506173nan59.81944961.6728243.10%nannannan
16NET_FLUX_SRFmean0.3940160.51633031.04%-0.0681860.068584200.58%0.3940160.51633031.04%-0.0681860.068584200.58%0.4622020.4477463.13%nannannan
17NET_FLUX_SRFmin-284.505205-299.5050245.27%-280.893287-290.2029343.31%-284.505205-299.5050245.27%-280.893287-290.2029343.31%-75.857589-85.85208913.18%nannannan
18PRECTmax17.28995117.071276nan20.26486220.138274nan17.28995117.071276nan20.26486220.138274nan2.3441112.4066252.67%nannannan
19PRECTmean3.0538023.056760nan3.0748853.074978nan3.0538023.056760nan3.0748853.074978nan-0.021083-0.01821813.59%nannannan
20PSLmin970.981710971.390765nan973.198437973.235326nan970.981710971.390765nan973.198437973.235326nan-6.328677-6.1046103.54%nannannan
21PSLrmsenannannannannannannannannannannannannannannan1.0428840.9799816.03%
22RESTOMmax84.29550283.821906nan87.70794487.451262nan84.29550283.821906nan87.70794487.451262nan17.39628321.42361623.15%nannannan
23RESTOMmean0.4815490.65656036.34%0.0180410.162984803.40%0.4815490.65656036.34%0.0180410.162984803.40%0.4635080.4935766.49%nannannan
24RESTOMmin-127.667181-129.014673nan-127.417586-128.673508nan-127.667181-129.014673nan-127.417586-128.673508nan-15.226249-14.8696142.34%nannannan
25SHFLXmax114.036895112.859646nan116.870038116.432591nan114.036895112.859646nan116.870038116.432591nan28.32065627.5567552.70%nannannan
26SHFLXmin-88.650312-88.386947nan-85.809438-85.480377nan-88.650312-88.386947nan-85.809438-85.480377nan-27.776625-28.3630532.11%nannannan
27SSTmin-1.788055-1.788055nan-1.676941-1.676941nan-1.788055-1.788055nan-1.676941-1.676941nan-4.513070-2.99327233.68%nannannan
28SWCFmax-0.518025-0.5368443.63%-0.311639-0.3316166.41%-0.518025-0.5368443.63%-0.311639-0.3316166.41%11.66893912.0870773.58%nannannan
29SWCFmin-123.625017-122.042043nan-131.053537-130.430161nan-123.625017-122.042043nan-131.053537-130.430161nan-21.415249-20.8089732.83%nannannan
30TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261852.90%nannannan
31TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261852.90%nannannan
32TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.9817575.1261855.31%nannannan
33TREFHTmax31.14150831.058424nan29.81921029.721868nan31.14150831.058424nan29.81921029.721868nan4.8678555.1261855.31%nannannan
34TREFHTmean14.76994614.741707nan13.84201313.800258nan14.76994614.741707nan13.84201313.800258nan0.9279330.9414492.28%nannannan
35TREFHTmean9.2142249.114572nan8.0833497.957917nan9.2142249.114572nan8.0833497.957917nan1.1308761.1566552.28%nannannan
36TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
37TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
38TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
39TREFHTmin-56.266677-55.623001nan-58.159250-57.542053nan-56.266677-55.623001nan-58.159250-57.542053nan-0.681558-0.6243718.39%nannannan
40TREFHTrmsenannannannannannannannannannannannannannannan1.1607181.1799952.68%
41TREFHTrmsenannannannannannannannannannannannannannannan1.3431691.3791412.68%
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_final.reset_index(names=[\"var_key\", \"metric\"]).style.map(\n", - " lambda x: \"background-color : red\" if isinstance(x, str) else \"\",\n", - " subset=pd.IndexSlice[:, PERCENTAGE_COLUMNS],\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "cdat_regression_test", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}