Update order of functions

E3SM-Project · Dec 14, 2023 · 867603c · 867603c
1 parent 57b945c
commit 867603c
Showing 1 changed file with 158 additions and 136 deletions.
diff --git a/e3sm_diags/derivations/formulas_cosp.py b/e3sm_diags/derivations/formulas_cosp.py
@@ -5,18 +5,19 @@
 
 from e3sm_diags.derivations.formulas import convert_units
 
-# Type annotations for CLOUD_HIST_MAP.
+# A dictionary storing attributes for "prs" and "tau" cloud axes.
+# 1. 'keys': a list of valid variable keys found in the dataset, used for
+#     dynamic mapping.
+# 2. 'default_bnds': an array of floats for default bounds values that are used
+#     if missing. FIXME: These default_bnds might not align with the number of
+#     cloud axis coordinates in a dataset.
+# 3. 'min_mask': the minimum value to standardize the axis on for histogram
+#     plotting.
 CloudAxis = Literal["prs", "tau"]
 CloudHistMapAttrs = TypedDict(
     "CloudHistMapAttrs",
     {"keys": List[str], "default_bnds": np.ndarray, "min_mask": float},
 )
-
-# A dictionary storing attributes for "prs" and "tau" cloud axes.
-# 1. 'keys' -- a list of valid variable keys found in the dataset
-# 2. 'default_bnds' -- an array of floats for default bounds values if missing
-# 3. 'min_mask' -- the minimum value to standardize the axis on for histogram
-#    plotting.
 CLOUD_HIST_MAP: Dict[CloudAxis, CloudHistMapAttrs] = {
     "prs": {
         "keys": ["cosp_htmisr", "misr_cth"],
@@ -54,7 +55,7 @@
 # adjustment ranges. If either value in the (min, max) tuple is None, then
 # the actual value from the axis is used instead.
 AdjRange = Tuple[Optional[float], Optional[float]]
-CLOUD_AXES_ADJ_RANGES: Dict[str, Dict[str, AdjRange]] = {
+CLOUD_BIN_SUM_MAP: Dict[str, Dict[str, AdjRange]] = {
     # ISSCCP
     "CLDTOT_TAU1.3_ISCCP": {"prs": (None, None), "tau": (1.3, None)},
     "CLDTOT_TAU1.3_9.4_ISCCP": {"prs": (None, None), "tau": (1.3, 9.4)},
@@ -78,7 +79,55 @@
 # A dictionary mapping the names of "prs" axes to the unit adjustment value.
 # - COSP v2 "cosp_pr" is in units "Pa" instead of "hPa" (v1).
 # - COSP v2 "cosp_htmisr" is in units "m" instead of "km" (v1).
-PRS_NAMES_TO_ADJ_UNITS = {"cosp_prs": 100, "cosp_htmisr": 1000}
+PRS_UNIT_ADJ_MAP = {"cosp_prs": 100, "cosp_htmisr": 1000}
+
+
+def cosp_histogram_standardize(
+    ds: xr.Dataset, target_var_key: str, var: xr.DataArray
+) -> xr.Dataset:
+    """Standardize cloud top pressure and cloud thickness bins.
+
+    This standardization makes the cloud variable data suitable for plotting.
+
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The dataset with the cloud variable, cloud axes (prs, tau), and
+        cloud axes bounds (optional). If bounds don't exist, they will be added
+        using default values (refer to ``CLOUD_HIST_MAP``).
+    target_var_key : str
+        The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP").
+    var : xr.DataArray
+        The variable in the dataset used for deriving the target variable
+        (e.g., "CLD_MODIS").
+
+    Returns
+    -------
+    xr.Dataset
+        The dataset with target variable, which is the standardized version
+        of the derived variable, ``var``.
+    """
+    ds_new = ds.copy()
+    prs = _get_cloud_axis(var, "prs")
+    tau = _get_cloud_axis(var, "tau")
+
+    # Mask on the min and max of prs and/or tau, then subset by dropping masked
+    # values.
+    var_std = _subset_cloud_var(var.copy(), prs, tau)
+
+    # Align the dataset dimensions with the standardized variabe. `xr.align`
+    # returns both objects and element 0 is the xr.Dataset that is needed.
+    ds_final: xr.Dataset = xr.align(ds_new, var_std)[0]  # type: ignore
+    ds_final[target_var_key] = var_std
+    ds_final = ds_final.drop_vars(var.name)
+
+    # TODO: These functions don't actually add missing cloud bounds yet
+    # because it is based on the logic from the legacy code, which does not
+    # correctly add bounds if they are missing.
+    ds_final = _add_missing_cloud_bnds(ds_final, prs, "prs")
+    ds_final = _add_missing_cloud_bnds(ds_final, tau, "tau")
+
+    return ds_final
 
 
 def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
@@ -103,8 +152,8 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
     tau = _get_cloud_axis(var, "tau")
 
     # 2. Get the prs and tau axis adjustment ranges if they are set.
-    prs_adj_range = CLOUD_AXES_ADJ_RANGES[target_var_key]["prs"]
-    tau_adj_range = CLOUD_AXES_ADJ_RANGES[target_var_key]["tau"]
+    prs_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["prs"]
+    tau_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["tau"]
 
     # 3. Get prs range, lim.
     prs_range = _get_prs_subset_range(prs, prs_adj_range)
@@ -139,6 +188,77 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
     return final_var
 
 
+def _get_cloud_axis(var: xr.DataArray, axis: CloudAxis) -> xr.DataArray:
+    da_axis = None
+
+    keys = CLOUD_HIST_MAP[axis]["keys"]
+    for key in keys:
+        if key in var.dims:
+            da_axis = var[key]
+
+            break
+
+    if da_axis is None:
+        raise KeyError(
+            f"The {axis!r} axis is not in the {var.name!r} to "
+            "standardize the cosp histogram."
+        )
+
+    return da_axis
+
+
+def _subset_cloud_var(
+    var: xr.DataArray, prs: xr.DataArray, tau: xr.DataArray
+) -> xr.DataArray:
+    prs_min = CLOUD_HIST_MAP["prs"]["min_mask"]
+    prs_max = prs[-1].item()
+
+    tau_min = CLOUD_HIST_MAP["tau"]["min_mask"]
+    tau_max = tau[-1].item()
+
+    # MISR model and MISR obs
+    if var.name in ["CLD_MISR", "CLMISR"]:
+        # COSP v2 cosp_htmisr in units m instead of km as in v1 and COSP v2
+        # cosp_htmisr[0] equals to 0 instead of -99 as in v1 so the cloud
+        # varable needs to be masked manually by slicing out the first index
+        # on the cosp_htmisr axis.
+        if var.name == "CLD_MISR" and prs.max().item() > 1000:
+            var = var.isel({prs.name: slice(1, None)})
+            prs_max = 1000.0 * prs_max
+
+        cond = (prs >= prs_min) & (prs <= prs_max) & (tau >= tau_min) & (tau <= tau_max)
+    # ISCCP model, # ISCCP obs, and MODIS
+    elif var.name in ["FISCCP1_COSP", "CLISCCP", "CLMODIS"]:
+        cond = (tau >= tau_min) & (tau <= tau_max)
+
+    var_sub = var.where(cond, drop=True)
+
+    return var_sub
+
+
+def _add_missing_cloud_bnds(
+    ds: xr.Dataset, axis_var: xr.DataArray, axis: CloudAxis
+) -> xr.Dataset:
+    # FIXME: ValueError: conflicting sizes for dimension 'misr_cth': length 7
+    # on the data but length 16 on coordinate 'misr_cth'
+    # https://github.com/E3SM-Project/e3sm_diags/issues/761
+    # TODO: Might need to dynamically generate bounds if they don't exist.
+
+    bnds_key = axis_var.attrs.get("bounds")
+    if bnds_key is None or bnds_key not in ds.data_vars.keys():
+        # bnds_data = CLOUD_HIST_MAP[axis]["default_bnds"]
+        # default_bnds = xr.DataArray(
+        #     name=f"{axis_var.name}_bnds",
+        #     data=bnds_data,
+        #     dims=list(axis_var.dims) + ["bnds"],
+        #     coords=axis_var.coords,
+        # )
+        # ds[default_bnds.name] = default_bnds
+        pass
+
+    return ds
+
+
 def _get_prs_subset_range(
     prs: xr.DataArray, prs_adj_range: AdjRange
 ) -> Tuple[float, float]:
@@ -167,8 +287,8 @@ def _get_prs_subset_range(
 
     for act_val, adj_val in zip(act_range, prs_adj_range):
         if adj_val is not None:
-            if prs.name in PRS_NAMES_TO_ADJ_UNITS.keys() and prs.max().item() > 1000:
-                adj_val = adj_val * PRS_NAMES_TO_ADJ_UNITS[str(prs.name)]
+            if prs.name in PRS_UNIT_ADJ_MAP.keys() and prs.max().item() > 1000:
+                adj_val = adj_val * PRS_UNIT_ADJ_MAP[str(prs.name)]
 
             final_range.append(adj_val)
         else:
@@ -212,7 +332,7 @@ def _get_tau_subset_range_and_str(
 
 
 def _get_simulation_str(var_key: str) -> Optional[str]:
-    """Get the prs simulation.
+    """Get the prs simulation string.
 
     Parameters
     ----------
@@ -247,7 +367,7 @@ def _get_prs_cloud_level_str(
     Parameters
     ----------
     var_key: str
-        The key of the variable
+        The key of the variable in the dataset.
     prs_act_range : Tuple[float, float]
         The prs actual range.
     prs_adj_range : AdjRange
@@ -277,13 +397,31 @@ def _get_prs_cloud_level_str(
     return cloud_level
 
 
-def _get_cloud_level(prs_act_range, low_bnds, high_bnds) -> str:
-    """Determines the cloud type based on prs values and the specified boundaries
+def _get_cloud_level(
+    prs_act_range: Tuple[float, float],
+    low_bnds: Tuple[float, float],
+    high_bnds: Tuple[float, float],
+) -> str:
+    """Get the cloud type based on prs values and the specified boundaries
 
     Thresholds for cloud levels:
-      - cloud top height: high cloud (<440hPa or > 7km)
-       - midlevel cloud (440-680hPa, 3-7 km)
-       - low clouds (>680hPa, < 3km)
+      1. cloud top height: high cloud (<440hPa or > 7km)
+      2. mid-level cloud (440-680hPa, 3-7 km)
+      3. low clouds (>680hPa, < 3km)
+
+    Parameters
+    ----------
+    prs_act_range : Tuple[float, float]
+        The prs actual range.
+    low_bnds : Tuple[float, float]
+        The low bounds.
+    high_bnds : Tuple[float, float]
+        The high bounds.
+
+    Returns
+    -------
+    str
+        _description_
     """
     min, max = prs_act_range
 
@@ -295,119 +433,3 @@ def _get_cloud_level(prs_act_range, low_bnds, high_bnds) -> str:
         return "low cloud fraction"
 
     return "total cloud fraction"
-
-
-def cosp_histogram_standardize(
-    ds: xr.Dataset, target_var_key: str, var: xr.DataArray
-) -> xr.Dataset:
-    """Standardize cloud top pressure and cloud thickness bins.
-
-    This standardization makes the cloud variable data suitable for plotting.
-
-    Parameters
-    ----------
-    ds : xr.Dataset
-        The dataset with the cloud variable, cloud axes (prs, tau), and
-        cloud axes bounds (optional). If bounds don't exist, they will be added
-        using default values (refer to ``CLOUD_HIST_MAP``).
-    target_var_key : str
-        The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP").
-    var : xr.DataArray
-        The variable in the dataset used for deriving the target variable
-        (e.g., "CLD_MODIS").
-
-    Returns
-    -------
-    xr.Dataset
-        The dataset with target variable, which is the standardized version
-        of the derived variable, ``var``.
-    """
-    ds_new = ds.copy()
-    prs = _get_cloud_axis(var, "prs")
-    tau = _get_cloud_axis(var, "tau")
-
-    # Mask on the min and max of prs and/or tau, then subset by dropping masked
-    # values.
-    var_std = _subset_cloud_var(var.copy(), prs, tau)
-
-    # Align the dataset dimensions with the standardized variabe. `xr.align`
-    # returns both objects and element 0 is the xr.Dataset that is needed.
-    ds_final: xr.Dataset = xr.align(ds_new, var_std)[0]  # type: ignore
-    ds_final[target_var_key] = var_std
-    ds_final = ds_final.drop_vars(var.name)
-
-    ds_final = _add_missing_cloud_bnds(ds_final, prs, "prs")
-    ds_final = _add_missing_cloud_bnds(ds_final, tau, "tau")
-
-    return ds_final
-
-
-def _get_cloud_axis(var: xr.DataArray, axis: CloudAxis) -> xr.DataArray:
-    da_axis = None
-
-    keys = CLOUD_HIST_MAP[axis]["keys"]
-    for key in keys:
-        if key in var.dims:
-            da_axis = var[key]
-
-            break
-
-    if da_axis is None:
-        raise KeyError(
-            f"The {axis!r} axis is not in the {var.name!r} to "
-            "standardize the cosp histogram."
-        )
-
-    return da_axis
-
-
-def _subset_cloud_var(
-    var: xr.DataArray, prs: xr.DataArray, tau: xr.DataArray
-) -> xr.DataArray:
-    prs_min = CLOUD_HIST_MAP["prs"]["min_mask"]
-    prs_max = prs[-1].item()
-
-    tau_min = CLOUD_HIST_MAP["tau"]["min_mask"]
-    tau_max = tau[-1].item()
-
-    # MISR model and MISR obs
-    if var.name in ["CLD_MISR", "CLMISR"]:
-        # COSP v2 cosp_htmisr in units m instead of km as in v1 and COSP v2
-        # cosp_htmisr[0] equals to 0 instead of -99 as in v1 so the cloud
-        # varable needs to be masked manually by slicing out the first index
-        # on the cosp_htmisr axis.
-        if var.name == "CLD_MISR" and prs.max().item() > 1000:
-            var = var.isel({prs.name: slice(1, None)})
-            prs_max = 1000.0 * prs_max
-
-        cond = (prs >= prs_min) & (prs <= prs_max) & (tau >= tau_min) & (tau <= tau_max)
-    # ISCCP model, # ISCCP obs, and MODIS
-    elif var.name in ["FISCCP1_COSP", "CLISCCP", "CLMODIS"]:
-        cond = (tau >= tau_min) & (tau <= tau_max)
-
-    var_sub = var.where(cond, drop=True)
-
-    return var_sub
-
-
-def _add_missing_cloud_bnds(
-    ds: xr.Dataset, axis_var: xr.DataArray, axis: CloudAxis
-) -> xr.Dataset:
-    # FIXME: ValueError: conflicting sizes for dimension 'misr_cth': length 7
-    # on the data but length 16 on coordinate 'misr_cth'
-    # https://github.com/E3SM-Project/e3sm_diags/issues/761
-    # TODO: Might need to dynamically generate bounds if they don't exist.
-
-    bnds_key = axis_var.attrs.get("bounds")
-    if bnds_key is None or bnds_key not in ds.data_vars.keys():
-        # bnds_data = CLOUD_HIST_MAP[axis]["default_bnds"]
-        # default_bnds = xr.DataArray(
-        #     name=f"{axis_var.name}_bnds",
-        #     data=bnds_data,
-        #     dims=list(axis_var.dims) + ["bnds"],
-        #     coords=axis_var.coords,
-        # )
-        # ds[default_bnds.name] = default_bnds
-        pass
-
-    return ds