Skip to content

Commit

Permalink
Update order of functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tomvothecoder committed Dec 14, 2023
1 parent 57b945c commit 867603c
Showing 1 changed file with 158 additions and 136 deletions.
294 changes: 158 additions & 136 deletions e3sm_diags/derivations/formulas_cosp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@

from e3sm_diags.derivations.formulas import convert_units

# Type annotations for CLOUD_HIST_MAP.
# A dictionary storing attributes for "prs" and "tau" cloud axes.
# 1. 'keys': a list of valid variable keys found in the dataset, used for
# dynamic mapping.
# 2. 'default_bnds': an array of floats for default bounds values that are used
# if missing. FIXME: These default_bnds might not align with the number of
# cloud axis coordinates in a dataset.
# 3. 'min_mask': the minimum value to standardize the axis on for histogram
# plotting.
CloudAxis = Literal["prs", "tau"]
CloudHistMapAttrs = TypedDict(
"CloudHistMapAttrs",
{"keys": List[str], "default_bnds": np.ndarray, "min_mask": float},
)

# A dictionary storing attributes for "prs" and "tau" cloud axes.
# 1. 'keys' -- a list of valid variable keys found in the dataset
# 2. 'default_bnds' -- an array of floats for default bounds values if missing
# 3. 'min_mask' -- the minimum value to standardize the axis on for histogram
# plotting.
CLOUD_HIST_MAP: Dict[CloudAxis, CloudHistMapAttrs] = {
"prs": {
"keys": ["cosp_htmisr", "misr_cth"],
Expand Down Expand Up @@ -54,7 +55,7 @@
# adjustment ranges. If either value in the (min, max) tuple is None, then
# the actual value from the axis is used instead.
AdjRange = Tuple[Optional[float], Optional[float]]
CLOUD_AXES_ADJ_RANGES: Dict[str, Dict[str, AdjRange]] = {
CLOUD_BIN_SUM_MAP: Dict[str, Dict[str, AdjRange]] = {
# ISSCCP
"CLDTOT_TAU1.3_ISCCP": {"prs": (None, None), "tau": (1.3, None)},
"CLDTOT_TAU1.3_9.4_ISCCP": {"prs": (None, None), "tau": (1.3, 9.4)},
Expand All @@ -78,7 +79,55 @@
# A dictionary mapping the names of "prs" axes to the unit adjustment value.
# - COSP v2 "cosp_pr" is in units "Pa" instead of "hPa" (v1).
# - COSP v2 "cosp_htmisr" is in units "m" instead of "km" (v1).
PRS_NAMES_TO_ADJ_UNITS = {"cosp_prs": 100, "cosp_htmisr": 1000}
PRS_UNIT_ADJ_MAP = {"cosp_prs": 100, "cosp_htmisr": 1000}


def cosp_histogram_standardize(
ds: xr.Dataset, target_var_key: str, var: xr.DataArray
) -> xr.Dataset:
"""Standardize cloud top pressure and cloud thickness bins.
This standardization makes the cloud variable data suitable for plotting.
Parameters
----------
ds : xr.Dataset
The dataset with the cloud variable, cloud axes (prs, tau), and
cloud axes bounds (optional). If bounds don't exist, they will be added
using default values (refer to ``CLOUD_HIST_MAP``).
target_var_key : str
The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP").
var : xr.DataArray
The variable in the dataset used for deriving the target variable
(e.g., "CLD_MODIS").
Returns
-------
xr.Dataset
The dataset with target variable, which is the standardized version
of the derived variable, ``var``.
"""
ds_new = ds.copy()
prs = _get_cloud_axis(var, "prs")
tau = _get_cloud_axis(var, "tau")

# Mask on the min and max of prs and/or tau, then subset by dropping masked
# values.
var_std = _subset_cloud_var(var.copy(), prs, tau)

# Align the dataset dimensions with the standardized variabe. `xr.align`
# returns both objects and element 0 is the xr.Dataset that is needed.
ds_final: xr.Dataset = xr.align(ds_new, var_std)[0] # type: ignore
ds_final[target_var_key] = var_std
ds_final = ds_final.drop_vars(var.name)

# TODO: These functions don't actually add missing cloud bounds yet
# because it is based on the logic from the legacy code, which does not
# correctly add bounds if they are missing.
ds_final = _add_missing_cloud_bnds(ds_final, prs, "prs")
ds_final = _add_missing_cloud_bnds(ds_final, tau, "tau")

return ds_final


def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
Expand All @@ -103,8 +152,8 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
tau = _get_cloud_axis(var, "tau")

# 2. Get the prs and tau axis adjustment ranges if they are set.
prs_adj_range = CLOUD_AXES_ADJ_RANGES[target_var_key]["prs"]
tau_adj_range = CLOUD_AXES_ADJ_RANGES[target_var_key]["tau"]
prs_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["prs"]
tau_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["tau"]

# 3. Get prs range, lim.
prs_range = _get_prs_subset_range(prs, prs_adj_range)
Expand Down Expand Up @@ -139,6 +188,77 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
return final_var


def _get_cloud_axis(var: xr.DataArray, axis: CloudAxis) -> xr.DataArray:
da_axis = None

keys = CLOUD_HIST_MAP[axis]["keys"]
for key in keys:
if key in var.dims:
da_axis = var[key]

break

if da_axis is None:
raise KeyError(
f"The {axis!r} axis is not in the {var.name!r} to "
"standardize the cosp histogram."
)

return da_axis


def _subset_cloud_var(
var: xr.DataArray, prs: xr.DataArray, tau: xr.DataArray
) -> xr.DataArray:
prs_min = CLOUD_HIST_MAP["prs"]["min_mask"]
prs_max = prs[-1].item()

tau_min = CLOUD_HIST_MAP["tau"]["min_mask"]
tau_max = tau[-1].item()

# MISR model and MISR obs
if var.name in ["CLD_MISR", "CLMISR"]:
# COSP v2 cosp_htmisr in units m instead of km as in v1 and COSP v2
# cosp_htmisr[0] equals to 0 instead of -99 as in v1 so the cloud
# varable needs to be masked manually by slicing out the first index
# on the cosp_htmisr axis.
if var.name == "CLD_MISR" and prs.max().item() > 1000:
var = var.isel({prs.name: slice(1, None)})
prs_max = 1000.0 * prs_max

cond = (prs >= prs_min) & (prs <= prs_max) & (tau >= tau_min) & (tau <= tau_max)
# ISCCP model, # ISCCP obs, and MODIS
elif var.name in ["FISCCP1_COSP", "CLISCCP", "CLMODIS"]:
cond = (tau >= tau_min) & (tau <= tau_max)

var_sub = var.where(cond, drop=True)

return var_sub


def _add_missing_cloud_bnds(
ds: xr.Dataset, axis_var: xr.DataArray, axis: CloudAxis
) -> xr.Dataset:
# FIXME: ValueError: conflicting sizes for dimension 'misr_cth': length 7
# on the data but length 16 on coordinate 'misr_cth'
# https://github.com/E3SM-Project/e3sm_diags/issues/761
# TODO: Might need to dynamically generate bounds if they don't exist.

bnds_key = axis_var.attrs.get("bounds")
if bnds_key is None or bnds_key not in ds.data_vars.keys():
# bnds_data = CLOUD_HIST_MAP[axis]["default_bnds"]
# default_bnds = xr.DataArray(
# name=f"{axis_var.name}_bnds",
# data=bnds_data,
# dims=list(axis_var.dims) + ["bnds"],
# coords=axis_var.coords,
# )
# ds[default_bnds.name] = default_bnds
pass

return ds


def _get_prs_subset_range(
prs: xr.DataArray, prs_adj_range: AdjRange
) -> Tuple[float, float]:
Expand Down Expand Up @@ -167,8 +287,8 @@ def _get_prs_subset_range(

for act_val, adj_val in zip(act_range, prs_adj_range):
if adj_val is not None:
if prs.name in PRS_NAMES_TO_ADJ_UNITS.keys() and prs.max().item() > 1000:
adj_val = adj_val * PRS_NAMES_TO_ADJ_UNITS[str(prs.name)]
if prs.name in PRS_UNIT_ADJ_MAP.keys() and prs.max().item() > 1000:
adj_val = adj_val * PRS_UNIT_ADJ_MAP[str(prs.name)]

final_range.append(adj_val)
else:
Expand Down Expand Up @@ -212,7 +332,7 @@ def _get_tau_subset_range_and_str(


def _get_simulation_str(var_key: str) -> Optional[str]:
"""Get the prs simulation.
"""Get the prs simulation string.
Parameters
----------
Expand Down Expand Up @@ -247,7 +367,7 @@ def _get_prs_cloud_level_str(
Parameters
----------
var_key: str
The key of the variable
The key of the variable in the dataset.
prs_act_range : Tuple[float, float]
The prs actual range.
prs_adj_range : AdjRange
Expand Down Expand Up @@ -277,13 +397,31 @@ def _get_prs_cloud_level_str(
return cloud_level


def _get_cloud_level(prs_act_range, low_bnds, high_bnds) -> str:
"""Determines the cloud type based on prs values and the specified boundaries
def _get_cloud_level(
prs_act_range: Tuple[float, float],
low_bnds: Tuple[float, float],
high_bnds: Tuple[float, float],
) -> str:
"""Get the cloud type based on prs values and the specified boundaries
Thresholds for cloud levels:
- cloud top height: high cloud (<440hPa or > 7km)
- midlevel cloud (440-680hPa, 3-7 km)
- low clouds (>680hPa, < 3km)
1. cloud top height: high cloud (<440hPa or > 7km)
2. mid-level cloud (440-680hPa, 3-7 km)
3. low clouds (>680hPa, < 3km)
Parameters
----------
prs_act_range : Tuple[float, float]
The prs actual range.
low_bnds : Tuple[float, float]
The low bounds.
high_bnds : Tuple[float, float]
The high bounds.
Returns
-------
str
_description_
"""
min, max = prs_act_range

Expand All @@ -295,119 +433,3 @@ def _get_cloud_level(prs_act_range, low_bnds, high_bnds) -> str:
return "low cloud fraction"

return "total cloud fraction"


def cosp_histogram_standardize(
ds: xr.Dataset, target_var_key: str, var: xr.DataArray
) -> xr.Dataset:
"""Standardize cloud top pressure and cloud thickness bins.
This standardization makes the cloud variable data suitable for plotting.
Parameters
----------
ds : xr.Dataset
The dataset with the cloud variable, cloud axes (prs, tau), and
cloud axes bounds (optional). If bounds don't exist, they will be added
using default values (refer to ``CLOUD_HIST_MAP``).
target_var_key : str
The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP").
var : xr.DataArray
The variable in the dataset used for deriving the target variable
(e.g., "CLD_MODIS").
Returns
-------
xr.Dataset
The dataset with target variable, which is the standardized version
of the derived variable, ``var``.
"""
ds_new = ds.copy()
prs = _get_cloud_axis(var, "prs")
tau = _get_cloud_axis(var, "tau")

# Mask on the min and max of prs and/or tau, then subset by dropping masked
# values.
var_std = _subset_cloud_var(var.copy(), prs, tau)

# Align the dataset dimensions with the standardized variabe. `xr.align`
# returns both objects and element 0 is the xr.Dataset that is needed.
ds_final: xr.Dataset = xr.align(ds_new, var_std)[0] # type: ignore
ds_final[target_var_key] = var_std
ds_final = ds_final.drop_vars(var.name)

ds_final = _add_missing_cloud_bnds(ds_final, prs, "prs")
ds_final = _add_missing_cloud_bnds(ds_final, tau, "tau")

return ds_final


def _get_cloud_axis(var: xr.DataArray, axis: CloudAxis) -> xr.DataArray:
da_axis = None

keys = CLOUD_HIST_MAP[axis]["keys"]
for key in keys:
if key in var.dims:
da_axis = var[key]

break

if da_axis is None:
raise KeyError(
f"The {axis!r} axis is not in the {var.name!r} to "
"standardize the cosp histogram."
)

return da_axis


def _subset_cloud_var(
var: xr.DataArray, prs: xr.DataArray, tau: xr.DataArray
) -> xr.DataArray:
prs_min = CLOUD_HIST_MAP["prs"]["min_mask"]
prs_max = prs[-1].item()

tau_min = CLOUD_HIST_MAP["tau"]["min_mask"]
tau_max = tau[-1].item()

# MISR model and MISR obs
if var.name in ["CLD_MISR", "CLMISR"]:
# COSP v2 cosp_htmisr in units m instead of km as in v1 and COSP v2
# cosp_htmisr[0] equals to 0 instead of -99 as in v1 so the cloud
# varable needs to be masked manually by slicing out the first index
# on the cosp_htmisr axis.
if var.name == "CLD_MISR" and prs.max().item() > 1000:
var = var.isel({prs.name: slice(1, None)})
prs_max = 1000.0 * prs_max

cond = (prs >= prs_min) & (prs <= prs_max) & (tau >= tau_min) & (tau <= tau_max)
# ISCCP model, # ISCCP obs, and MODIS
elif var.name in ["FISCCP1_COSP", "CLISCCP", "CLMODIS"]:
cond = (tau >= tau_min) & (tau <= tau_max)

var_sub = var.where(cond, drop=True)

return var_sub


def _add_missing_cloud_bnds(
ds: xr.Dataset, axis_var: xr.DataArray, axis: CloudAxis
) -> xr.Dataset:
# FIXME: ValueError: conflicting sizes for dimension 'misr_cth': length 7
# on the data but length 16 on coordinate 'misr_cth'
# https://github.com/E3SM-Project/e3sm_diags/issues/761
# TODO: Might need to dynamically generate bounds if they don't exist.

bnds_key = axis_var.attrs.get("bounds")
if bnds_key is None or bnds_key not in ds.data_vars.keys():
# bnds_data = CLOUD_HIST_MAP[axis]["default_bnds"]
# default_bnds = xr.DataArray(
# name=f"{axis_var.name}_bnds",
# data=bnds_data,
# dims=list(axis_var.dims) + ["bnds"],
# coords=axis_var.coords,
# )
# ds[default_bnds.name] = default_bnds
pass

return ds

0 comments on commit 867603c

Please sign in to comment.