diff --git a/ilamb3/__init__.py b/ilamb3/__init__.py index 545c643..9643736 100644 --- a/ilamb3/__init__.py +++ b/ilamb3/__init__.py @@ -10,6 +10,7 @@ from cf_xarray.units import units from ilamb3._version import __version__ # noqa +from ilamb3.config import conf # noqa # additional units that pint/cf-xarray does not handle units.define("kg = 1e3 * g") @@ -42,5 +43,5 @@ def ilamb_catalog() -> pooch.Pooch: return registry -__all__ = ["dataset", "compare", "analysis", "regions", "ilamb_catalog"] +__all__ = ["dataset", "compare", "analysis", "regions", "ilamb_catalog,", "conf"] xr.set_options(keep_attrs=True) diff --git a/ilamb3/analysis/bias.py b/ilamb3/analysis/bias.py index 4527f6b..5e77056 100644 --- a/ilamb3/analysis/bias.py +++ b/ilamb3/analysis/bias.py @@ -13,6 +13,7 @@ import pandas as pd import xarray as xr +import ilamb3.plot as plt from ilamb3 import compare as cmp from ilamb3 import dataset as dset from ilamb3.analysis.base import ILAMBAnalysis @@ -28,6 +29,8 @@ class bias_analysis(ILAMBAnalysis): ---------- required_variable : str The name of the variable to be used in this analysis. + variable_cmap : str + The colormap to use in plots of the comparison variable, optional. Methods ------- @@ -37,8 +40,11 @@ class bias_analysis(ILAMBAnalysis): The method """ - def __init__(self, required_variable: str): # numpydoc ignore=GL08 + def __init__( + self, required_variable: str, variable_cmap: str = "viridis" + ): # numpydoc ignore=GL08 self.req_variable = required_variable + self.cmap = variable_cmap def required_variables(self) -> list[str]: """ @@ -108,7 +114,6 @@ def __call__( varname = self.req_variable if use_uncertainty and "bounds" not in ref[varname].attrs: use_uncertainty = False - # Checks on the database if it is being used if method == "RegionalQuantiles": check_quantile_database(quantile_dbase) @@ -195,7 +200,7 @@ def __call__( if use_uncertainty: ref_out["uncert"] = uncert com_out = bias.to_dataset(name="bias") - com_out["bias_score"] = score + com_out["biasscore"] = score try: lat_name = dset.get_dim_name(com_mean, "lat") lon_name = dset.get_dim_name(com_mean, "lon") @@ -261,7 +266,7 @@ def _scalar( ] ) # Bias Score - bias_scalar_score = _scalar(com_out, "bias_score", region, True, True) + bias_scalar_score = _scalar(com_out, "biasscore", region, True, True) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "divide by zero encountered in divide", RuntimeWarning @@ -294,3 +299,48 @@ def _scalar( ) dfs.attrs = dict(method=method) return dfs, ref_out, com_out + + def plots( + self, + df: pd.DataFrame, + ref: xr.Dataset, + com: dict[str, xr.Dataset], + ) -> pd.DataFrame: + + # Some initialization + regions = [None if r == "None" else r for r in df["region"].unique()] + com["Reference"] = ref + + # Setup plot data + df = plt.determine_plot_limits(com).set_index("name") + df.loc["mean", ["cmap", "title"]] = [self.cmap, "Period Mean"] + df.loc["bias", ["cmap", "title"]] = ["seismic", "Bias"] + df.loc["biasscore", ["cmap", "title"]] = ["plasma", "Bias Score"] + + # Build up a dataframe of matplotlib axes + axs = [ + { + "name": plot, + "title": df.loc[plot, "title"], + "region": region, + "source": source, + "axis": ( + plt.plot_map( + ds[plot], + region=region, + vmin=df.loc[plot, "low"], + vmax=df.loc[plot, "high"], + cmap=df.loc[plot, "cmap"], + title=source + " " + df.loc[plot, "title"], + ) + if plot in ds + else pd.NA + ), + } + for plot in ["mean", "bias", "biasscore"] + for source, ds in com.items() + for region in regions + ] + axs = pd.DataFrame(axs).dropna(subset=["axis"]) + + return axs diff --git a/ilamb3/analysis/relationship.py b/ilamb3/analysis/relationship.py index 2d13b02..b84d865 100644 --- a/ilamb3/analysis/relationship.py +++ b/ilamb3/analysis/relationship.py @@ -287,7 +287,7 @@ def __call__( analysis_name, f"Score {var_dep} vs {var_ind}", "score", - "", + "1", score, ] ) diff --git a/ilamb3/config.py b/ilamb3/config.py new file mode 100644 index 0000000..11097aa --- /dev/null +++ b/ilamb3/config.py @@ -0,0 +1,101 @@ +"""Configuration for ilamb3""" + +import contextlib +import copy +from pathlib import Path + +import yaml + +import ilamb3.regions as reg + +defaults = { + "build_dir": "./_build", + "regions": [None], +} + + +class Config(dict): + """A global configuration object used in the package.""" + + def __init__(self, filename: Path | None = None, **kwargs): + self.filename = ( + Path(filename) + if filename is not None + else Path.home() / ".config/ilamb3/conf.yaml" + ) + self.filename.parent.mkdir(parents=True, exist_ok=True) + self.reload_all() + self.temp = None + super().__init__(**kwargs) + + def __repr__(self): + return yaml.dump(dict(self)) + + def reset(self): + """Return to defaults.""" + self.clear() + self.update(copy.deepcopy(defaults)) + + def save(self, filename: Path | None = None): + """Save current configuration to file as YAML.""" + filename = filename or self.filename + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + yaml.dump(dict(self), f) + + @contextlib.contextmanager + def _unset(self, temp): + yield + self.clear() + self.update(temp) + + def set( + self, + *, + build_dir: str | None = None, + regions: list[str] | None = None, + ): + """Change ilamb3 configuration options.""" + temp = copy.deepcopy(self) + if build_dir is not None: + self["build_dir"] = str(build_dir) + if regions is not None: + ilamb_regions = reg.Regions() + does_not_exist = set(regions) - set(ilamb_regions._regions) - set([None]) + if does_not_exist: + raise ValueError( + f"Cannot run ILAMB over these regions [{list(does_not_exist)}] which are not registered in our system [{list(ilamb_regions._regions)}]" + ) + self["regions"] = regions + return self._unset(temp) + + def __getitem__(self, item): + if item in self: + return super().__getitem__(item) + elif item in defaults: + return defaults[item] + else: + raise KeyError(item) + + def get(self, key, default=None): + if key in self: + return super().__getitem__(key) + return default + + def reload_all(self): + self.reset() + self.load() + + def load(self, filename: Path | None = None): + """Update global config from YAML file or default file if None.""" + filename = filename or self.filename + if filename.is_file(): + with open(filename) as f: + try: + self.update(yaml.safe_load(f)) + except Exception: + pass + + +conf = Config() +conf.reload_all() diff --git a/ilamb3/dataset.py b/ilamb3/dataset.py index 50a0fe2..e158b36 100644 --- a/ilamb3/dataset.py +++ b/ilamb3/dataset.py @@ -5,8 +5,8 @@ import numpy as np import xarray as xr +import ilamb3.regions as ilreg from ilamb3.exceptions import NoSiteDimension -from ilamb3.regions import Regions def get_dim_name( @@ -93,8 +93,8 @@ def get_coord_name( get_dim_name : A variant when the coordinate is a dimension. """ coord_names = { - "lat": ["lat", "latitude", "Latitude", "y"], - "lon": ["lon", "longitude", "Longitude", "x"], + "lat": ["lat", "latitude", "Latitude", "y", "lat_"], + "lon": ["lon", "longitude", "Longitude", "x", "lon_"], } possible_names = coord_names[coord] coord_name = set(dset.coords).intersection(possible_names) @@ -105,6 +105,28 @@ def get_coord_name( return str(coord_name.pop()) +def is_temporal(da: xr.DataArray) -> bool: + """ + Return if the dataarray is temporal. + + Parameters + ---------- + da : xr.DataArray + The input dataarray. + + Returns + ------- + bool + True if time dimension is present, False otherwise. + """ + try: + get_dim_name(da, "time") + return True + except KeyError: + pass + return False + + def is_spatial(da: xr.DataArray) -> bool: """ Return if the dataarray is spatial. @@ -528,7 +550,7 @@ def integrate_space( only be done in a single dimension at a time. """ if region is not None: - regions = Regions() + regions = ilreg.Regions() dset = regions.restrict_to_region(dset, region) space = [get_dim_name(dset, "lat"), get_dim_name(dset, "lon")] if not isinstance(dset, xr.Dataset): diff --git a/ilamb3/exceptions.py b/ilamb3/exceptions.py index a1d1230..31e78b7 100644 --- a/ilamb3/exceptions.py +++ b/ilamb3/exceptions.py @@ -27,3 +27,31 @@ class NoDatabaseEntry(ILAMBException): class NoSiteDimension(ILAMBException): """The dataset/dataarray does not contain a clear site dimension.""" + + +class AnalysisFailure(ILAMBException): + """ + The analysis function you were running threw an exception. + + Parameters + ---------- + analysis : str + The name of the analysis which has failed. + variable : str + The name of the variable associated with this analysis. + source : str + The name of the source of the reference data. + model : str + The name of the model whose analysis failed. + """ + + def __init__( + self, analysis: str, variable: str, source: str, model: str + ): # numpydoc ignore=GL08 + self.analysis = analysis + self.variable = variable + self.source = source + self.model = model + + def __str__(self): # numpydoc ignore=GL08 + return f"The '{self.analysis}' analysis failed for '{self.variable} | {self.source}' against '{self.model}'" diff --git a/ilamb3/plot.py b/ilamb3/plot.py new file mode 100644 index 0000000..bc5d581 --- /dev/null +++ b/ilamb3/plot.py @@ -0,0 +1,149 @@ +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import xarray as xr + +import ilamb3.dataset as dset +from ilamb3.regions import Regions + + +def get_extents(da: xr.DataArray) -> list[float]: + """Find the extent of the non-null data.""" + lat = xr.where(da.notnull(), da[dset.get_coord_name(da, "lat")], np.nan) + lon = xr.where(da.notnull(), da[dset.get_coord_name(da, "lon")], np.nan) + return [float(lon.min()), float(lon.max()), float(lat.min()), float(lat.max())] + + +def pick_projection(extents: list[float]) -> tuple[ccrs.Projection, float]: + """Given plot extents choose projection and aspect ratio.""" + if (extents[1] - extents[0]) > 300: + aspect_ratio = 2.0 + proj = ccrs.Robinson(central_longitude=0) + if (extents[2] > 0) and (extents[3] > 75): + aspect_ratio = 1.0 + proj = ccrs.Orthographic(central_latitude=+90, central_longitude=0) + if (extents[2] < -75) and (extents[3] < 0): + aspect_ratio = 1.0 + proj = ccrs.Orthographic(central_latitude=-90, central_longitude=0) + else: + aspect_ratio = max(extents[1], extents[0]) - min(extents[1], extents[0]) + aspect_ratio /= max(extents[3], extents[2]) - min(extents[3], extents[2]) + proj = ccrs.PlateCarree(central_longitude=np.array(extents)[:2].mean()) + return proj, aspect_ratio + + +def finalize_plot(ax: plt.Axes, extents: list[float]) -> plt.Axes: + """Add some final features to our plots.""" + ax.add_feature( + cfeature.NaturalEarthFeature( + "physical", "land", "110m", edgecolor="face", facecolor="0.875" + ), + zorder=-1, + ) + ax.add_feature( + cfeature.NaturalEarthFeature( + "physical", "ocean", "110m", edgecolor="face", facecolor="0.750" + ), + zorder=-1, + ) + # cleanup plotting extents + percent_pad = 0.1 + if (extents[1] - extents[0]) > 330: + extents[:2] = [-180, 180] # set_extent doesn't like (0,360) + extents[2:] = [-90, 90] + else: + dx = percent_pad * (extents[1] - extents[0]) + dy = percent_pad * (extents[3] - extents[2]) + extents = [extents[0] - dx, extents[1] + dx, extents[2] - dy, extents[3] + dy] + ax.set_extent(extents, ccrs.PlateCarree()) + return ax + + +def plot_map(da: xr.DataArray, **kwargs): + + # Process some options + kwargs["cmap"] = plt.get_cmap(kwargs["cmap"] if "cmap" in kwargs else "viridis", 9) + title = kwargs.pop("title") if "title" in kwargs else "" + + # Process region if given + ilamb_regions = Regions() + region = kwargs.pop("region") if "region" in kwargs else None + da = ilamb_regions.restrict_to_region(da, region) + + # Setup figure and its projection + extents = get_extents(da) + proj, aspect = pick_projection(extents) + figsize = kwargs.pop("figsize") if "figsize" in kwargs else (6 * 1.03, 6 / aspect) + _, ax = plt.subplots( + dpi=200, + tight_layout=(kwargs.pop("tight_layout") if "tight_layout" in kwargs else True), + figsize=figsize, + subplot_kw={"projection": proj}, + ) + + # Setup colorbar arguments + cba = {"label": da.attrs["units"]} + if "cbar_kwargs" in kwargs: + cba.update(kwargs.pop("cbar_kwargs")) + + # We can't make temporal maps + if dset.is_temporal(da): + raise ValueError("Cannot make spatio-temporal plots") + + # Space or sites? + if dset.is_spatial(da): + da.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs=cba, **kwargs) + elif dset.is_site(da): + xr.plot.scatter( + da.to_dataset(), + x=dset.get_coord_name(da, "lon"), + y=dset.get_coord_name(da, "lat"), + hue=da.name, + lw=0, + s=15, + cbar_kwargs=cba, + transform=ccrs.PlateCarree(), + **kwargs, + ) + else: + raise ValueError("plotting error") + ax.set_title(title) + ax = finalize_plot(ax, extents) + return ax + + +def determine_plot_limits( + dsd: xr.Dataset | dict[str, xr.Dataset], + percent_pad: float = 1.0, + symmetrize: list[str] = ["bias"], +) -> pd.DataFrame: + """Return a dataframe with the plot minima and maxima.""" + if isinstance(dsd, xr.Dataset): + dsd = {"key": xr.Dataset} + plots = set([var for _, ds in dsd.items() for var in ds.data_vars]) + out = [] + for plot in plots: + # Score maps are always [0,1] + if "score" in plot: + out.append({"name": plot, "low": 0, "high": 1}) + continue + # Otherwise pick a quantile across all data sources + data = np.quantile( + np.hstack( + [ + np.ma.masked_invalid(ds[plot].to_numpy()).compressed() + for _, ds in dsd.items() + if plot in ds + ] + ), + [percent_pad * 0.01, 1 - percent_pad * 0.01], + ) + # symmetrize + if plot in symmetrize: + vmax = max(np.abs(data)) + data[0] = -vmax + data[1] = vmax + out.append({"name": plot, "low": data[0], "high": data[1]}) + return pd.DataFrame(out) diff --git a/ilamb3/regions.py b/ilamb3/regions.py index fabc706..5115162 100644 --- a/ilamb3/regions.py +++ b/ilamb3/regions.py @@ -1,74 +1,63 @@ """Region definitions for use in the ILAMB system.""" import os -from typing import Literal import numpy as np import xarray as xr - -def get_dim_name( - dset: xr.Dataset | xr.DataArray, dim: Literal["time", "lat", "lon"] -) -> str: - """Return the name of the `dim` dimension from the dataset. - - Parameters - ---------- - dset - The input dataset/dataarray. - dim - The dimension to find in the dataset/dataarray - - Note - ---- - Duplicate of function in ilamb3.database to avoid circular import. - - """ - dim_names = { - "time": ["time"], - "lat": ["lat", "latitude", "Latitude", "y"], - "lon": ["lon", "longitude", "Longitude", "x"], - "depth": ["depth"], - } - possible_names = dim_names[dim] - dim_name = set(dset.dims).intersection(possible_names) - if len(dim_name) != 1: - msg = f"{dim} dimension not found: {dset.dims}] " - msg += f"not in [{','.join(possible_names)}]" - raise KeyError(msg) - return str(dim_name.pop()) +import ilamb3.dataset as dset def restrict_to_bbox( - var: xr.Dataset | xr.DataArray, - lat_name: str, - lon_name: str, - lat0: float, - latf: float, - lon0: float, - lonf: float, + da: xr.DataArray, lat0: float, latf: float, lon0: float, lonf: float ): - """Return the dataset selected to the nearest bounding box. + """Return the dataarray selected to the nearest bounding box. This is awkward because as of `v2023.6.0`, the `method` keyword cannot be used in slices. Note that this routine will sort the dimensions because slicing does not work well on unsorted indices. - """ - var = var.sortby(list(var.dims)) - var = var.sel( + assert isinstance(da, xr.DataArray) + lat_name = dset.get_dim_name(da, "lat") + lon_name = dset.get_dim_name(da, "lon") + da = da.sortby(list(da.dims)) + da = da.sel( { lat_name: slice( - var[lat_name].sel({lat_name: lat0}, method="nearest"), - var[lat_name].sel({lat_name: latf}, method="nearest"), + da[lat_name].sel({lat_name: lat0}, method="nearest"), + da[lat_name].sel({lat_name: latf}, method="nearest"), ), lon_name: slice( - var[lon_name].sel({lon_name: lon0}, method="nearest"), - var[lon_name].sel({lon_name: lonf}, method="nearest"), + da[lon_name].sel({lon_name: lon0}, method="nearest"), + da[lon_name].sel({lon_name: lonf}, method="nearest"), ), } ) - return var + return da + + +def restrict_to_region(da: xr.DataArray, dar: xr.DataArray): + """.""" + assert isinstance(da, xr.DataArray) + lat_name = dset.get_dim_name(da, "lat") + lon_name = dset.get_dim_name(da, "lon") + rlat_name = dset.get_dim_name(dar, "lat") + rlon_name = dset.get_dim_name(dar, "lon") + dar = dar.rename({rlat_name: lat_name, rlon_name: lon_name}) + return restrict_to_bbox( + xr.where( + dar.interp( + {lat_name: da[lat_name], lon_name: da[lon_name]}, + method="nearest", + ), + da, + np.nan, + ), + dar[lat_name].min(), + dar[lat_name].max(), + dar[lon_name].min(), + dar[lon_name].max(), + ) class Regions: @@ -176,41 +165,38 @@ def restrict_to_region( var: xr.Dataset | xr.DataArray, label: str | None, ) -> xr.Dataset | xr.DataArray: - """Given the region label and a variable, return a mask.""" + """Given the region label and a variable, return a mask. + + ILAMB intermediate netCDF files can contain 2 grids defined by (lat,lon) and + (lat_,lon_, the composed grid). For this reason, we need to restrict to regions + one dataarray at a time. + """ if label is None: return var rdata = Regions._regions[label] rtype = rdata[0] - lat_name = get_dim_name(var, "lat") - lon_name = get_dim_name(var, "lon") if rtype == 0: _, _, rlat, rlon = rdata - out = restrict_to_bbox( - var, lat_name, lon_name, rlat[0], rlat[1], rlon[0], rlon[1] - ) + if isinstance(var, xr.DataArray): + out = restrict_to_bbox(var, rlat[0], rlat[1], rlon[0], rlon[1]) + else: + out = xr.Dataset( + { + key: restrict_to_bbox( + var[key], rlat[0], rlat[1], rlon[0], rlon[1] + ) + for key in var + } + ) return out if rtype == 1: _, _, dar = rdata - rlat_name = get_dim_name(dar, "lat") - rlon_name = get_dim_name(dar, "lon") - dar = dar.rename({rlat_name: lat_name, rlon_name: lon_name}) - out = xr.where( - dar.interp( - {lat_name: var[lat_name], lon_name: var[lon_name]}, - method="nearest", - ), - var, - np.nan, - ) - out = restrict_to_bbox( - out, - lat_name, - lon_name, - dar[lat_name].min(), - dar[lat_name].max(), - dar[lon_name].min(), - dar[lon_name].max(), - ) + if isinstance(var, xr.DataArray): + out = restrict_to_region(var, dar) + else: + out = xr.Dataset( + {key: restrict_to_region(var[key], dar) for key in var} + ) return out raise ValueError(f"Region type #{rtype} not recognized") diff --git a/ilamb3/templates/dataset_page.html b/ilamb3/templates/dataset_page.html new file mode 100644 index 0000000..6cea4a0 --- /dev/null +++ b/ilamb3/templates/dataset_page.html @@ -0,0 +1,320 @@ + + + + + +{{ page_header }} + + + + + + + + + +
+
+ + +
+ + +
+

Scalar Table

+
+
+ + +{% for aname,analysis in analyses.items() %} +
+

{{aname}}

+{% for pname,plot in analysis.items() -%} +{% for item in plot -%} +{% for mname,pattern in item.items() %} + +{% endfor -%} +{% endfor -%} +{% endfor %} +
+{% endfor %} + + +
+

All Models

+
+ + +
+ +
+
+ + diff --git a/pyproject.toml b/pyproject.toml index 9b880d1..362f405 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "scipy>=1.15.1", "matplotlib>=3.10.0", "cartopy>=0.24.1", + "pyyaml>=6.0.2", ] [dependency-groups] @@ -65,7 +66,9 @@ version_scheme = "no-guess-dev" local_scheme = "node-and-date" fallback_version = "0.0.0" write_to = "ilamb3/_version.py" -write_to_template = '__version__ = "{version}"' + +[tool.setuptools.package-data] +ilamb3 = ["templates/*.html"] [tool.pytest.ini_options] console_output_style = "count" diff --git a/uv.lock b/uv.lock index 0ce0671..55bfc76 100644 --- a/uv.lock +++ b/uv.lock @@ -651,7 +651,7 @@ wheels = [ [[package]] name = "ilamb3" -version = "2024.5.31.post1.dev7+ge2d377f.d20250124" +version = "2024.5.31.post1.dev19+g7fa5efe.d20250124" source = { editable = "." } dependencies = [ { name = "cartopy" }, @@ -662,6 +662,7 @@ dependencies = [ { name = "pandas" }, { name = "pint-xarray" }, { name = "pooch" }, + { name = "pyyaml" }, { name = "scipy" }, { name = "xarray" }, ] @@ -691,6 +692,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.2.3" }, { name = "pint-xarray", specifier = ">=0.4" }, { name = "pooch", specifier = ">=1.8.2" }, + { name = "pyyaml", specifier = ">=6.0.2" }, { name = "scipy", specifier = ">=1.15.1" }, { name = "xarray", specifier = ">=2025.1.1" }, ]