Merge pull request #7 from rubisco-sfa/relationships

ADD: relationship analysis and documentation
rubisco-sfa · Jun 3, 2024 · b0a990d · b0a990d
2 parents 87aa59d + 8ad3fa9
commit b0a990d
Show file tree

Hide file tree

Showing 4 changed files with 125 additions and 18 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -38,6 +38,7 @@ This package is being developed and *not* currently listed in PyPI or conda-forg
 
    prelim
    bias
+   relationship
    nbp
 
 .. toctree::

diff --git a/docs/relationship.md b/docs/relationship.md
@@ -0,0 +1,34 @@
+# Relationships
+
+As models are frequently calibrated using the mean state measures, a higher score does not necessarily reflect a more process-oriented model. In order to assess the representation of processes in models, we also evaluate so-called variable-to-variable relationships.
+
+For the purposes of this section, we represent a generic dependent variable as {math}`v` and score its relationship with an independent variable {math}`u`. We form 2 discrete approximations (histograms) using the data collocated in the same spatiotemporal grid cells. The first is a 2D distribution which can be thought as a fraction of the datapoints found in each bin of the independent and dependent variables,
+
+```{math}
+\mathcal{F}_{\mathit{2D}}\left(v,u\right)
+```
+
+We make plots of these functions similar to panels (b) and (c) in the figure below. These example plots come from [Collier, et al., 2018](https://doi.org/10.1029/2018MS001354) and represent the relationship between gross primary productivity and surface air temperature. Panel (b) represents a reference product relationship and panel (c) a model relationship.
+
+[<img src=https://agupubs.onlinelibrary.wiley.com/cms/asset/db948555-3ca8-4a2c-8b4a-622de8109d47/jame20779-fig-0009-m.jpg>](https://doi.org/10.1029/2018MS001354)
+
+We also construct a discrete approximation of the independent variable {math}`u` in bins of the dependent variable {math}`v`, represented as
+
+```{math}
+\mathcal{U}\left(v\right)
+```
+
+In the plots above, the black curve represents the reference response and the maroon the comparison. In the scalar output, we report a score based on the relative RMSE between responses,
+
+```{math}
+\begin{align*}
+\varepsilon &= \frac{
+\sqrt{
+\int \left(\mathcal{U}_{\mathrm{com}}\left(v\right) - \mathcal{U}_{\mathrm{ref}}\left(v\right)\right)^2\ dv
+}
+}{
+ \int \mathcal{U}_{\mathrm{ref}}\left(v\right)^2\ dv
+}\\
+S &= e^{-\varepsilon}
+\end{align*}
+```
diff --git a/ilamb3/analysis/relationship.py b/ilamb3/analysis/relationship.py
@@ -1,8 +1,8 @@
-"""ILAMB Analysis assets for studying relationships between variables.
-
-There is a lot of data to manage for relationships. So we build a relationship class
-and then later create a ILAMBAnalysis ABC which uses it in an ILAMB analysis.
+"""
+The ILAMB relationship (variable to variable) methodology.
 
+There is a lot of data to manage for relationships. So we build a relationship class and
+then later create a ILAMBAnalysis which uses it.
 """
 
 from dataclasses import dataclass, field
@@ -19,7 +19,9 @@
 
 @dataclass
 class Relationship:
-    """A class for developing and comparing relationships from gridded data."""
+    """
+    A class for developing and comparing relationships from gridded data.
+    """
 
     dep: xr.DataArray
     ind: xr.DataArray
@@ -38,6 +40,7 @@ class Relationship:
     _response_std: np.ndarray = field(init=False, default_factory=lambda: None)
 
     def __post_init__(self):
+        """After initialization, perform checks on input data."""
         # check input dataarrays for compatibility
         assert isinstance(self.dep, xr.DataArray)
         assert isinstance(self.ind, xr.DataArray)
@@ -65,17 +68,23 @@ def __post_init__(self):
     def compute_limits(
         self, rel: Union["Relationship", None] = None
     ) -> Union["Relationship", None]:
-        """Compute the limits of the dependent and independent variables.
+        """
+        Compute the limits of the dependent and independent variables.
 
         Parameters
         ----------
+        rel : Relationship, optional
+            An optional and additional relationship who limits you also would like to
+            include in the check.
 
         Returns
         -------
-
+        Relationship or None
+            If a relationship was passed in, this will return it also with its limits
+            set.
         """
 
-        def _singlelimit(var, limit=None):
+        def _singlelimit(var, limit=None):  # numpydoc ignore=GL08
             lim = [var.min(), var.max()]
             delta = 1e-8 * (lim[1] - lim[0])
             lim[0] -= delta
@@ -99,15 +108,16 @@ def _singlelimit(var, limit=None):
         return rel
 
     def build_response(self, nbin: int = 25, eps: float = 3e-3):
-        """Creates a 2D distribution and a functional response
+        """
+        Create the 2D distribution and functional response.
 
         Parameters
         ----------
-        nbin
-            the number of bins to use in both dimensions
-        eps
-            the fraction of points required for a bin in the
-            independent variable be included in the funcitonal responses
+        nbin : int
+            The number of bins to use in both dimensions.
+        eps : float
+            The fraction of points required for a bin in the
+            independent variable be included in the funcitonal responses.
         """
         # if no limits have been created, make them now
         if self._dep_limits is None or self._ind_limits is None:
@@ -164,7 +174,19 @@ def build_response(self, nbin: int = 25, eps: float = 3e-3):
         self._response_std = std
 
     def score_response(self, rel: "Relationship") -> float:
-        """Score the reponse using the relative RMSE error."""
+        """
+        Score the functional response of the relationships.
+
+        Parameters
+        ----------
+        rel : Relationship
+            The other relationship to compare.
+
+        Returns
+        -------
+        float
+            The response score.
+        """
         rel_error = np.linalg.norm(
             self._response_mean - rel._response_mean
         ) / np.linalg.norm(self._response_mean)
@@ -173,20 +195,66 @@ def score_response(self, rel: "Relationship") -> float:
 
 
 class relationship_analysis(ILAMBAnalysis):
-    def __init__(self, dep_variable: str, ind_variable: str):
+    """
+    The ILAMB relationship methodology.
+
+    Parameters
+    ----------
+    dep_variable : str
+        The name of the dependent variable to be used in this analysis.
+    ind_variable : str
+        The name of the independent variable to be used in this analysis.
+
+    Methods
+    -------
+    required_variables
+        What variables are required.
+    __call__
+        The method
+    """
+
+    def __init__(self, dep_variable: str, ind_variable: str):  # numpydoc ignore=GL08
         self.dep_variable = dep_variable
         self.ind_variable = ind_variable
 
     def required_variables(self) -> list[str]:
+        """
+        Return the list of variables required for this analysis.
+
+        Returns
+        -------
+        list
+            The variable names used in this analysis.
+        """
         return [self.dep_variable, self.ind_variable]
 
     def __call__(
         self,
         ref: xr.Dataset,
         com: xr.Dataset,
         regions: list[Union[str, None]] = [None],
-        **kwargs,
     ) -> tuple[pd.DataFrame, xr.Dataset, xr.Dataset]:
+        """
+        Apply the ILAMB relationship methodology on the given datasets.
+
+        Parameters
+        ----------
+        ref : xr.Dataset
+            The reference dataset.
+        com : xr.Dataset
+            The comparison dataset.
+        regions : list
+            A list of region labels over which to apply the analysis.
+
+        Returns
+        -------
+        pd.DataFrame
+            A dataframe with scalar and score information from the comparison.
+        xr.Dataset
+            A dataset containing reference grided information from the comparison.
+        xr.Dataset
+            A dataset containing comparison grided information from the comparison.
+        """
         # Initialize
         analysis_name = "Relationship"
         var_ind = self.ind_variable
@@ -231,4 +299,4 @@ def __call__(
                 "value",
             ],
         )
-        return dfs, None, None
+        return dfs, xr.Dataset(), xr.Dataset()
diff --git a/ilamb3/models/__init__.py b/ilamb3/models/__init__.py
@@ -1 +1,5 @@
 """Model abstractions used in ILAMB."""
+
+from ilamb3.models.esgf import ModelESGF
+
+__all__ = ["ModelESGF"]
-Original file line number
+Diff line change
@@ Expand Up @@
        prelim
        bias
+       relationship
        nbp
     .. toctree::
@@ Expand Down @@