From 968d4f2d24a140f92dfb28861c5dfca8d2e7b543 Mon Sep 17 00:00:00 2001
From: Jennifer A Clark <jennifer.a.clark13@gmail.com>
Date: Thu, 14 Nov 2024 09:24:30 -0500
Subject: [PATCH] Add entropy and enthalpy (#406)

* Add enthalpy and entropy calculations

* Add enthalpy and entropy calculations

* Update tests for coverage

* black

* Add bootstrapping to BAR().fit(use_mbar=True) and adapt tests for code coverage

* Update CHANGES

* Update get_group syntax

* Update method default in bar when use_mbar==True

* Remove enthalpy/entropy for BAR

* Address review comments

* Update docstrings for units.py for the case of entropy values

* Update entropy attribute name from delta_s to delta_sT

* Undo irrelevant addition
---
 CHANGES                                    |  5 +-
 src/alchemlyb/estimators/base.py           | 25 +++++++++-
 src/alchemlyb/estimators/mbar_.py          | 58 +++++++++++++++++++---
 src/alchemlyb/parsing/__init__.py          |  1 -
 src/alchemlyb/postprocessors/units.py      | 11 +++-
 src/alchemlyb/tests/test_fep_estimators.py | 25 ++++++++++
 6 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/CHANGES b/CHANGES
index 103526c9..d11fc903 100644
--- a/CHANGES
+++ b/CHANGES
@@ -12,11 +12,12 @@ The rules for this file:
   * accompany each entry with github issue/PR number (Issue #xyz)
   * release numbers follow "Semantic Versioning" https://semver.org
 
-**/**/**** xiki-tempula
+**/**/**** xiki-tempula, jaclark5
 
   * 2.5.0
 
 Enhancements
+  - Added matrices for entropy and enthalpy for MBAR (PR #406)
   - Parallelise read and preprocess for ABFE workflow. (PR #371)
 
 
@@ -27,7 +28,7 @@ Enhancements
 Fixes
   - [doc] tutorial: use alchemlyb.concat (PR #399)
   - Resolve pandas FutureWarnings in bar_.py and mbar_.py (issue #408 PR #407)
- 
+
 
 09/17/2024 jaclark5, orbeckst
 
diff --git a/src/alchemlyb/estimators/base.py b/src/alchemlyb/estimators/base.py
index 93f3da8a..230ff782 100644
--- a/src/alchemlyb/estimators/base.py
+++ b/src/alchemlyb/estimators/base.py
@@ -1,10 +1,15 @@
 class _EstimatorMixOut:
-    """This class creates view for the d_delta_f_, delta_f_, states_ for the
-    estimator class to consume."""
+    """This class creates view for the attributes: states_, delta_f_, d_delta_f_,
+    delta_h_, d_delta_h_, delta_sT_, d_delta_sT_ for the estimator class to consume.
+    """
 
     _d_delta_f_ = None
     _delta_f_ = None
     _states_ = None
+    _d_delta_h_ = None
+    _delta_h_ = None
+    _d_delta_sT_ = None
+    _delta_sT_ = None
 
     @property
     def d_delta_f_(self):
@@ -14,6 +19,22 @@ def d_delta_f_(self):
     def delta_f_(self):
         return self._delta_f_
 
+    @property
+    def d_delta_h_(self):
+        return self._d_delta_h_
+
+    @property
+    def delta_h_(self):
+        return self._delta_h_
+
+    @property
+    def d_delta_sT_(self):
+        return self._d_delta_sT_
+
+    @property
+    def delta_sT_(self):
+        return self._delta_sT_
+
     @property
     def states_(self):
         return self._states_
diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py
index 45a648f6..78989cc2 100644
--- a/src/alchemlyb/estimators/mbar_.py
+++ b/src/alchemlyb/estimators/mbar_.py
@@ -57,6 +57,20 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
         The estimated statistical uncertainty (one standard deviation) in
         dimensionless free energy differences.
 
+    delta_h_ : DataFrame
+        The estimated dimensionless enthalpy difference between each state.
+
+    d_delta_h_ : DataFrame
+        The estimated statistical uncertainty (one standard deviation) in
+        dimensionless enthalpy differences.
+
+    delta_sT_ : DataFrame, optional
+        The estimated dimensionless entropy difference between each state.
+
+    d_delta_sT_ : DataFrame
+        The estimated statistical uncertainty (one standard deviation) in
+        dimensionless entropy differences.
+
     theta_ : DataFrame
         The theta matrix.
 
@@ -80,9 +94,11 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
     .. versionchanged:: 2.1.0
         `n_bootstraps` option added.
     .. versionchanged:: 2.4.0
-       Handle initial estimate, initial_f_k, from bar in the instance
-       that not all lambda states represented as column headers are
-       represented in the indices of u_nk.
+        Handle initial estimate, initial_f_k, from bar in the instance
+        that not all lambda states represented as column headers are
+        represented in the indices of u_nk.
+    .. versionchanged:: 2.5.0
+        Added computation of enthalpy and entropy
 
     """
 
@@ -110,7 +126,7 @@ def __init__(
         # handle for pymbar.MBAR object
         self._mbar = None
 
-    def fit(self, u_nk):
+    def fit(self, u_nk, compute_entropy_enthalpy=False):
         """
         Compute overlap matrix of reduced potentials using multi-state
         Bennett acceptance ratio.
@@ -121,6 +137,9 @@ def fit(self, u_nk):
             ``u_nk[n, k]`` is the reduced potential energy of uncorrelated
             configuration ``n`` evaluated at state ``k``.
 
+        compute_entropy_enthalpy : bool, optional, default=False
+            Compute entropy and enthalpy.
+
         """
         # sort by state so that rows from same state are in contiguous blocks
         u_nk = u_nk.sort_index(level=u_nk.index.names[1:])
@@ -171,13 +190,18 @@ def fit(self, u_nk):
             solver_protocol=self.method,
             n_bootstraps=self.n_bootstraps,
         )
-        if self.n_bootstraps == 0:
-            uncertainty_method = None
-        else:
-            uncertainty_method = "bootstrap"
+
+        uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
         out = self._mbar.compute_free_energy_differences(
             return_theta=True, uncertainty_method=uncertainty_method
         )
+        if compute_entropy_enthalpy:
+            out.update(
+                self._mbar.compute_entropy_and_enthalpy(
+                    uncertainty_method=uncertainty_method
+                )
+            )
+
         self._delta_f_ = pd.DataFrame(
             out["Delta_f"], columns=self._states_, index=self._states_
         )
@@ -187,9 +211,27 @@ def fit(self, u_nk):
         self.theta_ = pd.DataFrame(
             out["Theta"], columns=self._states_, index=self._states_
         )
+        if compute_entropy_enthalpy:
+            self._delta_h_ = pd.DataFrame(
+                out["Delta_u"], columns=self._states_, index=self._states_
+            )
+            self._d_delta_h_ = pd.DataFrame(
+                out["dDelta_u"], columns=self._states_, index=self._states_
+            )
+            self._delta_sT_ = pd.DataFrame(
+                out["Delta_s"], columns=self._states_, index=self._states_
+            )
+            self._d_delta_sT_ = pd.DataFrame(
+                out["dDelta_s"], columns=self._states_, index=self._states_
+            )
 
         self._delta_f_.attrs = u_nk.attrs
         self._d_delta_f_.attrs = u_nk.attrs
+        if compute_entropy_enthalpy:
+            self._delta_h_.attrs = u_nk.attrs
+            self._d_delta_h_.attrs = u_nk.attrs
+            self._delta_sT_.attrs = u_nk.attrs
+            self._d_delta_sT_.attrs = u_nk.attrs
 
         return self
 
diff --git a/src/alchemlyb/parsing/__init__.py b/src/alchemlyb/parsing/__init__.py
index dc048732..c2f5e1bf 100644
--- a/src/alchemlyb/parsing/__init__.py
+++ b/src/alchemlyb/parsing/__init__.py
@@ -34,5 +34,4 @@ def wrapper(outfile, T, *args, **kwargs):
                 dict_with_df[k].attrs["temperature"] = T
                 dict_with_df[k].attrs["energy_unit"] = "kT"
         return dict_with_df
-
     return wrapper
diff --git a/src/alchemlyb/postprocessors/units.py b/src/alchemlyb/postprocessors/units.py
index 510b4465..0dd14d66 100644
--- a/src/alchemlyb/postprocessors/units.py
+++ b/src/alchemlyb/postprocessors/units.py
@@ -16,6 +16,9 @@
 def to_kT(df, T=None):
     """Convert the unit of a DataFrame to `kT`.
 
+    Note that if entropy values are passed it is assumed that they are
+    multiplied by the temperature, S * T.
+
     If temperature `T` is not provided, the DataFrame need to have attribute
     `temperature` and `energy_unit`. Otherwise, the temperature of the output
     dateframe will be set accordingly.
@@ -25,7 +28,7 @@ def to_kT(df, T=None):
     df : DataFrame
         DataFrame to convert unit.
     T : float
-        Temperature (default: None).
+        Temperature (default: None) in Kelvin.
 
     Returns
     -------
@@ -61,6 +64,9 @@ def to_kT(df, T=None):
 def to_kcalmol(df, T=None):
     """Convert the unit of a DataFrame to kcal/mol.
 
+    Note that if entropy values are passed, the result is S * T in units
+    of kcal/mol.
+
     If temperature `T` is not provided, the DataFrame need to have attribute
     `temperature` and `energy_unit`. Otherwise, the temperature of the output
     dateframe will be set accordingly.
@@ -86,6 +92,9 @@ def to_kcalmol(df, T=None):
 def to_kJmol(df, T=None):
     """Convert the unit of a DataFrame to kJ/mol.
 
+    Note that if entropy values are passed, the result is S * T in units
+    of kJ/mol.
+
     If temperature `T` is not provided, the DataFrame need to have attribute
     `temperature` and `energy_unit`. Otherwise, the temperature of the output
     dateframe will be set accordingly.
diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py
index 074e3d56..73249590 100644
--- a/src/alchemlyb/tests/test_fep_estimators.py
+++ b/src/alchemlyb/tests/test_fep_estimators.py
@@ -198,6 +198,31 @@ def test_bootstrap(gmx_benzene_Coulomb_u_nk):
     assert mbar_bootstrap_err != mbar_err
 
 
+def test_enthalpy_entropy_mbar(gmx_benzene_Coulomb_u_nk):
+    mbar = MBAR()
+    u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
+    mbar.fit(u_nk, compute_entropy_enthalpy=True)
+
+    assert mbar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, 1.619069, 2.557990, 2.986302, 3.041156]), abs=1e-6
+    )
+    assert mbar.delta_h_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, 1.241970, 1.695000, 1.706555, 1.388906]), abs=1e-6
+    )
+    assert mbar.delta_sT_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, -0.377099, -0.862990, -1.279746, -1.652250]), abs=1e-6
+    )
+    assert mbar.d_delta_f_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, 0.008802, 0.014432, 0.018097, 0.020879]), abs=1e-6
+    )
+    assert mbar.d_delta_h_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, 0.011598, 0.016538, 0.018077, 0.018940]), abs=1e-6
+    )
+    assert mbar.d_delta_sT_.iloc[0, :].to_numpy() == pytest.approx(
+        np.array([0.0, 0.012242, 0.019519, 0.023606, 0.026107]), abs=1e-6
+    )
+
+
 def test_wrong_initial_f_k():
     with pytest.raises(
         ValueError, match="Only `BAR` is supported as string input to `initial_f_k`"