alchemistry · xiki-tempula · Nov 14, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/CHANGES b/CHANGES
@@ -12,6 +12,14 @@ The rules for this file:
   * accompany each entry with github issue/PR number (Issue #xyz)
   * release numbers follow "Semantic Versioning" https://semver.org
 
+??/??/2024 jaclark5
+
+  * 2.4.1
+
+Enhancements
+ - Added matrices for entropy and enthalpy for MBAR and BAR,
+   when BAR is substituted with a series of 2-state MBAR calculations.
+   (PR #406)
 
 09/17/2024 jaclark5, orbeckst
 

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pandas as pd
+import pymbar
 from pymbar.other_estimators import bar as BAR_
 from sklearn.base import BaseEstimator
 
+from .. import concat
 from .base import _EstimatorMixOut
 
 
@@ -18,9 +20,16 @@ class BAR(BaseEstimator, _EstimatorMixOut):
     relative_tolerance : float, optional
         Set to determine the relative tolerance convergence criteria.
 
-    method : str, optional, default='false-position'
-        choice of method to solve BAR nonlinear equations,
-        one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
+    method : str, optional, default='false-position' or 'robust'
+        Choice of method to solve BAR nonlinear equations,
+        one of 'self-consistent-iteration' or 'false-position' (default: 'false-position').
+        If ``use_mbar == True`` the default is 'robust', see 
+        :class:`~alchemlyb.estimators.mbar_.MBAR` for options.
+
+    n_bootstraps : int, optional
+        Whether to use bootstrap to estimate uncertainty. `0` means use analytic error
+        estimation. 50~200 is a reasonable range to do bootstrap. Currently used when
+        ``use_mbar is True``.
 
     verbose : bool, optional
         Set to True if verbose debug output is desired.
@@ -35,6 +44,20 @@ class BAR(BaseEstimator, _EstimatorMixOut):
         The estimated statistical uncertainty (one standard deviation) in
         dimensionless free energy differences.
 
+    delta_h_ : DataFrame
+        The estimated dimensionless enthalpy difference between each state.
+
+    d_delta_h_ : DataFrame
+        The estimated statistical uncertainty (one standard deviation) in
+        dimensionless enthalpy differences.
+
+    delta_s_ : DataFrame, optional
+        The estimated dimensionless entropy difference between each state.
+
+    d_delta_s_ : DataFrame
+        The estimated statistical uncertainty (one standard deviation) in
+        dimensionless entropy differences.
+
     states_ : list
         Lambda states for which free energy differences were obtained.
 
@@ -57,6 +80,7 @@ class BAR(BaseEstimator, _EstimatorMixOut):
     .. versionchanged:: 2.4.0
        Added assessment of lambda states represented in the indices of u_nk
        to provide meaningful errors to ensure proper use.
+       Added computation of enthalpy and entropy with 2-state MBAR
 
     """
 
@@ -65,29 +89,47 @@ def __init__(
         maximum_iterations=10000,
         relative_tolerance=1.0e-7,
         method="false-position",
+        n_bootstraps=0,
         verbose=False,
     ):
 
         self.maximum_iterations = maximum_iterations
         self.relative_tolerance = relative_tolerance
         self.method = method
+        self.n_bootstraps = n_bootstraps
         self.verbose = verbose
 
         # handle for pymbar.BAR object
         self._bar = None
 
-    def fit(self, u_nk):
+    def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
         """
         Compute overlap matrix of reduced potentials using
         Bennett acceptance ratio.
 
+        Sets the attributes: delta_f_ and d_delta_f_
+
         Parameters
         ----------
         u_nk : DataFrame
             u_nk[n,k] is the reduced potential energy of uncorrelated
             configuration n evaluated at state k.
 
+        use_mbar : bool, optional, default=False
+            Use 2-state MBAR instead of BAR. This will allow for the
+            calculation of enthalpic and entropic contributions.
+
+        compute_entropy_enthalpy : bool, optional, default=False
+            Compute entropy and enthalpy from 2-state MBAR. Note
+            that ``use_mbar`` must be ``True``.
+
         """
+
+        if compute_entropy_enthalpy and not use_mbar:
+            raise ValueError(
+                "Cannot compute the enthalpy and entropy with BAR, set use_mbar=True."
+            )
+
         # sort by state so that rows from same state are in contiguous blocks
         u_nk = u_nk.sort_index(level=u_nk.index.names[1:])
 
@@ -114,6 +156,11 @@ def fit(self, u_nk):
         # Now get free energy differences and their uncertainties for each step
         deltas = np.array([])
         d_deltas = np.array([])
+        if compute_entropy_enthalpy:
+            deltas_h = np.array([])
+            d_deltas_h = np.array([])
+            deltas_s = np.array([])
+            d_deltas_s = np.array([])
         for k in range(len(N_k) - 1):
             if N_k[k] == 0 or N_k[k + 1] == 0:
                 continue
@@ -129,18 +176,67 @@ def fit(self, u_nk):
             w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]
 
             # now determine df and ddf using pymbar.BAR
+            bar_method = self.method if not use_mbar else 'false-position'
             out = BAR_(
                 w_f,
                 w_r,
-                method=self.method,
+                method=bar_method,
                 maximum_iterations=self.maximum_iterations,
                 relative_tolerance=self.relative_tolerance,
                 verbose=self.verbose,
             )
+            if use_mbar:  # now determine df and ddf using pymbar.MBAR
+                tmp_u_nk = concat(
+                    [
+                        groups.get_group(
+                            self._states_[k]
+                            if isinstance(self._states_[k], tuple)
+                            else (self._states_[k],)
+                        ),
+                            groups.get_group(
+                                self._states_[k + 1]
+                                if isinstance(self._states_[k + 1], tuple)
+                                else (self._states_[k + 1],)
+                        ),
+                    ]
+                )
+                columns = tmp_u_nk.columns.tolist()
+                tmp_u_nk = tmp_u_nk.drop(
+                    [x for x in columns if x not in columns[k : k + 2]], axis=1
+                )
+                mbar_method = self.method if self.method != "false-position" else "robust"
+                mbar = pymbar.MBAR(
+                    tmp_u_nk.T,
+                    N_k[k : k + 2],
+                    maximum_iterations=self.maximum_iterations,
+                    relative_tolerance=self.relative_tolerance,
+                    verbose=self.verbose,
+                    initial_f_k=[0, out["Delta_f"]],
+                    solver_protocol=mbar_method,
+                    n_bootstraps=self.n_bootstraps,
+                )
+                uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
+                if compute_entropy_enthalpy:
+                    out = mbar.compute_entropy_and_enthalpy(
+                        uncertainty_method=uncertainty_method
+                    )
+                else:
+                    out = mbar.compute_free_energy_differences(
+                        uncertainty_method=uncertainty_method
+                    )
+                out = {key: val[0, 1] for key, val in out.items()}
 
             df, ddf = out["Delta_f"], out["dDelta_f"]
             deltas = np.append(deltas, df)
             d_deltas = np.append(d_deltas, ddf**2)
+            if compute_entropy_enthalpy:
+                dh, ddh = out["Delta_u"], out["dDelta_u"]
+                deltas_h = np.append(deltas_h, dh)
+                d_deltas_h = np.append(d_deltas_h, ddh**2)
+
+                ds, dds = out["Delta_s"], out["dDelta_s"]
+                deltas_s = np.append(deltas_s, ds)
+                d_deltas_s = np.append(d_deltas_s, dds**2)
 
         if len(deltas) == 0 and len(states) > 1:
             raise ValueError(
@@ -150,36 +246,73 @@ def fit(self, u_nk):
             )
 
         # build matrix of deltas between each state
-        adelta = np.zeros((len(deltas) + 1, len(deltas) + 1))
+        lx = len(deltas)
+        adelta = np.zeros((lx + 1, lx + 1))
         ad_delta = np.zeros_like(adelta)
-
-        for j in range(len(deltas)):
-            out = []
-            dout = []
-            for i in range(len(deltas) - j):
-                out.append(deltas[i : i + j + 1].sum())
+        if compute_entropy_enthalpy:
+            adelta_h, ad_delta_h = np.zeros_like(adelta), np.zeros_like(adelta)
+            adelta_s, ad_delta_s = np.zeros_like(adelta), np.zeros_like(adelta)
+
+        for j in range(lx):
+            out_f, dout_f = np.empty(lx - j), np.empty(lx - j)
+            if compute_entropy_enthalpy:
+                out_h, dout_h = np.empty(lx - j), np.empty(lx - j)
+                out_s, dout_s = np.empty(lx - j), np.empty(lx - j)
+            for i in range(lx - j):
+                out_f[i] = deltas[i : i + j + 1].sum()
 
                 # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
                 # Error estimate generated by BAR ARE correlated
 
                 # Use the BAR uncertainties between two neighbour states
-                if j == 0:
-                    dout.append(d_deltas[i : i + j + 1].sum())
                 # Other uncertainties are unknown at this point
-                else:
-                    dout.append(np.nan)
+                dout_f[i] = d_deltas[i : i + j + 1].sum() if j == 0 else np.nan
 
-            adelta += np.diagflat(np.array(out), k=j + 1)
-            ad_delta += np.diagflat(np.array(dout), k=j + 1)
+                if compute_entropy_enthalpy:
+                    out_h[i] = deltas_h[i : i + j + 1].sum()
+                    out_s[i] = deltas_s[i : i + j + 1].sum()
+
+                    # Use the BAR uncertainties between two neighbour states
+                    # Other uncertainties are unknown at this point
+                    dout_h[i] = d_deltas_h[i : i + j + 1].sum() if j == 0 else np.nan
+                    dout_s[i] = d_deltas_s[i : i + j + 1].sum() if j == 0 else np.nan
+
+            adelta += np.diagflat(out_f, k=j + 1)
+            ad_delta += np.diagflat(dout_f, k=j + 1)
+            if compute_entropy_enthalpy:
+                adelta_h += np.diagflat(out_h, k=j + 1)
+                ad_delta_h += np.diagflat(dout_h, k=j + 1)
+                adelta_s += np.diagflat(out_s, k=j + 1)
+                ad_delta_s += np.diagflat(dout_s, k=j + 1)
 
         # yield standard delta_f_ free energies between each state
         self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states)
+        if compute_entropy_enthalpy:
+            self._delta_h_ = pd.DataFrame(
+                adelta_h - adelta_h.T, columns=states, index=states
+            )
+            self._delta_s_ = pd.DataFrame(
+                adelta_s - adelta_s.T, columns=states, index=states
+            )
 
         # yield standard deviation d_delta_f_ between each state
         self._d_delta_f_ = pd.DataFrame(
             np.sqrt(ad_delta + ad_delta.T), columns=states, index=states
         )
+        if compute_entropy_enthalpy:
+            self._d_delta_h_ = pd.DataFrame(
+                np.sqrt(ad_delta_h + ad_delta_h.T), columns=states, index=states
+            )
+            self._d_delta_s_ = pd.DataFrame(
+                np.sqrt(ad_delta_s + ad_delta_s.T), columns=states, index=states
+            )
+
         self._delta_f_.attrs = u_nk.attrs
         self._d_delta_f_.attrs = u_nk.attrs
+        if compute_entropy_enthalpy:
+            self._delta_h_.attrs = u_nk.attrs
+            self._d_delta_h_.attrs = u_nk.attrs
+            self._delta_s_.attrs = u_nk.attrs
+            self._d_delta_s_.attrs = u_nk.attrs
 
         return self
diff --git a/src/alchemlyb/estimators/base.py b/src/alchemlyb/estimators/base.py
@@ -5,6 +5,10 @@ class _EstimatorMixOut:
     _d_delta_f_ = None
     _delta_f_ = None
     _states_ = None
+    _d_delta_h_ = None
+    _delta_h_ = None
+    _d_delta_s_ = None
+    _delta_s_ = None
 
     @property
     def d_delta_f_(self):
@@ -14,6 +18,22 @@ def d_delta_f_(self):
     def delta_f_(self):
         return self._delta_f_
 
+    @property
+    def d_delta_h_(self):
+        return self._d_delta_h_
+
+    @property
+    def delta_h_(self):
+        return self._delta_h_
+
+    @property
+    def d_delta_s_(self):
+        return self._d_delta_s_
+
+    @property
+    def delta_s_(self):
+        return self._delta_s_
+
     @property
     def states_(self):
         return self._states_