Remove enthalpy/entropy for BAR

alchemistry · Nov 12, 2024 · 74b8341 · 74b8341
1 parent bef7d06
commit 74b8341
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 195 deletions.
diff --git a/CHANGES b/CHANGES
@@ -18,9 +18,7 @@ The rules for this file:
 
 Enhancements
   - Parallelise read and preprocess for ABFE workflow. (PR #371)
-  - Added matrices for entropy and enthalpy for MBAR and BAR,
-    when BAR is substituted with a series of 2-state MBAR calculations.
-    (PR #406)
+  - Added matrices for entropy and enthalpy for MBAR (PR #406)
 
 
 09/19/2024 orbeckst, jaclark5

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -1,10 +1,8 @@
 import numpy as np
 import pandas as pd
-import pymbar
 from pymbar.other_estimators import bar as BAR_
 from sklearn.base import BaseEstimator
 
-from .. import concat
 from .base import _EstimatorMixOut
 
 
@@ -20,16 +18,9 @@ class BAR(BaseEstimator, _EstimatorMixOut):
     relative_tolerance : float, optional
         Set to determine the relative tolerance convergence criteria.
 
-    method : str, optional, default='false-position' or 'robust'
-        Choice of method to solve BAR nonlinear equations,
-        one of 'self-consistent-iteration' or 'false-position' (default: 'false-position').
-        If ``use_mbar == True`` the default is 'robust', see 
-        :class:`~alchemlyb.estimators.mbar_.MBAR` for options.
-
-    n_bootstraps : int, optional
-        Whether to use bootstrap to estimate uncertainty. `0` means use analytic error
-        estimation. 50~200 is a reasonable range to do bootstrap. Currently used when
-        ``use_mbar is True``.
+    method : str, optional, default='false-position'
+        choice of method to solve BAR nonlinear equations,
+        one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
 
     verbose : bool, optional
         Set to True if verbose debug output is desired.
@@ -44,20 +35,6 @@ class BAR(BaseEstimator, _EstimatorMixOut):
         The estimated statistical uncertainty (one standard deviation) in
         dimensionless free energy differences.
 
-    delta_h_ : DataFrame
-        The estimated dimensionless enthalpy difference between each state.
-
-    d_delta_h_ : DataFrame
-        The estimated statistical uncertainty (one standard deviation) in
-        dimensionless enthalpy differences.
-
-    delta_s_ : DataFrame, optional
-        The estimated dimensionless entropy difference between each state.
-
-    d_delta_s_ : DataFrame
-        The estimated statistical uncertainty (one standard deviation) in
-        dimensionless entropy differences.
-
     states_ : list
         Lambda states for which free energy differences were obtained.
 
@@ -80,7 +57,6 @@ class BAR(BaseEstimator, _EstimatorMixOut):
     .. versionchanged:: 2.4.0
        Added assessment of lambda states represented in the indices of u_nk
        to provide meaningful errors to ensure proper use.
-       Added computation of enthalpy and entropy with 2-state MBAR
 
     """
 
@@ -89,47 +65,29 @@ def __init__(
         maximum_iterations=10000,
         relative_tolerance=1.0e-7,
         method="false-position",
-        n_bootstraps=0,
         verbose=False,
     ):
 
         self.maximum_iterations = maximum_iterations
         self.relative_tolerance = relative_tolerance
         self.method = method
-        self.n_bootstraps = n_bootstraps
         self.verbose = verbose
 
         # handle for pymbar.BAR object
         self._bar = None
 
-    def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
+    def fit(self, u_nk):
         """
         Compute overlap matrix of reduced potentials using
         Bennett acceptance ratio.
 
-        Sets the attributes: delta_f_ and d_delta_f_
-
         Parameters
         ----------
         u_nk : DataFrame
             u_nk[n,k] is the reduced potential energy of uncorrelated
             configuration n evaluated at state k.
 
-        use_mbar : bool, optional, default=False
-            Use 2-state MBAR instead of BAR. This will allow for the
-            calculation of enthalpic and entropic contributions.
-
-        compute_entropy_enthalpy : bool, optional, default=False
-            Compute entropy and enthalpy from 2-state MBAR. Note
-            that ``use_mbar`` must be ``True``.
-
         """
-
-        if compute_entropy_enthalpy and not use_mbar:
-            raise ValueError(
-                "Cannot compute the enthalpy and entropy with BAR, set use_mbar=True."
-            )
-
         # sort by state so that rows from same state are in contiguous blocks
         u_nk = u_nk.sort_index(level=u_nk.index.names[1:])
 
@@ -160,11 +118,6 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
         # Now get free energy differences and their uncertainties for each step
         deltas = np.array([])
         d_deltas = np.array([])
-        if compute_entropy_enthalpy:
-            deltas_h = np.array([])
-            d_deltas_h = np.array([])
-            deltas_s = np.array([])
-            d_deltas_s = np.array([])
         for k in range(len(N_k) - 1):
             if N_k[k] == 0 or N_k[k + 1] == 0:
                 continue
@@ -189,67 +142,18 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
             w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]
 
             # now determine df and ddf using pymbar.BAR
-            bar_method = self.method if not use_mbar else 'false-position'
             out = BAR_(
                 w_f,
                 w_r,
-                method=bar_method,
+                method=self.method,
                 maximum_iterations=self.maximum_iterations,
                 relative_tolerance=self.relative_tolerance,
                 verbose=self.verbose,
             )
-            if use_mbar:  # now determine df and ddf using pymbar.MBAR
-                tmp_u_nk = concat(
-                    [
-                        groups.get_group(
-                            self._states_[k]
-                            if isinstance(self._states_[k], tuple)
-                            else (self._states_[k],)
-                        ),
-                            groups.get_group(
-                                self._states_[k + 1]
-                                if isinstance(self._states_[k + 1], tuple)
-                                else (self._states_[k + 1],)
-                        ),
-                    ]
-                )
-                columns = tmp_u_nk.columns.tolist()
-                tmp_u_nk = tmp_u_nk.drop(
-                    [x for x in columns if x not in columns[k : k + 2]], axis=1
-                )
-                mbar_method = self.method if self.method != "false-position" else "robust"
-                mbar = pymbar.MBAR(
-                    tmp_u_nk.T,
-                    N_k[k : k + 2],
-                    maximum_iterations=self.maximum_iterations,
-                    relative_tolerance=self.relative_tolerance,
-                    verbose=self.verbose,
-                    initial_f_k=[0, out["Delta_f"]],
-                    solver_protocol=mbar_method,
-                    n_bootstraps=self.n_bootstraps,
-                )
-                uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
-                if compute_entropy_enthalpy:
-                    out = mbar.compute_entropy_and_enthalpy(
-                        uncertainty_method=uncertainty_method
-                    )
-                else:
-                    out = mbar.compute_free_energy_differences(
-                        uncertainty_method=uncertainty_method
-                    )
-                out = {key: val[0, 1] for key, val in out.items()}
 
             df, ddf = out["Delta_f"], out["dDelta_f"]
             deltas = np.append(deltas, df)
             d_deltas = np.append(d_deltas, ddf**2)
-            if compute_entropy_enthalpy:
-                dh, ddh = out["Delta_u"], out["dDelta_u"]
-                deltas_h = np.append(deltas_h, dh)
-                d_deltas_h = np.append(d_deltas_h, ddh**2)
-
-                ds, dds = out["Delta_s"], out["dDelta_s"]
-                deltas_s = np.append(deltas_s, ds)
-                d_deltas_s = np.append(d_deltas_s, dds**2)
 
         if len(deltas) == 0 and len(states) > 1:
             raise ValueError(
@@ -259,73 +163,36 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
             )
 
         # build matrix of deltas between each state
-        lx = len(deltas)
-        adelta = np.zeros((lx + 1, lx + 1))
+        adelta = np.zeros((len(deltas) + 1, len(deltas) + 1))
         ad_delta = np.zeros_like(adelta)
-        if compute_entropy_enthalpy:
-            adelta_h, ad_delta_h = np.zeros_like(adelta), np.zeros_like(adelta)
-            adelta_s, ad_delta_s = np.zeros_like(adelta), np.zeros_like(adelta)
-
-        for j in range(lx):
-            out_f, dout_f = np.empty(lx - j), np.empty(lx - j)
-            if compute_entropy_enthalpy:
-                out_h, dout_h = np.empty(lx - j), np.empty(lx - j)
-                out_s, dout_s = np.empty(lx - j), np.empty(lx - j)
-            for i in range(lx - j):
-                out_f[i] = deltas[i : i + j + 1].sum()
+
+        for j in range(len(deltas)):
+            out = []
+            dout = []
+            for i in range(len(deltas) - j):
+                out.append(deltas[i : i + j + 1].sum())
 
                 # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
                 # Error estimate generated by BAR ARE correlated
 
                 # Use the BAR uncertainties between two neighbour states
+                if j == 0:
+                    dout.append(d_deltas[i : i + j + 1].sum())
                 # Other uncertainties are unknown at this point
-                dout_f[i] = d_deltas[i : i + j + 1].sum() if j == 0 else np.nan
-
-                if compute_entropy_enthalpy:
-                    out_h[i] = deltas_h[i : i + j + 1].sum()
-                    out_s[i] = deltas_s[i : i + j + 1].sum()
-
-                    # Use the BAR uncertainties between two neighbour states
-                    # Other uncertainties are unknown at this point
-                    dout_h[i] = d_deltas_h[i : i + j + 1].sum() if j == 0 else np.nan
-                    dout_s[i] = d_deltas_s[i : i + j + 1].sum() if j == 0 else np.nan
+                else:
+                    dout.append(np.nan)
 
-            adelta += np.diagflat(out_f, k=j + 1)
-            ad_delta += np.diagflat(dout_f, k=j + 1)
-            if compute_entropy_enthalpy:
-                adelta_h += np.diagflat(out_h, k=j + 1)
-                ad_delta_h += np.diagflat(dout_h, k=j + 1)
-                adelta_s += np.diagflat(out_s, k=j + 1)
-                ad_delta_s += np.diagflat(dout_s, k=j + 1)
+            adelta += np.diagflat(np.array(out), k=j + 1)
+            ad_delta += np.diagflat(np.array(dout), k=j + 1)
 
         # yield standard delta_f_ free energies between each state
         self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states)
-        if compute_entropy_enthalpy:
-            self._delta_h_ = pd.DataFrame(
-                adelta_h - adelta_h.T, columns=states, index=states
-            )
-            self._delta_s_ = pd.DataFrame(
-                adelta_s - adelta_s.T, columns=states, index=states
-            )
 
         # yield standard deviation d_delta_f_ between each state
         self._d_delta_f_ = pd.DataFrame(
             np.sqrt(ad_delta + ad_delta.T), columns=states, index=states
         )
-        if compute_entropy_enthalpy:
-            self._d_delta_h_ = pd.DataFrame(
-                np.sqrt(ad_delta_h + ad_delta_h.T), columns=states, index=states
-            )
-            self._d_delta_s_ = pd.DataFrame(
-                np.sqrt(ad_delta_s + ad_delta_s.T), columns=states, index=states
-            )
-
         self._delta_f_.attrs = u_nk.attrs
         self._d_delta_f_.attrs = u_nk.attrs
-        if compute_entropy_enthalpy:
-            self._delta_h_.attrs = u_nk.attrs
-            self._d_delta_h_.attrs = u_nk.attrs
-            self._delta_s_.attrs = u_nk.attrs
-            self._d_delta_s_.attrs = u_nk.attrs
 
         return self
diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py
@@ -223,47 +223,6 @@ def test_enthalpy_entropy_mbar(gmx_benzene_Coulomb_u_nk):
     )
 
 
-def test_enthalpy_entropy_bar(gmx_benzene_Coulomb_u_nk):
-    bar = BAR(n_bootstraps=2)
-    u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
-    bar.fit(u_nk, use_mbar=True)
-    lx = len(bar.d_delta_f_) - 1
-    assert bar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
-        np.array([0.0, 1.609778, 2.547866, 2.984183, 3.044385]), abs=1e-6
-    )
-
-    bar = BAR()
-    bar.fit(u_nk, compute_entropy_enthalpy=True, use_mbar=True)
-    assert bar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
-        np.array([0.0, 1.609778, 2.547866, 2.984183, 3.044385]), abs=1e-6
-    )
-    assert bar.delta_h_.iloc[0, :].to_numpy() == pytest.approx(
-        np.array([0.0, 1.337356, 2.140400, 2.512384, 2.563935]), abs=1e-6
-    )
-    assert bar.delta_s_.iloc[0, :].to_numpy() == pytest.approx(
-        np.array([0.0, -0.272422, -0.407467, -0.471798, -0.480450]), abs=1e-6
-    )
-    assert np.array(
-        [bar.d_delta_f_.to_numpy()[i + 1, i] for i in range(lx)]
-    ) == pytest.approx(np.array([0.009879, 0.008740, 0.007372, 0.006381]), abs=1e-6)
-    assert np.array(
-        [bar.d_delta_h_.to_numpy()[i + 1, i] for i in range(lx)]
-    ) == pytest.approx(np.array([0.011062, 0.008801, 0.007036, 0.006106]), abs=1e-6)
-    assert np.array(
-        [bar.d_delta_s_.to_numpy()[i + 1, i] for i in range(lx)]
-    ) == pytest.approx(np.array([0.008932, 0.006298, 0.004190, 0.002941]), abs=1e-6)
-
-
-def test_bar_contributions_no_mbar_error(gmx_benzene_Coulomb_u_nk):
-    bar = BAR()
-    u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
-    with pytest.raises(
-        ValueError,
-        match="Cannot compute the enthalpy and entropy with BAR, set use_mbar=True.",
-    ):
-        bar.fit(u_nk, compute_entropy_enthalpy=True, use_mbar=False)
-
-
 def test_wrong_initial_f_k():
     with pytest.raises(
         ValueError, match="Only `BAR` is supported as string input to `initial_f_k`"