Skip to content

Commit

Permalink
Remove enthalpy/entropy for BAR
Browse files Browse the repository at this point in the history
  • Loading branch information
jaclark5 committed Nov 12, 2024
1 parent bef7d06 commit 74b8341
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 195 deletions.
4 changes: 1 addition & 3 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ The rules for this file:

Enhancements
- Parallelise read and preprocess for ABFE workflow. (PR #371)
- Added matrices for entropy and enthalpy for MBAR and BAR,
when BAR is substituted with a series of 2-state MBAR calculations.
(PR #406)
- Added matrices for entropy and enthalpy for MBAR (PR #406)


09/19/2024 orbeckst, jaclark5
Expand Down
169 changes: 18 additions & 151 deletions src/alchemlyb/estimators/bar_.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import numpy as np
import pandas as pd
import pymbar
from pymbar.other_estimators import bar as BAR_
from sklearn.base import BaseEstimator

from .. import concat
from .base import _EstimatorMixOut


Expand All @@ -20,16 +18,9 @@ class BAR(BaseEstimator, _EstimatorMixOut):
relative_tolerance : float, optional
Set to determine the relative tolerance convergence criteria.
method : str, optional, default='false-position' or 'robust'
Choice of method to solve BAR nonlinear equations,
one of 'self-consistent-iteration' or 'false-position' (default: 'false-position').
If ``use_mbar == True`` the default is 'robust', see
:class:`~alchemlyb.estimators.mbar_.MBAR` for options.
n_bootstraps : int, optional
Whether to use bootstrap to estimate uncertainty. `0` means use analytic error
estimation. 50~200 is a reasonable range to do bootstrap. Currently used when
``use_mbar is True``.
method : str, optional, default='false-position'
choice of method to solve BAR nonlinear equations,
one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
verbose : bool, optional
Set to True if verbose debug output is desired.
Expand All @@ -44,20 +35,6 @@ class BAR(BaseEstimator, _EstimatorMixOut):
The estimated statistical uncertainty (one standard deviation) in
dimensionless free energy differences.
delta_h_ : DataFrame
The estimated dimensionless enthalpy difference between each state.
d_delta_h_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless enthalpy differences.
delta_s_ : DataFrame, optional
The estimated dimensionless entropy difference between each state.
d_delta_s_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless entropy differences.
states_ : list
Lambda states for which free energy differences were obtained.
Expand All @@ -80,7 +57,6 @@ class BAR(BaseEstimator, _EstimatorMixOut):
.. versionchanged:: 2.4.0
Added assessment of lambda states represented in the indices of u_nk
to provide meaningful errors to ensure proper use.
Added computation of enthalpy and entropy with 2-state MBAR
"""

Expand All @@ -89,47 +65,29 @@ def __init__(
maximum_iterations=10000,
relative_tolerance=1.0e-7,
method="false-position",
n_bootstraps=0,
verbose=False,
):

self.maximum_iterations = maximum_iterations
self.relative_tolerance = relative_tolerance
self.method = method
self.n_bootstraps = n_bootstraps
self.verbose = verbose

# handle for pymbar.BAR object
self._bar = None

def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
def fit(self, u_nk):
"""
Compute overlap matrix of reduced potentials using
Bennett acceptance ratio.
Sets the attributes: delta_f_ and d_delta_f_
Parameters
----------
u_nk : DataFrame
u_nk[n,k] is the reduced potential energy of uncorrelated
configuration n evaluated at state k.
use_mbar : bool, optional, default=False
Use 2-state MBAR instead of BAR. This will allow for the
calculation of enthalpic and entropic contributions.
compute_entropy_enthalpy : bool, optional, default=False
Compute entropy and enthalpy from 2-state MBAR. Note
that ``use_mbar`` must be ``True``.
"""

if compute_entropy_enthalpy and not use_mbar:
raise ValueError(
"Cannot compute the enthalpy and entropy with BAR, set use_mbar=True."
)

# sort by state so that rows from same state are in contiguous blocks
u_nk = u_nk.sort_index(level=u_nk.index.names[1:])

Expand Down Expand Up @@ -160,11 +118,6 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
# Now get free energy differences and their uncertainties for each step
deltas = np.array([])
d_deltas = np.array([])
if compute_entropy_enthalpy:
deltas_h = np.array([])
d_deltas_h = np.array([])
deltas_s = np.array([])
d_deltas_s = np.array([])
for k in range(len(N_k) - 1):
if N_k[k] == 0 or N_k[k + 1] == 0:
continue
Expand All @@ -189,67 +142,18 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]

# now determine df and ddf using pymbar.BAR
bar_method = self.method if not use_mbar else 'false-position'
out = BAR_(
w_f,
w_r,
method=bar_method,
method=self.method,
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose,
)
if use_mbar: # now determine df and ddf using pymbar.MBAR
tmp_u_nk = concat(
[
groups.get_group(
self._states_[k]
if isinstance(self._states_[k], tuple)
else (self._states_[k],)
),
groups.get_group(
self._states_[k + 1]
if isinstance(self._states_[k + 1], tuple)
else (self._states_[k + 1],)
),
]
)
columns = tmp_u_nk.columns.tolist()
tmp_u_nk = tmp_u_nk.drop(
[x for x in columns if x not in columns[k : k + 2]], axis=1
)
mbar_method = self.method if self.method != "false-position" else "robust"
mbar = pymbar.MBAR(
tmp_u_nk.T,
N_k[k : k + 2],
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose,
initial_f_k=[0, out["Delta_f"]],
solver_protocol=mbar_method,
n_bootstraps=self.n_bootstraps,
)
uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
if compute_entropy_enthalpy:
out = mbar.compute_entropy_and_enthalpy(
uncertainty_method=uncertainty_method
)
else:
out = mbar.compute_free_energy_differences(
uncertainty_method=uncertainty_method
)
out = {key: val[0, 1] for key, val in out.items()}

df, ddf = out["Delta_f"], out["dDelta_f"]
deltas = np.append(deltas, df)
d_deltas = np.append(d_deltas, ddf**2)
if compute_entropy_enthalpy:
dh, ddh = out["Delta_u"], out["dDelta_u"]
deltas_h = np.append(deltas_h, dh)
d_deltas_h = np.append(d_deltas_h, ddh**2)

ds, dds = out["Delta_s"], out["dDelta_s"]
deltas_s = np.append(deltas_s, ds)
d_deltas_s = np.append(d_deltas_s, dds**2)

if len(deltas) == 0 and len(states) > 1:
raise ValueError(
Expand All @@ -259,73 +163,36 @@ def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
)

# build matrix of deltas between each state
lx = len(deltas)
adelta = np.zeros((lx + 1, lx + 1))
adelta = np.zeros((len(deltas) + 1, len(deltas) + 1))
ad_delta = np.zeros_like(adelta)
if compute_entropy_enthalpy:
adelta_h, ad_delta_h = np.zeros_like(adelta), np.zeros_like(adelta)
adelta_s, ad_delta_s = np.zeros_like(adelta), np.zeros_like(adelta)

for j in range(lx):
out_f, dout_f = np.empty(lx - j), np.empty(lx - j)
if compute_entropy_enthalpy:
out_h, dout_h = np.empty(lx - j), np.empty(lx - j)
out_s, dout_s = np.empty(lx - j), np.empty(lx - j)
for i in range(lx - j):
out_f[i] = deltas[i : i + j + 1].sum()

for j in range(len(deltas)):
out = []
dout = []
for i in range(len(deltas) - j):
out.append(deltas[i : i + j + 1].sum())

# See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
# Error estimate generated by BAR ARE correlated

# Use the BAR uncertainties between two neighbour states
if j == 0:
dout.append(d_deltas[i : i + j + 1].sum())
# Other uncertainties are unknown at this point
dout_f[i] = d_deltas[i : i + j + 1].sum() if j == 0 else np.nan

if compute_entropy_enthalpy:
out_h[i] = deltas_h[i : i + j + 1].sum()
out_s[i] = deltas_s[i : i + j + 1].sum()

# Use the BAR uncertainties between two neighbour states
# Other uncertainties are unknown at this point
dout_h[i] = d_deltas_h[i : i + j + 1].sum() if j == 0 else np.nan
dout_s[i] = d_deltas_s[i : i + j + 1].sum() if j == 0 else np.nan
else:
dout.append(np.nan)

adelta += np.diagflat(out_f, k=j + 1)
ad_delta += np.diagflat(dout_f, k=j + 1)
if compute_entropy_enthalpy:
adelta_h += np.diagflat(out_h, k=j + 1)
ad_delta_h += np.diagflat(dout_h, k=j + 1)
adelta_s += np.diagflat(out_s, k=j + 1)
ad_delta_s += np.diagflat(dout_s, k=j + 1)
adelta += np.diagflat(np.array(out), k=j + 1)
ad_delta += np.diagflat(np.array(dout), k=j + 1)

# yield standard delta_f_ free energies between each state
self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states)
if compute_entropy_enthalpy:
self._delta_h_ = pd.DataFrame(
adelta_h - adelta_h.T, columns=states, index=states
)
self._delta_s_ = pd.DataFrame(
adelta_s - adelta_s.T, columns=states, index=states
)

# yield standard deviation d_delta_f_ between each state
self._d_delta_f_ = pd.DataFrame(
np.sqrt(ad_delta + ad_delta.T), columns=states, index=states
)
if compute_entropy_enthalpy:
self._d_delta_h_ = pd.DataFrame(
np.sqrt(ad_delta_h + ad_delta_h.T), columns=states, index=states
)
self._d_delta_s_ = pd.DataFrame(
np.sqrt(ad_delta_s + ad_delta_s.T), columns=states, index=states
)

self._delta_f_.attrs = u_nk.attrs
self._d_delta_f_.attrs = u_nk.attrs
if compute_entropy_enthalpy:
self._delta_h_.attrs = u_nk.attrs
self._d_delta_h_.attrs = u_nk.attrs
self._delta_s_.attrs = u_nk.attrs
self._d_delta_s_.attrs = u_nk.attrs

return self
41 changes: 0 additions & 41 deletions src/alchemlyb/tests/test_fep_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,47 +223,6 @@ def test_enthalpy_entropy_mbar(gmx_benzene_Coulomb_u_nk):
)


def test_enthalpy_entropy_bar(gmx_benzene_Coulomb_u_nk):
bar = BAR(n_bootstraps=2)
u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
bar.fit(u_nk, use_mbar=True)
lx = len(bar.d_delta_f_) - 1
assert bar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 1.609778, 2.547866, 2.984183, 3.044385]), abs=1e-6
)

bar = BAR()
bar.fit(u_nk, compute_entropy_enthalpy=True, use_mbar=True)
assert bar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 1.609778, 2.547866, 2.984183, 3.044385]), abs=1e-6
)
assert bar.delta_h_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 1.337356, 2.140400, 2.512384, 2.563935]), abs=1e-6
)
assert bar.delta_s_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, -0.272422, -0.407467, -0.471798, -0.480450]), abs=1e-6
)
assert np.array(
[bar.d_delta_f_.to_numpy()[i + 1, i] for i in range(lx)]
) == pytest.approx(np.array([0.009879, 0.008740, 0.007372, 0.006381]), abs=1e-6)
assert np.array(
[bar.d_delta_h_.to_numpy()[i + 1, i] for i in range(lx)]
) == pytest.approx(np.array([0.011062, 0.008801, 0.007036, 0.006106]), abs=1e-6)
assert np.array(
[bar.d_delta_s_.to_numpy()[i + 1, i] for i in range(lx)]
) == pytest.approx(np.array([0.008932, 0.006298, 0.004190, 0.002941]), abs=1e-6)


def test_bar_contributions_no_mbar_error(gmx_benzene_Coulomb_u_nk):
bar = BAR()
u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
with pytest.raises(
ValueError,
match="Cannot compute the enthalpy and entropy with BAR, set use_mbar=True.",
):
bar.fit(u_nk, compute_entropy_enthalpy=True, use_mbar=False)


def test_wrong_initial_f_k():
with pytest.raises(
ValueError, match="Only `BAR` is supported as string input to `initial_f_k`"
Expand Down

0 comments on commit 74b8341

Please sign in to comment.