Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add entropy and enthalpy #406

Merged
merged 15 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ The rules for this file:
* accompany each entry with github issue/PR number (Issue #xyz)
* release numbers follow "Semantic Versioning" https://semver.org

??/??/2024 jaclark5

* 2.4.1

Enhancements
- Added matrices for entropy and enthalpy for MBAR and BAR,
when BAR is substituted with a series of 2-state MBAR calculations.
(PR #406)

09/17/2024 jaclark5, orbeckst

Expand Down
169 changes: 151 additions & 18 deletions src/alchemlyb/estimators/bar_.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
import pandas as pd
import pymbar
from pymbar.other_estimators import bar as BAR_
from sklearn.base import BaseEstimator

from .. import concat
from .base import _EstimatorMixOut


Expand All @@ -18,9 +20,16 @@ class BAR(BaseEstimator, _EstimatorMixOut):
relative_tolerance : float, optional
Set to determine the relative tolerance convergence criteria.

method : str, optional, default='false-position'
choice of method to solve BAR nonlinear equations,
one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
method : str, optional, default='false-position' or 'robust'
Choice of method to solve BAR nonlinear equations,
one of 'self-consistent-iteration' or 'false-position' (default: 'false-position').
If ``use_mbar == True`` the default is 'robust', see
:class:`~alchemlyb.estimators.mbar_.MBAR` for options.

n_bootstraps : int, optional
Whether to use bootstrap to estimate uncertainty. `0` means use analytic error
estimation. 50~200 is a reasonable range to do bootstrap. Currently used when
``use_mbar is True``.

verbose : bool, optional
Set to True if verbose debug output is desired.
Expand All @@ -35,6 +44,20 @@ class BAR(BaseEstimator, _EstimatorMixOut):
The estimated statistical uncertainty (one standard deviation) in
dimensionless free energy differences.

delta_h_ : DataFrame
The estimated dimensionless enthalpy difference between each state.

d_delta_h_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless enthalpy differences.

delta_s_ : DataFrame, optional
The estimated dimensionless entropy difference between each state.

d_delta_s_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless entropy differences.

states_ : list
Lambda states for which free energy differences were obtained.

Expand All @@ -57,6 +80,7 @@ class BAR(BaseEstimator, _EstimatorMixOut):
.. versionchanged:: 2.4.0
Added assessment of lambda states represented in the indices of u_nk
to provide meaningful errors to ensure proper use.
Added computation of enthalpy and entropy with 2-state MBAR

"""

Expand All @@ -65,29 +89,47 @@ def __init__(
maximum_iterations=10000,
relative_tolerance=1.0e-7,
method="false-position",
n_bootstraps=0,
verbose=False,
):

self.maximum_iterations = maximum_iterations
self.relative_tolerance = relative_tolerance
self.method = method
self.n_bootstraps = n_bootstraps
self.verbose = verbose

# handle for pymbar.BAR object
self._bar = None

def fit(self, u_nk):
def fit(self, u_nk, use_mbar=False, compute_entropy_enthalpy=False):
"""
Compute overlap matrix of reduced potentials using
Bennett acceptance ratio.

Sets the attributes: delta_f_ and d_delta_f_

Parameters
----------
u_nk : DataFrame
u_nk[n,k] is the reduced potential energy of uncorrelated
configuration n evaluated at state k.

use_mbar : bool, optional, default=False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry do you mind explain to me why do you want to do this in BAR? I think if you want the enthalpic and entropic contributions, you would just run MBAR?

Use 2-state MBAR instead of BAR. This will allow for the
calculation of enthalpic and entropic contributions.

compute_entropy_enthalpy : bool, optional, default=False
Compute entropy and enthalpy from 2-state MBAR. Note
that ``use_mbar`` must be ``True``.

"""

if compute_entropy_enthalpy and not use_mbar:
raise ValueError(
"Cannot compute the enthalpy and entropy with BAR, set use_mbar=True."
)

# sort by state so that rows from same state are in contiguous blocks
u_nk = u_nk.sort_index(level=u_nk.index.names[1:])

Expand All @@ -114,6 +156,11 @@ def fit(self, u_nk):
# Now get free energy differences and their uncertainties for each step
deltas = np.array([])
d_deltas = np.array([])
if compute_entropy_enthalpy:
deltas_h = np.array([])
d_deltas_h = np.array([])
deltas_s = np.array([])
d_deltas_s = np.array([])
for k in range(len(N_k) - 1):
if N_k[k] == 0 or N_k[k + 1] == 0:
continue
Expand All @@ -129,18 +176,67 @@ def fit(self, u_nk):
w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]

# now determine df and ddf using pymbar.BAR
bar_method = self.method if not use_mbar else 'false-position'
out = BAR_(
w_f,
w_r,
method=self.method,
method=bar_method,
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose,
)
if use_mbar: # now determine df and ddf using pymbar.MBAR
tmp_u_nk = concat(
[
groups.get_group(
self._states_[k]
if isinstance(self._states_[k], tuple)
else (self._states_[k],)
),
groups.get_group(
self._states_[k + 1]
if isinstance(self._states_[k + 1], tuple)
else (self._states_[k + 1],)
),
]
)
columns = tmp_u_nk.columns.tolist()
tmp_u_nk = tmp_u_nk.drop(
[x for x in columns if x not in columns[k : k + 2]], axis=1
)
mbar_method = self.method if self.method != "false-position" else "robust"
mbar = pymbar.MBAR(
tmp_u_nk.T,
N_k[k : k + 2],
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose,
initial_f_k=[0, out["Delta_f"]],
solver_protocol=mbar_method,
n_bootstraps=self.n_bootstraps,
)
uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
if compute_entropy_enthalpy:
out = mbar.compute_entropy_and_enthalpy(
uncertainty_method=uncertainty_method
)
else:
out = mbar.compute_free_energy_differences(
uncertainty_method=uncertainty_method
)
out = {key: val[0, 1] for key, val in out.items()}

df, ddf = out["Delta_f"], out["dDelta_f"]
deltas = np.append(deltas, df)
d_deltas = np.append(d_deltas, ddf**2)
if compute_entropy_enthalpy:
dh, ddh = out["Delta_u"], out["dDelta_u"]
deltas_h = np.append(deltas_h, dh)
d_deltas_h = np.append(d_deltas_h, ddh**2)

ds, dds = out["Delta_s"], out["dDelta_s"]
deltas_s = np.append(deltas_s, ds)
d_deltas_s = np.append(d_deltas_s, dds**2)

if len(deltas) == 0 and len(states) > 1:
raise ValueError(
Expand All @@ -150,36 +246,73 @@ def fit(self, u_nk):
)

# build matrix of deltas between each state
adelta = np.zeros((len(deltas) + 1, len(deltas) + 1))
lx = len(deltas)
adelta = np.zeros((lx + 1, lx + 1))
ad_delta = np.zeros_like(adelta)

for j in range(len(deltas)):
out = []
dout = []
for i in range(len(deltas) - j):
out.append(deltas[i : i + j + 1].sum())
if compute_entropy_enthalpy:
adelta_h, ad_delta_h = np.zeros_like(adelta), np.zeros_like(adelta)
adelta_s, ad_delta_s = np.zeros_like(adelta), np.zeros_like(adelta)

for j in range(lx):
out_f, dout_f = np.empty(lx - j), np.empty(lx - j)
if compute_entropy_enthalpy:
out_h, dout_h = np.empty(lx - j), np.empty(lx - j)
out_s, dout_s = np.empty(lx - j), np.empty(lx - j)
for i in range(lx - j):
out_f[i] = deltas[i : i + j + 1].sum()

# See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
# Error estimate generated by BAR ARE correlated

# Use the BAR uncertainties between two neighbour states
if j == 0:
dout.append(d_deltas[i : i + j + 1].sum())
# Other uncertainties are unknown at this point
else:
dout.append(np.nan)
dout_f[i] = d_deltas[i : i + j + 1].sum() if j == 0 else np.nan

adelta += np.diagflat(np.array(out), k=j + 1)
ad_delta += np.diagflat(np.array(dout), k=j + 1)
if compute_entropy_enthalpy:
out_h[i] = deltas_h[i : i + j + 1].sum()
out_s[i] = deltas_s[i : i + j + 1].sum()

# Use the BAR uncertainties between two neighbour states
# Other uncertainties are unknown at this point
dout_h[i] = d_deltas_h[i : i + j + 1].sum() if j == 0 else np.nan
dout_s[i] = d_deltas_s[i : i + j + 1].sum() if j == 0 else np.nan

adelta += np.diagflat(out_f, k=j + 1)
ad_delta += np.diagflat(dout_f, k=j + 1)
if compute_entropy_enthalpy:
adelta_h += np.diagflat(out_h, k=j + 1)
ad_delta_h += np.diagflat(dout_h, k=j + 1)
adelta_s += np.diagflat(out_s, k=j + 1)
ad_delta_s += np.diagflat(dout_s, k=j + 1)

# yield standard delta_f_ free energies between each state
self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states)
if compute_entropy_enthalpy:
self._delta_h_ = pd.DataFrame(
adelta_h - adelta_h.T, columns=states, index=states
)
self._delta_s_ = pd.DataFrame(
adelta_s - adelta_s.T, columns=states, index=states
)

# yield standard deviation d_delta_f_ between each state
self._d_delta_f_ = pd.DataFrame(
np.sqrt(ad_delta + ad_delta.T), columns=states, index=states
)
if compute_entropy_enthalpy:
self._d_delta_h_ = pd.DataFrame(
np.sqrt(ad_delta_h + ad_delta_h.T), columns=states, index=states
)
self._d_delta_s_ = pd.DataFrame(
np.sqrt(ad_delta_s + ad_delta_s.T), columns=states, index=states
)

self._delta_f_.attrs = u_nk.attrs
self._d_delta_f_.attrs = u_nk.attrs
if compute_entropy_enthalpy:
self._delta_h_.attrs = u_nk.attrs
self._d_delta_h_.attrs = u_nk.attrs
self._delta_s_.attrs = u_nk.attrs
self._d_delta_s_.attrs = u_nk.attrs

return self
20 changes: 20 additions & 0 deletions src/alchemlyb/estimators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ class _EstimatorMixOut:
_d_delta_f_ = None
_delta_f_ = None
_states_ = None
_d_delta_h_ = None
_delta_h_ = None
_d_delta_s_ = None
_delta_s_ = None

@property
def d_delta_f_(self):
Expand All @@ -14,6 +18,22 @@ def d_delta_f_(self):
def delta_f_(self):
return self._delta_f_

@property
def d_delta_h_(self):
return self._d_delta_h_

@property
def delta_h_(self):
return self._delta_h_

@property
def d_delta_s_(self):
return self._d_delta_s_

@property
def delta_s_(self):
return self._delta_s_

@property
def states_(self):
return self._states_
Loading