Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add entropy and enthalpy #406

Merged
merged 15 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ The rules for this file:
* accompany each entry with github issue/PR number (Issue #xyz)
* release numbers follow "Semantic Versioning" https://semver.org

**/**/**** xiki-tempula
**/**/**** xiki-tempula, jaclark5

* 2.5.0

Enhancements
- Added matrices for entropy and enthalpy for MBAR (PR #406)
jaclark5 marked this conversation as resolved.
Show resolved Hide resolved
- Parallelise read and preprocess for ABFE workflow. (PR #371)


Expand All @@ -27,7 +28,7 @@ Enhancements
Fixes
- [doc] tutorial: use alchemlyb.concat (PR #399)
- Resolve pandas FutureWarnings in bar_.py and mbar_.py (issue #408 PR #407)


09/17/2024 jaclark5, orbeckst

Expand Down
25 changes: 23 additions & 2 deletions src/alchemlyb/estimators/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
class _EstimatorMixOut:
"""This class creates view for the d_delta_f_, delta_f_, states_ for the
estimator class to consume."""
"""This class creates view for the attributes: states_, delta_f_, d_delta_f_,
delta_h_, d_delta_h_, delta_sT_, d_delta_sT_ for the estimator class to consume.
"""

_d_delta_f_ = None
_delta_f_ = None
_states_ = None
_d_delta_h_ = None
_delta_h_ = None
_d_delta_sT_ = None
_delta_sT_ = None

@property
def d_delta_f_(self):
Expand All @@ -14,6 +19,22 @@ def d_delta_f_(self):
def delta_f_(self):
return self._delta_f_

@property
def d_delta_h_(self):
return self._d_delta_h_

@property
def delta_h_(self):
return self._delta_h_

@property
def d_delta_sT_(self):
return self._d_delta_sT_

@property
def delta_sT_(self):
return self._delta_sT_

@property
def states_(self):
return self._states_
58 changes: 50 additions & 8 deletions src/alchemlyb/estimators/mbar_.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
The estimated statistical uncertainty (one standard deviation) in
dimensionless free energy differences.

delta_h_ : DataFrame
The estimated dimensionless enthalpy difference between each state.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is enthalpy dimensionless? Should it be having energy unit?

Copy link
Contributor Author

@jaclark5 jaclark5 Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiki-tempula Do you want me to change the docstring? I put this since the existing documentation for free energy is:

    delta_f_ : DataFrame
        The estimated dimensionless free energy difference between each state.

and I'm treating enthalpy and entropy the same as free energy... it seems I should change entropy appropriately though. See the new commit.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, then I think this is fine. I will change this in a later PR.


d_delta_h_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless enthalpy differences.

delta_sT_ : DataFrame, optional
The estimated dimensionless entropy difference between each state.

d_delta_sT_ : DataFrame
The estimated statistical uncertainty (one standard deviation) in
dimensionless entropy differences.

theta_ : DataFrame
The theta matrix.

Expand All @@ -80,9 +94,11 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
.. versionchanged:: 2.1.0
`n_bootstraps` option added.
.. versionchanged:: 2.4.0
Handle initial estimate, initial_f_k, from bar in the instance
that not all lambda states represented as column headers are
represented in the indices of u_nk.
Handle initial estimate, initial_f_k, from bar in the instance
that not all lambda states represented as column headers are
represented in the indices of u_nk.
.. versionchanged:: 2.5.0
Added computation of enthalpy and entropy

"""

Expand Down Expand Up @@ -110,7 +126,7 @@ def __init__(
# handle for pymbar.MBAR object
self._mbar = None

def fit(self, u_nk):
def fit(self, u_nk, compute_entropy_enthalpy=False):
"""
Compute overlap matrix of reduced potentials using multi-state
Bennett acceptance ratio.
Expand All @@ -121,6 +137,9 @@ def fit(self, u_nk):
``u_nk[n, k]`` is the reduced potential energy of uncorrelated
configuration ``n`` evaluated at state ``k``.

compute_entropy_enthalpy : bool, optional, default=False
Compute entropy and enthalpy.

"""
# sort by state so that rows from same state are in contiguous blocks
u_nk = u_nk.sort_index(level=u_nk.index.names[1:])
Expand Down Expand Up @@ -171,13 +190,18 @@ def fit(self, u_nk):
solver_protocol=self.method,
n_bootstraps=self.n_bootstraps,
)
if self.n_bootstraps == 0:
uncertainty_method = None
else:
uncertainty_method = "bootstrap"

uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap"
out = self._mbar.compute_free_energy_differences(
return_theta=True, uncertainty_method=uncertainty_method
)
if compute_entropy_enthalpy:
out.update(
self._mbar.compute_entropy_and_enthalpy(
uncertainty_method=uncertainty_method
)
)

self._delta_f_ = pd.DataFrame(
out["Delta_f"], columns=self._states_, index=self._states_
)
Expand All @@ -187,9 +211,27 @@ def fit(self, u_nk):
self.theta_ = pd.DataFrame(
out["Theta"], columns=self._states_, index=self._states_
)
if compute_entropy_enthalpy:
self._delta_h_ = pd.DataFrame(
out["Delta_u"], columns=self._states_, index=self._states_
)
self._d_delta_h_ = pd.DataFrame(
out["dDelta_u"], columns=self._states_, index=self._states_
)
self._delta_sT_ = pd.DataFrame(
Copy link
Collaborator

@xiki-tempula xiki-tempula Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy for this value to be s*T. Is the mbar output s or s*T?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well the mbar output is reduced entropy, so S/k or sT/(kT) so it's consistent.

Copy link
Collaborator

@xiki-tempula xiki-tempula Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that the unit is the same but I worried about the magnitude.
The G=H-sT so if the Delta_s is s then the Delta_f should be the same as Delta_u-Delta_s * T. If Delta_s is sT, then Delta_f should be Delta_u-Delta_s

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I agree, so I'm changing the variable names to make that clear. I'll do whatever the maintainers agree on, maybe @mrshirts or @orbeckst have an opinion?

Summary: Entropy has different units and which need to be handled properly. While the units of enthalpy and free energy are attr['energy_units'] = 'kT', 'kcal/mol' or 'kJ/mol', the equivalent for entropy would then be 'k', 'kcal/mol/K' or 'kJ/mol/K'. The downside of this difference in units would require either having copies of the units.py functions for entropy specifically or add a series of if statements to the functions in units.py to detect and handle entropy units. Also, it's awkward to say that dimensionless entropy has units of 'k'

Alternative: By having the entropy matrix represent S*T (denoted with the variable name delta_sT_) the object can retain the same units as enthalpy and free energy and use the same unit conversion functions, making everything cleaner internally, and avoid the awkward unit "definition" of 'k' for S.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'm merging this PR. I have checked the numerical values and can verify that Delta_s is not s but rather s*T. So it is in the unit of kT, kJ/mol and kcal/mol.

out["Delta_s"], columns=self._states_, index=self._states_
)
self._d_delta_sT_ = pd.DataFrame(
out["dDelta_s"], columns=self._states_, index=self._states_
)

self._delta_f_.attrs = u_nk.attrs
self._d_delta_f_.attrs = u_nk.attrs
if compute_entropy_enthalpy:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that you do add the units back.

self._delta_h_.attrs = u_nk.attrs
self._d_delta_h_.attrs = u_nk.attrs
self._delta_sT_.attrs = u_nk.attrs
self._d_delta_sT_.attrs = u_nk.attrs

return self

Expand Down
5 changes: 5 additions & 0 deletions src/alchemlyb/parsing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,13 @@ def wrapper(outfile, T, *args, **kwargs):
dict_with_df = func(outfile, T, *args, **kwargs)
for k in dict_with_df.keys():
if dict_with_df[k] is not None:

if T < 273:
jaclark5 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("Temperature must be in Kelvin.")

dict_with_df[k].attrs["temperature"] = T
dict_with_df[k].attrs["energy_unit"] = "kT"

return dict_with_df

return wrapper
13 changes: 12 additions & 1 deletion src/alchemlyb/postprocessors/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
def to_kT(df, T=None):
"""Convert the unit of a DataFrame to `kT`.

Note that if entropy values are passed it is assumed that they are
multiplied by the temperature, S * T.

If temperature `T` is not provided, the DataFrame need to have attribute
`temperature` and `energy_unit`. Otherwise, the temperature of the output
dateframe will be set accordingly.
Expand All @@ -25,7 +28,7 @@ def to_kT(df, T=None):
df : DataFrame
DataFrame to convert unit.
T : float
Temperature (default: None).
Temperature (default: None) in Kelvin.

Returns
-------
Expand All @@ -34,6 +37,8 @@ def to_kT(df, T=None):
"""
new_df = df.copy()
if T is not None:
if T < 273:
raise ValueError("Temperature must be in Kelvin.")
new_df.attrs["temperature"] = T
elif "temperature" not in df.attrs:
raise TypeError("Attribute temperature not found in the input " "Dataframe.")
Expand Down Expand Up @@ -61,6 +66,9 @@ def to_kT(df, T=None):
def to_kcalmol(df, T=None):
"""Convert the unit of a DataFrame to kcal/mol.

Note that if entropy values are passed, the result is S * T in units
of kcal/mol.

If temperature `T` is not provided, the DataFrame need to have attribute
`temperature` and `energy_unit`. Otherwise, the temperature of the output
dateframe will be set accordingly.
Expand All @@ -86,6 +94,9 @@ def to_kcalmol(df, T=None):
def to_kJmol(df, T=None):
"""Convert the unit of a DataFrame to kJ/mol.

Note that if entropy values are passed, the result is S * T in units
of kJ/mol.

If temperature `T` is not provided, the DataFrame need to have attribute
`temperature` and `energy_unit`. Otherwise, the temperature of the output
dateframe will be set accordingly.
Expand Down
25 changes: 25 additions & 0 deletions src/alchemlyb/tests/test_fep_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,31 @@ def test_bootstrap(gmx_benzene_Coulomb_u_nk):
assert mbar_bootstrap_err != mbar_err


def test_enthalpy_entropy_mbar(gmx_benzene_Coulomb_u_nk):
mbar = MBAR()
u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
mbar.fit(u_nk, compute_entropy_enthalpy=True)

assert mbar.delta_f_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 1.619069, 2.557990, 2.986302, 3.041156]), abs=1e-6
)
assert mbar.delta_h_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 1.241970, 1.695000, 1.706555, 1.388906]), abs=1e-6
)
assert mbar.delta_sT_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, -0.377099, -0.862990, -1.279746, -1.652250]), abs=1e-6
)
assert mbar.d_delta_f_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 0.008802, 0.014432, 0.018097, 0.020879]), abs=1e-6
)
assert mbar.d_delta_h_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 0.011598, 0.016538, 0.018077, 0.018940]), abs=1e-6
)
assert mbar.d_delta_sT_.iloc[0, :].to_numpy() == pytest.approx(
np.array([0.0, 0.012242, 0.019519, 0.023606, 0.026107]), abs=1e-6
)


def test_wrong_initial_f_k():
with pytest.raises(
ValueError, match="Only `BAR` is supported as string input to `initial_f_k`"
Expand Down
Loading