diff --git a/CHANGES b/CHANGES index 993ac8d8..62fd9cea 100644 --- a/CHANGES +++ b/CHANGES @@ -13,6 +13,16 @@ The rules for this file: * release numbers follow "Semantic Versioning" https://semver.org ------------------------------------------------------------------------------ + +??/??/2024 jaclark5 + + * 2.4.0 + +Enhancements + - Addition of `block_average` function in both `convergence` and + `visualization` (Issue #380, PR #381) + + 08/24/2024 xiki-tempula * 2.3.2 diff --git a/docs/convergence.rst b/docs/convergence.rst index c71e65ee..a4a96056 100644 --- a/docs/convergence.rst +++ b/docs/convergence.rst @@ -75,6 +75,31 @@ is, where 0 fully-unequilibrated and 1.0 is fully-equilibrated. :: >>> value = A_c(dhdl_list, tol=2) 0.7085 +Block Average +-------------- +If one obtains suspicious results from the forward / backward convergence plot, +it may be useful to view the block averages of the change in free energy using +:func:`~alchemlyb.convergence.block_average` and +:func:`~alchemlyb.visualisation.plot_block_average` over the course of each +step in lambda individually, the following example is for :math:`\lambda` = 0 + + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.visualisation import plot_block_average + >>> from alchemlyb.convergence import block_average + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = block_average(data_list, 'mbar') + >>> ax = plot_block_average(df) + >>> ax.figure.savefig('dF_t_block_average.png') + +Will give a plot looks like this + +.. figure:: images/dF_t_block_average.png + + A plot of the free energy divided into block averages showing consistency/inconsistency + across the trajectory. Convergence functions --------------------- diff --git a/docs/convergence/alchemlyb.convergence.convergence.rst b/docs/convergence/alchemlyb.convergence.convergence.rst index e5512074..c40e3ad3 100644 --- a/docs/convergence/alchemlyb.convergence.convergence.rst +++ b/docs/convergence/alchemlyb.convergence.convergence.rst @@ -15,3 +15,5 @@ All convergence functions are located in this submodule but for convenience they .. autofunction:: alchemlyb.convergence.fwdrev_cumavg_Rc .. autofunction:: alchemlyb.convergence.A_c + +.. autofunction:: alchemlyb.convergence.block_average diff --git a/docs/images/dF_t_block_average.png b/docs/images/dF_t_block_average.png new file mode 100644 index 00000000..039f4a7a Binary files /dev/null and b/docs/images/dF_t_block_average.png differ diff --git a/docs/visualisation.rst b/docs/visualisation.rst index 703270f4..b84cfa9d 100644 --- a/docs/visualisation.rst +++ b/docs/visualisation.rst @@ -19,6 +19,7 @@ Plotting Functions plot_ti_dhdl plot_dF_state plot_convergence + plot_block_average .. _plot_overlap_matrix: diff --git a/docs/visualisation/alchemlyb.visualisation.plot_block_average.rst b/docs/visualisation/alchemlyb.visualisation.plot_block_average.rst new file mode 100644 index 00000000..952362e9 --- /dev/null +++ b/docs/visualisation/alchemlyb.visualisation.plot_block_average.rst @@ -0,0 +1,6 @@ +alchemlyb.visualisation.plot\_block\_average +============================================= + +.. currentmodule:: alchemlyb.visualisation + +.. autofunction:: plot_block_average \ No newline at end of file diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index d829581f..b031527e 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -1 +1 @@ -from .convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c +from .convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c, block_average diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index d109e1ee..d613f22e 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -30,20 +30,26 @@ def forward_backward_convergence( Parameters ---------- df_list : list - List of DataFrame of either dHdl or u_nk. + List of DataFrame of either dHdl or u_nk, where each represents a + different value of lambda. estimator : {'MBAR', 'BAR', 'TI'} Name of the estimators. - See the important note below on the use of "MBAR". .. deprecated:: 1.0.0 Lower case input is also accepted until release 2.0.0. num : int - The number of time points. + The number of blocks used to divide *each* DataFrame and progressively add + to assess convergence. Note that if the DataFrames are different lengths, + the number of samples contributed with each block will be different. error_tol : float The maximum error tolerated for analytic error. If the analytic error is bigger than the error tolerance, the bootstrap error will be used. .. versionadded:: 2.3.0 + .. versionchanged:: 2.4.0 + Clarified docstring, removed incorrect estimation of std for cumulative + result in bar and added check that only a single lambda state is + represented in the indices of each df in df_list. kwargs : dict Keyword arguments to be passed to the estimator. @@ -94,7 +100,16 @@ def forward_backward_convergence( # select estimator class by name my_estimator = estimators_dispatch[estimator](**kwargs) logger.info(f"Use {estimator} estimator for convergence analysis.") - + + # Check that each df in the list has only one value of lambda + for i, df in enumerate(df_list): + lambda_values = list(set([x[1:] for x in df.index.to_numpy()])) + if len(lambda_values) > 1: + ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + raise ValueError( + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + ) + logger.info("Begin forward analysis") forward_list = [] forward_error_list = [] @@ -170,14 +185,9 @@ def _forward_backward_convergence_estimate( my_estimator.initial_f_k = result.delta_f_.iloc[0, :] mean = result.delta_f_.iloc[0, -1] if estimator.lower() == "bar": - error = np.sqrt( - sum( - [ - result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1) - ] - ) - ) + # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742 + # Error estimate generated by BAR ARE correlated + error = np.nan else: error = result.d_delta_f_.iloc[0, -1] if estimator.lower() == "mbar" and error > error_tol: @@ -262,7 +272,7 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2): float Convergence time fraction :math:`R_c` [Fan2021]_ :class:`pandas.DataFrame` - The DataFrame with moving average. :: + The DataFrame with block average. :: Forward Backward data_fraction 0 3.016442 3.065176 0.1 @@ -389,3 +399,106 @@ def A_c(series_list, precision=0.01, tol=2): d_R_c = sorted_array[-i] - sorted_array[-i - 1] result += d_R_c * sum(R_c_list <= element) / n_R_c return result + + +def block_average(df_list, estimator="MBAR", num=10, **kwargs): + """Free energy estimate for portions of the trajectory. + + Generate the free energy estimate for a series of blocks in time, + with the specified number of equally spaced points. + For example, setting `num` to 10 would give the block averages + which is the free energy estimate from the first 10% alone, then the + next 10% ... of the data. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk, where each represents a + different value of lambda. + estimator : {'MBAR', 'BAR', 'TI'} + Name of the estimators. + num : int + The number of blocks used to divide *each* DataFrame. Note that + if the DataFrames are different lengths, the number of samples + contributed to each block will be different. + kwargs : dict + Keyword arguments to be passed to the estimator. + + Returns + ------- + :class:`pandas.DataFrame` + The DataFrame with estimate data. :: + + FE FE_Error + 0 3.016442 0.052748 + 1 3.078106 0.037170 + 2 3.072561 0.030186 + 3 3.048325 0.026070 + 4 3.049769 0.023359 + 5 3.034078 0.021260 + 6 3.043274 0.019642 + 7 3.035460 0.018340 + 8 3.042032 0.017319 + 9 3.044149 0.016405 + + + .. versionadded:: 2.4.0 + + """ + logger.info("Start block averaging analysis.") + logger.info("Check data availability.") + if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS): + msg = f"Estimator {estimator} is not available in {FEP_ESTIMATORS + TI_ESTIMATORS}." + logger.error(msg) + raise ValueError(msg) + else: + # select estimator class by name + estimator_fit = estimators_dispatch[estimator](**kwargs).fit + logger.info(f"Use {estimator} estimator for convergence analysis.") + + # Check that each df in the list has only one value of lambda + for i, df in enumerate(df_list): + lambda_values = list(set([x[1:] for x in df.index.to_numpy()])) + if len(lambda_values) > 1: + ind = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0] + raise ValueError( + "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind) + ) + + if estimator in ["BAR"] and len(df_list) > 2: + raise ValueError( + "Restrict to two DataFrames, one with a fep-lambda value and one its forward adjacent state for a " + "meaningful result." + ) + + logger.info("Begin Moving Average Analysis") + average_list = [] + average_error_list = [] + for i in range(1, num): + logger.info("Moving Average Analysis: {:.2f}%".format(100 * i / num)) + sample = [] + for data in df_list: + ind1, ind2 = len(data) // num * (i - 1), len(data) // num * i + sample.append(data[ind1:ind2]) + sample = concat(sample) + result = estimator_fit(sample) + + average_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == "bar": + # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742 + # Error estimate generated by BAR ARE correlated + average_error_list.append(np.nan) + else: + average_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info( + "{:.2f} +/- {:.2f} kT".format(average_list[-1], average_error_list[-1]) + ) + + convergence = pd.DataFrame( + { + "FE": average_list, + "FE_Error": average_error_list, + } + ) + convergence.attrs = df_list[0].attrs + return convergence diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py index bb0afd4d..52353064 100644 --- a/src/alchemlyb/estimators/bar_.py +++ b/src/alchemlyb/estimators/bar_.py @@ -54,6 +54,9 @@ class BAR(BaseEstimator, _EstimatorMixOut): .. versionchanged:: 1.0.0 `delta_f_`, `d_delta_f_`, `states_` are view of the original object. + .. versionchanged:: 2.4.0 + Added assessment of lambda states represented in the indices of u_nk + to provide meaningful errors to ensure proper use. """ @@ -88,7 +91,7 @@ def fit(self, u_nk): # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) - # get a list of the lambda states + # get a list of the lambda states that are sampled self._states_ = u_nk.columns.values.tolist() # group u_nk by lambda states @@ -97,11 +100,25 @@ def fit(self, u_nk): (len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns ] - + + # Pull lambda states from indices + states = list(set( x[1:] for x in u_nk.index)) + for state in states: + if len(state) == 1: + state = state[0] + if state not in self._states_: + raise ValueError( + f"Indexed lambda state, {state}, is not represented in u_nk columns:" + f" {self._states_}" + ) + # Now get free energy differences and their uncertainties for each step deltas = np.array([]) d_deltas = np.array([]) for k in range(len(N_k) - 1): + if N_k[k] == 0 or N_k[k + 1] == 0: + continue + # get us from lambda step k uk = groups.get_group(self._states_[k]) # get w_F @@ -126,6 +143,13 @@ def fit(self, u_nk): deltas = np.append(deltas, df) d_deltas = np.append(d_deltas, ddf**2) + if len(deltas) == 0 and len(states) > 1: + raise ValueError( + "u_nk does not contain energies computed between any adjacent states.\n" + "To compute the free energy with BAR, ensure that values in u_nk exist" + f" for the columns:\n{states}." + ) + # build matrix of deltas between each state adelta = np.zeros((len(deltas) + 1, len(deltas) + 1)) ad_delta = np.zeros_like(adelta) @@ -150,13 +174,11 @@ def fit(self, u_nk): ad_delta += np.diagflat(np.array(dout), k=j + 1) # yield standard delta_f_ free energies between each state - self._delta_f_ = pd.DataFrame( - adelta - adelta.T, columns=self._states_, index=self._states_ - ) + self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states) # yield standard deviation d_delta_f_ between each state self._d_delta_f_ = pd.DataFrame( - np.sqrt(ad_delta + ad_delta.T), columns=self._states_, index=self._states_ + np.sqrt(ad_delta + ad_delta.T), columns=states, index=states ) self._delta_f_.attrs = u_nk.attrs self._d_delta_f_.attrs = u_nk.attrs diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index e0ab594a..fa399cb1 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -33,7 +33,7 @@ class MBAR(BaseEstimator, _EstimatorMixOut): .. versionchanged:: 2.3.0 The new default is now "BAR" as it provides a substantial speedup over the previous default `None`. - + method : str, optional, default="robust" The optimization routine to use. This can be any of the methods @@ -79,6 +79,11 @@ class MBAR(BaseEstimator, _EstimatorMixOut): default value for `method` was changed from "hybr" to "robust" .. versionchanged:: 2.1.0 `n_bootstraps` option added. + .. versionchanged:: 2.4.0 + Handle initial estimate, initial_f_k, from bar in the instance + that not all lambda states represented as column headers are + represented in the indices of u_nk. + """ def __init__( @@ -135,6 +140,20 @@ def fit(self, u_nk): ) bar.fit(u_nk) initial_f_k = bar.delta_f_.iloc[0, :] + if len(bar.delta_f_.iloc[0, :]) != len(self._states_): + states = [ + x + for i, x in enumerate(self._states_[:-1]) + if N_k[i] > 0 and N_k[i + 1] > 0 + ] + initial_f_k = pd.Series( + [ + initial_f_k.loc(x) if x in states else np.nan + for x in self._states_ + ], + index=self._states_, + dtype=float, + ) else: initial_f_k = self.initial_f_k diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 2bde94b3..7852217b 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -2,7 +2,13 @@ import pandas as pd import pytest -from alchemlyb.convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c +from alchemlyb import concat +from alchemlyb.convergence import ( + forward_backward_convergence, + fwdrev_cumavg_Rc, + A_c, + block_average, +) from alchemlyb.convergence.convergence import _cummean @@ -26,6 +32,113 @@ def test_convergence_fep(gmx_benzene_Coulomb_u_nk, estimator): assert convergence.loc[9, "Backward"] == pytest.approx(3.04, 0.01) +def test_block_average_ti(gmx_benzene_Coulomb_dHdl): + df_avg = block_average(gmx_benzene_Coulomb_dHdl, "TI") + assert df_avg.shape == (9, 2) + assert df_avg.loc[1, "FE"] == pytest.approx(3.18, 0.01) + assert df_avg.loc[1, "FE_Error"] == pytest.approx(0.07, 0.1) + assert df_avg.loc[8, "FE"] == pytest.approx(3.15, 0.01) + assert df_avg.loc[8, "FE_Error"] == pytest.approx(0.07, 0.1) + + +@pytest.mark.parametrize("estimator", ["DUMMY"]) +def test_block_average_error_1(gmx_ABFE_complex_u_nk, estimator): + with pytest.raises(ValueError, match=r"Estimator DUMMY is not available .*"): + _ = block_average(gmx_ABFE_complex_u_nk, estimator) + + +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_block_average_error_2_mbar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:15] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[0\]", + ): + _ = block_average([concat(df_list)], estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[1\]", + ): + _ = block_average([concat(df_list)], estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_2_bar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:13] + with pytest.raises( + ValueError, + match=r"Restrict to two DataFrames, one with a fep-lambda value .*", + ): + _ = block_average(df_list, estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Restrict to two DataFrames, one with a fep-lambda value .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_3_bar(gmx_ABFE_complex_u_nk, estimator): + # Test if lambda state column representing one of the two lambda + # states in the df indices is missing from *both* dfs. + df_list = gmx_ABFE_complex_u_nk[10:12] + state1 = list(set(x[1:] for x in df_list[0].index))[0] + df_list[0] = df_list[0].drop(state1, axis=1) + df_list[1] = df_list[1].drop(state1, axis=1) + with pytest.raises( + ValueError, + match=r"Indexed lambda state, .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_error_4_bar(gmx_ABFE_complex_u_nk, estimator): + # Test if lambda state column representing one of the two lambda + # states in the df indices is missing from *one* dfs. + df_list = gmx_ABFE_complex_u_nk[10:12] + state1 = list(set(x[1:] for x in df_list[0].index))[0] + df_list[0] = df_list[0].drop(state1, axis=1) + with pytest.raises( + ValueError, + match=r"u_nk does not contain energies computed between any adjacent .*", + ): + _ = block_average(df_list, estimator) + + +@pytest.mark.parametrize("estimator", ["BAR"]) +def test_block_average_bar(gmx_ABFE_complex_u_nk, estimator): + df_avg = block_average(gmx_ABFE_complex_u_nk[10:12], estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(3.701, 0.01) + assert np.isnan(df_avg.loc[0, "FE_Error"]) + assert df_avg.loc[8, "FE"] == pytest.approx(3.603, 0.01) + assert np.isnan(df_avg.loc[8, "FE_Error"]) + + df_list = gmx_ABFE_complex_u_nk[14:16] + df_list[-1] = df_list[-1].iloc[:-2] + df_avg = block_average(df_list, estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(0.651, 0.01) + assert np.isnan(df_avg.loc[0, "FE_Error"]) + assert df_avg.loc[8, "FE"] == pytest.approx(0.901, 0.01) + assert np.isnan(df_avg.loc[8, "FE_Error"]) + + +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_block_average_mbar(gmx_benzene_Coulomb_u_nk, estimator): + df_avg = block_average([gmx_benzene_Coulomb_u_nk[0]], estimator) + assert df_avg.shape == (9, 2) + assert df_avg.loc[0, "FE"] == pytest.approx(3.41, 0.01) + assert df_avg.loc[0, "FE_Error"] == pytest.approx(0.22, 0.01) + assert df_avg.loc[8, "FE"] == pytest.approx(2.83, 0.01) + assert df_avg.loc[8, "FE_Error"] == pytest.approx(0.33, 0.01) + + def test_convergence_wrong_estimator(gmx_benzene_Coulomb_dHdl): with pytest.raises(ValueError, match="is not available in"): forward_backward_convergence(gmx_benzene_Coulomb_dHdl, "WWW") @@ -52,6 +165,23 @@ def test_convergence_method(gmx_benzene_Coulomb_u_nk): assert len(convergence) == 2 +@pytest.mark.parametrize("estimator", ["MBAR"]) +def test_forward_backward_convergence_mbar(gmx_ABFE_complex_u_nk, estimator): + df_list = gmx_ABFE_complex_u_nk[10:15] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[0\]", + ): + _ = forward_backward_convergence([concat(df_list)], estimator) + + df_list = gmx_ABFE_complex_u_nk[14:17] + with pytest.raises( + ValueError, + match=r"Provided DataFrame, df_list\[0\] has more than one lambda value in df.index\[1\]", + ): + _ = forward_backward_convergence([concat(df_list)], estimator) + + def test_cummean_short(): """Test the case where the input is shorter than the expected output""" value = _cummean(np.empty(10), 100) diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index 8be24867..f2535f8e 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -8,7 +8,7 @@ import alchemlyb from alchemlyb.convergence import forward_backward_convergence from alchemlyb.estimators import MBAR, TI, BAR -from alchemlyb.visualisation import plot_convergence +from alchemlyb.visualisation import plot_convergence, plot_block_average from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation.mbar_matrix import plot_mbar_overlap_matrix from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl @@ -147,7 +147,7 @@ def test_plot_dF_state( def test_plot_convergence_dataframe(gmx_benzene_Coulomb_u_nk): - df = forward_backward_convergence(gmx_benzene_Coulomb_u_nk, "MBAR") + df = forward_backward_convergence([gmx_benzene_Coulomb_u_nk[0]], "MBAR") ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) @@ -218,6 +218,47 @@ def test_plot_convergence_final_nan(): plt.close(ax.figure) +def test_plot_block_average(gmx_benzene_Coulomb_u_nk): + data_list = gmx_benzene_Coulomb_u_nk + fe = [] + fe_error = [] + num_points = 10 + for i in range(1, num_points + 1): + slice = int(len(data_list[0]) / num_points * i) + u_nk_coul = alchemlyb.concat([data[:slice] for data in data_list]) + estimate = MBAR().fit(u_nk_coul) + fe.append(estimate.delta_f_.loc[0, 1]) + fe_error.append(estimate.d_delta_f_.loc[0, 1]) + + df = pd.DataFrame( + data={ + "FE": fe, + "FE_Error": fe_error, + } + ) + df.attrs = estimate.delta_f_.attrs + ax = plot_block_average(df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, units="kJ/mol") + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + df = df.drop("FE_Error", axis=1) + ax = plot_block_average(df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, final_error=1) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + ax = plot_block_average(df, final_error=np.inf) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + + class Test_Units: @staticmethod @pytest.fixture() diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py index 6955dcaf..6aa4c358 100644 --- a/src/alchemlyb/visualisation/__init__.py +++ b/src/alchemlyb/visualisation/__init__.py @@ -1,4 +1,4 @@ -from .convergence import plot_convergence +from .convergence import plot_convergence, plot_block_average from .dF_state import plot_dF_state from .mbar_matrix import plot_mbar_overlap_matrix from .ti_dhdl import plot_ti_dhdl diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index b74ad336..bf232b5d 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -56,11 +56,10 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): Keyword arg final_error for plotting a horizontal error bar. The array input has been deprecated. The units default to `None` which uses the units in the input. - .. versionchanged:: 0.6.0 data now takes in dataframe - .. versionadded:: 0.4.0 + """ if units is not None: dataframe = get_unit_converter(units)(dataframe) @@ -136,7 +135,7 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): ax.legend( (line1[0], line2[0]), ("Forward", "Reverse"), - loc=9, + loc="best", prop=FP(size=18), frameon=False, ) @@ -144,4 +143,121 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None): ax.set_ylabel(r"$\Delta G$ ({})".format(units), fontsize=16, color="#151B54") plt.tick_params(axis="x", color="#D2B9D3") plt.tick_params(axis="y", color="#D2B9D3") + plt.tight_layout() + return ax + + +def plot_block_average(dataframe, units=None, final_error=None, ax=None): + """Plot the forward and backward convergence. + + The input could be the result from + :func:`~alchemlyb.convergence.forward_backward_convergence` or + :func:`~alchemlyb.convergence.fwdrev_cumavg_Rc`. The input should be a + :class:`pandas.DataFrame` which has column `FE` and + :attr:`pandas.DataFrame.attrs` should compile with :ref:`note-on-units`. + The errorbar will be plotted if column `FE_Error` and `Backward_Error` + is present. + + `FE`: A column of free energy estimate from some X% block of the data, + where optional `FE_Error` column is the corresponding error. + + `final_error` is the error of the final value and is shown as the error band around the + final value. It can be provided in case an estimate is available that is more appropriate + than the default, which is the error of the last value in `Backward`. + + Parameters + ---------- + dataframe : Dataframe + Output Dataframe has column `Forward`, `Backward` or optionally + `Forward_Error`, `Backward_Error` see :ref:`plot_convergence `. + units : str + The unit of the estimate. The default is `None`, which is to use the + unit in the input. Setting this will change the output unit. + final_error : float + The error (standard deviation) of the final value in ``units``. If not given, takes the + overall error of the time blocks, unless these were not provided, it which case it + equals 1 kT. + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ``ax=None``, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the forward and backward convergence drawn. + + Note + ---- + The code is taken and modified from + `Alchemical Analysis `_. + + .. versionadded:: 2.4.0 + + """ + if units is not None: + dataframe = get_unit_converter(units)(dataframe) + df_avg = dataframe["FE"].to_numpy() + if "FE_Error" in dataframe: + df_avg_error = dataframe["FE_Error"].to_numpy() + else: + df_avg_error = np.zeros(len(df_avg)) + + if ax is None: # pragma: no cover + fig, ax = plt.subplots(figsize=(8, 6)) + + plt.setp(ax.spines["bottom"], color="#D2B9D3", lw=3, zorder=-2) + plt.setp(ax.spines["left"], color="#D2B9D3", lw=3, zorder=-2) + + for dire in ["top", "right"]: + ax.spines[dire].set_color("none") + + ax.xaxis.set_ticks_position("bottom") + ax.yaxis.set_ticks_position("left") + + f_ts = np.linspace(0, 1, len(df_avg) + 1)[1:] + + if final_error is None: + if np.sum(df_avg_error) != 0: + final_error = np.std(df_avg) + else: + final_error = 1.0 + + if np.isfinite(final_error): + line0 = ax.fill_between( + [0, 1], + np.mean(df_avg) - final_error, + np.mean(df_avg) + final_error, + color="#D2B9D3", + zorder=1, + ) + line1 = ax.errorbar( + f_ts, + df_avg, + yerr=df_avg_error, + color="#736AFF", + lw=3, + zorder=2, + marker="o", + mfc="w", + mew=2.5, + mec="#736AFF", + ms=12, + label="Avg FE", + ) + + xticks_spacing = len(f_ts) // 10 or 1 + xticks = f_ts[::xticks_spacing] + plt.xticks(xticks, [f"{i:.2f}" for i in xticks], fontsize=10) + plt.yticks(fontsize=10) + + ax.legend( + loc="best", + prop=FP(size=18), + frameon=False, + ) + ax.set_xlabel(r"Fraction of the Simulation Time", fontsize=16, color="#151B54") + ax.set_ylabel(r"$\Delta G$ ({})".format(units), fontsize=16, color="#151B54") + plt.tick_params(axis="x", color="#D2B9D3") + plt.tick_params(axis="y", color="#D2B9D3") + plt.tight_layout() return ax diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 09d685a9..c050cfe5 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -1,7 +1,7 @@ """Functions for Plotting the dF states. To assess the quality of the free energy estimation, The dF between adjacent -lambda states can be ploted to assess the quality of the estimation. +lambda states can be plotted to assess the quality of the estimation. The code for producing the dF states plot is modified based on `Alchemical Analysis `_. @@ -266,3 +266,4 @@ def plot_dF_state( leg.get_frame().set_alpha(0.5) return fig +