Moving average (alchemistry#381)

* Add moving_average function for visualization and convergence testing * Update versionadded * Run Black * Bug fix bar_.py states * Update Changelog * Update the docs * Add tests * Formatting to align with Black * Update tests * Refactor moving_average to align with forward_backward_convergence function * Update tests * Update test_convergence and lambda tests in convergence.moving_average * Adjust convergence.py and tests for codecoverage * black * Update moving_average to block_average for more accurate descriptive name * Address reviewer comments * Update test to align with changed handling of dfs of different length in block_average * Remove incorrect popagation of error in BAR * Add tests and error catch for ill constructed BAR input, u_nk * black * Updated version comments --------- Co-authored-by: Oliver Beckstein <orbeckst@gmail.com>
jaclark5 · Nov 15, 2024 · c17f1f5 · c17f1f5
1 parent 160e109
commit c17f1f5
Show file tree

Hide file tree

Showing 7 changed files with 72 additions and 1 deletion.
diff --git a/CHANGES b/CHANGES
@@ -33,7 +33,7 @@ Fixes
 
 09/17/2024 jaclark5, orbeckst
 
-  * 2.4.0
+  * 2.4.1
 
 Enhancements
   - Addition of `block_average` function in both `convergence` and 

diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py
@@ -1,6 +1,10 @@
+<<<<<<< HEAD
 from .convergence import (
     forward_backward_convergence,
     fwdrev_cumavg_Rc,
     A_c,
     block_average,
 )
+=======
+from .convergence import forward_backward_convergence, fwdrev_cumavg_Rc, A_c, block_average
+>>>>>>> 9eb22a1 (Moving average (#381))
diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py
@@ -39,7 +39,11 @@ def forward_backward_convergence(
            Lower case input is also accepted until release 2.0.0.
     num : int
         The number of blocks used to divide *each* DataFrame and progressively add
+<<<<<<< HEAD
         to assess convergence. Note that if the DataFrames are different lengths,
+=======
+        to assess convergence. Note that if the DataFrames are different lengths, 
+>>>>>>> 9eb22a1 (Moving average (#381))
         the number of samples contributed with each block will be different.
     error_tol : float
         The maximum error tolerated for analytic error. If the analytic error is
@@ -48,7 +52,11 @@ def forward_backward_convergence(
         .. versionadded:: 2.3.0
         .. versionchanged:: 2.4.0
            Clarified docstring, removed incorrect estimation of std for cumulative
+<<<<<<< HEAD
            result in bar and added check that only a single lambda state is
+=======
+           result in bar and added check that only a single lambda state is 
+>>>>>>> 9eb22a1 (Moving average (#381))
            represented in the indices of each df in df_list.
 
     kwargs : dict
@@ -100,11 +108,16 @@ def forward_backward_convergence(
         # select estimator class by name
         my_estimator = estimators_dispatch[estimator](**kwargs)
         logger.info(f"Use {estimator} estimator for convergence analysis.")
+<<<<<<< HEAD
 
+=======
+
+>>>>>>> 9eb22a1 (Moving average (#381))
     # Check that each df in the list has only one value of lambda
     for i, df in enumerate(df_list):
         lambda_values = list(set([x[1:] for x in df.index.to_numpy()]))
         if len(lambda_values) > 1:
+<<<<<<< HEAD
             ind = [
                 j
                 for j in range(len(lambda_values[0]))
@@ -116,6 +129,13 @@ def forward_backward_convergence(
                 )
             )
 
+=======
+            ind  = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0]
+            raise ValueError(
+                "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind)
+            )
+
+>>>>>>> 9eb22a1 (Moving average (#381))
     logger.info("Begin forward analysis")
     forward_list = []
     forward_error_list = []
@@ -466,6 +486,7 @@ def block_average(df_list, estimator="MBAR", num=10, **kwargs):
     for i, df in enumerate(df_list):
         lambda_values = list(set([x[1:] for x in df.index.to_numpy()]))
         if len(lambda_values) > 1:
+<<<<<<< HEAD
             ind = [
                 j
                 for j in range(len(lambda_values[0]))
@@ -475,14 +496,23 @@ def block_average(df_list, estimator="MBAR", num=10, **kwargs):
                 "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(
                     i, ind
                 )
+=======
+            ind  = [j for j in range(len(lambda_values[0])) if len(list(set([x[j] for x in lambda_values]))) > 1][0]
+            raise ValueError(
+                "Provided DataFrame, df_list[{}] has more than one lambda value in df.index[{}]".format(i, ind)
+>>>>>>> 9eb22a1 (Moving average (#381))
             )
 
     if estimator in ["BAR"] and len(df_list) > 2:
         raise ValueError(
             "Restrict to two DataFrames, one with a fep-lambda value and one its forward adjacent state for a "
             "meaningful result."
         )
+<<<<<<< HEAD
 
+=======
+
+>>>>>>> 9eb22a1 (Moving average (#381))
     logger.info("Begin Moving Average Analysis")
     average_list = []
     average_error_list = []

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -104,24 +104,41 @@ def fit(self, u_nk):
             )
             for i in u_nk.columns
         ]
+<<<<<<< HEAD
 
         # Pull lambda states from indices
         states = list(set(x[1:] if len(x[1:]) > 1 else x[1] for x in u_nk.index))
         for state in states:
+=======
+
+        # Pull lambda states from indices
+        states = list(set( x[1:] for x in u_nk.index))
+        for state in states:
+            if len(state) == 1:
+                state = state[0]
+>>>>>>> 9eb22a1 (Moving average (#381))
             if state not in self._states_:
                 raise ValueError(
                     f"Indexed lambda state, {state}, is not represented in u_nk columns:"
                     f" {self._states_}"
                 )
+<<<<<<< HEAD
         states.sort(key=lambda x: self._states_.index(x))
 
+=======
+
+>>>>>>> 9eb22a1 (Moving average (#381))
         # Now get free energy differences and their uncertainties for each step
         deltas = np.array([])
         d_deltas = np.array([])
         for k in range(len(N_k) - 1):
             if N_k[k] == 0 or N_k[k + 1] == 0:
                 continue
+<<<<<<< HEAD
 
+=======
+
+>>>>>>> 9eb22a1 (Moving average (#381))
             # get us from lambda step k
             uk = groups.get_group(
                 self._states_[k]
@@ -161,7 +178,11 @@ def fit(self, u_nk):
                 "To compute the free energy with BAR, ensure that values in u_nk exist"
                 f" for the columns:\n{states}."
             )
+<<<<<<< HEAD
 
+=======
+
+>>>>>>> 9eb22a1 (Moving average (#381))
         # build matrix of deltas between each state
         adelta = np.zeros((len(deltas) + 1, len(deltas) + 1))
         ad_delta = np.zeros_like(adelta)

diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py
@@ -94,12 +94,19 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
     .. versionchanged:: 2.1.0
         `n_bootstraps` option added.
     .. versionchanged:: 2.4.0
+<<<<<<< HEAD
         Handle initial estimate, initial_f_k, from bar in the instance
         that not all lambda states represented as column headers are
         represented in the indices of u_nk.
     .. versionchanged:: 2.5.0
         Added computation of enthalpy and entropy
 
+=======
+       Handle initial estimate, initial_f_k, from bar in the instance
+       that not all lambda states represented as column headers are 
+       represented in the indices of u_nk.
+    
+>>>>>>> 9eb22a1 (Moving average (#381))
     """
 
     def __init__(

diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py
@@ -59,7 +59,11 @@ def plot_convergence(dataframe, units=None, final_error=None, ax=None):
      .. versionchanged:: 0.6.0
          data now takes in dataframe
      .. versionadded:: 0.4.0
+<<<<<<< HEAD
 
+=======
+     
+>>>>>>> 9eb22a1 (Moving average (#381))
     """
     if units is not None:
         dataframe = get_unit_converter(units)(dataframe)
@@ -192,7 +196,11 @@ def plot_block_average(dataframe, units=None, final_error=None, ax=None):
      `Alchemical Analysis <https://github.com/MobleyLab/alchemical-analysis>`_.
 
      .. versionadded:: 2.4.0
+<<<<<<< HEAD
 
+=======
+     
+>>>>>>> 9eb22a1 (Moving average (#381))
     """
     if units is not None:
         dataframe = get_unit_converter(units)(dataframe)

diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py
@@ -266,3 +266,4 @@ def plot_dF_state(
 
     leg.get_frame().set_alpha(0.5)
     return fig
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,7 +33,7 @@ Fixes @@
 /17/2024 jaclark5, orbeckst
-      * 2.4.0
+      * 2.4.1
     Enhancements
       - Addition of `block_average` function in both `convergence` and
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -266,3 +266,4 @@ def plot_dF_state(

		leg.get_frame().set_alpha(0.5)
		return fig