Clarified optimizer (double descent especially).

WIP test/debug PreferenceLearning
tournesol-app · Jan 9, 2025 · c6659f6 · c6659f6
1 parent ce41f32
commit c6659f6
Show file tree

Hide file tree

Showing 51 changed files with 4,899 additions and 8,830 deletions.
diff --git a/solidago/src/solidago/_generative_model/_assessment/base.py b/solidago/src/solidago/_generative_model/_assessment/base.py
@@ -12,27 +12,26 @@ def __call__(self,
         assessments: Assessments
     ) -> Assessments:
         """ Fills in the assessments """
-        filled_assessments = Assessments()
-        for (username, criterion, entity_name), assessment_list in assessments:
-            filled_assessments[username, criterion, entity_name] = list()
-            for index, assessment in enumerate(assessment_list):
-                user = users.get(username)
-                entity = entities.get(entity_name)
-                public = made_public[user, entity]
-                a, a_min, a_max = self.sample(assessment, user, entity, public, criterion)
-                filled_assessments.add_row((user, criterion, entity), dict(assessment) | { 
-                    "assessment": a,
-                    "assessment_min": a_min, 
-                    "assessment_max": a_max, 
-                })
-        return filled_assessments
+        result = Assessments()
+        for (username, criterion, entity_name), assessment in assessments:
+            assessment = self.sample(
+                assessment=assessment, 
+                user=users.get(username), 
+                entity=entities.get(entity_name), 
+                public=made_public[username, entity_name],
+                criterion=criterion
+            )
+            result.add_row((username, criterion, entity_name), assessment)
+        return result
 
     def sample(self, 
         assessment: Assessment, 
         user: User, 
         entity: Entity, 
         public: bool, 
         criterion: str
-    ) -> tuple[float, float, float]:
-        """ Returns assessment min, max and value """
-        return np.random.random(), 0, 1
+    ) -> Assessment:
+        assessment["assessment"] = np.random.random()
+        assessment["assessment_min"] = 0
+        assessment["assessment_max"] = 1
+        return assessment
diff --git a/solidago/src/solidago/_generative_model/_assessment/normal.py b/solidago/src/solidago/_generative_model/_assessment/normal.py
@@ -8,17 +8,20 @@
 class NormalAssessmentGenerator(AssessmentGenerator):
     def __init__(self, error_size: float=1):
         self.error_size = error_size
-    
+
     def sample(self, 
         assessment: Assessment, 
-        user: VectorUser, 
-        entity: VectorEntity, 
+        user: User, 
+        entity: Entity, 
         public: bool, 
         criterion: str
-    ) -> tuple[float, float, float]:
+    ) -> Assessment:
         score = user.vector @ entity.vector / sqrt(user.vector.size)
         if "is_trustworthy" in user and not user["is_trustworthy"]:
             score = - score
         else:
             score += self.error_size * normal()
-        return score, -float("inf"), float("inf")
+        assessment["assessment"] = score
+        assessment["assessment_min"] = -float("inf")
+        assessment["assessment_max"] = float("inf")
+        return assessment
diff --git a/solidago/src/solidago/_generative_model/_comparison/base.py b/solidago/src/solidago/_generative_model/_comparison/base.py
@@ -9,29 +9,30 @@
 class ComparisonGenerator(StateFunction):
     def __call__(self, users: Users, entities: Entities, made_public: MadePublic, comparisons: Comparisons) -> Comparisons:
         """ Fills in the comparisons """
-        filled_comparisons = Comparisons()
-        for (username, criterion, left_name, right_name), comparisons_list in comparisons:
-            filled_comparisons[username, criterion, left_name, right_name] = list()
-            for index, comparison in enumerate(comparisons_list):
-                user = users.get(username)
-                left = entities.get(left_name)
-                right = entities.get(right_name)
-                left_public = made_public[user, left]
-                right_public = made_public[user, right]
-                comparison_value, comparison_max = self.sample(user, left, right, left_public, right_public, criterion)
-                filled_comparisons.add_row((user, criterion, left, right), dict(comparison) | {
-                    "comparison_max": comparison_max,
-                    "comparison": comparison_value
-                })
-        return filled_comparisons
+        result = Comparisons()
+        for (username, criterion, left_name, right_name), comparison in comparisons:
+            comparison = self.sample(
+                comparison=comparison,
+                user=users.get(username), 
+                left=entities.get(left_name), 
+                right=entities.get(right_name),
+                left_public=made_public[username, left_name],
+                right_public=made_public[username, right_name], 
+                criterion=criterion
+            )
+            result.add_row((username, criterion, left_name, right_name), comparison)
+        return result
 
     def sample(self, 
+        comparison: Comparison,
         user: User, 
         left: Entity, 
         right: Entity, 
         left_public: bool, 
         right_public: bool, 
         criterion: str
-    ) -> tuple[float, float]:
+    ) -> Comparison:
         """ Returns comparison max and value """
-        return (2 * np.random.random() - 1)**2, 1
+        comparison["comparison"] = (2 * np.random.random() - 1)**2
+        comparison["comparison_max"] = 1
+        return comparison
diff --git a/solidago/src/solidago/_generative_model/_comparison/thurston.py b/solidago/src/solidago/_generative_model/_comparison/thurston.py
@@ -21,20 +21,22 @@ def score_matrix(self, users: VectorUsers, entities: VectorEntities):
         return users.vectors @ entities.vectors.T / users.vectors.shape[1]
 
     def sample(self, 
+        comparison: Comparison,
         user: User, 
         left: Entity, 
         right: Entity, 
         left_public: bool, 
         right_public: bool, 
         criterion: str
-    ) -> tuple[float, float]:
+    ) -> Comparison:
         """ `lpublic` and `rpublic` are not used.
         Returns comparison max and value. """
         score_diff = (user.vector @ (right.vector - left.vector)) / np.sqrt(user.vector.size)
-        comparison = self.sample_comparison(score_diff)
+        comparison["comparison"] = self.sample_comparison(score_diff)
         if "is_trustworthy" in user and not user["is_trustworthy"]:
-            comparison = - comparison
-        return comparison, self.comparison_max
+            comparison["comparison"] = - comparison["comparison"]
+        comparison["comparison_max"] = self.comparison_max
+        return comparison
 
     @abstractmethod
     def sample_comparison(self, score_diff: float) -> float:

diff --git a/solidago/src/solidago/_pipeline/_preference_learning/generalized_bradley_terry.py b/solidago/src/solidago/_pipeline/_preference_learning/generalized_bradley_terry.py
@@ -6,26 +6,28 @@
 import numpy as np
 import numpy.typing as npt
 
-import solidago.primitives.dichotomy
+import solidago.primitives.dichotomy as dichotomy
 
 from solidago._state import *
-from solidago.primitives.optimize import coordinate_descent, njit_brentq
 from .base import PreferenceLearning
 
 
 class GeneralizedBradleyTerry(PreferenceLearning):
     def __init__(self, 
         prior_std_dev: float=7.0,
         uncertainty_nll_increase: float=1.0,
-        max_uncertainty: float=1e3
+        max_uncertainty: float=1e3,
+        last_comparison_only: bool=True,
     ):
         """ Generalized Bradley Terry is a class of porbability models of comparisons,
         introduced in the paper "Generalized Bradley-Terry Models for Score Estimation 
         from Paired Comparisons" by Julien Fageot, Sadegh Farhadkhani, Lê-Nguyên Hoang
         and Oscar Villemaud, and published at AAAI'24.
         
-        This implementation leverages coordinate descent, and makes heavy use of numba 
-        to accelerate the computations.
+        Note that this class only defines the key objects of Generalized Bradley Terry,
+        without specification of (1) the root law and (2) the optimization method to
+        compute the maximum a posteriori. Nevertheless, it does implement uncertainty
+        estimation given the maximum a posteriori, using dichotomic search.
         
         Parameters
         ----------
@@ -42,6 +44,7 @@ def __init__(self,
         self.prior_std_dev = prior_std_dev
         self.uncertainty_nll_increase = uncertainty_nll_increase
         self.max_uncertainty = max_uncertainty
+        self.last_comparison_only = last_comparison_only
 
     @abstractmethod
     def cumulant_generating_function_derivative(self, score_diffs: Mapping[int, float]) -> Mapping[int, float]:
@@ -169,20 +172,21 @@ def compute_uncertainties(self,
             rights[i] is the right uncertainty on scores[i]
         """
         compared_entity_indices = comparisons.compared_entity_indices(entity_name2index)
-        score_diffs = scores[compared_entity_indices["left"]] - scores[compared_entity_indices["right"]]
+        indices = { loc: np.array(compared_entity_indices[loc]) for loc in ("left", "right") }
+        score_diffs = scores[indices["left"]] - scores[indices["right"]]
         normalized_comparisons = comparisons.normalized_comparisons()
-        score_log_likelihood = self.negative_log_likelihood(score_diffs, normalized_comparisons)
+        score_negative_log_likelihood = self.negative_log_likelihood(score_diffs, normalized_comparisons)
 
         kwargs = dict(
-            self.translated_negative_log_likelihood,
+            f=self.translated_negative_log_likelihood,
             value=score_negative_log_likelihood + self.uncertainty_nll_increase,
             error=1e-1,
         )
 
         lefts = np.empty_like(scores)
         rights = np.empty_like(scores)
         for i in range(len(scores)):
-            indicators = (1 *(left_indices == i) - 1 *(right_indices == i)).to_numpy()
+            indicators = 1 *(indices["left"] == i) - 1 *(indices["right"] == i)
             kwargs["args"] = (score_diffs, normalized_comparisons, indicators)
             try:
                 lefts[i] = - dichotomy.solve(xmin=-self.max_uncertainty, xmax=0.0, **kwargs)
@@ -249,24 +253,40 @@ def translated_negative_log_likelihood(self,
         is being estimated.
         """
         deviated_score_diffs = indicators * delta + score_diffs
-        return self.negative_log_likelihood_function(deviated_score_diffs, normalized_comparisons)
+        return self.negative_log_likelihood(deviated_score_diffs, normalized_comparisons)
 
 
 class UniformGBT(GeneralizedBradleyTerry):
     def __init__(self,
         prior_std_dev: float = 7.0,
         uncertainty_nll_increase: float = 1.0,
         max_uncertainty: float=1e3,
+        last_comparison_only: bool=True,
     ):
-        """
-
-        Parameters (TODO)
+        """ UniformGBT is the specific instance of the generalized Bradley-Terry models
+        with a uniform distribution over [-1, 1] as a root law. Find out more 
+        in the paper "Generalized Bradley-Terry Models for Score Estimation 
+        from Paired Comparisons" by Julien Fageot, Sadegh Farhadkhani, Lê-Nguyên Hoang
+        and Oscar Villemaud, and published at AAAI'24.
+        
+        
+        Parameters
         ----------
+        prior_std_dev: float=7.0
+            Typical scale of scores. 
+            Technical, it should be the standard deviation of the gaussian prior.
+        uncertainty_nll_increase: float=1.0
+            To determine the uncertainty, we compute left_unc (respectively, right_unc)
+            such that score - left_unc (respectively, + right_unc) has a likelihood
+            which is exp(uncertainty_nll_increase) times lower than score.
+        max_uncertainty: float=1e3
+            Replaces infinite uncertainties with max_uncertainty
         """
         super().__init__(
             prior_std_dev=prior_std_dev,
             uncertainty_nll_increase=uncertainty_nll_increase,
             max_uncertainty=max_uncertainty,
+            last_comparison_only=last_comparison_only,
         )
 
     def cumulant_generating_function(self, score_diffs: npt.NDArray) -> npt.NDArray:
@@ -277,9 +297,9 @@ def cumulant_generating_function(self, score_diffs: npt.NDArray) -> npt.NDArray:
         """
         score_diffs_abs = np.abs(score_diffs)
         return np.where(
-            score_diffs_abs > 1e-1,
+            score_diffs_abs > 1,
             np.where(
-                score_diffs_abs < 20.0,
+                score_diffs_abs < 10.0,
                 np.log(np.sinh(score_diffs) / score_diffs),
                 score_diffs_abs - np.log(2) - np.log(score_diffs_abs),
             ),

diff --git a/solidago/src/solidago/_pipeline/_preference_learning/numba_generalized_bradley_terry.py b/solidago/src/solidago/_pipeline/_preference_learning/numba_generalized_bradley_terry.py
@@ -77,42 +77,22 @@ def compute_scores(self,
     ) -> npt.NDArray:
         """ Computes the scores given comparisons """
         entity_ordered_comparisons = comparisons.order_by_entities()
-        def get_derivative_args(entity_index: int, scores: np.ndarray):
+        def get_partial_derivative_args(entity_index: int, scores: np.ndarray) -> tuple:
             entity_name = entities.iloc[entity_index].name
             df = entity_ordered_comparisons[entity_name].to_df()
-            normalized_comparisons = df["comparison"] / df["comparison_max"]
+            normalized_comparisons = np.array(df["comparison"] / df["comparison_max"])
             indices = df["other_name"].map(entity_name2index)
             return scores[indices], normalized_comparisons
 
         return coordinate_descent(
-            self.update_coordinate_function,
-            get_args=get_derivative_args,
+            self.partial_derivative,
+            get_partial_derivative_args=get_partial_derivative_args,
             initialization=self.init_scores(entity_name2index, init_multiscores),
             error=self.convergence_error,
-        )        
-
-    @cached_property
-    def update_coordinate_function(self) -> Callable[[npt.NDArray, npt.NDArray, float], float]:
-        xtol = self.convergence_error / 10
-        partial_derivative = self.partial_derivative
-
-        @njit
-        def njit_update_coordinate_function(
-            compared_scores: npt.NDArray, 
-            compared_comparisons: npt.NDArray, 
-            init: float
-        ) -> float:
-            return njit_brentq(
-                partial_derivative,
-                args=(compared_scores, compared_comparisons),
-                xtol=xtol,
-                a=old_coordinate_value - 1,
-                b=old_coordinate_value + 1
-            )
-        return njit_update_coordinate_function
+        )
 
     @cached_property
-    def partial_derivative(self) -> Callable[[float, npt.NDArray, npt.NDArray], float]:
+    def partial_derivative(self) -> Callable[[int, np.ndarray[np.float64], dict, dict], float]:
         """ Computes the partial derivative along a coordinate, 
         for a given value along the coordinate,
         when other coordinates' values are given by the solution.
@@ -124,12 +104,15 @@ def partial_derivative(self) -> Callable[[float, npt.NDArray, npt.NDArray], floa
 
         @njit
         def njit_partial_derivative(
-            value: float,
+            coordinate: int,
+            scores: float,
             compared_scores: npt.NDArray, 
-            compared_comparisons: npt.NDArray, 
+            normalized_comparisons: npt.NDArray, 
         ) -> npt.NDArray:
-            score_diffs = value - compared_scores
-            return (value / prior_var) + np.sum(cfg_deriv(score_diffs) - compared_comparisons)
+            score_diffs = scores[coordinate] - compared_scores
+            nll_derivative = np.sum(cfg_deriv(score_diffs) - normalized_comparisons)
+            prior_derivative = scores[coordinate] / prior_var
+            return prior_derivative + nll_derivative
 
         return njit_partial_derivative
 

diff --git a/solidago/src/solidago/_state/_assessments/base.py b/solidago/src/solidago/_state/_assessments/base.py
@@ -19,12 +19,6 @@ def __init__(self,
     ):
         super().__init__(d, key_names, save_filename)
 
-    def default_value(self) -> list:
-        return list()
-
-    def process_stored_value(self, keys: list[str], stored_value: list[dict]) -> list[Assessment]:
-        return [self.row_cls(v) for v in stored_value]
-
     def get_evaluators(self, entity: Union[str, "Entity"]) -> set[str]:
         return self[{ "entity_name": entity }].get_set("username")