From 84d6639848e71aaea82d609ce9af6a2e6bb35505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=AA=20Nguy=C3=AAn=20Hoang?= Date: Sun, 5 Jan 2025 22:47:38 +0100 Subject: [PATCH] WIP Debugging pipeline --- solidago/src/solidago/pipeline/base.py | 14 ++++-- .../src/solidago/pipeline/scaling/__init__.py | 2 +- .../src/solidago/pipeline/scaling/mehestan.py | 24 +++++----- ...antile_zero_shift.py => quantile_shift.py} | 9 +--- solidago/src/solidago/pipeline/sequential.py | 14 +++++- .../src/solidago/state/models/user_models.py | 9 ++-- solidago/tests/pipeline/test_pipeline.json | 45 ++++++++++--------- 7 files changed, 67 insertions(+), 50 deletions(-) rename solidago/src/solidago/pipeline/scaling/{quantile_zero_shift.py => quantile_shift.py} (91%) diff --git a/solidago/src/solidago/pipeline/base.py b/solidago/src/solidago/pipeline/base.py index f994ed3043..956c810ab6 100644 --- a/solidago/src/solidago/pipeline/base.py +++ b/solidago/src/solidago/pipeline/base.py @@ -122,10 +122,16 @@ def json_keys(self) -> list: def __str__(self) -> str: return repr(self) - def __repr__(self) -> str: - return f"{type(self).__name__}(\n\t" + "\n\t".join([ - f"{key}={getattr(self, key)}" for key in self.json_keys() - ]) + "\n)" + def __repr__(self, n_indents: int=0) -> str: + def sub_repr(key): + value = getattr(self, key) + return value.__repr__(n_indents + 1) if isinstance(value, StateFunction) else value + + indent = "\t" * (n_indents + 1) + last_indent = "\t" * n_indents + return f"{type(self).__name__}(\n{indent}" + f",\n{indent}".join([ + f"{key}={sub_repr(key)}" for key in self.json_keys() + ]) + f"\n{last_indent})" def to_json(self): return type(self).__name__, { key: getattr(self, key) for key in self.json_keys() } diff --git a/solidago/src/solidago/pipeline/scaling/__init__.py b/solidago/src/solidago/pipeline/scaling/__init__.py index 702f6931ef..f21e05847e 100644 --- a/solidago/src/solidago/pipeline/scaling/__init__.py +++ b/solidago/src/solidago/pipeline/scaling/__init__.py @@ -8,5 +8,5 @@ """ from .mehestan import Mehestan -from .quantile_zero_shift import QuantileShift, QuantileZeroShift +from .quantile_shift import QuantileShift, QuantileZeroShift from .standardize import Standardize diff --git a/solidago/src/solidago/pipeline/scaling/mehestan.py b/solidago/src/solidago/pipeline/scaling/mehestan.py index 4de2628c3e..5ccb06d9ac 100644 --- a/solidago/src/solidago/pipeline/scaling/mehestan.py +++ b/solidago/src/solidago/pipeline/scaling/mehestan.py @@ -16,18 +16,18 @@ class Mehestan(StateFunction): def __init__(self, - lipschitz=0.1, - min_activity=10.0, - n_scalers_max=100, - privacy_penalty=0.5, - large_number_of_activities=1000, - user_comparison_lipschitz=10.0, - p_norm_for_multiplicative_resilience=4.0, - n_entity_to_fully_compare_max=100, - n_diffs_sample_max=1000, - default_multiplicator_dev=0.8, - default_translation_dev=1., - error=1e-5 + lipschitz: float=0.1, + min_activity: float=10.0, + n_scalers_max: float=100, + privacy_penalty: float=0.5, + large_number_of_activities: float=1000, + user_comparison_lipschitz: float=10.0, + p_norm_for_multiplicative_resilience: float=4.0, + n_entity_to_fully_compare_max: float=100, + n_diffs_sample_max: float=1000, + default_multiplicator_dev: float=0.8, + default_translation_dev: float=1., + error: float=1e-5 ): """ Mehestan performs Lipschitz-resilient collaborative scaling. It is based on "Robust Sparse Voting", by Youssef Allouah, diff --git a/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py b/solidago/src/solidago/pipeline/scaling/quantile_shift.py similarity index 91% rename from solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py rename to solidago/src/solidago/pipeline/scaling/quantile_shift.py index bfe673709d..c0b986af66 100644 --- a/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py +++ b/solidago/src/solidago/pipeline/scaling/quantile_shift.py @@ -60,17 +60,10 @@ def __call__(self, entities: Entities, user_models: UserModels) -> UserModels: class QuantileZeroShift(QuantileShift): - def __init__( - self, + def __init__(self, zero_quantile: float = 0.15, lipschitz: float = 0.1, error: float = 0.00001 ): super().__init__(zero_quantile, target_score=0.0, lipschitz=lipschitz, error=error) - def to_json(self): - return type(self).__name__, dict( - zero_quantile=self.quantile, - lipschitz=self.lipschitz, - error=self.error - ) diff --git a/solidago/src/solidago/pipeline/sequential.py b/solidago/src/solidago/pipeline/sequential.py index f57c566e37..499308c6b7 100644 --- a/solidago/src/solidago/pipeline/sequential.py +++ b/solidago/src/solidago/pipeline/sequential.py @@ -16,7 +16,13 @@ class Sequential(StateFunction): def __init__(self, **kwargs): super().__init__() for key, value in kwargs.items(): - setattr(self, key, value) + if isinstance(value, StateFunction): + setattr(self, key, value) + elif isinstance(value, (list, tuple)) and len(value) == 2: + import solidago.pipeline as pipeline + setattr(self, key, getattr(pipeline, value[0])(**value[1])) + else: + print(f"Sequential.__init__: Got unhandled input key={key}, type(value)={type(value).__name__}") @property def modules(self): @@ -48,3 +54,9 @@ def load(cls, d: Union[dict, str]) -> "Sequential": d = json.load(d) import solidago.pipeline as pipeline return cls(**{ key: getattr(pipeline, d[key][0])(**d[key][1]) for key in d }) + + def json_keys(self) -> list: + return list( + key for key in self.__dict__ + if key[0] != "_" and hasattr(self, key) + ) diff --git a/solidago/src/solidago/state/models/user_models.py b/solidago/src/solidago/state/models/user_models.py index fe63bdf722..cadf882c6a 100644 --- a/solidago/src/solidago/state/models/user_models.py +++ b/solidago/src/solidago/state/models/user_models.py @@ -22,20 +22,21 @@ def default_value(self) -> ScoringModel: def score(self, entity: Union[str, "Entity", "Entities"]) -> MultiScore: from solidago.state import Entity, Entities if isinstance(entity, (str, Entity)): - result = NestedDictOfTuples(key_names=["username", "criterion"]) + result = MultiScore(key_names=["username", "criterion"]) for username, model in self: multiscore = model(entity) for criterion, score in multiscore: - result[username, criterion] = score.to_triplet() + result[username, criterion] = score return result assert isinstance(entity, Entities) entities = entity - result = NestedDictOfTuples(key_names=["username", "entity_name", "criterion"]) + result = MultiScore(key_names=["username", "entity_name", "criterion"]) for username, model in self: for entity in model.evaluated_entities(entities): multiscore = model(entity) for criterion, score in multiscore: - result[username, str(entity), criterion] = score.to_triplet() + result[username, str(entity), criterion] = score + return result def __getitem__(self, user: Union[str, "User"]) -> ScoringModel: if str(user) not in self.keys(): diff --git a/solidago/tests/pipeline/test_pipeline.json b/solidago/tests/pipeline/test_pipeline.json index 1db5b846c5..ac1bf9e93d 100644 --- a/solidago/tests/pipeline/test_pipeline.json +++ b/solidago/tests/pipeline/test_pipeline.json @@ -18,26 +18,31 @@ "overtrust_ratio": 0.1 }], "scaling": ["Sequential", { - "collaborative_scaling": ["Mehestan", { - "lipschitz": 1, - "min_activity": 1, - "n_scalers_max": 100, - "privacy_penalty": 0.5, - "p_norm_for_multiplicative_resilience": 4.0, - "error": 1e-05 - }], - "common_translation": ["QuantileZeroShift", { - "zero_quantile": 0.15, - "lipschitz": 0.1, - "error": 1e-05 - }], - "common_multiplicator": ["Standardize", { - "zero_quantile": 0.15, - "lipschitz": 0.1, - "error": 1e-05 - }] - } - ], + "collaborative_scaling": ["Mehestan", { + "lipschitz": 1, + "min_activity": 1, + "n_scalers_max": 100, + "privacy_penalty": 0.5, + "large_number_of_activities": 1000, + "user_comparison_lipschitz": 10.0, + "p_norm_for_multiplicative_resilience": 4.0, + "n_entity_to_fully_compare_max": 100, + "n_diffs_sample_max": 1000, + "default_multiplicator_dev": 0.8, + "default_translation_dev": 1.0, + "error": 1e-05 + }], + "common_translation": ["QuantileZeroShift", { + "zero_quantile": 0.15, + "lipschitz": 0.1, + "error": 1e-05 + }], + "common_multiplicator": ["Standardize", { + "dev_quantile": 0.9, + "lipschitz": 0.1, + "error": 1e-05 + }] + }], "aggregation": ["EntitywiseQrQuantile", { "quantile": 0.2, "lipschitz": 0.1,