diff --git a/solidago/src/solidago/pipeline/__init__.py b/solidago/src/solidago/pipeline/__init__.py index 78e1422c8c..3f45d2e725 100644 --- a/solidago/src/solidago/pipeline/__init__.py +++ b/solidago/src/solidago/pipeline/__init__.py @@ -1,7 +1,7 @@ from .trust_propagation import * from .preference_learning import * from .voting_rights import * -# from .scaling import * +from .scaling import * from .aggregation import * from .post_process import * diff --git a/solidago/src/solidago/pipeline/aggregation/__init__.py b/solidago/src/solidago/pipeline/aggregation/__init__.py index 18df0e3812..6b863472d6 100644 --- a/solidago/src/solidago/pipeline/aggregation/__init__.py +++ b/solidago/src/solidago/pipeline/aggregation/__init__.py @@ -4,8 +4,5 @@ The aggregation may also adjust the user models to the learned global model. """ -from .base import Aggregation from .average import Average from .entitywise_qr_quantile import EntitywiseQrQuantile -# from .standardized_qr_quantile import StandardizedQrQuantile -# from .standardized_qr_median import StandardizedQrMedian diff --git a/solidago/src/solidago/pipeline/aggregation/average.py b/solidago/src/solidago/pipeline/aggregation/average.py index a7c5b598b0..889452016b 100644 --- a/solidago/src/solidago/pipeline/aggregation/average.py +++ b/solidago/src/solidago/pipeline/aggregation/average.py @@ -1,12 +1,10 @@ import pandas as pd -from solidago.voting_rights import VotingRights -from solidago.scoring_model import ScoringModel, DirectScoringModel +from solidago.state import * +from solidago.pipeline.base import StateFunction -from .base import Aggregation - -class Average(Aggregation): +class Average(StateFunction): def main(self, entities: Entities, voting_rights: VotingRights, diff --git a/solidago/src/solidago/pipeline/aggregation/base.py b/solidago/src/solidago/pipeline/aggregation/base.py deleted file mode 100644 index b8802357f8..0000000000 --- a/solidago/src/solidago/pipeline/aggregation/base.py +++ /dev/null @@ -1,46 +0,0 @@ -from abc import abstractmethod -from typing import Mapping - -import pandas as pd - -from solidago.voting_rights import VotingRights -from solidago.scoring_model import ScoringModel - - -class Aggregation: - @abstractmethod - def __call__( - self, - voting_rights: VotingRights, - user_models: Mapping[int, ScoringModel], - users: pd.DataFrame, - entities: pd.DataFrame - ) -> tuple[dict[int, ScoringModel], ScoringModel]: - """ Returns scaled user models - - Parameters - ---------- - voting_rights: VotingRights - voting_rights[user, entity]: float - user_models: dict[int, ScoringModel] - user_models[user] is user's scoring model - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) - entities: DataFrame with columns - * entity_id (int, ind) - - Returns - ------- - updated_user_models[user]: ScoringModel - Returns a scaled user model - global_model: ScoringModel - Returns a global scoring model - """ - raise NotImplementedError - - def to_json(self): - return (type(self).__name__, ) - - def __str__(self): - return type(self).__name__ diff --git a/solidago/src/solidago/pipeline/aggregation/entitywise_qr_quantile.py b/solidago/src/solidago/pipeline/aggregation/entitywise_qr_quantile.py index ec3e9b2900..2c05ff038e 100644 --- a/solidago/src/solidago/pipeline/aggregation/entitywise_qr_quantile.py +++ b/solidago/src/solidago/pipeline/aggregation/entitywise_qr_quantile.py @@ -1,13 +1,12 @@ import pandas as pd import numpy as np -from .base import Aggregation - -from solidago.state import * from solidago.primitives.lipschitz import qr_quantile, qr_uncertainty +from solidago.state import * +from solidago.pipeline.base import StateFunction -class EntitywiseQrQuantile(Aggregation): +class EntitywiseQrQuantile(StateFunction): def __init__(self, quantile=0.2, lipschitz=0.1, error=1e-5): """ Standardize scores so that only a fraction 1 - dev_quantile of the scores is further than 1 away from the median, @@ -38,7 +37,7 @@ def main(self, for criterion, scores_list in all_scores.items(): if criterion not in rights: continue - scores, left_uncs, right_uncs = [ np.array(l) for l in zip(*scores_list)) ] + scores, left_uncs, right_uncs = [ np.array(l) for l in zip(*scores_list) ] score = qr_quantile( lipschitz=self.lipschitz, quantile=self.quantile, diff --git a/solidago/src/solidago/pipeline/aggregation/standardized_qr_median.py b/solidago/src/solidago/pipeline/aggregation/standardized_qr_median.py deleted file mode 100644 index 6b709418bf..0000000000 --- a/solidago/src/solidago/pipeline/aggregation/standardized_qr_median.py +++ /dev/null @@ -1,32 +0,0 @@ -import pandas as pd -import numpy as np - -from .standardized_qr_quantile import StandardizedQrQuantile - -from solidago.voting_rights import VotingRights -from solidago.scoring_model import ScoringModel, DirectScoringModel, ScaledScoringModel - - -class StandardizedQrMedian(StandardizedQrQuantile): - def __init__(self, dev_quantile=0.9, lipschitz=0.1, error=1e-5): - """ Standardize scores so that only a fraction 1 - dev_quantile - of the scores is further than 1 away from the median, - and then run qr_median to aggregate the scores. - - Parameters - ---------- - qtl_std_dev: float - lipschitz: float - error: float - """ - super().__init__(0.5, dev_quantile, lipschitz, error) - - def to_json(self): - return type(self).__name__, dict( - dev_quantile=self.dev_quantile, lipschitz=self.lipschitz, error=self.error - ) - - def __str__(self): - prop_names = ["dev_quantile", "lipschitz", "error"] - prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) - return f"{type(self).__name__}({prop})" diff --git a/solidago/src/solidago/pipeline/aggregation/standardized_qr_quantile.py b/solidago/src/solidago/pipeline/aggregation/standardized_qr_quantile.py deleted file mode 100644 index 32287df3cb..0000000000 --- a/solidago/src/solidago/pipeline/aggregation/standardized_qr_quantile.py +++ /dev/null @@ -1,142 +0,0 @@ -import pandas as pd -import numpy as np - -from .base import Aggregation - -from solidago.voting_rights import VotingRights -from solidago.scoring_model import ScoringModel, DirectScoringModel, ScaledScoringModel - -from solidago.primitives import qr_quantile, qr_standard_deviation, qr_uncertainty - - -class StandardizedQrQuantile(Aggregation): - def __init__(self, quantile=0.2, dev_quantile=0.9, lipschitz=0.1, error=1e-5): - """ Standardize scores so that only a fraction 1 - dev_quantile - of the scores is further than 1 away from the median, - and then run qr_median to aggregate the scores. - - Parameters - ---------- - qtl_std_dev: float - lipschitz: float - error: float - """ - self.quantile = quantile - self.dev_quantile = dev_quantile - self.lipschitz = lipschitz - self.error = error - - def __call__( - self, - voting_rights: VotingRights, - user_models: dict[int, ScoringModel], - users: pd.DataFrame, - entities: pd.DataFrame - ) -> tuple[dict[int, ScaledScoringModel], ScoringModel]: - """ Returns scaled user models - - Parameters - ---------- - voting_rights: VotingRights - voting_rights[user, entity]: float - user_models: dict[int, ScoringModel] - user_models[user] is user's scoring model - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) - entities: DataFrame with columns - * entity_id (int, ind) - - Returns - ------- - updated_user_models[user]: ScoringModel - Returns a scaled user model - global_model: ScoringModel - Returns a global scoring model - """ - df = _get_user_scores(voting_rights, user_models, entities) - std_dev = self._compute_std_dev(df) - - scaled_models = { - user_id: ScaledScoringModel(scoring, 1/std_dev) - for user_id, scoring in user_models.items() - } - for column in ("scores", "left_uncertainties", "right_uncertainties"): - df[column] /= std_dev - - global_scores = DirectScoringModel() - for entity_id, dfe in df.groupby("entity_id"): - if entity_id not in entities.index: - continue - score = qr_quantile( - self.lipschitz, - self.quantile, - np.array(dfe["scores"]), - np.array(dfe["voting_rights"]), - np.array(dfe["left_uncertainties"]), - np.array(dfe["right_uncertainties"]), - self.error - ) - uncertainty = qr_uncertainty( - self.lipschitz, - np.array(dfe["scores"]), - np.array(dfe["voting_rights"]), - np.array(dfe["left_uncertainties"]), - np.array(dfe["right_uncertainties"]), - default_dev = 1, - error = self.error, - ) - global_scores[entity_id] = score, uncertainty - return scaled_models, global_scores - - def _compute_std_dev(self, df): - if len(df) == 0: - return 1.0 - return qr_standard_deviation( - lipschitz=self.lipschitz, - values=df["scores"].to_numpy(), - quantile_dev=self.dev_quantile, - voting_rights=df["voting_rights"].to_numpy(), - left_uncertainties=df["left_uncertainties"].to_numpy(), - right_uncertainties=df["right_uncertainties"].to_numpy(), - default_dev=1.0, - error=self.error - ) - - def to_json(self): - return type(self).__name__, dict( - quantile=self.quantile, dev_quantile=self.dev_quantile, - lipschitz=self.lipschitz, error=self.error - ) - - def __str__(self): - prop_names = ["quantile", "dev_quantile", "lipschitz", "error"] - prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) - return f"{type(self).__name__}({prop})" - - -def _get_user_scores( - voting_rights: VotingRights, user_models: dict[int, ScoringModel], entities: pd.DataFrame -): - return pd.DataFrame( - ( - dict( - user_id=user_id, - entity_id=entity_id, - voting_rights=voting_rights[user_id, entity_id], - scores=score, - left_uncertainties=left, - right_uncertainties=right, - ) - for user_id, user_model in user_models.items() - for entity_id, (score, left, right) in user_model.iter_entities(entities) - ), - columns=[ - "user_id", - "entity_id", - "voting_rights", - "scores", - "left_uncertainties", - "right_uncertainties", - ], - ) diff --git a/solidago/src/solidago/pipeline/post_process/__init__.py b/solidago/src/solidago/pipeline/post_process/__init__.py index ba8ee5704c..8ddb0251af 100644 --- a/solidago/src/solidago/pipeline/post_process/__init__.py +++ b/solidago/src/solidago/pipeline/post_process/__init__.py @@ -4,6 +4,4 @@ of yielding more human-readible scores. """ -from .base import PostProcess -from .no_post_process import NoPostProcess from .squash import Squash diff --git a/solidago/src/solidago/pipeline/post_process/base.py b/solidago/src/solidago/pipeline/post_process/base.py deleted file mode 100644 index 92f786cc53..0000000000 --- a/solidago/src/solidago/pipeline/post_process/base.py +++ /dev/null @@ -1,16 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Mapping - -import pandas as pd - -from solidago.state import * - - -class PostProcess(StateFunction): - def main(self, - user_models: UserModels, - global_model: ScoringModel, - ) -> tuple[UserModels, ScoringModel]: - """ Post-processes user models and global models, - typically to yield human-readible scores """ - return user_models, global_model diff --git a/solidago/src/solidago/pipeline/post_process/squash.py b/solidago/src/solidago/pipeline/post_process/squash.py index e70b7d6957..d25a2a9e13 100644 --- a/solidago/src/solidago/pipeline/post_process/squash.py +++ b/solidago/src/solidago/pipeline/post_process/squash.py @@ -4,10 +4,10 @@ import pandas as pd from solidago.state import * -from .base import PostProcess +from solidago.pipeline.base import StateFunction -class Squash(PostProcess): +class Squash(StateFunction): def __init__(self, score_max: float = 100.0): assert score_max > 0 self.score_max = score_max diff --git a/solidago/src/solidago/pipeline/preference_learning/base.py b/solidago/src/solidago/pipeline/preference_learning/base.py index 32fd74e510..5baa7381aa 100644 --- a/solidago/src/solidago/pipeline/preference_learning/base.py +++ b/solidago/src/solidago/pipeline/preference_learning/base.py @@ -7,10 +7,10 @@ logger = logging.getLogger(__name__) from solidago.state import * +from solidago.pipeline.base import StateFunction - -class PreferenceLearning(ABC): +class PreferenceLearning(StateFunction, ABC): def main(self, users: Users, entities: Entities, diff --git a/solidago/src/solidago/pipeline/preference_learning/comparison_learning.py b/solidago/src/solidago/pipeline/preference_learning/comparison_learning.py deleted file mode 100644 index aa56af99fc..0000000000 --- a/solidago/src/solidago/pipeline/preference_learning/comparison_learning.py +++ /dev/null @@ -1,69 +0,0 @@ -from abc import abstractmethod -from typing import Optional -import pandas as pd - -from solidago.scoring_model import ScoringModel - -from .base import PreferenceLearning - - -class ComparisonBasedPreferenceLearning(PreferenceLearning): - @abstractmethod - def comparison_learning( - self, - comparisons: pd.DataFrame, - entities: pd.DataFrame, - initialization: Optional[ScoringModel]=None, - updated_entities: Optional[set[int]]=None, - ) -> ScoringModel: - """ Learns only based on comparisons - - Parameters - ---------- - comparisons: DataFrame with columns - * entity_a: int - * entity_b: int - * score: float - entities: DataFrame with columns - * entity_id: int, index - * May contain others, such as vector representation - initialization: ScoringModel or None - Starting model, added to facilitate optimization - It is not supposed to affect the output of the training - updated_entities: set of entities (int) - This allows to prioritize coordinate descent, starting with newly evaluated entities - """ - raise NotImplementedError - - def user_learn( - self, - user_judgments: dict[str, pd.DataFrame], - entities: pd.DataFrame, - initialization: Optional[ScoringModel] = None, - new_judgments: Optional[dict[str, pd.DataFrame]]=None, # TODO: should use Judgements ? - ) -> ScoringModel: - """ Learns a scoring model, given user judgments of entities - - Parameters - ---------- - user_judgments: dict[str, pd.DataFrame] - May contain different forms of judgments, - but most likely will contain "comparisons" and/or "assessments" - entities: DataFrame with columns - * entity_id: int, index - * May contain others, such as vector representation - initialization: ScoringModel or None - Starting model, added to facilitate optimization - It is not supposed to affect the output of the training - new_judgments: New judgments - This allows to prioritize coordinate descent, starting with newly evaluated entities - - Returns - ------- - model: ScoringModel - """ - comparisons, updated_entities = user_judgments["comparisons"], None - if new_judgments is not None: - new_comparisons = new_judgments["comparisons"] - updated_entities = set(new_comparisons["entity_a"]) | set(new_comparisons["entity_b"]) - return self.comparison_learning(comparisons, entities, initialization, updated_entities) diff --git a/solidago/src/solidago/pipeline/preference_learning/hookean_model.py b/solidago/src/solidago/pipeline/preference_learning/hookean_model.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/solidago/src/solidago/pipeline/scaling/__init__.py b/solidago/src/solidago/pipeline/scaling/__init__.py index 94cf4fd627..702f6931ef 100644 --- a/solidago/src/solidago/pipeline/scaling/__init__.py +++ b/solidago/src/solidago/pipeline/scaling/__init__.py @@ -7,7 +7,6 @@ assuming each user has a very specific selection bias of rated entities. """ -from .base import Scaling from .mehestan import Mehestan from .quantile_zero_shift import QuantileShift, QuantileZeroShift from .standardize import Standardize diff --git a/solidago/src/solidago/pipeline/scaling/base.py b/solidago/src/solidago/pipeline/scaling/base.py deleted file mode 100644 index a4b3d490d8..0000000000 --- a/solidago/src/solidago/pipeline/scaling/base.py +++ /dev/null @@ -1,48 +0,0 @@ -from abc import abstractmethod -from typing import Mapping - -import pandas as pd - -from solidago.privacy_settings import PrivacySettings -from solidago.scoring_model import ScoringModel, ScaledScoringModel -from solidago.voting_rights import VotingRights - - -class Scaling: - @abstractmethod - def __call__( - self, - user_models: Mapping[int, ScoringModel], - users: pd.DataFrame, - entities: pd.DataFrame, - voting_rights: VotingRights, - privacy: PrivacySettings - ) -> dict[int, ScaledScoringModel]: - """ Returns scaled user models - - Parameters - ---------- - user_models: dict[int, ScoringModel] - user_models[user] is user's scoring model - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) - entities: DataFrame with columns - * entity_id (int, ind) - voting_rights: VotingRights - voting_rights[user, entity]: float - privacy: PrivacySettings - privacy[user, entity] in { True, False, None } - - Returns - ------- - out[user]: ScoringModel - Will be scaled by the Scaling method - """ - raise NotImplementedError - - def to_json(self) -> tuple: - return (type(self).__name__, ) - - def __str__(self): - return type(self).__name__ diff --git a/solidago/src/solidago/pipeline/scaling/compose.py b/solidago/src/solidago/pipeline/scaling/compose.py deleted file mode 100644 index d9afc90433..0000000000 --- a/solidago/src/solidago/pipeline/scaling/compose.py +++ /dev/null @@ -1,29 +0,0 @@ -from solidago.scoring_model import ScaledScoringModel -from .base import Scaling -from .no_scaling import NoScaling - - -class ScalingCompose(Scaling): - """Class used to compose any number of scaling solutions""" - - def __init__(self, *scalings: Scaling): - """Composes a list of scalings""" - self.scalings = scalings - - def __call__( - self, user_models, users, entities, voting_rights, privacy - ) -> dict[int, ScaledScoringModel]: - if len(self.scalings) == 0: - return NoScaling()(user_models) - scaled_models = user_models - for scaling in self.scalings: - scaled_models = scaling(scaled_models, users, entities, voting_rights, privacy) - return scaled_models # type: ignore - - def to_json(self): - scalings_json = [scaling.to_json() for scaling in self.scalings] - return type(self).__name__, scalings_json - - def __str__(self): - prop = ", ".join([str(s) for s in self.scalings]) - return f"{type(self).__name__}({prop})" diff --git a/solidago/src/solidago/pipeline/scaling/mehestan.py b/solidago/src/solidago/pipeline/scaling/mehestan.py index 2f12dd2776..e5e8c24482 100644 --- a/solidago/src/solidago/pipeline/scaling/mehestan.py +++ b/solidago/src/solidago/pipeline/scaling/mehestan.py @@ -7,16 +7,14 @@ logger = logging.getLogger(__name__) -from solidago.primitives import qr_median, qr_uncertainty, lipschitz_resilient_mean +from solidago.primitives.lipschitz import qr_median, qr_uncertainty, lipschitz_resilient_mean from solidago.primitives.pairs import UnorderedPairs -from solidago.primitives.datastructure import NestedDictOfRowList +from solidago.primitives.datastructure import NestedDictOfItems, NestedDictOfRowLists from solidago.state import * +from solidago.pipeline.base import StateFunction -from .base import Scaling -from .no_scaling import NoScaling - -class Mehestan(Scaling): +class Mehestan(StateFunction): def __init__( self, lipschitz=0.1, @@ -105,6 +103,7 @@ def main(self, for username in user_models }) + """ A simple way to distribute computations is to parallelize `compute_scales` """ def compute_scales(self, users: Users, # Must have column "trust_score" scores: MultiScore, # key_names == ["criterion", "username", "entity_name"] @@ -150,8 +149,8 @@ def scale_criterion(self, logger.info(f"Mehestan 2 for {criterion}. Collaborative scaling of scalers") scalers = users.get({ "is_scaler": True }) - scaler_scores = scores[scalers] - scaler_scales, scaler_scores = self.compute_scales(trusts, made_public, scaler_scores, scaler_scores) + scaler_scales, scaler_scores = self.scale_to_scalers(trusts, made_public, + scores[scalers], scores[scalers], scalees_are_scalers=True) end_step2 = timeit.default_timer() logger.info(f"Mehestan 2 for {criterion}. Terminated in {int(end_step2 - end_step1)} seconds") @@ -163,7 +162,7 @@ def scale_criterion(self, logger.info(f"Mehestan 3 for {criterion}. Terminated in {int(end - end_step2)} seconds") logger.info(f"Succesful Mehestan normalization on {criterion}, in {int(end - start)} seconds") - return users, scaler_scales | nonscaler_scales + return users, (scaler_scales | nonscaler_scales) def penalty(self, public: bool): return 1 if public else self.privacy_penalty @@ -202,49 +201,52 @@ def compute_scalers(self, is_scaler[argsort[-user_index-1]] = True return is_scaler - def compute_scales(self, + def scale_to_scalers(self, trusts: NestedDictOfItems, # key_names == ["username"] made_public: MadePublic, # key_names == ["username", "entity_name"] scaler_scores: MultiScore, # key_names == ["username", "entity_name"] scalee_scores: MultiScore, # key_names == ["username", "entity_name"] + scalees_are_scalers: bool=False ): + s = "2" if scalees_are_scalers else "3" start = timeit.default_timer() - model_norms = self.compute_model_norms(made_public, scalee_scores) - end2a = timeit.default_timer() - logger.info(f" Mehestan 3a. Model norms in {int(end2a - start)} seconds") + scalee_model_norms = self.compute_model_norms(made_public, scalee_scores) + end_a = timeit.default_timer() + logger.info(f" Mehestan {s}a. Model norms in {int(end_a - start)} seconds") - end3a = timeit.default_timer() weight_lists, ratio_lists = self.ratios(made_public, scaler_scores, scalee_scores) - end3b = timeit.default_timer() - logger.info(f" Mehestan 3b. Entity ratios in {int(end3b - end3a)} seconds") + end_b = timeit.default_timer() + logger.info(f" Mehestan {s}b. Entity ratios in {int(end_b - end_a)} seconds") voting_rights, ratios = self.aggregate_scaler_scores(trusts, weight_lists, ratio_lists) - end3c = timeit.default_timer() - logger.info(f" Mehestan 3c. Aggregate ratios in {int(end3c - end3b)} seconds") - multiplicators = self.compute_multiplicators(voting_rights, ratios, model_norms) - end3d = timeit.default_timer() - logger.info(f" Mehestan 3d. Multiplicators in {int(end3d - end3c)} seconds") + end_c = timeit.default_timer() + logger.info(f" Mehestan {s}c. Aggregate ratios in {int(end_c - end_b)} seconds") + multiplicators = self.compute_multiplicators(voting_rights, ratios, scalee_model_norms) + end_d = timeit.default_timer() + logger.info(f" Mehestan {s}d. Multiplicators in {int(end_d - end_c)} seconds") for (scalee_name, entity_name), score in scaler_scores: scalee_scores[scalee_name, entity_name] = score * multiplicators[scalee_name] + if scalees_are_scalers: + scaler_scores = scalee_scores weight_lists, diff_lists = self.diffs(made_public, scaler_scores, scalee_scores) - end3e = timeit.default_timer() - logger.info(f" Mehestan 3e. Entity diffs in {int(end3e - end3d)} seconds") + end_e = timeit.default_timer() + logger.info(f" Mehestan {s}e. Entity diffs in {int(end_e - end_d)} seconds") voting_rights, diffs = self.aggregate_scaler_scores(trusts, weight_lists, diff_lists) - end3f = timeit.default_timer() - logger.info(f" Mehestan 3f. Aggregate diffs in {int(end3f - end3e)} seconds") + end_f = timeit.default_timer() + logger.info(f" Mehestan {s}f. Aggregate diffs in {int(end_f - end_e)} seconds") translations = self.compute_translations(voting_rights, diffs) - end3g = timeit.default_timer() - logger.info(f" Mehestan 3g. Translations in {int(end3g - end3f)} seconds") + end_g = timeit.default_timer() + logger.info(f" Mehestan {s}g. Translations in {int(end_g - end_f)} seconds") - scales = ScaleDict({ + scalee_scales = ScaleDict({ scalee_name: ( *multiplicators[scalee_name].to_triplet(), *translations[scalee_name].to_triplet() ) for scalee_name in multiplicators.get_set("scalee_name") }, key_names=["username"]) - return scales, scaler_scores + return scalee_scales, scalee_scores ############################################ ## Methods to estimate the scalers ## @@ -278,7 +280,7 @@ def compute_activities(self, return np.array([ self.computer_user_activities(trusts[user], made_public[user], scores[user]) for user in users - }] + ]) def computer_user_activities(self, trust: float, @@ -312,9 +314,9 @@ def computer_user_activities(self, ############################################################# def aggregate_scaler_scores(self, - trusts: NestedDictOfRowList, # key_names == ["username"] - weight_lists: NestedDictOfRowList, # key_names == ["scalee_name", "scaler_name"] - score_lists: NestedDictOfRowList, # key_names == ["scalee_name", "scaler_name"] + trusts: NestedDictOfRowLists, # key_names == ["username"] + weight_lists: NestedDictOfRowLists, # key_names == ["scalee_name", "scaler_name"] + score_lists: NestedDictOfRowLists, # key_names == ["scalee_name", "scaler_name"] ) -> tuple[VotingRights, MultiScore]: # key_names == ["scalee_name", "scaler_name"] """ For any two pairs (scalee, scaler), aggregates their ratio or diff data. Typically used to transform s_{uvef}'s into s_{uv}, and tau_{uve}'s into tau_{uv}. @@ -325,7 +327,7 @@ def aggregate_scaler_scores(self, voting_rights: VotingRights Must have key_names == ["scalee_name", "scaler_name"] voting_rights[scalee_name, scaler_name] is a list of weights - comparisons: NestedDictOfRowList + comparisons: NestedDictOfRowLists Must have key_names == ["scalee_name", "scaler_name"] comparisons[scalee, scaler] is a list of triplets that represent a Score, which represent how scalee's scale should be set to match scaler's. @@ -485,7 +487,7 @@ def ratios(self, each of which is of type Score. """ reordered_scaler_scores = scaler_scores.reorder_keys(["entity_name", "username"]) - weight_lists, ratio_lists = NestedDictOfRowList(), NestedDictOfRowList() + weight_lists, ratio_lists = NestedDictOfRowLists(), NestedDictOfRowLists() for scalee_name, _ in scalee_scores: scalee_entity_names = scalee_scores[scalee_name].get_set("entity_name") @@ -601,7 +603,7 @@ def diffs(self, of type Score (i.e. with left and right uncertainties). """ reordered_scaler_scores = scaler_scores.reorder_keys(["entity_name", "username"]) - weight_lists, diff_lists = NestedDictOfRowList(), NestedDictOfRowList() + weight_lists, diff_lists = NestedDictOfRowLists(), NestedDictOfRowLists() for scalee_name, _ in scalee_scores: scalee_entity_names = scalee_scores[scalee_name].get_set("entity_name") diff --git a/solidago/src/solidago/pipeline/scaling/no_scaling.py b/solidago/src/solidago/pipeline/scaling/no_scaling.py deleted file mode 100644 index 8c6deebc42..0000000000 --- a/solidago/src/solidago/pipeline/scaling/no_scaling.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Mapping - -import pandas as pd - -from solidago.voting_rights import VotingRights -from solidago.privacy_settings import PrivacySettings -from solidago.scoring_model import ScoringModel, ScaledScoringModel - -from .base import Scaling - -class NoScaling(Scaling): - def __call__( - self, - user_models: Mapping[int, ScoringModel], - users: pd.DataFrame = ..., - entities: pd.DataFrame = ..., - voting_rights: VotingRights = ..., - privacy: PrivacySettings = ..., - ) -> dict[int, ScaledScoringModel]: - """ Returns scaled user models - - Parameters - ---------- - user_models: dict[int, ScoringModel] - user_models[user] is user's scoring model - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) - entities: DataFrame with columns - * entity_id (int, ind) - voting_rights: VotingRights - voting_rights[user, entity]: float - privacy: PrivacySettings - privacy[user, entity] in { True, False, None } - - Returns - ------- - out[user]: ScoringModel - Will be scaled by the Scaling method - """ - return { - user_id: ScaledScoringModel(scoring) - for (user_id, scoring) in user_models.items() - } diff --git a/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py b/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py index 3ef8241bd0..f736a6bb8f 100644 --- a/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py +++ b/solidago/src/solidago/pipeline/scaling/quantile_zero_shift.py @@ -3,15 +3,12 @@ import pandas as pd import numpy as np -from .base import Scaling - -from solidago.privacy_settings import PrivacySettings -from solidago.scoring_model import ScoringModel, ScaledScoringModel -from solidago.voting_rights import VotingRights from solidago.primitives import qr_quantile +from solidago.state import * +from solidago.pipeline.base import StateFunction -class QuantileShift(Scaling): +class QuantileShift(StateFunction): def __init__(self, quantile: float = 0.15, target_score: float = 0.0, diff --git a/solidago/src/solidago/pipeline/scaling/standardize.py b/solidago/src/solidago/pipeline/scaling/standardize.py index 038f324b73..fdfee95f92 100644 --- a/solidago/src/solidago/pipeline/scaling/standardize.py +++ b/solidago/src/solidago/pipeline/scaling/standardize.py @@ -2,10 +2,10 @@ from solidago.primitives import qr_standard_deviation from solidago.state import * -from .base import Scaling +from solidago.pipeline.base import StateFunction -class Standardize(Scaling): +class Standardize(StateFunction): def __init__(self, dev_quantile: float=0.9, lipschitz: float=0.1, error: float=1e-5): """ The scores are shifted so that their quantile zero_quantile equals zero diff --git a/solidago/src/solidago/pipeline/trust_propagation/__init__.py b/solidago/src/solidago/pipeline/trust_propagation/__init__.py index fd774edfd3..0eab44c1f2 100644 --- a/solidago/src/solidago/pipeline/trust_propagation/__init__.py +++ b/solidago/src/solidago/pipeline/trust_propagation/__init__.py @@ -4,7 +4,6 @@ to derive trust scores for the different users. """ -from .base import TrustPropagation from .trust_all import TrustAll from .no_trust_propagation import NoTrustPropagation from .lipschitrust import LipschiTrust diff --git a/solidago/src/solidago/pipeline/trust_propagation/base.py b/solidago/src/solidago/pipeline/trust_propagation/base.py deleted file mode 100644 index 5b0e8a99df..0000000000 --- a/solidago/src/solidago/pipeline/trust_propagation/base.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import ABC, abstractmethod - -from solidago.pipeline.base import StateFunction -from solidago.state import State, Users, Vouches - - -class TrustPropagation(StateFunction): - @abstractmethod - def main(self, users: Users, vouches: Vouches) -> Users: - """ Propagates user trust through vouches """ - return self.propagate(state.users, state.vouches) - diff --git a/solidago/src/solidago/pipeline/trust_propagation/lipschitrust.py b/solidago/src/solidago/pipeline/trust_propagation/lipschitrust.py index a6221faa00..eadd3f5ce0 100644 --- a/solidago/src/solidago/pipeline/trust_propagation/lipschitrust.py +++ b/solidago/src/solidago/pipeline/trust_propagation/lipschitrust.py @@ -5,13 +5,12 @@ import numpy as np -from .base import TrustPropagation -from solidago.state import State, Users, Vouches +from solidago.state import * +from solidago.pipeline.base import StateFunction -class LipschiTrust(TrustPropagation): - def __init__( - self, +class LipschiTrust(StateFunction): + def __init__(self, pretrust_value: float = 0.8, decay: float = 0.8, sink_vouch: float = 5.0, diff --git a/solidago/src/solidago/pipeline/trust_propagation/no_trust_propagation.py b/solidago/src/solidago/pipeline/trust_propagation/no_trust_propagation.py index d33c9021cd..096cdaa137 100644 --- a/solidago/src/solidago/pipeline/trust_propagation/no_trust_propagation.py +++ b/solidago/src/solidago/pipeline/trust_propagation/no_trust_propagation.py @@ -1,8 +1,8 @@ -from .base import TrustPropagation -from solidago.state import Users, Vouches +from solidago.state import * +from solidago.pipeline.base import StateFunction -class NoTrustPropagation(TrustPropagation): +class NoTrustPropagation(StateFunction): def __init__(self, pretrust_value: float=0.8,): """ Parameters diff --git a/solidago/src/solidago/pipeline/trust_propagation/trust_all.py b/solidago/src/solidago/pipeline/trust_propagation/trust_all.py index 124c95b90c..9ce252ee3e 100644 --- a/solidago/src/solidago/pipeline/trust_propagation/trust_all.py +++ b/solidago/src/solidago/pipeline/trust_propagation/trust_all.py @@ -1,8 +1,8 @@ -from .base import TrustPropagation -from solidago.state import Users, Vouches +from solidago.state import * +from solidago.pipeline.base import StateFunction -class TrustAll(TrustPropagation): +class TrustAll(StateFunction): """`TrustAll` is a naive solution that assignes an equal amount of trust to all users""" def main(self, users: Users, vouches: Vouches) -> Users: users["trust_score"] = 1. diff --git a/solidago/src/solidago/pipeline/voting_rights/__init__.py b/solidago/src/solidago/pipeline/voting_rights/__init__.py index cdafc0bd0b..44275a8555 100644 --- a/solidago/src/solidago/pipeline/voting_rights/__init__.py +++ b/solidago/src/solidago/pipeline/voting_rights/__init__.py @@ -4,6 +4,5 @@ based on users' trust scores and privacy settings. """ -from .base import * from .is_trust import * from .affine_overtrust import * diff --git a/solidago/src/solidago/pipeline/voting_rights/affine_overtrust.py b/solidago/src/solidago/pipeline/voting_rights/affine_overtrust.py index 6c382d0ab7..927b6fb653 100644 --- a/solidago/src/solidago/pipeline/voting_rights/affine_overtrust.py +++ b/solidago/src/solidago/pipeline/voting_rights/affine_overtrust.py @@ -5,10 +5,10 @@ from solidago.primitives.dichotomy import solve from solidago.state import * -from .base import VotingRightsAssignment +from solidago.pipeline.base import StateFunction -class AffineOvertrust(VotingRightsAssignment): +class AffineOvertrust(StateFunction): def __init__(self, privacy_penalty: float = 0.5, min_overtrust: float = 2.0, diff --git a/solidago/src/solidago/pipeline/voting_rights/base.py b/solidago/src/solidago/pipeline/voting_rights/base.py deleted file mode 100644 index ca622e2585..0000000000 --- a/solidago/src/solidago/pipeline/voting_rights/base.py +++ /dev/null @@ -1,7 +0,0 @@ -from solidago.state import * -from solidago.pipeline.base import StateFunction - - -class VotingRightsAssignment(StateFunction): - def main(self, voting_rights: VotingRights) -> VotingRights: - return voting_rights diff --git a/solidago/src/solidago/pipeline/voting_rights/is_trust.py b/solidago/src/solidago/pipeline/voting_rights/is_trust.py index 53fa5486a2..17b4aaca62 100644 --- a/solidago/src/solidago/pipeline/voting_rights/is_trust.py +++ b/solidago/src/solidago/pipeline/voting_rights/is_trust.py @@ -1,8 +1,8 @@ from solidago.state import * -from .base import VotingRightsAssignment +from solidago.pipeline.base import StateFunction -class IsTrust(VotingRightsAssignment): +class IsTrust(StateFunction): def __init__(self, privacy_penalty: float=0.5): """ Computes voting_rights simply as the user trust scores, potentially multiplied by the privacy penalty if the vote is private. diff --git a/solidago/src/solidago/pipeline/voting_rights/lipshiflow.py b/solidago/src/solidago/pipeline/voting_rights/lipshiflow.py deleted file mode 100644 index d97e03c982..0000000000 --- a/solidago/src/solidago/pipeline/voting_rights/lipshiflow.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np -import pandas as pd - -from solidago import PrivacySettings -from .base import VotingRights, VotingRightsAssignment - - -class LipschiFlow(VotingRightsAssignment): - def __init__( - self, - max_voting_right: float, - lipschitz: float, - epsilon: float - ): - """ privately scored entities are given - - Parameters - ---------- - privacy_penalty: float - Penalty on private comparisons - """ - self.max_voting_right = max_voting_right - self.lipschitz = lipschitz - self.epsilon - - def __call__( - self, - users: pd.DataFrame, - entities: pd.DataFrame, - vouches: pd.DataFrame, - privacy: PrivacySettings, - user_models: dict[int, "ScoringModel"] - ) -> tuple[VotingRights, pd.DataFrame]: - """Compute voting rights - - Parameters - ---------- - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) - entities: DataFrame with columns - * entity_id (int, index) - vouches: DataFrame - This is not used by VotingRightsWithLimitedOvertrust - privacy: PrivacySettings - privacy[user, entity] is the privacy setting of user for entity - May be True, False or None - - Returns - ------- - voting_rights[user, entity] is the voting right - of a user on entity for criterion - entities: DataFrame with columns - * entity_id (int, index) - * cumulative_trust (float) - * min_voting_right (float) - * overtrust (float) - """ - voting_rights = VotingRights() - - for entity_id, entity in entities.iterrows(): - state = NetworkState(users, entity, vouches, privacy, users) - while state.relay.sum() > self.epsilon: - state = self.forward(users, state) - state = self.backward(users, state) - voting_rights = state.assign(entity, voting_rights) - return voting_rights, entities - - def forward(self, users, state): - raise NotImplemented - - def backward(self, users, state): - raise NotImplemented - - def assign(self, entity, voting_rights): - raise NotImplemented - - def to_json(self): - return self.__class__.__name__, dict( - max_voting_right=self.max_voting_right, - lipschitz=self.lipschitz - ) - - def __str__(self): - prop_names = ["max_voting_right", "lipschitz"] - prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) - return f"{type(self).__name__}({prop})" - - -class NetworkState: - def __init__(self, users, entities, vouches, privacy): - self.index_to_user_id = users.user_id - self.entity = entities - self.vouches = vouches - self.privacy = privacy - self.relay = users.trust_score - self.cumulative_relay = np.zeros(len(users)) - self.assigned = np.zeros(len(users)) - self.cumulative_flow = [dict() for _ in users] - - def assign(self, entity, voting_rights): - raise NotImplemented diff --git a/solidago/src/solidago/primitives/datastructure/named_dataframe.py b/solidago/src/solidago/primitives/datastructure/named_dataframe.py index 45f1615748..2c2148c792 100644 --- a/solidago/src/solidago/primitives/datastructure/named_dataframe.py +++ b/solidago/src/solidago/primitives/datastructure/named_dataframe.py @@ -1,4 +1,4 @@ -from typing import Union, Optional +from typing import Union, Optional, Iterable from pandas import DataFrame, Series from types import SimpleNamespace from pathlib import Path diff --git a/solidago/src/solidago/primitives/datastructure/vector_dataframe.py b/solidago/src/solidago/primitives/datastructure/vector_dataframe.py index aca5bd4c94..ecfefd1821 100644 --- a/solidago/src/solidago/primitives/datastructure/vector_dataframe.py +++ b/solidago/src/solidago/primitives/datastructure/vector_dataframe.py @@ -1,4 +1,4 @@ -from typing import Union, Iterable +from typing import Union, Optional, Iterable from pandas import DataFrame, Series from types import SimpleNamespace from pathlib import Path diff --git a/solidago/src/solidago/state/models/post_processed.py b/solidago/src/solidago/state/models/post_processed.py index 62e3f3585f..5d1cc25e9e 100644 --- a/solidago/src/solidago/state/models/post_processed.py +++ b/solidago/src/solidago/state/models/post_processed.py @@ -35,7 +35,7 @@ def post_process(self, score: Union[Score, MultiScore]) -> Union[Score, MultiSco class SquashedModel(PostProcessedModel): - def __init__(self, parent: ScoringModel, self.max_score: float=100.): + def __init__(self, parent: ScoringModel, max_score: float=100.): super().__init__(parent) self.max_score = max_score diff --git a/solidago/src/solidago/state/voting_rights/base.py b/solidago/src/solidago/state/voting_rights/base.py index c499f47880..9ec90cf724 100644 --- a/solidago/src/solidago/state/voting_rights/base.py +++ b/solidago/src/solidago/state/voting_rights/base.py @@ -11,4 +11,4 @@ def __init__(self, value_name="voting_right", save_filename="voting_rights.csv" ): - super().__init__(d, key_names, value_name, save_filename, , default_value=0) + super().__init__(d, key_names, value_name, save_filename, default_value=0)