From 170d95d6455394cca66da40a35ded71a08a45240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=AA=20Nguy=C3=AAn=20Hoang?= Date: Fri, 3 Jan 2025 10:23:39 +0100 Subject: [PATCH] WIP Mehestan --- .../src/solidago/pipeline/scaling/__init__.py | 2 - .../src/solidago/pipeline/scaling/mehestan.py | 46 ++++++++----------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/solidago/src/solidago/pipeline/scaling/__init__.py b/solidago/src/solidago/pipeline/scaling/__init__.py index 13c2b1149b..94cf4fd627 100644 --- a/solidago/src/solidago/pipeline/scaling/__init__.py +++ b/solidago/src/solidago/pipeline/scaling/__init__.py @@ -8,8 +8,6 @@ """ from .base import Scaling -from .compose import ScalingCompose -from .no_scaling import NoScaling from .mehestan import Mehestan from .quantile_zero_shift import QuantileShift, QuantileZeroShift from .standardize import Standardize diff --git a/solidago/src/solidago/pipeline/scaling/mehestan.py b/solidago/src/solidago/pipeline/scaling/mehestan.py index 62e6fb935b..ebba8b1944 100644 --- a/solidago/src/solidago/pipeline/scaling/mehestan.py +++ b/solidago/src/solidago/pipeline/scaling/mehestan.py @@ -2,22 +2,17 @@ import numpy as np import pandas as pd - -import logging import timeit +import logging -from .base import Scaling -from .no_scaling import NoScaling +logger = logging.getLogger(__name__) -from solidago.privacy_settings import PrivacySettings -from solidago.scoring_model import ScoringModel, ScaledScoringModel -from solidago.voting_rights import VotingRights from solidago.primitives import qr_median, qr_uncertainty, lipschitz_resilient_mean +from solidago.primitives.pairs import UnorderedPairs +from solidago.state import * -from solidago.utils.pairs import UnorderedPairs - - -logger = logging.getLogger(__name__) +from .base import Scaling +from .no_scaling import NoScaling class Mehestan(Scaling): @@ -32,14 +27,16 @@ def __init__( n_diffs_sample_max=1000, error=1e-5 ): - """ Mehestan performs Lipschitz-resilient ollaborative scaling. - - A simplified version of Mehestan was published in - "Robust Sparse Voting", Youssef Allouah, Rachid Guerraoui, Lȩ Nguyên Hoang - and Oscar Villemaud, published at AISTATS 2024. + """ Mehestan performs Lipschitz-resilient collaborative scaling. + It is based on "Robust Sparse Voting", by Youssef Allouah, + Rachid Guerraoui, Lȩ Nguyên Hoang and Oscar Villemaud, + published at AISTATS 2024. The inclusion of uncertainties is further detailed in - "Solidago: A Modular Pipeline for Collaborative Scoring" + "Solidago: A Modular Pipeline for Collaborative Scoring", + by Lê Nguyên Hoang, Romain Beylerian, Bérangère Colbois, Julien Fageot, + Louis Faucon, Aidan Jungo, Alain Le Noac'h, Adrien Matissart + and Oscar Villemaud. Parameters ---------- @@ -69,14 +66,11 @@ def __init__( self.n_diffs_sample_max = n_diffs_sample_max self.error = error - def __call__( - self, - user_models: Mapping[int, ScoringModel], - users: pd.DataFrame, - entities: pd.DataFrame, - voting_rights: Optional[VotingRights] = None, - privacy: Optional[PrivacySettings] = None - ) -> dict[int, ScaledScoringModel]: + def __call__(self, + users: Users, + entities: Entities, + user_models: UserModels, + ) -> UserModels: """ Returns scaled user models Parameters @@ -107,7 +101,7 @@ def __call__( nonscalers = users[users["is_scaler"] == False] if len(scalers) == 0: logger.warning(" No user qualifies as a scaler. No scaling performed.") - return NoScaling()(user_models) + return user_models end_step1 = timeit.default_timer() logger.info(f"Mehestan 1. Terminated in {int(end_step1 - start)} seconds")