From e5c96e59389bcc1356c810d84ff45d798a99d797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=AA=20Nguy=C3=AAn=20Hoang?= Date: Thu, 24 Oct 2024 11:41:39 +0200 Subject: [PATCH] Use solidago pipeline "neurips24" to compute scores (#1970) * Added import for vouchers and scores in pipline/inputs Fixed tiny_tournesol.zip file for testing. Added data_analysis for dataset submission. * Important change: Modified qr_quantile using asymmetric Huber rather than additional term. This implies that the addition of a new user with huge uncertainties will not affect the quantile much. * implement 'get_pipeline_kwargs' in TournesolInput * fix experiments script * read vouches in TournesolInput * [solidago] gbt: estimate asymmetrical uncertainties based on increase of neg. log likelihood by 1 (#1973) --------- Co-authored-by: Louis Faucon * implement 'get_pipeline_kwargs' in TournesolInput * fix experiments script * read vouches in TournesolInput * Fixed experiments calls to Tournesol inputs API * normalize weight per user in Standardize * normalize weight per user in QuantileZeroShift * solidago: fix numerical instability in gbt * fix wrong usage of 'med' in qr_uncertainty, expose high_likelihood_range_threshold in gbt args * add QuantileShift (in addition to QuantileZeroShift) to define target_score different from 0 * lbfgs: raise error when max_iter is reached * update ml_train to call new pipeline, tweaks in solidago to be consistent with existing tournesol tests * fix test_mehestan in solidago, standardize typing to reduce numba compilations * fix mehestan after refactoring * update test about scalings * fix lbfgs initialization when past scores are available --------- Co-authored-by: Adrien Matissart Co-authored-by: Adrien Matissart Co-authored-by: Louis Faucon --- backend/ml/inputs.py | 30 ++- backend/ml/management/commands/ml_train.py | 172 +++++++++++--- backend/ml/outputs.py | 4 +- backend/tests/requirements.txt | 2 - backend/tournesol/lib/public_dataset.py | 10 +- .../commands/load_public_dataset.py | 8 +- backend/tournesol/tests/test_ml_run.py | 58 ++--- solidago/experiments/data_analysis.py | 135 +++++++++++ solidago/experiments/engagement_bias.json | 1 - solidago/experiments/resilience.json | 1 - solidago/experiments/synthetic.py | 7 +- solidago/experiments/tournesol.py | 135 +++++++++-- solidago/pyproject.toml | 2 +- solidago/src/solidago/__version__.py | 2 +- .../aggregation/entitywise_qr_quantile.py | 13 +- .../aggregation/standardized_qr_quantile.py | 1 - .../solidago/generative_model/user_model.py | 8 +- solidago/src/solidago/judgments.py | 21 +- solidago/src/solidago/pipeline/__init__.py | 2 + solidago/src/solidago/pipeline/inputs.py | 121 +++++++--- solidago/src/solidago/pipeline/pipeline.py | 38 ++- solidago/src/solidago/post_process/base.py | 4 +- solidago/src/solidago/post_process/squash.py | 5 +- .../solidago/preference_learning/__init__.py | 5 +- .../src/solidago/preference_learning/base.py | 56 +++-- .../generalized_bradley_terry.py | 189 ++++++++------- .../lbfgs_generalized_bradley_terry.py | 217 ++++++++---------- solidago/src/solidago/primitives.py | 35 +-- solidago/src/solidago/scaling/__init__.py | 2 +- solidago/src/solidago/scaling/base.py | 2 +- solidago/src/solidago/scaling/mehestan.py | 110 +++++---- .../solidago/scaling/quantile_zero_shift.py | 87 +++++-- solidago/src/solidago/scaling/standardize.py | 63 +++-- solidago/src/solidago/scoring_model.py | 12 +- solidago/src/solidago/solvers/dichotomy.py | 7 +- solidago/src/solidago/solvers/optimize.py | 66 ++++-- .../solidago/trust_propagation/__init__.py | 3 +- .../src/solidago/trust_propagation/base.py | 17 +- .../trust_propagation/lipschitrust.py | 34 +-- .../trust_propagation/no_trust_propagation.py | 27 +-- .../src/solidago/trust_propagation/noop.py | 25 ++ .../solidago/trust_propagation/trust_all.py | 32 +-- solidago/src/solidago/utils/pairs.py | 31 +-- .../src/solidago/voting_rights/__init__.py | 5 +- .../voting_rights/affine_overtrust.py | 14 +- solidago/tests/data/data_1.py | 20 +- solidago/tests/data/data_2.py | 54 +++-- solidago/tests/data/data_3.py | 73 +++--- solidago/tests/data/data_4.py | 36 +-- solidago/tests/data/tiny_tournesol.zip | Bin 695004 -> 696188 bytes solidago/tests/test_aggregation.py | 2 +- solidago/tests/test_judgments.py | 7 - solidago/tests/test_mehestan.py | 133 +---------- solidago/tests/test_preference_learning.py | 27 ++- solidago/tests/test_primitives.py | 24 +- solidago/tests/test_privacy_settings.py | 4 +- solidago/tests/test_scaling.py | 4 +- solidago/tests/test_solvers.py | 34 ++- 58 files changed, 1357 insertions(+), 880 deletions(-) create mode 100644 solidago/experiments/data_analysis.py create mode 100644 solidago/src/solidago/trust_propagation/noop.py delete mode 100644 solidago/tests/test_judgments.py diff --git a/backend/ml/inputs.py b/backend/ml/inputs.py index 9f8272378f..daba070b26 100644 --- a/backend/ml/inputs.py +++ b/backend/ml/inputs.py @@ -2,7 +2,7 @@ from typing import Optional import pandas as pd -from django.db.models import Case, F, QuerySet, When +from django.db.models import Case, F, Q, QuerySet, When from django.db.models.expressions import RawSQL from solidago.pipeline import TournesolInput @@ -14,6 +14,7 @@ ContributorScaling, Entity, ) +from vouch.models import Voucher class MlInputFromDb(TournesolInput): @@ -189,3 +190,30 @@ def get_individual_scores( dtf = pd.DataFrame(values) return dtf[["user_id", "entity", "criteria", "raw_score"]] + + def get_vouches(self): + values = Voucher.objects.filter( + by__is_active=True, + to__is_active=True, + ).values( + voucher=F("by__id"), + vouchee=F("to__id"), + vouch=F("value"), + ) + return pd.DataFrame(values, columns=["voucher", "vouchee", "vouch"]) + + def get_users(self): + values = ( + User.objects + .filter(is_active=True) + .annotate(is_pretrusted=Q(pk__in=User.with_trusted_email())) + .values( + "is_pretrusted", + "trust_score", + user_id=F("id"), + ) + ) + return pd.DataFrame( + data=values, + columns=["user_id", "is_pretrusted", "trust_score"], + ).set_index("user_id") diff --git a/backend/ml/management/commands/ml_train.py b/backend/ml/management/commands/ml_train.py index 38496d9cf2..b345613f45 100644 --- a/backend/ml/management/commands/ml_train.py +++ b/backend/ml/management/commands/ml_train.py @@ -1,10 +1,66 @@ +import os +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed + +from django import db +from django.conf import settings from django.core.management.base import BaseCommand +from solidago.aggregation import EntitywiseQrQuantile +from solidago.pipeline import Pipeline +from solidago.post_process.squash import Squash +from solidago.preference_learning import UniformGBT +from solidago.scaling import Mehestan, QuantileShift, ScalingCompose, Standardize +from solidago.trust_propagation import LipschiTrust, NoopTrust +from solidago.voting_rights import AffineOvertrust from ml.inputs import MlInputFromDb -from ml.mehestan.run import MehestanParameters, run_mehestan +from ml.outputs import TournesolPollOutput, save_tournesol_scores from tournesol.models import EntityPollRating, Poll -from tournesol.models.poll import ALGORITHM_LICCHAVI, ALGORITHM_MEHESTAN -from vouch.trust_algo import trust_algo +from tournesol.models.poll import ALGORITHM_MEHESTAN, DEFAULT_POLL_NAME + + +def get_solidago_pipeline(run_trust_propagation: bool = True): + if run_trust_propagation: + trust_algo = LipschiTrust() + else: + trust_algo = NoopTrust() + + aggregation_lipshitz = 0.1 + + return Pipeline( + trust_propagation=trust_algo, + voting_rights=AffineOvertrust(), + # TODO: use LBFGS (faster) implementation. + # Currently requires to install Solidago with "torch" extra. + preference_learning=UniformGBT( + prior_std_dev=7.0, + convergence_error=1e-5, + cumulant_generating_function_error=1e-5, + high_likelihood_range_threshold=0.25, + # max_iter=300, + ), + scaling=ScalingCompose( + Mehestan(), + Standardize( + dev_quantile=0.9, + lipschitz=0.1, + ), + QuantileShift( + quantile=0.1, + # target_score is defined to be the recommendability + # threshold, i.e the therorical max score that can be + # reached by an entity with 2 contributors. + target_score=2*aggregation_lipshitz, + lipschitz=0.1, + error=1e-5, + ), + ), + aggregation=EntitywiseQrQuantile( + quantile=0.5, + lipschitz=aggregation_lipshitz, + error=1e-5, + ), + post_process=Squash(score_max=100.) + ) class Command(BaseCommand): @@ -17,37 +73,87 @@ def add_arguments(self, parser): help="Disable trust scores computation and preserve existing trust_score values", ) parser.add_argument("--main-criterion-only", action="store_true") - parser.add_argument("--alpha", type=float, default=None) - parser.add_argument("-W", type=float, default=None) - parser.add_argument("--score-shift-quantile", type=float, default=None) - parser.add_argument("--score-deviation-quantile", type=float, default=None) def handle(self, *args, **options): - if not options["no_trust_algo"]: - # Update "trust_score" for all users - trust_algo() - - # Update scores for all polls for poll in Poll.objects.filter(active=True): - ml_input = MlInputFromDb(poll_name=poll.name) - - if poll.algorithm == ALGORITHM_MEHESTAN: - kwargs = { - param: options[param] - for param in ["alpha", "W", "score_shift_quantile", "score_deviation_quantile"] - if options[param] is not None - } - parameters = MehestanParameters(**kwargs) - run_mehestan( - ml_input=ml_input, - poll=poll, - parameters=parameters, - main_criterion_only=options["main_criterion_only"], + if poll.algorithm != ALGORITHM_MEHESTAN: + raise ValueError(f"Unknown algorithm {poll.algorithm!r}") + + is_default_poll = (poll.name == DEFAULT_POLL_NAME) + self.run_poll_pipeline( + poll=poll, + update_trust_scores=(not options["no_trust_algo"] and is_default_poll), + main_criterion_only=options["main_criterion_only"], + ) + + def run_poll_pipeline( + self, + poll: Poll, + update_trust_scores: bool, + main_criterion_only: bool, + ): + pipeline = get_solidago_pipeline( + run_trust_propagation=update_trust_scores + ) + criteria_list = poll.criterias_list + criteria_to_run = [poll.main_criteria] + if not main_criterion_only: + criteria_to_run.extend( + c for c in criteria_list if c != poll.main_criteria + ) + + if settings.MEHESTAN_MULTIPROCESSING: + # compute each criterion in parallel + cpu_count = os.cpu_count() or 1 + cpu_count -= settings.MEHESTAN_KEEP_N_FREE_CPU + os.register_at_fork(before=db.connections.close_all) + executor = ProcessPoolExecutor(max_workers=max(1, cpu_count)) + else: + # In tests, we might prefer to use a single thread to reduce overhead + # of multiple processes, db connections, and redundant numba compilation + executor = ThreadPoolExecutor(max_workers=1) + + with executor: + futures = [] + for crit in criteria_to_run: + pipeline_input = MlInputFromDb(poll_name=poll.name) + pipeline_output = TournesolPollOutput( + poll_name=poll.name, + criterion=crit, + save_trust_scores_enabled=(update_trust_scores and crit == poll.main_criteria) ) - elif poll.algorithm == ALGORITHM_LICCHAVI: - raise NotImplementedError("Licchavi is no longer supported") - else: - raise ValueError(f"unknown algorithm {repr(poll.algorithm)}'") - self.stdout.write(f"Starting bulk update of sum_trust_score for poll {poll.name}") - EntityPollRating.bulk_update_sum_trust_scores(poll) - self.stdout.write(f"Finished bulk update of sum_trust_score for poll {poll.name}") + + futures.append( + executor.submit( + self.run_pipeline_and_close_db, + pipeline=pipeline, + pipeline_input=pipeline_input, + pipeline_output=pipeline_output, + criterion=crit, + ) + ) + + for fut in as_completed(futures): + # reraise potential exception + fut.result() + + save_tournesol_scores(poll) + EntityPollRating.bulk_update_sum_trust_scores(poll) + + self.stdout.write(f"Pipeline for poll {poll.name}: Done") + + @staticmethod + def run_pipeline_and_close_db( + pipeline: Pipeline, + pipeline_input: MlInputFromDb, + pipeline_output: TournesolPollOutput, + criterion: str + ): + pipeline.run( + input=pipeline_input, + criterion=criterion, + output=pipeline_output, + ) + # Closing the connection fixes a warning in tests + # about open connections to the database. + db.connection.close() diff --git a/backend/ml/outputs.py b/backend/ml/outputs.py index 4b5db46f66..32e5c02e23 100644 --- a/backend/ml/outputs.py +++ b/backend/ml/outputs.py @@ -176,8 +176,10 @@ def save_entity_scores( scores: pd.DataFrame, score_mode="default", ): - scores_iterator = scores[["entity_id", "score", "uncertainty"]].itertuples(index=False) + if len(scores) == 0: + return + scores_iterator = scores[["entity_id", "score", "uncertainty"]].itertuples(index=False) with transaction.atomic(): EntityCriteriaScore.objects.filter( poll=self.poll, diff --git a/backend/tests/requirements.txt b/backend/tests/requirements.txt index 896517d1ce..ff6dae73b2 100644 --- a/backend/tests/requirements.txt +++ b/backend/tests/requirements.txt @@ -8,10 +8,8 @@ pylint-django==2.5.3 pylint-json2html==0.4.0 # Unit tests tools -faker==13.15.1 pytest==7.1.3 pytest-html==3.1.1 -pytest-mock==3.8.2 # Pytest for django pytest-django==4.5.2 diff --git a/backend/tournesol/lib/public_dataset.py b/backend/tournesol/lib/public_dataset.py index 8612436d52..58d8e3ef16 100644 --- a/backend/tournesol/lib/public_dataset.py +++ b/backend/tournesol/lib/public_dataset.py @@ -291,7 +291,7 @@ def write_comparisons_file( "criteria", "score", "score_max", - "week_date" + "week_date", ] writer = csv.DictWriter(write_target, fieldnames=fieldnames) writer.writeheader() @@ -413,7 +413,9 @@ def write_vouchers_file(write_target): "to_username": voucher.to.username, "value": voucher.value, } - for voucher in Voucher.objects.filter(is_public=True) - .select_related("by", "to") - .order_by("by__username", "to__username") + for voucher in ( + Voucher.objects.filter(is_public=True, by__is_active=True, to__is_active=True) + .select_related("by", "to") + .order_by("by__username", "to__username") + ) ) diff --git a/backend/tournesol/management/commands/load_public_dataset.py b/backend/tournesol/management/commands/load_public_dataset.py index b6e40aba2f..095e0efc8b 100644 --- a/backend/tournesol/management/commands/load_public_dataset.py +++ b/backend/tournesol/management/commands/load_public_dataset.py @@ -105,12 +105,12 @@ def handle(self, *args, **options): entity_1=videos[entity_a], entity_2=videos[entity_b], ) - for _, values in rows.iterrows(): + for values in rows.itertuples(index=False): ComparisonCriteriaScore.objects.create( comparison=comparison, - criteria=values["criteria"], - score=values["score"], - score_max=values["score_max"], + criteria=values.criteria, + score=values.score, + score_max=values.score_max, ) nb_comparisons += 1 print(f"Created {nb_comparisons} comparisons") diff --git a/backend/tournesol/tests/test_ml_run.py b/backend/tournesol/tests/test_ml_run.py index af427ac1dc..021891a7c4 100644 --- a/backend/tournesol/tests/test_ml_run.py +++ b/backend/tournesol/tests/test_ml_run.py @@ -17,6 +17,7 @@ Find more details on https://docs.djangoproject.com/en/4.0/topics/testing/overview/#rollback-emulation """ + from django.core.management import call_command from django.test import TransactionTestCase, override_settings @@ -40,9 +41,7 @@ class TestMlTrain(TransactionTestCase): serialized_rollback = True def setUp(self) -> None: - EmailDomain.objects.create( - domain="@verified.test", status=EmailDomain.STATUS_ACCEPTED - ) + EmailDomain.objects.create(domain="@verified.test", status=EmailDomain.STATUS_ACCEPTED) def test_ml_train(self): user1 = UserFactory(email="user1@verified.test") @@ -56,9 +55,7 @@ def test_ml_train(self): self.assertEqual(EntityCriteriaScore.objects.count(), 0) self.assertEqual(ContributorRatingCriteriaScore.objects.count(), 0) call_command("ml_train") - self.assertEqual( - EntityCriteriaScore.objects.filter(score_mode="default").count(), 20 - ) + self.assertEqual(EntityCriteriaScore.objects.filter(score_mode="default").count(), 20) self.assertEqual(ContributorRatingCriteriaScore.objects.count(), 20) # Asserts that all contributors have been assigned a strictly positive voting right self.assertEqual( @@ -120,9 +117,7 @@ def test_ml_on_multiple_polls(self): scores_mode_default = EntityCriteriaScore.objects.filter(score_mode="default") self.assertEqual(scores_mode_default.count(), 28) self.assertEqual(scores_mode_default.filter(poll=poll2).count(), 20) - self.assertEqual( - scores_mode_default.filter(poll=Poll.default_poll()).count(), 8 - ) + self.assertEqual(scores_mode_default.filter(poll=Poll.default_poll()).count(), 8) def test_ml_run_with_video_having_score_zero(self): video = VideoFactory(make_safe_for_poll=False) @@ -138,45 +133,50 @@ def test_ml_run_with_video_having_score_zero(self): rating.refresh_from_db() self.assertAlmostEqual(rating.tournesol_score, 0.0, delta=3) - def test_individual_scaling_are_computed(self): - # User 1 will belong to calibration users (as the most active trusted user) + def test_individual_scalings_are_computed(self): + # User 1 will belong to scaler users (as a sufficiently active trusted user) + user1 = UserFactory(email="user@verified.test") user2 = UserFactory() - + videos = VideoFactory.create_batch(30, make_safe_for_poll=False) for user in [user1, user2]: - ComparisonCriteriaScoreFactory.create_batch( - 10, comparison__user=user, criteria="largely_recommended" - ) + for video1, video2 in zip(videos, videos[1:]): + ComparisonCriteriaScoreFactory( + comparison__user=user, + comparison__entity_1=video1, + comparison__entity_2=video2, + criteria="largely_recommended", + score=10 if user is user1 else -10, + ) self.assertEqual(EntityCriteriaScore.objects.count(), 0) self.assertEqual(ContributorRatingCriteriaScore.objects.count(), 0) self.assertEqual(ContributorScaling.objects.count(), 0) + self.assertEqual(ContributorRating.objects.count(), 60) + ContributorRating.objects.update(is_public=True) - call_command("ml_train") + call_command("ml_train", "--main-criterion-only") - self.assertEqual(ContributorRatingCriteriaScore.objects.count(), 40) + self.assertEqual(ContributorRatingCriteriaScore.objects.count(), 60) self.assertEqual(ContributorScaling.objects.count(), 2) # Check scaling values for user1 calibration_scaling = ContributorScaling.objects.get(user=user1) - self.assertAlmostEqual(calibration_scaling.scale, 1.0) - self.assertAlmostEqual(calibration_scaling.translation, 0.0) - # Scaling uncertainties are also defined for scaling calibration users - self.assertAlmostEqual(calibration_scaling.scale_uncertainty, 1.0) - self.assertAlmostEqual(calibration_scaling.translation_uncertainty, 1.0) + self.assertAlmostEqual(calibration_scaling.scale, 0.94, places=2) + self.assertAlmostEqual(calibration_scaling.translation, 0.24, places=2) + self.assertAlmostEqual(calibration_scaling.scale_uncertainty, 1.51, places=2) + self.assertAlmostEqual(calibration_scaling.translation_uncertainty, 1.9, places=1) # Check scaling values for user2 scaling = ContributorScaling.objects.get(user=user2) - self.assertAlmostEqual(scaling.scale, 1.0) - self.assertAlmostEqual(scaling.translation, 0.0) - self.assertAlmostEqual(scaling.scale_uncertainty, 1.0) - self.assertAlmostEqual(scaling.translation_uncertainty, 1.0) + self.assertAlmostEqual(scaling.scale, 0.94, places=2) + self.assertAlmostEqual(scaling.translation, 0.24, places=2) + self.assertAlmostEqual(scaling.scale_uncertainty, 1.51, places=2) + self.assertAlmostEqual(scaling.translation_uncertainty, 1.9, places=1) def test_tournesol_scores_different_trust(self): # 10 pretrusted users - verified_users = [ - UserFactory(email=f"user_{n}@verified.test") for n in range(10) - ] + verified_users = [UserFactory(email=f"user_{n}@verified.test") for n in range(10)] # 20 non_verified_users non_verified_users = UserFactory.create_batch(20) diff --git a/solidago/experiments/data_analysis.py b/solidago/experiments/data_analysis.py new file mode 100644 index 0000000000..1c2aed4d9c --- /dev/null +++ b/solidago/experiments/data_analysis.py @@ -0,0 +1,135 @@ +from solidago.pipeline.inputs import TournesolInputFromPublicDataset +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import scipy + +data = TournesolInputFromPublicDataset.download() + +criteria = { + "reliability": "Reliable and not misleading", + "importance": "Important and actionable", + "engaging": "Engaging and thought-provoking", + "pedagogy": "Clear and pedagogical", + "layman_friendly": "Layman-friendly", + "diversity_inclusion": "Diversity and inclusion", + "backfire_risk": "Resilience to backfiring risks", + "better_habits": "Encourages better habits", + "entertaining_relaxing": "Entertaining and relaxing" +} +entities = set(data.comparisons.entity_a) | set(data.comparisons.entity_b) +user_ids = set(data.comparisons.user_id) + +def add_comparison_analysis_columns(comparisons): + def is_first_comparison(comparisons): + registered = { e: set() for e in entities } + entity_a_firsts, entity_b_firsts = list(), list() + for _, r in comparisons.iterrows(): + entity_a_first, entity_b_first = False, False + if r.criteria == "largely_recommended" and r.user_id not in registered[r.entity_a]: + registered[r.entity_a].add(r.user_id) + entity_a_first = True + if r.criteria == "largely_recommended" and r.user_id not in registered[r.entity_b]: + registered[r.entity_b].add(r.user_id) + entity_b_first = True + entity_a_firsts.append(entity_a_first) + entity_b_firsts.append(entity_b_first) + return entity_a_firsts, entity_b_firsts + + entity_a_firsts, entity_b_firsts = is_first_comparison(comparisons) + comparisons = comparisons.assign(entity_a_first=entity_a_firsts) + comparisons = comparisons.assign(entity_b_first=entity_b_firsts) + + def score_of_first_comparison(comparisons): + first_comparison_score = list() + for _, r in comparisons.iterrows(): + if r.entity_a_first and (not r.entity_b_first): + first_comparison_score.append(r.score) + elif (not r.entity_a_first) and r.entity_b_first: + first_comparison_score.append(- r.score) + else: + first_comparison_score.append(np.nan) + return first_comparison_score + + comparisons = comparisons.assign(first_comparison_score=score_of_first_comparison(comparisons)) + + def has_others(comparisons): + with_others = dict() + for _, r in comparisons[comparisons.criteria != "largely_recommended"].iterrows(): + if r.user_id not in with_others: + with_others[r.user_id] = dict() + if r.entity_a not in with_others[r.user_id]: + with_others[r.user_id][r.entity_a] = set() + with_others[r.user_id][r.entity_a].add(r.entity_b) + has_others = list() + for _, r in comparisons.iterrows(): + has_others.append( + r.user_id in with_others + and r.entity_a in with_others[r.user_id] + and r.entity_b in with_others[r.user_id][r.entity_a] + ) + return has_others + + comparisons = comparisons.assign(has_others=has_others(comparisons)) + + def is_trusted(comparisons): + return [data.users.loc[r.user_id, "trust_score"] >= 0.8 for _, r in comparisons.iterrows()] + + comparisons = comparisons.assign(is_trusted=is_trusted(comparisons)) + + return comparisons + +c = add_comparison_analysis_columns(data.comparisons) + +def add_user_analysis_columns(users, comparisons): + def n_comparisons(users, comparisons): + return [ + len(comparisons[comparisons.user_id == user_id]) + for user_id, _ in data.users.iterrows() + ] + users = users.assign(n_comparisons=n_comparisons(users, comparisons)) + users = users.assign( + n_main_comparisons=n_comparisons( + users, + comparisons[comparisons.criteria == "largely_recommneded"] + ) + ) + return users + +u = add_user_analysis_columns(data.users, data.comparisons) + +def add_score_analysis_columns(): + def _unsquash(scores): + for _, row in scores[scores.score == 100.00].iterrows(): + row.score = 99.99 + for _, row in scores[scores.score == -100.00].iterrows(): + row.score = -99.99 + return scores.score / np.sqrt(100.0**2 - scores.score) + + data.collective_scores = data.collective_scores.assign(unsquashed=_unsquash(data.collective_scores.scores)) + data.individual_scores = data.individual_scores.assign(unsquashed=_unsquash(data.individual_scores.scores)) + +def confidence_interval(scores, confidence=0.95): + mean = scores.mean() + z_deviation = np.sqrt(2) * scipy.special.erfinv(confidence) + deviation = z_deviation * np.sqrt( scores.var() / len(scores) ) + return mean - deviation, mean + deviation + +def plot_criteria(comparisons, figsize=(2, 3)): + fig, axs = plt.subplots(3, 3, figsize=figsize) + for n_plot, ax in enumerate(axs.flat): + criterion = list(criteria.keys())[n_plot] + cc = comparisons[comparisons.criteria == criterion] + ax.hist(cc.score, bins=21) + ax.set_title(criteria[criterion]) + +def n_extreme_values(scores, n_std_dev): + mean = scores.mean() + std_dev = np.sqrt(scores.var()) + return len(scores[np.abs(scores - mean) > n_std_dev * std_dev]) + +def plot(comparison_scores, colors=("g", "y", "r"), labels=None): + if labels is None: + plt.hist(comparison_scores, 21, density=True, histtype='bar', color=colors) + else: + plt.hist(comparison_scores, 21, density=True, histtype='bar', color=colors, label=labels) diff --git a/solidago/experiments/engagement_bias.json b/solidago/experiments/engagement_bias.json index b9771540ac..4327b90ab9 100644 --- a/solidago/experiments/engagement_bias.json +++ b/solidago/experiments/engagement_bias.json @@ -49,7 +49,6 @@ }], "preference_learning": ["UniformGBT", { "prior_std_dev": 7, - "comparison_max": 10, "convergence_error": 1e-05, "cumulant_generating_function_error": 1e-05 }], diff --git a/solidago/experiments/resilience.json b/solidago/experiments/resilience.json index 59934e46d7..c21b9426bc 100644 --- a/solidago/experiments/resilience.json +++ b/solidago/experiments/resilience.json @@ -49,7 +49,6 @@ }], "preference_learning": ["UniformGBT", { "prior_std_dev": 7, - "comparison_max": 10, "convergence_error": 1e-05, "cumulant_generating_function_error": 1e-05 }], diff --git a/solidago/experiments/synthetic.py b/solidago/experiments/synthetic.py index f07750d92a..adecc2d4a4 100644 --- a/solidago/experiments/synthetic.py +++ b/solidago/experiments/synthetic.py @@ -31,10 +31,13 @@ def sample_correlation(n_users, n_entities, seed, generative_model, pipeline) -> users, voting_rights, user_models, global_model = pipeline(*data) truth = entities["svd0"] - estimate = [global_model(e, row)[0] for e, row in entities.iterrows()] + estimate = [ + global_model(e, row)[0] if global_model(e, row) is not None else 0. + for e, row in entities.iterrows() + ] return np.corrcoef(truth, estimate)[0, 1] -def sample_n_correlations(n_users, n_entities, n_seeds, generative_model, pipeline, thread=True): +def sample_n_correlations(n_users, n_entities, n_seeds, generative_model, pipeline, thread=False): if not thread: return [ sample_correlation(n_users, n_entities, seed, generative_model, pipeline) diff --git a/solidago/experiments/tournesol.py b/solidago/experiments/tournesol.py index ee2a41b36b..a25b403635 100644 --- a/solidago/experiments/tournesol.py +++ b/solidago/experiments/tournesol.py @@ -1,11 +1,15 @@ import logging +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + from threading import Thread from solidago.pipeline.inputs import TournesolInputFromPublicDataset from solidago.trust_propagation import LipschiTrust from solidago.voting_rights import AffineOvertrust -from solidago.preference_learning import LBFGSUniformGBT +from solidago.preference_learning import UniformGBT from solidago.scaling import ScalingCompose, Mehestan, QuantileZeroShift from solidago.aggregation import StandardizedQrQuantile from solidago.post_process import Squash @@ -27,6 +31,16 @@ ch.setLevel(logging.INFO) info_logger.addHandler(ch) +logger.info("Retrieve public dataset") +inputs = TournesolInputFromPublicDataset.download() +video_id_to_entity_id = { + video_id: entity_id + for entity_id, video_id in enumerate(inputs.entity_id_to_video_id) +} + + +# criteria = set(inputs.comparisons["criteria"]) +criteria = { "largely_recommended" } pipeline = Pipeline( trust_propagation=LipschiTrust( @@ -40,12 +54,10 @@ min_overtrust=2.0, overtrust_ratio=0.1, ), - preference_learning=LBFGSUniformGBT( + preference_learning=UniformGBT( prior_std_dev=7, - comparison_max=10, convergence_error=1e-5, cumulant_generating_function_error=1e-5, - n_steps=2, ), scaling=ScalingCompose( Mehestan( @@ -65,7 +77,7 @@ aggregation=StandardizedQrQuantile( quantile=0.2, dev_quantile=0.9, - lipschitz=0.1, + lipschitz=100, error=1e-5 ), post_process= Squash( @@ -73,27 +85,100 @@ ) ) -logger.info("Retrieve public dataset") -inputs = TournesolInputFromPublicDataset.download() +user_outputs, entities, voting_rights, scaled_user_models = dict(), dict(), dict(), dict() -logger.info("Preprocessing data for the pipeline") -users, vouches, entities, privacy = inputs.get_pipeline_objects() + +for c in criteria: + logger.info(f"Running the pipeline for criterion `{c}`") -# criteria = set(inputs.comparisons["criteria"]) -criteria = { "largely_recommended" } + pipeline_objects = inputs.get_pipeline_kwargs(criterion=c) + users = pipeline_objects["users"] + vouches = pipeline_objects["vouches"] + all_entities = pipeline_objects["entities"] + privacy = pipeline_objects["privacy"] + judgments = pipeline_objects["judgments"] + + users = pipeline.trust_propagation(users, vouches) + voting_rights[c], entities[c] = pipeline.voting_rights(users, all_entities, vouches, privacy) + user_models = pipeline.preference_learning(judgments, users, entities[c]) + scaled_user_models[c] = pipeline.scaling(user_models, users, entities[c], voting_rights[c], privacy) + +# threads = [Thread(target=run_pipeline, args=(criterion,)) for criterion in criteria] +# for thread in threads: +# thread.start() +# for thread in threads: +# thread.join() + +logger.info(f"Successful pipeline run.") + +scores = inputs.collective_scores -user_outputs, voting_rights, user_models, global_model = dict(), dict(), dict(), dict() -def run_pipeline(criterion): - logger.info(f"Running the pipeline for criterion `{criterion}`") - judgments = inputs.get_judgments(criterion) - output = pipeline(users, vouches, entities, privacy, judgments) - user_outputs[criterion], voting_rights[criterion] = output[0], output[1] - user_models[criterion], global_model[criterion] = output[2], output[3] - -threads = [Thread(target=run_pipeline, args=(criterion,)) for criterion in criteria] -for thread in threads: - thread.start() -for thread in threads: - thread.join() +squashed_user_models, global_model = dict(), dict() +quantiles = [0.1, 0.2, 0.35, 0.5, 0.65, 0.8, 0.9] + +for q in quantiles: + + pipeline.aggregation.quantile = q + squashed_user_models[q], global_model[q] = dict(), dict() + for c in criteria: + user_models, global_model[q][c] = pipeline.aggregation(voting_rights[c], scaled_user_models[c], users, entities[c]) + squashed_user_models[q][c], global_model[q][c] = pipeline.post_process(user_models, global_model[q][c], entities) -logger.info(f"Successful pipeline run.") + q_scores = list() + for _, row in scores.iterrows(): + try: + entity_id = video_id_to_entity_id[row.video] + q_scores.append(global_model[q][row.criteria](entity_id, None)[0]) + except: + q_scores.append(0.) + scores[f"score_q={q}"] = q_scores + +comparisons = inputs.comparisons +s_main = scores[scores.criteria == "largely_recommended"] +c_main = comparisons[comparisons.criteria == "largely_recommended"] + +entity_a_counts = c_main.value_counts("entity_a") +entity_b_counts = c_main.value_counts("entity_b") + +def n_comparisons(video): + total = 0 + if video not in video_id_to_entity_id: + return 0 + if video_id_to_entity_id[video] in entity_a_counts: + total += entity_a_counts[video_id_to_entity_id[video]] + if video_id_to_entity_id[video] in entity_b_counts: + total += entity_b_counts[video_id_to_entity_id[video]] + return total + +def n_contributors(video): + if video not in video_id_to_entity_id: + return 0 + entity = video_id_to_entity_id[video] + contributors = set(c_main[c_main.entity_a == entity].user_id) + contributors |= set(c_main[c_main.entity_b == entity].user_id) + return len(contributors) + +s_main.loc[:,"n_comparisons"] = [n_comparisons(r.video) for _, r in s_main.iterrows()] +s_main.loc[:,"n_contributors"] = [n_contributors(r.video) for _, r in s_main.iterrows()] + +s_top_main = s_main[(s_main.n_comparisons > 100) & (s_main.n_contributors > 20)] +top_entities = set(s_top_main.video) +c_top_main = c_main[(c_main.entity_a.isin(top_entities)) | (c_main.entity_b.isin(top_entities))] + + +ranking = { q: s_top_main.sort_values(f"score_q={q}", ascending=False)["video"] for q in quantiles } +for q in quantiles: + rk = list(ranking[q]) + s_top_main.loc[:, f"ranking_q={q}"] = [ rk.index(r.video) for _, r in s_top_main.iterrows() ] + +ranking_cols = [f"ranking_q={q}" for q in quantiles] + +s_top_main.loc[:, "ranking_delta"] = s_top_main["ranking_q=0.8"] - s_top_main["ranking_q=0.2"] +s_top_main.loc[:, "score_delta"] = s_top_main["ranking_q=0.8"] - s_top_main["ranking_q=0.2"] + +largest_delta = set(s_top_main.sort_values("score_delta")[:5].video) +largest_delta |= set(s_top_main.sort_values("score_delta")[-5:].video) + +s_plot = s_top_main[s_top_main.video.isin(largest_delta)][["video"] + ranking_cols].set_index("video") + + diff --git a/solidago/pyproject.toml b/solidago/pyproject.toml index c1588faa7b..f4608c7548 100644 --- a/solidago/pyproject.toml +++ b/solidago/pyproject.toml @@ -30,7 +30,7 @@ dynamic = ["version"] [project.optional-dependencies] test = [ - "pytest >=7.1.3,<8.0.0", + "pytest >=7.1.3,<9.0.0", ] torch = [ "torch>=2.2,<3", diff --git a/solidago/src/solidago/__version__.py b/solidago/src/solidago/__version__.py index ef811d0dd5..fff47f6636 100644 --- a/solidago/src/solidago/__version__.py +++ b/solidago/src/solidago/__version__.py @@ -1,4 +1,4 @@ # Changing the version will automatically publish a new version on PyPI. # (see /.github/workflows/solidago-publish.yml) -__version__ = "0.1.1" +__version__ = "0.2.0" diff --git a/solidago/src/solidago/aggregation/entitywise_qr_quantile.py b/solidago/src/solidago/aggregation/entitywise_qr_quantile.py index 1a5e2b9b52..e0e8b2b09a 100644 --- a/solidago/src/solidago/aggregation/entitywise_qr_quantile.py +++ b/solidago/src/solidago/aggregation/entitywise_qr_quantile.py @@ -4,9 +4,9 @@ from .base import Aggregation from solidago.voting_rights import VotingRights -from solidago.scoring_model import ScoringModel, DirectScoringModel, ScaledScoringModel +from solidago.scoring_model import ScoringModel, DirectScoringModel -from solidago.primitives import qr_quantile, qr_standard_deviation, qr_uncertainty +from solidago.primitives import qr_quantile, qr_uncertainty class EntitywiseQrQuantile(Aggregation): @@ -56,8 +56,7 @@ def __call__( df = _get_user_scores(voting_rights, user_models, entities) global_scores = DirectScoringModel() - for entity, _ in entities.iterrows(): - dfe = df[df["entity_id"] == entity] + for entity_id, dfe in df.groupby("entity_id"): score = qr_quantile( self.lipschitz, self.quantile, @@ -73,11 +72,11 @@ def __call__( np.array(dfe["voting_rights"]), np.array(dfe["left_uncertainties"]), np.array(dfe["right_uncertainties"]), - default_dev = 1, + default_dev = 1.0, error = self.error, - median = score, + median = score if self.quantile == 0.5 else None, ) - global_scores[entity] = score, uncertainty + global_scores[entity_id] = score, uncertainty return user_models, global_scores diff --git a/solidago/src/solidago/aggregation/standardized_qr_quantile.py b/solidago/src/solidago/aggregation/standardized_qr_quantile.py index 17a62035d4..32287df3cb 100644 --- a/solidago/src/solidago/aggregation/standardized_qr_quantile.py +++ b/solidago/src/solidago/aggregation/standardized_qr_quantile.py @@ -85,7 +85,6 @@ def __call__( np.array(dfe["right_uncertainties"]), default_dev = 1, error = self.error, - median = score, ) global_scores[entity_id] = score, uncertainty return scaled_models, global_scores diff --git a/solidago/src/solidago/generative_model/user_model.py b/solidago/src/solidago/generative_model/user_model.py index fae0939f4a..ac726c7a3b 100644 --- a/solidago/src/solidago/generative_model/user_model.py +++ b/solidago/src/solidago/generative_model/user_model.py @@ -39,8 +39,8 @@ def __init__( zipf_compare: float=1.5, poisson_compare: float=30.0, n_comparisons_per_entity: float=3.0, - multiplicator_std_dev: float=0, - engagement_bias_std_dev: float=0, + multiplicator_std_dev: float=0.0, + engagement_bias_std_dev: float=0.0, svd_mean: list[float]=[3, 0, 0], svd_dimension: Optional[int]=None, ): @@ -57,8 +57,8 @@ def __init__( zipf_compare: float=1.5, poisson_compare: float=30.0, n_comparisons_per_entity: float=3.0, - multiplicator_std_dev: float=0, - engagement_bias_std_dev: float=0, + multiplicator_std_dev: float=0.0, + engagement_bias_std_dev: float=0.0, svd_mean: mean of the svd representation, svd_dimension: int or None (default), """ diff --git a/solidago/src/solidago/judgments.py b/solidago/src/solidago/judgments.py index b3a6b9693f..89cd1bf239 100644 --- a/solidago/src/solidago/judgments.py +++ b/solidago/src/solidago/judgments.py @@ -1,10 +1,11 @@ from abc import ABC, abstractmethod +from typing import Optional import pandas as pd class Judgments(ABC): @abstractmethod - def __getitem__(self, user: int) -> dict[str, pd.DataFrame]: + def __getitem__(self, user: int) -> Optional[dict[str, pd.DataFrame]]: """ Returns user's judgments that can be used to infer a user model Parameters @@ -22,8 +23,8 @@ def __getitem__(self, user: int) -> dict[str, pd.DataFrame]: class DataFrameJudgments(Judgments): def __init__( self, - comparisons: pd.DataFrame = None, - assessments: pd.DataFrame = None + comparisons: Optional[pd.DataFrame] = None, + assessments: Optional[pd.DataFrame] = None, ): """ Instantiates judgments from all contributors, based on dataframes @@ -41,22 +42,28 @@ def __init__( * `assessment` * `assessment_type` """ - self.comparisons = comparisons if comparisons is None: self.comparisons = pd.DataFrame(columns=[ "user_id", "entity_a", "entity_b", "comparison", "comparison_max" ]) + else: + self.comparisons = comparisons - self.assessments = assessments if assessments is None: self.assessments = pd.DataFrame(columns=[ "user_id", "entity_id", "assessment", "assessment_type" ]) + else: + self.assessments = assessments def __getitem__(self, user: int): + comparisons = self.comparisons[self.comparisons["user_id"] == user] + assessments = self.assessments[self.assessments["user_id"] == user] + if len(comparisons) == 0 and len(assessments) == 0: + return None return dict( - comparisons=self.comparisons[self.comparisons["user_id"] == user], - assessments=self.assessments[self.assessments["user_id"] == user] + comparisons=comparisons, + assessments=assessments, ) diff --git a/solidago/src/solidago/pipeline/__init__.py b/solidago/src/solidago/pipeline/__init__.py index 57bb379f53..fa97097024 100644 --- a/solidago/src/solidago/pipeline/__init__.py +++ b/solidago/src/solidago/pipeline/__init__.py @@ -1,3 +1,5 @@ from .inputs import TournesolInput from .outputs import PipelineOutput from .pipeline import DefaultPipeline, Pipeline + +__all__ = ["TournesolInput", "DefaultPipeline", "Pipeline", "PipelineOutput"] diff --git a/solidago/src/solidago/pipeline/inputs.py b/solidago/src/solidago/pipeline/inputs.py index 686323c37a..48c93e4c00 100644 --- a/solidago/src/solidago/pipeline/inputs.py +++ b/solidago/src/solidago/pipeline/inputs.py @@ -57,6 +57,56 @@ def get_individual_scores( ) -> Optional[pd.DataFrame]: raise NotImplementedError + @abstractmethod + def get_vouches(self): + """Fetch data about vouches shared between users + + Returns: + - DataFrame with columns + * `voucher`: int, user_id of the user who gives the vouch + * `vouchee`: int, user_id of the user who receives the vouch + * `vouch`: float, value of this vouch + """ + raise NotImplementedError + + def get_users(self): + raise NotImplementedError + + def get_pipeline_kwargs(self, criterion: str): + ratings_properties = self.ratings_properties + users = self.get_users() + vouches = self.get_vouches() + comparisons = self.get_comparisons(criteria=criterion) + entities_ids = set(comparisons["entity_a"].unique()) | set( + comparisons["entity_b"].unique() + ) + entities = pd.DataFrame(index=list(entities_ids)) + + privacy = PrivacySettings() + user_entity_pairs = set( + comparisons[["user_id", "entity_a"]].itertuples(index=False, name=None) + ).union(comparisons[["user_id", "entity_b"]].itertuples(index=False, name=None)) + for rating in ratings_properties.itertuples(): + if (rating.user_id, rating.entity_id) in user_entity_pairs: + privacy[(rating.user_id, rating.entity_id)] = not rating.is_public + + judgments = DataFrameJudgments( + comparisons=comparisons.rename( + columns={ + "score": "comparison", + "score_max": "comparison_max", + } + ) + ) + + return { + "users": users, + "vouches": vouches, + "entities": entities, + "privacy": privacy, + "judgments": judgments, + } + class TournesolInputFromPublicDataset(TournesolInput): def __init__(self, dataset_zip: Union[str, BinaryIO]): @@ -72,14 +122,18 @@ def __init__(self, dataset_zip: Union[str, BinaryIO]): self.comparisons = pd.read_csv(comparison_file, keep_default_na=False) self.entity_id_to_video_id = pd.Series( list(set(self.comparisons.video_a) | set(self.comparisons.video_b)), - name="video_id" + name="video_id", ) video_id_to_entity_id = { video_id: entity_id for (entity_id, video_id) in self.entity_id_to_video_id.items() } - self.comparisons["entity_a"] = self.comparisons["video_a"].map(video_id_to_entity_id) - self.comparisons["entity_b"] = self.comparisons["video_b"].map(video_id_to_entity_id) + self.comparisons["entity_a"] = self.comparisons["video_a"].map( + video_id_to_entity_id + ) + self.comparisons["entity_b"] = self.comparisons["video_b"].map( + video_id_to_entity_id + ) self.comparisons.drop(columns=["video_a", "video_b"], inplace=True) with (zipfile.Path(zip_file) / "users.csv").open(mode="rb") as users_file: @@ -90,10 +144,25 @@ def __init__(self, dataset_zip: Union[str, BinaryIO]): # Fill trust_score on newly created users for which it was not computed yet self.users.trust_score = pd.to_numeric(self.users.trust_score).fillna(0.0) - username_to_user_id = pd.Series( + self.username_to_user_id = pd.Series( data=self.users.index, index=self.users["public_username"] ) - self.comparisons = self.comparisons.join(username_to_user_id, on="public_username") + self.comparisons = self.comparisons.join(self.username_to_user_id, on="public_username") + + with (zipfile.Path(zip_file) / "vouchers.csv").open(mode="rb") as vouchers_file: + # keep_default_na=False is required otherwise some public usernames + # such as "NA" are converted to float NaN. + self.vouchers = pd.read_csv(vouchers_file, keep_default_na=False) + + with (zipfile.Path(zip_file) / "collective_criteria_scores.csv").open(mode="rb") as collective_scores_file: + # keep_default_na=False is required otherwise some public usernames + # such as "NA" are converted to float NaN. + self.collective_scores = pd.read_csv(collective_scores_file, keep_default_na=False) + + with (zipfile.Path(zip_file) / "individual_criteria_scores.csv").open(mode="rb") as individual_scores_file: + # keep_default_na=False is required otherwise some public usernames + # such as "NA" are converted to float NaN. + self.individual_scores = pd.read_csv(individual_scores_file, keep_default_na=False) @classmethod def download(cls) -> "TournesolInputFromPublicDataset": @@ -137,27 +206,21 @@ def get_individual_scores( ) -> Optional[pd.DataFrame]: # TODO: read contributor scores from individual_scores.csv return None - - def get_pipeline_objects(self): - users = self.users - users = users.assign(is_pretrusted=(users["trust_score"] >= 0.8)) - vouches = pd.DataFrame(columns=["voucher", "vouchee", "vouch"]) - entities_indices = set(self.comparisons["entity_a"]) | set(self.comparisons["entity_b"]) - entities = pd.DataFrame(index=list(entities_indices)) - entities.index.name = "entity_id" - privacy = PrivacySettings() - for (user_id, entity_id) in set( - self.comparisons[["user_id", "entity_a"]].itertuples(index=False, name=None) - ).union( - self.comparisons[["user_id", "entity_b"]].itertuples(index=False, name=None) - ): - privacy[user_id, entity_id] = False - return users, vouches, entities, privacy - - def get_judgments(self, criterion): - comparisons = self.comparisons - if criterion is not None: - comparisons = comparisons[comparisons["criteria"] == criterion] - comparisons = comparisons.rename(columns={"score": "comparison"}) - comparisons = comparisons.assign(comparison_max=[10] * len(comparisons)) - return DataFrameJudgments(comparisons=comparisons) + + def get_vouches(self): + vouchers = self.vouchers[ + self.vouchers.by_username.isin(self.username_to_user_id.index) + & self.vouchers.to_username.isin(self.username_to_user_id.index) + ] + return pd.DataFrame( + { + "voucher": vouchers.by_username.map(self.username_to_user_id), + "vouchee": vouchers.to_username.map(self.username_to_user_id), + "vouch": vouchers.value, + } + ) + + def get_users(self): + users = self.ratings_properties.groupby("user_id").first()[["trust_score"]] + users["is_pretrusted"] = users["trust_score"] >= 0.8 + return users diff --git a/solidago/src/solidago/pipeline/pipeline.py b/solidago/src/solidago/pipeline/pipeline.py index 4e99519c57..610a517fe0 100644 --- a/solidago/src/solidago/pipeline/pipeline.py +++ b/solidago/src/solidago/pipeline/pipeline.py @@ -16,6 +16,7 @@ from solidago.aggregation import Aggregation, StandardizedQrMedian, StandardizedQrQuantile, Average, EntitywiseQrQuantile from solidago.post_process import PostProcess, Squash, NoPostProcess +from solidago.pipeline.inputs import TournesolInput from solidago.pipeline.outputs import PipelineOutput logger = logging.getLogger(__name__) @@ -45,7 +46,7 @@ class DefaultPipeline: scaling: Scaling = ScalingCompose( Mehestan( lipschitz=0.1, - min_activity=10, + min_activity=10.0, n_scalers_max=100, privacy_penalty=0.5, p_norm_for_multiplicative_resilience=4.0, @@ -82,7 +83,7 @@ def __init__( aggregation: Aggregation = DefaultPipeline.aggregation, post_process: PostProcess = DefaultPipeline.post_process, ): - """ Instantiates the pipeline components. + """Instantiates the pipeline components. Parameters ---------- @@ -118,7 +119,22 @@ def from_json(cls, json) -> "Pipeline": aggregation=aggregation_from_json(json["aggregation"]), post_process=post_process_from_json(json["post_process"]), ) - + + def run( + self, + input: TournesolInput, + criterion: str, + output: Optional[PipelineOutput] = None + ): + # TODO: criterion should be managed by TournesolInput + + # TODO: read existing individual scores from input + # to pass `init_user_models` + return self( + **input.get_pipeline_kwargs(criterion), + output=output, + ) + def __call__( self, users: pd.DataFrame, @@ -148,8 +164,6 @@ def __call__( judgments[user] must yield the judgment data provided by the user init_user_models: dict[int, ScoringModel] user_models[user] is the user's model - skip_set: set[int] - Steps that are skipped in the pipeline Returns ------- @@ -176,8 +190,9 @@ def __call__( output.save_trust_scores(trusts=users) logger.info(f"Pipeline 2. Computing voting rights with {str(self.voting_rights)}") - # FIXME: `privacy` may contain (user, entity) even if user has expressed no judgement + # WARNING: `privacy` may contain (user, entity) even if user has expressed no judgement # about the entity. These users should not be given a voting right on the entity. + # For now, irrelevant privacy values are excluded in `input.get_pipeline_kwargs()` voting_rights, entities = self.voting_rights(users, entities, vouches, privacy) start_step3 = timeit.default_timer() logger.info(f"Pipeline 2. Terminated in {np.round(start_step3 - start_step2, 2)} seconds") @@ -229,8 +244,8 @@ def to_json(self): post_process=self.post_process.to_json() ) + @staticmethod def save_individual_scalings( - self, user_models: dict[int, ScaledScoringModel], output: PipelineOutput, ): @@ -251,8 +266,8 @@ def save_individual_scalings( ) output.save_individual_scalings(scalings_df) + @staticmethod def save_individual_scores( - self, user_scorings: dict[int, ScoringModel], raw_user_scorings: dict[int, ScoringModel], voting_rights: VotingRights, @@ -285,9 +300,10 @@ def get_raw_uncertainty(row): _, left_unc, right_unc = raw_scoring return left_unc + right_unc - scores_df["raw_score"] = scores_df.apply(get_raw_score, axis=1) - scores_df["raw_uncertainty"] = scores_df.apply(get_raw_uncertainty, axis=1) - output.save_individual_scores(scores_df) + if len(scores_df) > 0: + scores_df["raw_score"] = scores_df.apply(get_raw_score, axis=1) + scores_df["raw_uncertainty"] = scores_df.apply(get_raw_uncertainty, axis=1) + output.save_individual_scores(scores_df) def trust_propagation_from_json(json): diff --git a/solidago/src/solidago/post_process/base.py b/solidago/src/solidago/post_process/base.py index bbaac07afd..3072b8f912 100644 --- a/solidago/src/solidago/post_process/base.py +++ b/solidago/src/solidago/post_process/base.py @@ -13,7 +13,7 @@ def __call__( user_models: Mapping[int, ScoringModel], global_model: ScoringModel, entities: pd.DataFrame - ) -> tuple[dict[int, ScoringModel], ScoringModel]: + ) -> tuple[Mapping[int, ScoringModel], ScoringModel]: """ Post-processes user models and global models, typically to yield human-readible scores @@ -31,5 +31,5 @@ def __call__( """ raise NotImplementedError - def to_json(self): + def to_json(self) -> tuple: return (type(self).__name__, ) diff --git a/solidago/src/solidago/post_process/squash.py b/solidago/src/solidago/post_process/squash.py index 647c1f2a5f..66c97fc56a 100644 --- a/solidago/src/solidago/post_process/squash.py +++ b/solidago/src/solidago/post_process/squash.py @@ -8,7 +8,7 @@ class Squash(PostProcess): - def __init__(self, score_max: float = 100): + def __init__(self, score_max: float = 100.0): self.score_max = score_max def __call__( @@ -16,7 +16,7 @@ def __call__( user_models: Mapping[int, ScoringModel], global_model: ScoringModel, entities: Optional[pd.DataFrame] = None - ) -> tuple[dict[int, ScoringModel], ScoringModel]: + ) -> tuple[Mapping[int, ScoringModel], ScoringModel]: """ Post-processes user models and global models, typically to yield human-readible scores @@ -39,7 +39,6 @@ def __call__( for u in user_models } squashed_global_model = PostProcessedScoringModel(global_model, squash) - return squashed_user_models, squashed_global_model def to_json(self): diff --git a/solidago/src/solidago/preference_learning/__init__.py b/solidago/src/solidago/preference_learning/__init__.py index da46034788..d1776ea8a7 100644 --- a/solidago/src/solidago/preference_learning/__init__.py +++ b/solidago/src/solidago/preference_learning/__init__.py @@ -1,4 +1,4 @@ -""" Step 3 of the pipeline. +""" **Step 3 of the pipeline** Preference learning infers, for each user and based on their data, a model of the user's preferences. @@ -13,3 +13,6 @@ from .lbfgs_generalized_bradley_terry import LBFGSUniformGBT except RuntimeError: pass + + +__all__ = ["PreferenceLearning", "UniformGBT", "LBFGSUniformGBT"] diff --git a/solidago/src/solidago/preference_learning/base.py b/solidago/src/solidago/preference_learning/base.py index 5c9ac77a0b..087f362596 100644 --- a/solidago/src/solidago/preference_learning/base.py +++ b/solidago/src/solidago/preference_learning/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Optional, Union +from typing import Optional import pandas as pd import logging @@ -10,21 +10,24 @@ logger = logging.getLogger(__name__) + class PreferenceLearning(ABC): + MAX_UNCERTAINTY = 1000.0 + def __call__( - self, + self, judgments: Judgments, users: pd.DataFrame, entities: pd.DataFrame, initialization: Optional[dict[int, ScoringModel]] = None, new_judgments: Optional[Judgments] = None, ) -> dict[int, ScoringModel]: - """ Learns a scoring model, given user judgments of entities - + """Learns a scoring model, given user judgments of entities + Parameters ---------- - user_judgments: dict[str, pd.DataFrame] - May contain different forms of judgments, + judgments: + May contain different forms of judgments, but most likely will contain "comparisons" and/or "assessments" entities: DataFrame with columns * entity_id: int, index @@ -32,16 +35,17 @@ def __call__( initialization: dict[int, ScoringModel] or ScoringModel or None Starting models, added to facilitate optimization It is not supposed to affect the output of the training - new_judgments: New judgments - This allows to prioritize coordinate descent, starting with newly evaluated entities - + new_judgments: + New judgments + This allows to prioritize coordinate descent, starting with newly evaluated entities + Returns ------- user_models: dict[int, ScoringModel] user_models[user] is the learned scoring model for user """ assert isinstance(judgments, Judgments) - + user_models = dict() if initialization is None else initialization for n_user, user in enumerate(users.index): if n_user % 100 == 0: @@ -52,23 +56,26 @@ def __call__( if initialization is not None: init_model = initialization.get(user) new_judg = None if new_judgments is None else new_judgments[user] - user_models[user] = self.user_learn(judgments[user], entities, init_model, new_judg) + user_judgments = judgments[user] + if user_judgments is None: + continue + user_models[user] = self.user_learn(user_judgments, entities, init_model, new_judg) return user_models - + @abstractmethod def user_learn( - self, + self, user_judgments: dict[str, pd.DataFrame], entities: pd.DataFrame, - initialization: Optional[ScoringModel]=None, - new_judgments: Optional[dict[str, pd.DataFrame]]=None, + initialization: Optional[ScoringModel] = None, + new_judgments: Optional[dict[str, pd.DataFrame]] = None, ) -> ScoringModel: - """ Learns a scoring model, given user judgments of entities - + """Learns a scoring model, given user judgments of entities + Parameters ---------- user_judgments: dict[str, pd.DataFrame] - May contain different forms of judgments, + May contain different forms of judgments, but most likely will contain "comparisons" and/or "assessments" entities: DataFrame with columns * entity_id: int, index @@ -76,17 +83,18 @@ def user_learn( initialization: ScoringModel or None Starting model, added to facilitate optimization It is not supposed to affect the output of the training - new_judgments: New judgments - This allows to prioritize coordinate descent, starting with newly evaluated entities - + new_judgments: + New judgments + This allows to prioritize coordinate descent, starting with newly evaluated entities + Returns ------- model: ScoringModel """ raise NotImplementedError - - def to_json(self): - return (type(self).__name__, ) + + def to_json(self) -> tuple: + return (type(self).__name__,) def __str__(self): return type(self).__name__ diff --git a/solidago/src/solidago/preference_learning/generalized_bradley_terry.py b/solidago/src/solidago/preference_learning/generalized_bradley_terry.py index 3e2cc9596e..a9a5dd9086 100644 --- a/solidago/src/solidago/preference_learning/generalized_bradley_terry.py +++ b/solidago/src/solidago/preference_learning/generalized_bradley_terry.py @@ -16,8 +16,9 @@ class GeneralizedBradleyTerry(ComparisonBasedPreferenceLearning): def __init__( self, - prior_std_dev: float=7, + prior_std_dev: float=7.0, convergence_error: float=1e-5, + high_likelihood_range_threshold = 1.0, ): """ @@ -30,8 +31,10 @@ def __init__( """ self.prior_std_dev = prior_std_dev self.convergence_error = convergence_error - + self.high_likelihood_range_threshold = high_likelihood_range_threshold + @property + @abstractmethod def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt.NDArray]: """ The beauty of the generalized Bradley-Terry model is that it suffices to specify its cumulant generating function derivative to fully define it, @@ -46,23 +49,33 @@ def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt ------- out: float """ - raise NotImplementedError - + + @property @abstractmethod - def cumulant_generating_function_second_derivative(self, score_diff: float) -> float: - """ We estimate uncertainty by the flatness of the negative log likelihood, - which is directly given by the second derivative of the cumulant generating function. - - Parameters - ---------- - score_diff: float - Score difference - - Returns - ------- - out: float + def log_likelihood_function(self) -> Callable[[npt.NDArray, npt.NDArray], float]: + """The loss function definition is used only to compute uncertainties. """ - pass + + @cached_property + def translated_negative_log_likelihood(self): + """This function is a convex negative log likelihood, translated such + that its minimum has a constant negative value at `delta=0`. The + roots of this function are used to compute the uncertainties + intervals. If it has only a single root, then uncertainty on the + other side is considered infinite. + """ + ll_function = self.log_likelihood_function + high_likelihood_range_threshold = self.high_likelihood_range_threshold + + @njit + def f(delta, theta_diff, r, coord_indicator, ll_actual): + return ( + ll_function(theta_diff + delta * coord_indicator, r) + - ll_actual + - high_likelihood_range_threshold + ) + + return f @cached_property def update_coordinate_function(self): @@ -101,16 +114,16 @@ def comparison_learning( """ entities = list(set(comparisons["entity_a"]) | set(comparisons["entity_b"])) entity_coordinates = { entity: c for c, entity in enumerate(entities) } - + comparisons_dict = self.comparisons_dict(comparisons, entity_coordinates) - + init_solution = np.zeros(len(entities)) if initialization is not None: for (entity_id, entity_coord) in entity_coordinates.items(): entity_init_values = initialization(entity_id) if entity_init_values is not None: init_solution[entity_coord] = entity_init_values[0] - + updated_coordinates = list() if updated_entities is None else [ entity_coordinates[entity] for entity in updated_entities ] @@ -121,7 +134,7 @@ def get_derivative_args(coord: int, sol: np.ndarray): sol[indices], comparisons_bis ) - + solution = coordinate_descent( self.update_coordinate_function, get_args=get_derivative_args, @@ -129,29 +142,54 @@ def get_derivative_args(coord: int, sol: np.ndarray): updated_coordinates=updated_coordinates, error=self.convergence_error, ) - - uncertainties = [ - self.hessian_diagonal_element(coordinate, solution, comparisons_dict[coordinate][0]) - for coordinate in range(len(entities)) - ] - - model = DirectScoringModel() + + comparisons = comparisons.assign( + entity_a_coord=comparisons["entity_a"].map(entity_coordinates), + entity_b_coord=comparisons['entity_b'].map(entity_coordinates), + ) + score_diff = solution[comparisons["entity_a_coord"]] - solution[comparisons["entity_b_coord"]] + r_actual = (comparisons["comparison"] / comparisons["comparison_max"]).to_numpy() + + uncertainties_left = np.empty_like(solution) + uncertainties_right = np.empty_like(solution) + ll_actual = self.log_likelihood_function(score_diff, r_actual) + for coordinate in range(len(solution)): - model[entities[coordinate]] = solution[coordinate], uncertainties[coordinate] - + comparison_indicator = ( + (comparisons["entity_a_coord"] == coordinate).astype(int) + - (comparisons["entity_b_coord"] == coordinate).astype(int) + ).to_numpy() + try: + uncertainties_left[coordinate] = -1 * njit_brentq( + self.translated_negative_log_likelihood, + args=(score_diff, r_actual, comparison_indicator, ll_actual), + xtol=1e-2, + a=-self.MAX_UNCERTAINTY, + b=0.0, + extend_bounds="no", + ) + except ValueError: + uncertainties_left[coordinate] = self.MAX_UNCERTAINTY + + try: + uncertainties_right[coordinate] = njit_brentq( + self.translated_negative_log_likelihood, + args=(score_diff, r_actual, comparison_indicator, ll_actual), + xtol=1e-2, + a=0.0, + b=self.MAX_UNCERTAINTY, + extend_bounds="no", + ) + except ValueError: + uncertainties_right[coordinate] = self.MAX_UNCERTAINTY + + model = DirectScoringModel() + for coord in range(len(solution)): + model[entities[coord]] = solution[coord], uncertainties_left[coord], uncertainties_right[coord] return model - + def comparisons_dict(self, comparisons, entity_coordinates) -> dict[int, tuple[npt.NDArray, npt.NDArray]]: - comparisons = ( - comparisons[ - ["entity_a","entity_b","comparison", "comparison_max"] - ] - .assign( - pair=comparisons.apply(lambda c: {c["entity_a"], c["entity_b"]}, axis=1) - ) - .drop_duplicates("pair", keep="last") - .drop(columns="pair") - ) + comparisons = comparisons[["entity_a","entity_b","comparison", "comparison_max"]] comparisons_sym = pd.concat( [ comparisons, @@ -176,7 +214,7 @@ def comparisons_dict(self, comparisons, entity_coordinates) -> dict[int, tuple[n coord: (group["entity_b"].to_numpy(), group["comparison"].to_numpy()) for (coord, group) in comparisons_sym.groupby("entity_a") } # type: ignore - + @cached_property def partial_derivative(self): """ Computes the partial derivative along a coordinate, @@ -203,41 +241,48 @@ def njit_partial_derivative( ) ) return njit_partial_derivative - - def hessian_diagonal_element( - self, - coordinate: int, - solution: np.ndarray, - comparisons_indices: np.ndarray, - ) -> float: - """ Computes the second partial derivative """ - result = 1 / self.prior_std_dev ** 2 - for coordinate_bis in comparisons_indices: - score_diff = solution[coordinate] - solution[coordinate_bis] - result += self.cumulant_generating_function_second_derivative(score_diff) - return result class UniformGBT(GeneralizedBradleyTerry): - def __init__( self, - prior_std_dev: float = 7, + prior_std_dev: float = 7.0, convergence_error: float = 1e-5, cumulant_generating_function_error: float = 1e-5, + high_likelihood_range_threshold: float = 1.0, ): """ - Parameters + Parameters (TODO) ---------- - initialization: dict[int, float] - previously computed entity scores - error: float - tolerated error """ - super().__init__(prior_std_dev, convergence_error) + super().__init__( + prior_std_dev, + convergence_error, + high_likelihood_range_threshold=high_likelihood_range_threshold + ) self.cumulant_generating_function_error = cumulant_generating_function_error + @cached_property + def log_likelihood_function(self): + @njit + def f(score_diff, r): + score_diff_abs = np.abs(score_diff) + return ( + np.where( + score_diff_abs > 1e-1, + np.where( + score_diff_abs < 20.0, + np.log(np.sinh(score_diff) / score_diff), + score_diff_abs - np.log(2) - np.log(score_diff_abs), + ), + score_diff_abs ** 2 / 6 - score_diff_abs ** 4 / 180, + ) + + r * score_diff + ).sum() + + return f + @cached_property def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt.NDArray]: tolerance = self.cumulant_generating_function_error @@ -252,28 +297,12 @@ def f(score_diff: npt.NDArray): return f - def cumulant_generating_function_second_derivative(self, score_diff: float) -> float: - """We estimate uncertainty by the flatness of the negative log likelihood, - which is directly given by the second derivative of the cumulant generating function. - - Parameters - ---------- - score_diff: float - Score difference - - Returns - ------- - out: float - """ - if np.abs(score_diff) < self.cumulant_generating_function_error: - return (1 / 3) - (score_diff**2 / 15) - return 1 - (1 / np.tanh(score_diff) ** 2) + (1 / score_diff**2) - def to_json(self): return type(self).__name__, dict( prior_std_dev=self.prior_std_dev, convergence_error=self.convergence_error, cumulant_generating_function_error=self.cumulant_generating_function_error, + high_likelihood_range_threshold=self.high_likelihood_range_threshold, ) def __str__(self): diff --git a/solidago/src/solidago/preference_learning/lbfgs_generalized_bradley_terry.py b/solidago/src/solidago/preference_learning/lbfgs_generalized_bradley_terry.py index 46111dce2a..8fcbeab81c 100644 --- a/solidago/src/solidago/preference_learning/lbfgs_generalized_bradley_terry.py +++ b/solidago/src/solidago/preference_learning/lbfgs_generalized_bradley_terry.py @@ -13,15 +13,16 @@ ) from exc from solidago.scoring_model import ScoringModel, DirectScoringModel +from solidago.solvers import dichotomy from .comparison_learning import ComparisonBasedPreferenceLearning - class LBFGSGeneralizedBradleyTerry(ComparisonBasedPreferenceLearning): def __init__( self, prior_std_dev: float = 7, convergence_error: float = 1e-5, - n_steps: int = 3, + max_iter: int = 100, + high_likelihood_range_threshold = 1.0, ): """ @@ -34,7 +35,8 @@ def __init__( """ self.prior_std_dev = prior_std_dev self.convergence_error = convergence_error - self.n_steps = n_steps + self.max_iter = max_iter + self.high_likelihood_range_threshold = high_likelihood_range_threshold self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @abstractmethod @@ -54,22 +56,6 @@ def cumulant_generating_function(self, score_diff: torch.Tensor) -> torch.Tensor """ pass - @abstractmethod - def cumulant_generating_function_second_derivative(self, score_diff: float) -> float: - """We estimate uncertainty by the flatness of the negative log likelihood, - which is directly given by the second derivative of the cumulant generating function. - - Parameters - ---------- - score_diff: float - Score difference - - Returns - ------- - out: float - """ - pass - def comparison_learning( self, comparisons: pd.DataFrame, @@ -94,97 +80,117 @@ def comparison_learning( entities = list(set(comparisons["entity_a"]) | set(comparisons["entity_b"])) entity_coordinates = {entity: c for c, entity in enumerate(entities)} - comparisons = ( - comparisons - .assign( - entity_a=comparisons["entity_a"].map(entity_coordinates), - entity_b=comparisons["entity_b"].map(entity_coordinates), - comparison=comparisons["comparison"] / comparisons["comparison_max"], - pair=comparisons.apply(lambda c: {c["entity_a"], c["entity_b"]}, axis=1), - ) - .drop_duplicates("pair", keep="last") - .drop(columns="pair") + comparisons_np = ( + comparisons["entity_a"].map(entity_coordinates).to_numpy(), + comparisons["entity_b"].map(entity_coordinates).to_numpy(), + (comparisons["comparison"] / comparisons["comparison_max"]).to_numpy() ) - solution = torch.normal( - 0, 1, (len(entities),), requires_grad=True, dtype=float, device=self.device + solution = np.random.normal(0.0, 1.0, size=len(entities)) + if initialization is not None: + for (entity_id, values) in initialization.iter_entities(): + entity_coord = entity_coordinates.get(entity_id) + if entity_coord is not None: + score, _left, _right = values + solution[entity_coord] = score + + solution = torch.tensor(solution, requires_grad=True, device=self.device) + lbfgs = torch.optim.LBFGS( + (solution,), + max_iter=self.max_iter, + tolerance_change=self.convergence_error, + line_search_fn="strong_wolfe", ) - for (entity, coord) in entity_coordinates.items(): - if initialization is not None and entity in initialization: - solution[coord] = initialization[entity] - - lbfgs = torch.optim.LBFGS((solution,)) def closure(): lbfgs.zero_grad() - loss = self.loss(solution, comparisons) + loss = self.loss(solution, comparisons_np) loss.backward() return loss - for _step in range(self.n_steps): - lbfgs.step(closure) + lbfgs.step(closure) # type: ignore - uncertainties = [ - self.hessian_diagonal_element(entity, solution, comparisons) - for entity in range(len(entities)) - ] + n_iter = lbfgs.state_dict()["state"][0]["n_iter"] + if n_iter >= self.max_iter: + raise RuntimeError(f"LBFGS failed to converge in {n_iter} iterations") + + solution = solution.detach() + if solution.isnan().any(): + raise RuntimeError(f"Nan in solution, state: {lbfgs.state_dict()}") + + def loss_with_delta(delta, comparisons, coord): + solution_with_delta = solution.clone() + solution_with_delta[coord] += delta + return self.loss(solution_with_delta, comparisons, with_regularization=False).item() model = DirectScoringModel() for coordinate in range(len(solution)): + mask = ((comparisons_np[0] == coordinate) | (comparisons_np[1] == coordinate)) + comparisons_np_subset = tuple(arr[mask] for arr in comparisons_np) + ll_solution = self.loss(solution, comparisons_np_subset, with_regularization=False).item() + + try: + uncertainty_left = -1 * dichotomy.solve( + loss_with_delta, + value=ll_solution + self.high_likelihood_range_threshold, + args=(comparisons_np_subset, coordinate), + xmin=-self.MAX_UNCERTAINTY, + xmax=0.0, + error=1e-1, + ) + except ValueError: + uncertainty_left = self.MAX_UNCERTAINTY + + try: + uncertainty_right = dichotomy.solve( + loss_with_delta, + value=ll_solution + self.high_likelihood_range_threshold, + args=(comparisons_np_subset, coordinate), + xmin=0.0, + xmax=self.MAX_UNCERTAINTY, + error=1e-1, + ) + except ValueError: + uncertainty_right = self.MAX_UNCERTAINTY + model[entities[coordinate]] = ( - solution[coordinate].detach().numpy(), - uncertainties[coordinate], + solution[coordinate].item(), + uncertainty_left, + uncertainty_right, ) return model - - def loss(self, solution, comparisons) -> torch.Tensor: - score_diff = ( - solution[comparisons["entity_b"].to_numpy()] - - solution[comparisons["entity_a"].to_numpy()] - ) - return ( - torch.sum(solution**2) / (2 * self.prior_std_dev**2) - + self.cumulant_generating_function(score_diff).sum() - - (score_diff * torch.from_numpy(comparisons["comparison"].to_numpy())).sum() + + def loss(self, solution, comparisons, with_regularization=True): + comp_a, comp_b, comp_value = comparisons + score_diff = solution[comp_b] - solution[comp_a] + loss = ( + self.cumulant_generating_function(score_diff).sum() + - (score_diff * torch.from_numpy(comp_value)).sum() ) - - def hessian_diagonal_element( - self, - entity: int, - solution: torch.Tensor, - comparisons: pd.DataFrame, - ) -> float: - """Computes the second partial derivative""" - result = 1 / self.prior_std_dev**2 - c = comparisons[(comparisons["entity_a"] == entity) | (comparisons["entity_b"] == entity)] - for row in c.itertuples(): - score_diff = ( - solution[row.entity_b] - - solution[row.entity_a] - ) - result += self.cumulant_generating_function_second_derivative(score_diff.detach()) - return result + if with_regularization: + loss += torch.sum(solution**2) / (2 * self.prior_std_dev**2) + return loss class LBFGSUniformGBT(LBFGSGeneralizedBradleyTerry): def __init__( self, prior_std_dev: float = 7, - comparison_max: float = 10, convergence_error: float = 1e-5, cumulant_generating_function_error: float = 1e-5, - n_steps: int = 3, + max_iter: int = 100, + high_likelihood_range_threshold: float = 1.0, ): """ - Parameters + Parameters (TODO) ---------- - initialization: dict[int, float] - previously computed entity scores - error: float - tolerated error """ - super().__init__(prior_std_dev, convergence_error, n_steps) - self.comparison_max = comparison_max + super().__init__( + prior_std_dev, + convergence_error, + max_iter=max_iter, + high_likelihood_range_threshold=high_likelihood_range_threshold, + ) self.cumulant_generating_function_error = cumulant_generating_function_error def cumulant_generating_function(self, score_diff: torch.Tensor) -> torch.Tensor: @@ -199,60 +205,31 @@ def cumulant_generating_function(self, score_diff: torch.Tensor) -> torch.Tensor ------- out: float """ + score_diff_abs = score_diff.abs() return torch.where( - score_diff != 0, - torch.log(torch.sinh(score_diff) / score_diff), - 0.0 + score_diff_abs > 1e-1, + torch.where( + score_diff_abs < 20.0, + (torch.sinh(score_diff) / score_diff).log(), + score_diff_abs - np.log(2) - score_diff_abs.log(), + ), + score_diff_abs ** 2 / 6 - score_diff_abs ** 4 / 180, ) - def cumulant_generating_function_derivative(self, score_diff: float) -> float: - """ For. - - Parameters - ---------- - score_diff: float - Score difference - - Returns - ------- - out: float - """ - if np.abs(score_diff) < self.cumulant_generating_function_error: - return score_diff / 3 - return 1 / np.tanh(score_diff) - 1 / score_diff - - def cumulant_generating_function_second_derivative(self, score_diff: float) -> float: - """We estimate uncertainty by the flatness of the negative log likelihood, - which is directly given by the second derivative of the cumulant generating function. - - Parameters - ---------- - score_diff: float - Score difference - - Returns - ------- - out: float - """ - if np.abs(score_diff) < self.cumulant_generating_function_error: - return (1 / 3) - (score_diff**2 / 15) - return 1 - (1 / np.tanh(score_diff) ** 2) + (1 / score_diff**2) - def to_json(self): return type(self).__name__, dict( prior_std_dev=self.prior_std_dev, - comparison_max=self.comparison_max, convergence_error=self.convergence_error, cumulant_generating_function_error=self.cumulant_generating_function_error, + high_likelihood_range_threshold=self.high_likelihood_range_threshold, ) def __str__(self): prop_names = [ "prior_std_dev", "convergence_error", - "comparison_max", "cumulant_generating_function_error", - "n_steps", + "max_iter", ] prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) return f"{type(self).__name__}({prop})" diff --git a/solidago/src/solidago/primitives.py b/solidago/src/solidago/primitives.py index 1f39ee860b..7bd30ed667 100644 --- a/solidago/src/solidago/primitives.py +++ b/solidago/src/solidago/primitives.py @@ -9,14 +9,14 @@ @njit def qr_quantile( - lipschitz: float, + lipschitz: float, quantile: float, values: npt.NDArray, voting_rights: Union[npt.NDArray, float]=1.0, left_uncertainties: Optional[npt.NDArray]=None, right_uncertainties: Optional[npt.NDArray]=None, - default_value: float=0, - error: float=1e-5 + default_value: float=0.0, + error: float=1e-5, ) -> float: """ Computes the quadratically regularized quantile, an estimate of the quantile of values,weighted by voting_rights, given left and right @@ -85,25 +85,30 @@ def _qr_quantile_loss_derivative( """Computes the derivative of the loss associated to qr_quantile""" regularization = (variable - default_value) / lipschitz - if quantile == 0.5: - quantile_term = 0.0 - elif isinstance(voting_rights, (int, float)): - quantile_term = (1.0 - 2.0 * quantile) * voting_rights * len(values) - else: - quantile_term = (1.0 - 2.0 * quantile) * np.sum(voting_rights) - deltas = variable - values uncertainties_2 = left_uncertainties_2 * (deltas < 0) + right_uncertainties_2 * (deltas > 0) + spacing forces = voting_rights * deltas / np.sqrt(uncertainties_2 + deltas**2) + + if quantile == 0.5: + return regularization + forces.sum() + + left_strength = min(1.0, quantile / (1-quantile)) + right_strength = min(1.0, (1-quantile) / quantile) + + forces = np.where( + forces < 0, + forces * left_strength, + forces * right_strength, + ) - return regularization + quantile_term + forces.sum() + return regularization + forces.sum() @njit def qr_median( lipschitz: float, values: npt.NDArray, - voting_rights: Union[npt.NDArray, float] = 1, + voting_rights: Union[npt.NDArray, float] = 1.0, left_uncertainties: Optional[npt.NDArray] = None, right_uncertainties: Optional[npt.NDArray] = None, default_value: float = 0.0, @@ -256,7 +261,7 @@ def clip_mean( def lipschitz_resilient_mean( lipschitz: float, values: npt.NDArray, - voting_rights: Union[npt.NDArray, float] = 1.0, + voting_rights: Union[npt.NDArray[np.float64], float] = 1.0, left_uncertainties: Optional[npt.NDArray] = None, right_uncertainties: Optional[npt.NDArray] = None, default_value: float = 0.0, @@ -293,11 +298,11 @@ def lipschitz_resilient_mean( if len(values) == 0: return default_value - if isinstance(voting_rights, float): + if isinstance(voting_rights, (float, int)): voting_rights = np.full(values.shape, voting_rights) total_voting_rights = np.sum(voting_rights) - if total_voting_rights == 0: + if total_voting_rights == 0.0: return default_value return clip_mean( diff --git a/solidago/src/solidago/scaling/__init__.py b/solidago/src/solidago/scaling/__init__.py index d89f5e313d..13c2b1149b 100644 --- a/solidago/src/solidago/scaling/__init__.py +++ b/solidago/src/solidago/scaling/__init__.py @@ -11,5 +11,5 @@ from .compose import ScalingCompose from .no_scaling import NoScaling from .mehestan import Mehestan -from .quantile_zero_shift import QuantileZeroShift +from .quantile_zero_shift import QuantileShift, QuantileZeroShift from .standardize import Standardize diff --git a/solidago/src/solidago/scaling/base.py b/solidago/src/solidago/scaling/base.py index 984cf57eda..a4b3d490d8 100644 --- a/solidago/src/solidago/scaling/base.py +++ b/solidago/src/solidago/scaling/base.py @@ -41,7 +41,7 @@ def __call__( """ raise NotImplementedError - def to_json(self): + def to_json(self) -> tuple: return (type(self).__name__, ) def __str__(self): diff --git a/solidago/src/solidago/scaling/mehestan.py b/solidago/src/solidago/scaling/mehestan.py index 4fcc53dde5..62e6fb935b 100644 --- a/solidago/src/solidago/scaling/mehestan.py +++ b/solidago/src/solidago/scaling/mehestan.py @@ -1,4 +1,4 @@ -from typing import Callable, Optional +from typing import Callable, Mapping, Optional import numpy as np import pandas as pd @@ -24,7 +24,7 @@ class Mehestan(Scaling): def __init__( self, lipschitz=0.1, - min_activity=10, + min_activity=10.0, n_scalers_max=100, privacy_penalty=0.5, user_comparison_lipschitz=10.0, @@ -71,7 +71,7 @@ def __init__( def __call__( self, - user_models: dict[int, ScoringModel], + user_models: Mapping[int, ScoringModel], users: pd.DataFrame, entities: pd.DataFrame, voting_rights: Optional[VotingRights] = None, @@ -112,7 +112,8 @@ def __call__( logger.info(f"Mehestan 1. Terminated in {int(end_step1 - start)} seconds") logger.info("Mehestan 2. Collaborative scaling of scalers") - scaled_models = self.scale_scalers(user_models, scalers, entities, privacy) + scaler_user_models = {u: m for u,m in user_models.items() if u in scalers.index} + scaled_models = self.scale_scalers(scaler_user_models, scalers, entities, privacy) end_step2 = timeit.default_timer() logger.info(f"Mehestan 2. Terminated in {int(end_step2 - end_step1)} seconds") @@ -134,7 +135,7 @@ def __call__( def compute_scalers( self, - user_models: dict[int, ScoringModel], + user_models: Mapping[int, ScoringModel], entities: pd.DataFrame, users: pd.DataFrame, privacy: Optional[PrivacySettings], @@ -160,7 +161,7 @@ def compute_scalers( activities = self.compute_activities(user_models, entities, users, privacy) index_to_user = { index: user for index, user in enumerate(users.index) } np_activities = np.array([ - activities[index_to_user[index]] + activities.get(index_to_user[index], 0.0) for index in range(len(users)) ]) argsort = np.argsort(np_activities) @@ -177,8 +178,7 @@ def scale_scalers(self, user_models, scalers, entities, privacy): end2a = timeit.default_timer() logger.info(f" Mehestan 2a. Model norms in {int(end2a - start)} seconds") - entity_ratios = self.compute_entity_ratios(user_models, user_models, - entities, scalers, scalers, privacy) + entity_ratios = self.compute_entity_ratios(user_models, user_models, entities, privacy) end2b = timeit.default_timer() logger.info(f" Mehestan 2b. Entity ratios in {int(end2b - end2a)} seconds") ratio_voting_rights, ratios, ratio_uncertainties = _aggregate_user_comparisons( @@ -193,8 +193,9 @@ def scale_scalers(self, user_models, scalers, entities, privacy): end2d = timeit.default_timer() logger.info(f" Mehestan 2d. Multiplicators in {int(end2d - end2c)} seconds") - entity_diffs = self.compute_entity_diffs(user_models, user_models, scalers, scalers, - entities, privacy, multiplicators) + entity_diffs = self.compute_entity_diffs( + user_models, user_models, entities, privacy, multiplicators + ) end2e = timeit.default_timer() logger.info(f" Mehestan 2e. Entity diffs in {int(end2e - end2d)} seconds") diff_voting_rights, diffs, diff_uncertainties = _aggregate_user_comparisons( @@ -209,30 +210,33 @@ def scale_scalers(self, user_models, scalers, entities, privacy): return { u: ScaledScoringModel( - base_model=user_models[u], + base_model=model, multiplicator=multiplicators[u][0], translation=translations[u][0], multiplicator_left_uncertainty=multiplicators[u][1], multiplicator_right_uncertainty=multiplicators[u][1], translation_left_uncertainty=translations[u][1], translation_right_uncertainty=translations[u][1] - ) for u in scalers.index + ) for u, model in user_models.items() } - def scale_non_scalers(self, user_models, nonscalers, entities, scalers, scaled_models, privacy): + def scale_non_scalers( + self, user_models, nonscalers, entities, scalers, scaled_models, privacy + ): start = timeit.default_timer() model_norms = self.compute_model_norms(user_models, nonscalers, entities, privacy) end2a = timeit.default_timer() logger.info(f" Mehestan 3a. Model norms in {int(end2a - start)} seconds") end3a = timeit.default_timer() - entity_ratios = self.compute_entity_ratios(user_models, scaled_models, entities, - nonscalers, scalers, privacy) + nonscaler_models = {u: m for (u, m) in user_models.items() if u in nonscalers.index} + entity_ratios = self.compute_entity_ratios( + nonscaler_models, scaled_models, entities, privacy + ) end3b = timeit.default_timer() logger.info(f" Mehestan 3b. Entity ratios in {int(end3b - end3a)} seconds") ratio_voting_rights, ratios, ratio_uncertainties = _aggregate_user_comparisons( - scalers, entity_ratios, - error=self.error, lipschitz=self.user_comparison_lipschitz + scalers, entity_ratios, error=self.error, lipschitz=self.user_comparison_lipschitz ) end3c = timeit.default_timer() logger.info(f" Mehestan 3c. Aggregate ratios in {int(end3c - end3b)} seconds") @@ -242,8 +246,9 @@ def scale_non_scalers(self, user_models, nonscalers, entities, scalers, scaled_m end3d = timeit.default_timer() logger.info(f" Mehestan 3d. Multiplicators in {int(end3d - end3c)} seconds") - entity_diffs = self.compute_entity_diffs(user_models, scaled_models, nonscalers, scalers, - entities, privacy, multiplicators) + entity_diffs = self.compute_entity_diffs( + nonscaler_models, scaled_models, entities, privacy, multiplicators + ) end3e = timeit.default_timer() logger.info(f" Mehestan 3e. Entity diffs in {int(end3e - end3d)} seconds") diff_voting_rights, diffs, diff_uncertainties = _aggregate_user_comparisons( @@ -255,16 +260,17 @@ def scale_non_scalers(self, user_models, nonscalers, entities, scalers, scaled_m end3g = timeit.default_timer() logger.info(f" Mehestan 3g. Translations in {int(end3g - end3f)} seconds") - return scaled_models | { + return scaled_models | { u: ScaledScoringModel( - base_model=user_models[u], - multiplicator=multiplicators[u][0], + base_model=model, + multiplicator=multiplicators[u][0], translation=translations[u][0], - multiplicator_left_uncertainty=multiplicators[u][1], - multiplicator_right_uncertainty=multiplicators[u][1], + multiplicator_left_uncertainty=multiplicators[u][1], + multiplicator_right_uncertainty=multiplicators[u][1], translation_left_uncertainty=translations[u][1], - translation_right_uncertainty=translations[u][1] - ) for u in nonscalers.index + translation_right_uncertainty=translations[u][1], + ) + for u, model in nonscaler_models.items() } ############################################ @@ -273,7 +279,7 @@ def scale_non_scalers(self, user_models, nonscalers, entities, scalers, scaled_m def compute_activities( self, - user_models: dict[int, ScoringModel], + user_models: Mapping[int, ScoringModel], entities: pd.DataFrame, users: pd.DataFrame, privacy: Optional[PrivacySettings], @@ -300,11 +306,12 @@ def compute_activities( user_id, # type: ignore user_models[user_id], # type: ignore entities, - user.get("trust_score", 1.0), # type: ignore + trust_score, # type: ignore privacy, self.privacy_penalty ) - for (user_id, user) in users.iterrows() + for (user_id, trust_score) in users["trust_score"].items() + if user_id in user_models } ############################################ @@ -349,6 +356,7 @@ def compute_model_norms( self.privacy_penalty ) for user in users.index + if user in user_models } def compute_entity_ratios( @@ -356,8 +364,6 @@ def compute_entity_ratios( scalee_models: dict[int, ScoringModel], scaler_models: dict[int, ScoringModel], entities: pd.DataFrame, - scalees: pd.DataFrame, - scalers: pd.DataFrame, privacy: PrivacySettings ) -> dict[int, dict[int, tuple[list[float], list[float], list[float], list[float]]]]: """ Computes the ratios of score differences, with uncertainties, @@ -388,28 +394,28 @@ def compute_entity_ratios( user_entity_ratios = dict() entities_ids = set(entities.index) - for u in scalees.index: + for u, u_model in scalee_models.items(): user_entity_ratios[u] = dict() - u_entities = entities_ids & scalee_models[u].scored_entities() + u_entities = entities_ids & u_model.scored_entities() if len(u_entities) == 0: continue - for v in scalers.index: + for v, v_model in scaler_models.items(): if u == v: user_entity_ratios[u][v] = [1.], [1.], [0.], [0.] continue - uv_entities = list(u_entities & scaler_models[v].scored_entities()) + uv_entities = list(u_entities & v_model.scored_entities()) if len(entities) <= 1: continue elif len(entities) <= 100: ratios = self.load_all_ratios(u, v, uv_entities, entities, - scalee_models[u], scaler_models[v], privacy) + u_model, v_model, privacy) if ratios is not None: user_entity_ratios[u][v] = ratios else: ratios = self.sample_ratios(u, v, uv_entities, entities, - scalee_models[u], scaler_models[v], privacy) + u_model, v_model, privacy) if ratios is not None: user_entity_ratios[u][v] = ratios @@ -522,7 +528,7 @@ def compute_multiplicators( return { u: _aggregate(self.lipschitz / (8 * (1e-9 + model_norms[u])), voting_rights[u], ratios[u], uncertainties[u], - default_value=1, default_dev=0.8, error=self.error) + default_value=1.0, default_dev=0.8, error=self.error) for u in voting_rights } @@ -534,8 +540,6 @@ def compute_entity_diffs( self, scalee_models: dict[int, ScoringModel], scaler_models: dict[int, ScoringModel], - scalees: pd.DataFrame, - scalers: pd.DataFrame, entities: pd.DataFrame, privacy: PrivacySettings, multiplicators: dict[int, tuple[float, float]] @@ -569,22 +573,22 @@ def compute_entity_diffs( differences = dict() entities_ids = set(entities.index) - for u in scalees.index: - u_entities = entities_ids & scalee_models[u].scored_entities() + for u, u_model in scalee_models.items(): + u_entities = entities_ids & u_model.scored_entities() differences[u] = dict() - for v in scalers.index: + for v, v_model in scaler_models.items(): if u == v: differences[u][v] = [0.], [1.], [0.], [0.] continue - uv_entities = u_entities & scaler_models[v].scored_entities() + uv_entities = u_entities & v_model.scored_entities() if len(entities) == 0: continue differences[u][v] = list(), list(), list(), list() for e in uv_entities: - score_u, left_u, right_u = scalee_models[u](e, entities.loc[e]) - score_v, left_v, right_v = scaler_models[v](e, entities.loc[e]) + score_u, left_u, right_u = u_model(e, entities.loc[e]) + score_v, left_v, right_v = v_model(e, entities.loc[e]) uve_voting_right = 1 if privacy is not None and privacy[u, e]: @@ -642,7 +646,7 @@ def compute_translations( return { u: _aggregate(self.lipschitz / 8, voting_rights[u], diffs[u], uncertainties[u], - default_value=0, default_dev=1, + default_value=0.0, default_dev=1.0, error=self.error, aggregator=lipschitz_resilient_mean) for u in voting_rights } @@ -717,13 +721,17 @@ def _computer_user_activities( activities: dict[int, float] activities[user] is a measure of user's trustworthy activeness. """ - results = 0 + if trust_score <= 0.0: + return 0.0 + + results = 0.0 entity_ids = set(entities.index) - for entity_id, (score, left, right) in user_model.iter_entities(): if entity_id not in entity_ids: continue if score <= left and score >= -right: + # Uncertainty interval contains 0 + # Sign of score is uncertain. continue added_quantity = 1.0 if privacy is not None and privacy[user, entity_id]: @@ -843,7 +851,7 @@ def _aggregate( default_value: float, error: float=1e-5, aggregator: Callable = qr_median, - default_dev: float=1, + default_dev: float=1.0, ) -> tuple[float, float]: """ Computes the multiplicators of users with given user_ratios @@ -879,6 +887,6 @@ def _aggregate( right_uncertainties=np.array(uncertainties), default_dev=default_dev, error=error, - median=value + median=value if aggregator is qr_median else None, ) return value, uncertainty diff --git a/solidago/src/solidago/scaling/quantile_zero_shift.py b/solidago/src/solidago/scaling/quantile_zero_shift.py index ccd9bd201a..c8193f1678 100644 --- a/solidago/src/solidago/scaling/quantile_zero_shift.py +++ b/solidago/src/solidago/scaling/quantile_zero_shift.py @@ -1,3 +1,5 @@ +from typing import Mapping + import pandas as pd import numpy as np @@ -9,28 +11,36 @@ from solidago.primitives import qr_quantile -class QuantileZeroShift(Scaling): - def __init__(self, zero_quantile: float=0.15, lipschitz: float=0.1, error: float=1e-5): - """ The scores are shifted so that their quantile zero_quantile equals zero - +class QuantileShift(Scaling): + def __init__( + self, + quantile: float = 0.15, + *, + target_score: float = 0.0, + lipschitz: float = 0.1, + error: float = 1e-5, + ): + """The scores are shifted so that their quantile zero_quantile equals zero + Parameters ---------- zero_quantile: float """ - self.zero_quantile = zero_quantile + self.quantile = quantile + self.target_score = target_score self.lipschitz = lipschitz self.error = error - + def __call__( - self, - user_models: dict[int, ScoringModel], + self, + user_models: Mapping[int, ScoringModel], users: pd.DataFrame, entities: pd.DataFrame, voting_rights: VotingRights, - privacy: PrivacySettings + privacy: PrivacySettings, ) -> dict[int, ScaledScoringModel]: - """ Returns scaled user models - + """Returns scaled user models + Parameters ---------- user_models: dict[int, ScoringModel] @@ -50,31 +60,60 @@ def __call__( out[user]: ScoringModel Will be scaled by the Scaling method """ - votes, scores, lefts, rights = list(), list(), list(), list() - for user in user_models: - for entity in user_models[user].scored_entities(entities): - output = user_models[user](entity, entities.loc[entity]) - votes.append(voting_rights[user, entity]) + weights = [] + scores, lefts, rights = [], [], [] + for user_id, user_model in user_models.items(): + n_entities = 0 + for entity_id, output in user_model.iter_entities(entities): + n_entities += 1 scores.append(output[0]) lefts.append(output[1]) rights.append(output[2]) - - shift = - qr_quantile(self.lipschitz, self.zero_quantile, np.array(scores), - np.array(votes), np.array(lefts), np.array(rights), error=self.error) - + if n_entities > 0: + weights.extend([1 / n_entities] * n_entities) + + shift = -qr_quantile( + lipschitz=self.lipschitz, + quantile=self.quantile, + values=np.array(scores), + voting_rights=np.array(weights), + left_uncertainties=np.array(lefts), + right_uncertainties=np.array(rights), + error=self.error, + ) + self.target_score + return { - user: ScaledScoringModel(user_models[user], translation=shift) - for user in user_models + user: ScaledScoringModel(user_model, translation=shift) + for (user, user_model) in user_models.items() } def to_json(self): return type(self).__name__, dict( - zero_quantile=self.zero_quantile, + quantile=self.quantile, + target_score=self.target_score, lipschitz=self.lipschitz, error=self.error ) def __str__(self): - prop_names = ["zero_quantile", "lipschitz", "error"] + prop_names = ["quantile", "lipschitz", "error", "target_score"] prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) return f"{type(self).__name__}({prop})" + + +class QuantileZeroShift(QuantileShift): + def __init__( + self, + zero_quantile: float = 0.15, + *, + lipschitz: float = 0.1, + error: float = 0.00001 + ): + super().__init__(zero_quantile, target_score=0.0, lipschitz=lipschitz, error=error) + + def to_json(self): + return type(self).__name__, dict( + zero_quantile=self.quantile, + lipschitz=self.lipschitz, + error=self.error + ) diff --git a/solidago/src/solidago/scaling/standardize.py b/solidago/src/solidago/scaling/standardize.py index d2eef40c84..bff2534430 100644 --- a/solidago/src/solidago/scaling/standardize.py +++ b/solidago/src/solidago/scaling/standardize.py @@ -1,5 +1,4 @@ import pandas as pd -import numpy as np from .base import Scaling @@ -20,7 +19,7 @@ def __init__(self, dev_quantile: float=0.9, lipschitz: float=0.1, error: float=1 self.dev_quantile = dev_quantile self.lipschitz = lipschitz self.error = error - + def __call__( self, user_models: dict[int, ScoringModel], @@ -29,25 +28,26 @@ def __call__( voting_rights: VotingRights, privacy: PrivacySettings ): - df = _get_user_scores(voting_rights, user_models, entities) + df = _get_user_scores(user_models, entities) std_dev = self._compute_std_dev(df) return { - user: ScaledScoringModel(user_models[user], 1/std_dev) - for user in user_models + user: ScaledScoringModel(user_model, multiplicator=1/std_dev) + for (user, user_model) in user_models.items() } - + def _compute_std_dev(self, df): + w = 1 / df.groupby("user_id")["scores"].transform("size") return qr_standard_deviation( - lipschitz=self.lipschitz, - values=np.array(df["scores"]), + lipschitz=self.lipschitz, + values=df["scores"].to_numpy(), quantile_dev=self.dev_quantile, - voting_rights=np.array(df["voting_rights"]), - left_uncertainties=np.array(df["left_uncertainties"]), - right_uncertainties=np.array(df["right_uncertainties"]), - default_dev=1, - error=self.error + voting_rights=w.to_numpy(), + left_uncertainties=df["left_uncertainties"].to_numpy(), + right_uncertainties=df["right_uncertainties"].to_numpy(), + default_dev=1.0, + error=self.error, ) - + def to_json(self): return type(self).__name__, dict(dev_quantile=self.dev_quantile, lipschitz=self.lipschitz, error=self.error) @@ -57,29 +57,24 @@ def __str__(self): prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names]) return f"{type(self).__name__}({prop})" -def _get_user_scores( - voting_rights: VotingRights, - user_models: dict[int, ScoringModel], - entities: pd.DataFrame -): - user_list, entity_list, voting_right_list = list(), list(), list() + +def _get_user_scores(user_models: dict[int, ScoringModel], entities: pd.DataFrame): + user_list, entity_list = list(), list() scores, lefts, rights = list(), list(), list() for user_id, scoring_model in user_models.items(): - for entity in scoring_model.scored_entities(entities): + for entity_id, output in scoring_model.iter_entities(entities): user_list.append(user_id) - entity_list.append(entity) - voting_right_list.append(voting_rights[user_id, entity]) - output = scoring_model(entity, entities.loc[entity]) + entity_list.append(entity_id) scores.append(output[0]) lefts.append(output[1]) rights.append(output[2]) - - return pd.DataFrame(dict( - user_id=user_list, - entity_id=entity_list, - voting_rights=voting_right_list, - scores=scores, - left_uncertainties=lefts, - right_uncertainties=rights, - )) - + + return pd.DataFrame( + dict( + user_id=user_list, + entity_id=entity_list, + scores=scores, + left_uncertainties=lefts, + right_uncertainties=rights, + ) + ) diff --git a/solidago/src/solidago/scoring_model.py b/solidago/src/solidago/scoring_model.py index e07783d915..0da70d3c0f 100644 --- a/solidago/src/solidago/scoring_model.py +++ b/solidago/src/solidago/scoring_model.py @@ -89,12 +89,12 @@ class ScaledScoringModel(ScoringModel): def __init__( self, base_model: ScoringModel, - multiplicator: float=1, - translation: float=0, - multiplicator_left_uncertainty: float=0, - multiplicator_right_uncertainty: float=0, - translation_left_uncertainty: float=0, - translation_right_uncertainty: float=0 + multiplicator: float=1.0, + translation: float=0.0, + multiplicator_left_uncertainty: float=0.0, + multiplicator_right_uncertainty: float=0.0, + translation_left_uncertainty: float=0.0, + translation_right_uncertainty: float=0.0, ): """ When base_model is itself a scaled scoring model, the scalings are aggregated, so that the base model is actually diff --git a/solidago/src/solidago/solvers/dichotomy.py b/solidago/src/solidago/solvers/dichotomy.py index 1808b58545..a2d0138edb 100644 --- a/solidago/src/solidago/solvers/dichotomy.py +++ b/solidago/src/solidago/solvers/dichotomy.py @@ -4,11 +4,12 @@ def solve( - f: Callable[[float], float], + f: Callable[..., float], value: float = 0, xmin: float = 0, xmax: float = 1, error: float = 1e-6, + args = (), ): """Solves for f(x) == value, using dichotomy search May return an error if f(xmin) * f(xmax) > 0 @@ -29,7 +30,7 @@ def solve( ------- out: float """ - ymin, ymax = f(xmin) - value, f(xmax) - value + ymin, ymax = f(xmin, *args) - value, f(xmax, *args) - value if ymin * ymax > 0: raise ValueError(f"No solution to f(x)={value} was found in [{xmin}, {xmax}]") @@ -40,7 +41,7 @@ def solve( n_iterations = int(np.ceil(np.log2(delta / error))) for _ in range(n_iterations): x = (xmin + xmax) / 2 - y = f(x) - value + y = f(x, *args) - value if y == 0: return x if ymin * y < 0: diff --git a/solidago/src/solidago/solvers/optimize.py b/solidago/src/solidago/solvers/optimize.py index 2e56d048eb..ac31e57c12 100644 --- a/solidago/src/solidago/solvers/optimize.py +++ b/solidago/src/solidago/solvers/optimize.py @@ -6,8 +6,8 @@ Copyright © 2013-2021 Thomas J. Sargent and John Stachurski: BSD-3 All rights reserved. """ -# pylint: skip-file -from typing import Callable, Tuple + +from typing import Callable, Tuple, Literal import numpy as np from numba import njit @@ -41,21 +41,55 @@ def _bisect_interval(a, b, fa, fb) -> Tuple[float, int]: @njit -def njit_brentq(f, args=(), xtol=_xtol, rtol=_rtol, maxiter=_iter, disp=True, a: float=-1.0, b: float=1.0) -> float: - """ `Accelerated brentq. Requires f to be itself jitted via numba. +def njit_brentq( + f, + args=(), + xtol=_xtol, + rtol=_rtol, + maxiter=_iter, + a: float = -1.0, + b: float = 1.0, + extend_bounds: Literal["ascending", "descending", "no"] = "ascending", +) -> float: + """Accelerated brentq. Requires f to be itself jitted via numba. Essentially, numba optimizes the execution by running an optimized compilation of the function when it is first called, and by then running the compiled function. - - + Parameters ---------- f : jitted and callable Python function returning a number. `f` must be continuous. + args : tuple, optional(default=()) + Extra arguments to be used in the function call. + xtol : number, optional(default=2e-12) + The computed root ``x0`` will satisfy ``np.allclose(x, x0, + atol=xtol, rtol=rtol)``, where ``x`` is the exact root. The + parameter must be nonnegative. + rtol : number, optional(default=`4*np.finfo(float).eps`) + The computed root ``x0`` will satisfy ``np.allclose(x, x0, + atol=xtol, rtol=rtol)``, where ``x`` is the exact root. + maxiter : number, optional(default=100) + Maximum number of iterations. + a : number + One end of the bracketing interval [a,b]. + b : number + The other end of the bracketing interval [a,b]. + extend_bounds: default: "ascending", + Whether to extend the interval [a,b] to find a root. + ('no': to keep the bounds [a, b], + 'ascending': extend the bounds assuming `f` is ascending, + 'descending': extend the bounds assuming `f` is descending) """ - while f(a, *args) > 0: - a = a - 2 * (b-a) - while f(b, *args) < 0: - b = b + 2 * (b-a) + if extend_bounds == "ascending": + while f(a, *args) > 0: + a = a - 2 * (b - a) + while f(b, *args) < 0: + b = b + 2 * (b - a) + elif extend_bounds == "descending": + while f(a, *args) < 0: + a = a - 2 * (b - a) + while f(b, *args) > 0: + b = b + 2 * (b - a) if xtol <= 0: raise ValueError("xtol is too small (<= 0)") @@ -134,7 +168,7 @@ def njit_brentq(f, args=(), xtol=_xtol, rtol=_rtol, maxiter=_iter, disp=True, a: fcur = f(xcur, *args) funcalls += 1 - if disp and status == _ECONVERR: + if status == _ECONVERR: raise RuntimeError("Failed to converge") return root # type: ignore @@ -143,13 +177,13 @@ def njit_brentq(f, args=(), xtol=_xtol, rtol=_rtol, maxiter=_iter, disp=True, a: def coordinate_descent( update_coordinate_function: Callable[[Tuple, float], float], get_args: Callable[[int, np.ndarray], Tuple], - initialization: np.ndarray, + initialization: np.ndarray, updated_coordinates: list[int], - error: float = 1e-5 + error: float = 1e-5, ): - """ Minimize a loss function with coordinate descent, + """Minimize a loss function with coordinate descent, by leveraging the partial derivatives of the loss - + Parameters ---------- loss_partial_derivative: callable @@ -160,7 +194,7 @@ def coordinate_descent( Initialization point of the coordinate descent error: float Tolerated error - + Returns ------- out: stationary point of the loss diff --git a/solidago/src/solidago/trust_propagation/__init__.py b/solidago/src/solidago/trust_propagation/__init__.py index 3a0e3908d2..1235c10118 100644 --- a/solidago/src/solidago/trust_propagation/__init__.py +++ b/solidago/src/solidago/trust_propagation/__init__.py @@ -1,4 +1,4 @@ -""" Step 1 of the pipeline. +""" **Step 1 in the pipeline** Trust propagation is tasked to combine pretrusts and vouches to derive trust scores for the different users. @@ -7,4 +7,5 @@ from .base import TrustPropagation from .no_trust_propagation import NoTrustPropagation from .lipschitrust import LipschiTrust +from .noop import NoopTrust from .trust_all import TrustAll diff --git a/solidago/src/solidago/trust_propagation/base.py b/solidago/src/solidago/trust_propagation/base.py index 1510283dff..a9c40a94dc 100644 --- a/solidago/src/solidago/trust_propagation/base.py +++ b/solidago/src/solidago/trust_propagation/base.py @@ -3,6 +3,10 @@ import pandas as pd class TrustPropagation(ABC): + """ + Base class for Trust Propagation algorithms + """ + @abstractmethod def __call__(self, users: pd.DataFrame, @@ -12,17 +16,24 @@ def __call__(self, Parameters ---------- - users: DataFrame with columns + users: DataFrame + with columns + * user_id (int, index) * is_pretrusted (bool) - vouches: DataFrame with columns + + vouches: DataFrame + with columns + * voucher (str) * vouchee (str) * vouch (float) Returns ------- - users: DataFrame with columns + users: DataFrame + with columns + * user_id (int, index) * is_pretrusted (bool) * trust_score (float) diff --git a/solidago/src/solidago/trust_propagation/lipschitrust.py b/solidago/src/solidago/trust_propagation/lipschitrust.py index 0373cd3927..92615cfacc 100644 --- a/solidago/src/solidago/trust_propagation/lipschitrust.py +++ b/solidago/src/solidago/trust_propagation/lipschitrust.py @@ -17,14 +17,18 @@ def __init__(self, error: float=1e-8 ): """ A robustified variant of PageRank - Inputs: - - pretrust_value is the pretrust of a pretrusted user - (Trust^{pre}_{checkmark} in paper) - - decay is the decay of trusts in voucher's vouchees - (beta in paper) - - sink_vouch is the vouch to none, used to incentivize vouching + + Parameters + ---------- + pretrust_value: + the pretrust of a pretrusted user. + (`Trust^{pre}_{checkmark}` in paper) + decay: + the decay of trusts in voucher's vouchees. + (`beta` in paper) + sink_vouch: is the vouch to none, used to incentivize vouching (V^{sink}_{checkmark} in paper) - - error > 0 is an upper bound on error (in L1 norm) + error: > 0 is an upper bound on error (in L1 norm) (epsilon_{LipschiTrust} in paper) """ assert pretrust_value >= 0 and pretrust_value <= 1 @@ -41,22 +45,6 @@ def __call__(self, users: pd.DataFrame, vouches: pd.DataFrame ) -> pd.DataFrame: - """ - Inputs: - - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - - vouches: DataFrame with columns - * voucher (str) - * vouchee (str) - * vouch (float) - - Returns: - - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - * trust_score (float) - """ if len(users) == 0: return users.assign(trust_score=[]) diff --git a/solidago/src/solidago/trust_propagation/no_trust_propagation.py b/solidago/src/solidago/trust_propagation/no_trust_propagation.py index 43a11bf575..9c81ab7282 100644 --- a/solidago/src/solidago/trust_propagation/no_trust_propagation.py +++ b/solidago/src/solidago/trust_propagation/no_trust_propagation.py @@ -5,32 +5,19 @@ class NoTrustPropagation(TrustPropagation): def __init__(self, pretrust_value: float=0.8,): + """ + Parameters + ---------- + pretrust_value: + trust score to assign to pretrusted users + """ self.pretrust_value = pretrust_value def __call__(self, users: pd.DataFrame, vouches: pd.DataFrame ) -> pd.DataFrame: - """ Propagates trust through vouch network - - Parameters - ---------- - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - vouches: DataFrame with columns - * voucher (str) - * vouchee (str) - * vouch (float) - - Returns - ------- - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - * trust_score (float) - """ - return users.assign(trust_score=users["is_pretrusted"] * pretrust_value) + return users.assign(trust_score=users["is_pretrusted"] * self.pretrust_value) def __str__(self): return f"{type(self).__name__}(pretrust_value={self.pretrust_value})" diff --git a/solidago/src/solidago/trust_propagation/noop.py b/solidago/src/solidago/trust_propagation/noop.py new file mode 100644 index 0000000000..918ae14d5d --- /dev/null +++ b/solidago/src/solidago/trust_propagation/noop.py @@ -0,0 +1,25 @@ +import pandas as pd + +from .base import TrustPropagation + + +class NoopTrust(TrustPropagation): + """ + Noop for trust propagation: + trust scores are simply read from the input dataframe + """ + + def __call__(self, users: pd.DataFrame, _vouches: pd.DataFrame) -> pd.DataFrame: + return pd.DataFrame( + { + "is_pretrusted": users["is_pretrusted"], + "trust_score": users["trust_score"].fillna(0.0), + }, + index=users.index, + ) + + def __str__(self): + return type(self).__name__ + + def to_json(self): + return (type(self).__name__,) diff --git a/solidago/src/solidago/trust_propagation/trust_all.py b/solidago/src/solidago/trust_propagation/trust_all.py index 53f59e204b..79f3d2e37b 100644 --- a/solidago/src/solidago/trust_propagation/trust_all.py +++ b/solidago/src/solidago/trust_propagation/trust_all.py @@ -1,32 +1,10 @@ -""" TrustAll is a naive solution that assignes an equal amount of trust to all users -""" - from .base import TrustPropagation import pandas as pd -import numpy as np + class TrustAll(TrustPropagation): - def __call__(self, - users: pd.DataFrame, - vouches: pd.DataFrame - ) -> dict[str, float]: - """ - Inputs: - - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - - vouches: DataFrame with columns - * voucher (str) - * vouchee (str) - * vouch (float) - - Returns: - - users: DataFrame with columns - * user_id (int, index) - * is_pretrusted (bool) - * trust_score (float) - """ - return users.assign(trust_score=[1.0] * len(users)) - - + """`TrustAll` is a naive solution that assignes an equal amount of trust to all users""" + + def __call__(self, users: pd.DataFrame, vouches: pd.DataFrame): + return users.assign(trust_score=1.0) diff --git a/solidago/src/solidago/utils/pairs.py b/solidago/src/solidago/utils/pairs.py index 88fc2e82cd..7d28f1a160 100644 --- a/solidago/src/solidago/utils/pairs.py +++ b/solidago/src/solidago/utils/pairs.py @@ -9,42 +9,43 @@ def __init__(self, elements: Union[list, int]): self.elements = np.arange(elements) else: self.elements = elements - + @property def n_elements(self): return len(self.elements) - + @property def n_pairs(self): - return int( self.n_elements * (self.n_elements - 1) / 2 ) - + return (self.n_elements * (self.n_elements - 1)) // 2 + def __iter__(self): return UnorderedPairsIterator(self) - - def index_to_pair(self, index, p_shuffle: float=0): + + def index_to_pair(self, index: int, p_shuffle: Union[int, float] = 0): assert index >= 0 and index < self.n_pairs - a = int( (1 + np.sqrt(8*index + 1)) / 2 ) - b = index - int(a * (a - 1) / 2) + a = int((1 + np.sqrt(8 * index + 1)) / 2) + b = index - int(a * (a - 1) / 2) try: self.elements[b] except: raise ValueError(b) return swap(self.elements[a], self.elements[b], p_shuffle) - + def sample(self, p_shuffle=0.5) -> tuple[int, int]: index = np.random.randint(self.n_elements) return self.index_to_pair(index, p_shuffle) - - def n_samples(self, n_samples: int, p_shuffle: float=0.5) -> list[tuple[int, int]]: + + def n_samples(self, n_samples: int, p_shuffle: float = 0.5) -> list[tuple[int, int]]: indices = np.arange(self.n_pairs) np.random.shuffle(indices) - return [ self.index_to_pair(index, p_shuffle) for index in indices[:n_samples] ] + return [self.index_to_pair(index, p_shuffle) for index in indices[:n_samples]] + class UnorderedPairsIterator: def __init__(self, pairs): self.pairs = pairs self.a, self.b = 0, -1 - + def __next__(self): self.b += 1 if self.a == self.b: @@ -53,9 +54,9 @@ def __next__(self): if self.a >= self.pairs.n_elements: raise StopIteration return self.pairs.elements[self.a], self.pairs.elements[self.b] - -def swap(a, b, p_shuffle=1): + +def swap(a, b, p_shuffle: Union[float, int] = 1): if p_shuffle == 0: return a, b if p_shuffle == 1 or np.random.random() <= p_shuffle: diff --git a/solidago/src/solidago/voting_rights/__init__.py b/solidago/src/solidago/voting_rights/__init__.py index 5587816df3..2504129452 100644 --- a/solidago/src/solidago/voting_rights/__init__.py +++ b/solidago/src/solidago/voting_rights/__init__.py @@ -1,4 +1,4 @@ -""" Step 2 of the pipeline. +""" **Step 2 in the pipeline** Voting rights are assigned per user and per entity, based on users' trust scores and privacy settings. @@ -11,3 +11,6 @@ from .affine_overtrust import AffineOvertrust from .compute_voting_rights import compute_voting_rights + + +__all__ = ["VotingRightsAssignment", "IsTrust", "AffineOvertrust"] diff --git a/solidago/src/solidago/voting_rights/affine_overtrust.py b/solidago/src/solidago/voting_rights/affine_overtrust.py index 8732c4df9a..d44e86a58a 100644 --- a/solidago/src/solidago/voting_rights/affine_overtrust.py +++ b/solidago/src/solidago/voting_rights/affine_overtrust.py @@ -57,7 +57,7 @@ def __call__( * overtrust (float) """ voting_rights = VotingRights() - if len(users) == 0: + if len(users) == 0 or len(entities) == 0: return voting_rights, entities trust_scores = users["trust_score"] @@ -65,7 +65,10 @@ def __call__( for e in entities.index: user_ids = privacy.users(e) privacy_weights = pd.Series( - {u: self.privacy_penalty if privacy[u, e] else 1.0 for u in user_ids} + { + u: self.privacy_penalty if privacy[u, e] else 1.0 + for u in user_ids + } ) (voting_rights_series, cumulative_trust, min_voting_right, overtrust) = ( self.compute_entity_voting_rights( @@ -178,11 +181,10 @@ def min_voting_right( ---------- max_overtrust: float Maximal overtrust allowed for entity_id - users: DataFrame with columns - * user_id (int, index) - * trust_score (float) + trust_scores: + trust score values per user privacy_weights: dict[int, float] - privacy_weights[u] is the privacy weight of user u + privacy weight per user Returns ------- diff --git a/solidago/tests/data/data_1.py b/solidago/tests/data/data_1.py index 6fe97f6e48..29e723f9d2 100644 --- a/solidago/tests/data/data_1.py +++ b/solidago/tests/data/data_1.py @@ -95,24 +95,24 @@ learned_models = { 0: DirectScoringModel({ - 0: (0.8543576022084396, 0.22268338112332428, 0.22268338112332428), - 1: (-0.8542675414366053, 0.22268338112332428, 0.22268338112332428) + 0: (0.8543576022084396, 2.62, 4.52), + 1: (-0.8542675414366053, 4.55, 2.62) }), 1: DirectScoringModel({ - 0: (-0.45987485219302987, 0.3040980645995397, 0.3040980645995397), - 1: (0.46000589337922315, 0.3040980645995397, 0.3040980645995397) + 0: (-0.45987485219302987, 3.39, 2.50), + 1: (0.46000589337922315, 2.50, 3.39) }), 2: DirectScoringModel({ - 0: (-0.6411620404227717, 0.26730021787214453, 0.26730021787214453), - 1: (0.6412670367706607, 0.26730021787214453, 0.26730021787214453) + 0: (-0.6411620404227717, 3.84, 2.53), + 1: (0.6412670367706607, 2.53, 3.84), }), 3: DirectScoringModel({ - 0: (2.0611090358800523, 0.07820614406839796, 0.07820614406839796), - 1: (-2.061088795104216, 0.07820614406839796, 0.07820614406839796) + 0: (2.0611090358800523, 3.73, 11.73), + 1: (-2.061088795104216, 11.73, 3.73), }), 4: DirectScoringModel({ - 0: (-4.949746148097695, 0.030612236968599268, 0.030612236968599268), - 1: (4.949747745198173, 0.030612236968599268, 0.030612236968599268) + 0: (-4.949746148097695, 1000.0, 6.26), + 1: (4.949747745198173, 6.26, 1000.0), }) } diff --git a/solidago/tests/data/data_2.py b/solidago/tests/data/data_2.py index a8974c0930..7d934e6b23 100644 --- a/solidago/tests/data/data_2.py +++ b/solidago/tests/data/data_2.py @@ -96,28 +96,38 @@ }) learned_models = { - 0: DirectScoringModel({ - 1: (1.016590197621329, 0.4638561508964967, 0.4638561508964967), - 2: (-0.7877876012816142, 0.5266102124947752, 0.5266102124947752), - 6: (-0.2291324680780755, 0.5846829229672795, 0.5846829229672795) - }), - 4: DirectScoringModel({ - 1: (-0.29761600676032623, 0.33137621448368626, 0.33137621448368626), - 2: (0.2977647751812212, 0.33137621448368626, 0.33137621448368626) - }), - 2: DirectScoringModel({ - 1: (-4.965658292354929, 0.07061473411881966, 0.07061473411881966), - 2: (0.02121949850814651, 0.06043250186863176, 0.06043250186863176), - 6: (4.944447487603185, 0.030590395515494022, 0.030590395515494022) - }), - 8: DirectScoringModel({ - 1: (0.641162040422771, 0.26729930161403126, 0.26729930161403126), - 6: (-0.6412757158129634, 0.26729930161403126, 0.26729930161403126) - }), - 6: DirectScoringModel({ - 2: (0.6412670367706619, 0.26730021787214453, 0.26730021787214453), - 6: (-0.64116204042277, 0.26730021787214453, 0.26730021787214453) - }) + 0: DirectScoringModel( + { + 1: (1.0166024998812924, 1.86, 2.67), + 2: (-0.7877792323989169, 2.36, 1.84), + 6: (-0.22912573047151286, 2.0, 1.89), + } + ), + 4: DirectScoringModel( + { + 1: (-0.29762516882114326, 3.07, 2.52), + 2: (0.297764775194798, 2.52, 3.07), + } + ), + 2: DirectScoringModel( + { + 1: (-4.965657348164456, 17.7, 3.7), + 2: (0.021224904768728296, 4.57, 10.74), + 6: (4.944450204676572, 6.27, 1000.0), + } + ), + 8: DirectScoringModel( + { + 1: (0.6412670367706624, 2.53, 3.84), + 6: (-0.6411620404227696, 3.84, 2.53), + } + ), + 6: DirectScoringModel( + { + 2: (0.6411620404227699, 2.53, 3.84), + 6: (-0.6412757158129653, 3.84, 2.53), + } + ), } mehestan_scaled_models = { diff --git a/solidago/tests/data/data_3.py b/solidago/tests/data/data_3.py index 77e3e51ad1..584b546446 100644 --- a/solidago/tests/data/data_3.py +++ b/solidago/tests/data/data_3.py @@ -57,7 +57,6 @@ judgments = DataFrameJudgments(pd.DataFrame(dict( # The judgements contain a pair of entities (2, 3) compared twice by user 1 - # The learned models assume than only the last one is considered in the learning process. user_id= [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4], entity_a=[0, 2, 4, 0, 1, 4, 3, 4, 0, 2, 1, 0, 4, 0, 0, 0, 0, 2, 4, 0, 1, 2], entity_b=[2, 3, 3, 2, 2, 3, 2, 1, 4, 3, 0, 4, 1, 1, 2, 3, 4, 3, 3, 1, 2, 3], @@ -127,37 +126,47 @@ }) learned_models = { - 0: DirectScoringModel({ - 0: (5.033398104293532, 0.03096002542681708, 0.03096002542681708), - 2: (-4.70158398961812, 0.16736063832963174, 0.16736063832963174), - 3: (-2.2520973379065916, 0.21329795423450776, 0.21329795423450776), - 4: (1.920367641692402, 0.07689734133169311, 0.07689734133169311) - }), - 1: DirectScoringModel({ - 0: (-0.2647961020419981, 0.4346751126408668, 0.4346751126408668), - 1: (-0.723484071079214, 0.4387030807322584, 0.4387030807322584), - 2: (-1.9388463288575928, 0.5587891835427051, 0.5587891835427051), - 3: (1.5444643844051669, 0.4306522267140735, 0.4306522267140735), - 4: (1.383154679686365, 0.7248330732791874, 0.7248330732791874) - }), - 2: DirectScoringModel({ - 0: (-0.41403365335425246, 0.6232992917299677, 0.6232992917299677), - 1: (0.6217382466724807, 0.5848617488561728, 0.5848617488561728), - 4: (-0.2073399097731925, 0.6429793786107434, 0.6429793786107434) - }), - 3: DirectScoringModel({ - 0: (-0.7616735994053457, 1.136102459884623, 1.136102459884623), - 1: (-0.1376152832458718, 0.32929993602267005, 0.32929993602267005), - 2: (0.26367367798854036, 0.6238313087499935, 0.6238313087499935), - 3: (0.03904465536134892, 0.958527480363601, 0.958527480363601), - 4: (0.5972697855709417, 0.5727475480020277, 0.5727475480020277) - }), - 4: DirectScoringModel({ - 0: (-0.374212225919361, 0.2649557275176049, 0.2649557275176049), - 1: (0.9304055634303148, 0.5771139502767695, 0.5771139502767695), - 2: (0.351959458123829, 0.5817762983596343, 0.5817762983596343), - 3: (-0.9084921677179439, 0.2696180756004697, 0.2696180756004697) - }) + 0: DirectScoringModel( + { + 0: (5.033397746231767, 6.16, 1000.0), + 2: (-4.701576351427851, 7.36, 2.57), + 3: (-2.2520977680270975, 2.93, 3.04), + 4: (1.920367325723618, 3.78, 11.67), + } + ), + 1: DirectScoringModel( + { + 0: (0.1240287651798575, 1.91, 1.92), + 1: (-0.20925013140334098, 1.99, 1.88), + 2: (-0.7121177373667139, 1.38, 1.23), + 3: (-0.17088798816552617, 1.53, 1.54), + 4: (0.968692135233115, 1.45, 1.90), + } + ), + 2: DirectScoringModel( + { + 0: (-0.41427772976495064, 2.05, 1.77), + 1: (0.6215002710681929, 1.76, 2.22), + 4: (-0.20758081689489893, 1.93, 1.81), + } + ), + 3: DirectScoringModel( + { + 0: (-0.7616827423140184, 1.49, 1.26), + 1: (-0.13763264866961566, 2.55, 3.04), + 2: (0.2636628216175936, 1.78, 2.04), + 3: (0.03903170049617262, 1.5, 1.5), + 4: (0.5972590006109219, 1.79, 2.23), + } + ), + 4: DirectScoringModel( + { + 0: (-0.37421638528904544, 3.82, 2.55), + 1: (0.9304020798748038, 1.77, 2.24), + 2: (0.3519562762627696, 1.89, 2.01), + 3: (-0.9084875543437719, 3.86, 2.51), + } + ), } mehestan_scaled_models = { diff --git a/solidago/tests/data/data_4.py b/solidago/tests/data/data_4.py index 505c74cf46..d6ca467cad 100644 --- a/solidago/tests/data/data_4.py +++ b/solidago/tests/data/data_4.py @@ -346,7 +346,7 @@ 4: ScaledScoringModel( base_model=learned_models[4], multiplicator=1, - translation=0, + translation=0.0, multiplicator_left_uncertainty=1, multiplicator_right_uncertainty=1, translation_left_uncertainty=1, @@ -373,7 +373,7 @@ 9: ScaledScoringModel( base_model=learned_models[9], multiplicator=1, - translation=0, + translation=0.0, multiplicator_left_uncertainty=1, multiplicator_right_uncertainty=1, translation_left_uncertainty=1, @@ -422,10 +422,10 @@ base_model=learned_models[4], multiplicator=1, translation=2.197462409621523, - multiplicator_left_uncertainty=0, - multiplicator_right_uncertainty=0, - translation_left_uncertainty=0, - translation_right_uncertainty=0, + multiplicator_left_uncertainty=0.0, + multiplicator_right_uncertainty=0.0, + translation_left_uncertainty=0.0, + translation_right_uncertainty=0.0, ), 5: ScaledScoringModel( base_model=learned_models[5], @@ -449,28 +449,28 @@ base_model=learned_models[7], multiplicator=1, translation=2.197462409621523, - multiplicator_left_uncertainty=0, - multiplicator_right_uncertainty=0, - translation_left_uncertainty=0, - translation_right_uncertainty=0, + multiplicator_left_uncertainty=0.0, + multiplicator_right_uncertainty=0.0, + translation_left_uncertainty=0.0, + translation_right_uncertainty=0.0, ), 8: ScaledScoringModel( base_model=learned_models[8], multiplicator=1, translation=2.197462409621523, - multiplicator_left_uncertainty=0, - multiplicator_right_uncertainty=0, - translation_left_uncertainty=0, - translation_right_uncertainty=0, + multiplicator_left_uncertainty=0.0, + multiplicator_right_uncertainty=0.0, + translation_left_uncertainty=0.0, + translation_right_uncertainty=0.0, ), 9: ScaledScoringModel( base_model=learned_models[9], multiplicator=1, translation=2.197462409621523, - multiplicator_left_uncertainty=0, - multiplicator_right_uncertainty=0, - translation_left_uncertainty=0, - translation_right_uncertainty=0, + multiplicator_left_uncertainty=0.0, + multiplicator_right_uncertainty=0.0, + translation_left_uncertainty=0.0, + translation_right_uncertainty=0.0, ), } diff --git a/solidago/tests/data/tiny_tournesol.zip b/solidago/tests/data/tiny_tournesol.zip index 517620c7ce7a9d0f727d6d19abb9871027c90688..1376211624d70407e68a48a6cf918927c58550e9 100644 GIT binary patch delta 1472 zcmcb!RqM|_tqHCi|L!@YfpDX5t1shLU#1JL9PIZYLhY(YEiLXa&cK`2rmQsgU)k8Fo%~`a5FHnykKTv028HQ zXZw^*1@ktM$GWrw4eim>7BW@4d!?A1~G8@6{Z?_b%Z5 z!J@-#`FfmDjnibl7Mzfq8lCs@w(>sKofl#_awHyfo1U1O6d?cIa6zt{$ek&D5<137 zi#8OfG4FSpARheGsA4V4qL}m*4XPru{y(^|Vv@7IT4Qm2aM2XixMl&%ov}^YQ_nnL z-@w1xM(gxzrcyS$!s?d_@%~+_c&gUz$aQ`l!d2q;h2`~%&dJ*8dU+ui7aKjadX%hG z>hF_y`l-mPB*wp2pR4q|2szT{k@6-X)$4+Fe8^Up!;QTQ?5<5duOHJ{0-7mzDztJx3Zqkw65%ZVx5mqb8O9i=lYZ-iK~izQbeVLi+RsY3GF{2ED%_J z#cOjxGoQb(u%PWt<#%)57|uN*_x>ec{j7gwtF~OY|8(}O?;Cbp;%1CdwkUi3cpkfDvW;hJ$(%odtY4>X{>bIZRIo3O`EKrJMh_0hrK`lemh>NwuJ23S9%gH`cDIb(m`{ zxAzlE<@sHqtMvW^d#`=9vy?ouG^Y`D# z#Z}1uSk|R{-2cO!IZ=O<^)6p}X^|NylsZA_X30lRK2H0_w>louyIrmS$67mT?Tc`tfDw7Wc*~HSB#T zAmNh#IW((uHN8yw6MzD4i1?Bp?>B6`(epAu{E<=E+q~}j`pDl}GbhS0 ztxXjx-&dM0D)VpEOIbZxsbfLv2l{%an!V+ox75*ht@p$kO*T`XScK1C8@=V-#RgH< zlw+%&eR?7E=ELoW@)Nvrw>)fmcyjxUAsi+H|lG^Gq&JG*$UCxy;anPR!)8Koe4)#bt^j7H}BJmGdjgtpG*GJ~565#pQ2Wq~G? vIgiT}P3Yh}E)!jt{iPM$42&!C$rM1VId8v_uq1K|uN1_r$uTtFTG09Jn* diff --git a/solidago/tests/test_aggregation.py b/solidago/tests/test_aggregation.py index 0cf463915d..0e0930e8f2 100644 --- a/solidago/tests/test_aggregation.py +++ b/solidago/tests/test_aggregation.py @@ -41,7 +41,7 @@ def test_qtlstd_qrmed_invariance(test): the multiplicative scales of input user models, as long as it is the same for all users. """ td = importlib.import_module(f"data.data_{test}") - aggregation = StandardizedQrMedian(dev_quantile=0.9, lipschitz=1000., error=1e-5) + aggregation = StandardizedQrMedian(dev_quantile=0.9, lipschitz=10000., error=1e-7) user_models, global_model = aggregation( td.voting_rights, td.standardized_models, diff --git a/solidago/tests/test_judgments.py b/solidago/tests/test_judgments.py deleted file mode 100644 index ffa05c9403..0000000000 --- a/solidago/tests/test_judgments.py +++ /dev/null @@ -1,7 +0,0 @@ -from solidago.pipeline.inputs import TournesolInputFromPublicDataset -from solidago.judgments import Judgments, DataFrameJudgments - -def test_tournesol_import(): - inputs = TournesolInputFromPublicDataset("tests/data/tiny_tournesol.zip") - judgments = inputs.get_judgments("largely_recommended") - assert "aidjango" in set(judgments.comparisons["public_username"]) diff --git a/solidago/tests/test_mehestan.py b/solidago/tests/test_mehestan.py index ccc8f0b67f..4905ce3228 100644 --- a/solidago/tests/test_mehestan.py +++ b/solidago/tests/test_mehestan.py @@ -5,17 +5,16 @@ from solidago.voting_rights import VotingRights from solidago.privacy_settings import PrivacySettings -from solidago.judgments import DataFrameJudgments from solidago.scoring_model import DirectScoringModel, ScaledScoringModel -from solidago.scaling import ScalingCompose, Mehestan, QuantileZeroShift +from solidago.scaling import Mehestan from solidago.scaling.mehestan import (Mehestan, _aggregate_user_comparisons, _aggregate) mehestan = Mehestan( lipschitz=100., - min_activity=1, + min_activity=1.0, n_scalers_max=3, privacy_penalty=0.5, user_comparison_lipschitz=100., @@ -26,130 +25,6 @@ @pytest.mark.parametrize("test", range(5)) def test_learned_models(test): td = importlib.import_module(f"data.data_{test}") + if "trust_score" not in td.users: + td.users["trust_score"] = 1.0 m_models = mehestan(td.learned_models, td.users, td.entities, td.voting_rights, td.privacy) - - -users = pd.DataFrame(dict( - is_pretrusted=[True] * 5, - trust_score=[1.] * 5, -)) -users.index.name = "user_id" - -entities = pd.DataFrame(index=range(5)) -entities.index.name = "entity_id" - -privacy = PrivacySettings({ - 0: { 0: False, 1: False, 2: False, 3: False, 4: False }, - 1: { 0: False, 1: False, 2: False, 3: False, 4: False }, -}) - -voting_rights = VotingRights({ - 0: {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}, - 1: {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}, - 2: {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}, - 3: {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}, - 4: {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}, -}) - -learned_models = { - 0: DirectScoringModel({ - 0: (0., 0., 0.), - 1: (1., 0., 0.), - 2: (2., 0., 0.), - 3: (3., 0., 0.), - 4: (4., 0., 0.), - }), - 1: DirectScoringModel({ - 0: (0., 0., 0.), - 1: (5., 0., 0.), - 2: (10., 0., 0), - }), - 2: DirectScoringModel({ - 0: (-5., 0., 0.), - 1: (-6., 0., 0.), - 2: (-7., 0., 0.), - }), - 3: DirectScoringModel({ - 0: (0., 0., 0.), - 1: (4., 0., 0.), - }), - 4: DirectScoringModel({ - 0: (0., 0., 0.), - 2: (2., 0., 0.), - }) -} - -activities = mehestan.compute_activities(learned_models, entities, users, privacy) -is_scaler = mehestan.compute_scalers(learned_models, entities, users, privacy) -users = users.assign(is_scaler=is_scaler) -scalers = users[users["is_scaler"]] -nonscalers = users[users["is_scaler"] == False] - -scaler_model_norms = mehestan.compute_model_norms(learned_models, scalers, entities, privacy) -scaler_entity_ratios = mehestan.compute_entity_ratios(learned_models, learned_models, entities, scalers, scalers, privacy) -scaler_ratio_voting_rights, scaler_ratios, scaler_ratio_uncertainties = _aggregate_user_comparisons( - scalers, scaler_entity_ratios, - error=mehestan.error, lipschitz=mehestan.user_comparison_lipschitz -) -scaler_multiplicators = mehestan.compute_multiplicators( - scaler_ratio_voting_rights, scaler_ratios, scaler_ratio_uncertainties, scaler_model_norms -) -scaler_entity_diffs = mehestan.compute_entity_diffs( - learned_models, learned_models, scalers, scalers, entities, privacy, scaler_multiplicators -) -scaler_diff_voting_rights, scaler_diffs, scaler_diff_uncertainties = _aggregate_user_comparisons( - scalers, scaler_entity_diffs, - error=mehestan.error, lipschitz=mehestan.user_comparison_lipschitz -) -scaler_translations = mehestan.compute_translations( - scaler_diff_voting_rights, scaler_diffs, scaler_diff_uncertainties -) - -scaled_models = dict() -for scaler in scalers.index: - scaled_models[scaler] = ScaledScoringModel( - base_model=learned_models[scaler], - multiplicator=scaler_multiplicators[scaler][0], - translation=scaler_translations[scaler][0], - multiplicator_left_uncertainty=scaler_multiplicators[scaler][1], - multiplicator_right_uncertainty=scaler_multiplicators[scaler][1], - translation_left_uncertainty=scaler_translations[scaler][1], - translation_right_uncertainty=scaler_translations[scaler][1] - ) - -nonscaler_model_norms = mehestan.compute_model_norms(learned_models, nonscalers, entities, privacy) -nonscaler_entity_ratios = mehestan.compute_entity_ratios(learned_models, scaled_models, entities, - nonscalers, scalers, privacy) -nonscaler_ratio_voting_rights, nonscaler_ratios, nonscaler_ratio_uncertainties = _aggregate_user_comparisons( - scalers, nonscaler_entity_ratios, - error=mehestan.error, - lipschitz=mehestan.user_comparison_lipschitz -) -nonscaler_multiplicators = mehestan.compute_multiplicators( - nonscaler_ratio_voting_rights, nonscaler_ratios, nonscaler_ratio_uncertainties, - nonscaler_model_norms -) - -nonscaler_entity_diffs = mehestan.compute_entity_diffs( - learned_models, scaled_models, nonscalers, scalers, entities, privacy, nonscaler_multiplicators -) -nonscaler_diff_voting_rights, nonscaler_diffs, nonscaler_diff_uncertainties = _aggregate_user_comparisons( - scalers, nonscaler_entity_diffs, - error=mehestan.error, lipschitz=mehestan.user_comparison_lipschitz -) -nonscaler_translations = mehestan.compute_translations( - nonscaler_diff_voting_rights, nonscaler_diffs, nonscaler_diff_uncertainties -) - -scaled_models |= { - u: ScaledScoringModel( - base_model=learned_models[u], - multiplicator=nonscaler_multiplicators[u][0], - translation=nonscaler_translations[u][0], - multiplicator_left_uncertainty=nonscaler_multiplicators[u][1], - multiplicator_right_uncertainty=nonscaler_multiplicators[u][1], - translation_left_uncertainty=nonscaler_translations[u][1], - translation_right_uncertainty=nonscaler_translations[u][1] - ) for u in nonscalers.index -} - diff --git a/solidago/tests/test_preference_learning.py b/solidago/tests/test_preference_learning.py index 010600ae55..0a4dbf3cb7 100644 --- a/solidago/tests/test_preference_learning.py +++ b/solidago/tests/test_preference_learning.py @@ -1,7 +1,11 @@ +import pandas as pd import pytest import importlib +import solidago.preference_learning as preference_learning + + @pytest.mark.parametrize("test", range(4)) def test_uniform_gbt(test): td = importlib.import_module(f"data.data_{test}") @@ -12,10 +16,11 @@ def test_uniform_gbt(test): target = td.learned_models[user](entity, td.entities.loc[entity]) assert output == pytest.approx(target, abs=1e-1), (user, entity) + @pytest.mark.parametrize("test", range(4)) def test_lbfgs_uniform_gbt(test): pytest.importorskip("torch") - from solidago.preference_learning import LBFGSUniformGBT + LBFGSUniformGBT = preference_learning.LBFGSUniformGBT td = importlib.import_module(f"data.data_{test}") models = LBFGSUniformGBT()(td.judgments, td.users, td.entities) for user in td.users.index: @@ -24,3 +29,23 @@ def test_lbfgs_uniform_gbt(test): target = td.learned_models[user](entity, td.entities.loc[entity]) assert output == pytest.approx(target, abs=1e-1), (user, entity) + +@pytest.mark.parametrize("method", ["UniformGBT", "LBFGSUniformGBT"]) +def test_gbt_score_zero(method): + if method == "LBFGSUniformGBT": + pytest.importorskip("torch") + gbt = getattr(preference_learning, method)() + model = gbt.comparison_learning( + comparisons=pd.DataFrame( + { + "entity_a": [1, 1], + "entity_b": [2, 3], + "comparison": [0, 0], + "comparison_max": [10, 10], + } + ) + ) + + assert model(entity_id=1) == pytest.approx((0, 1.8, 1.8), abs=0.1) + assert model(entity_id=2) == pytest.approx((0, 2.7, 2.7), abs=0.1) + assert model(entity_id=3) == pytest.approx((0, 2.7, 2.7), abs=0.1) diff --git a/solidago/tests/test_primitives.py b/solidago/tests/test_primitives.py index afcd915654..7ca12a7539 100644 --- a/solidago/tests/test_primitives.py +++ b/solidago/tests/test_primitives.py @@ -11,8 +11,8 @@ def test_qrmedian_resilience(): median = qr_median( - lipschitz=1, - voting_rights=1, + lipschitz=1.0, + voting_rights=1.0, values=np.array([-10.0, 5.0, 10.0]), left_uncertainties=np.array([1e-3, 1e-7, 1e-3]), right_uncertainties=np.array([1e-5, 1e-3, 1e-4]), @@ -61,7 +61,7 @@ def test_qr_quantile_zero_uncertainty_incorrect_prior(): def test_qr_quantile_high_uncertainty(): quantile = qr_quantile( - lipschitz=1, + lipschitz=1.0, quantile=0.5, voting_rights=1.0, values=np.array([1.0, 2.0, 6.0, 9.0]), @@ -76,13 +76,13 @@ def test_qr_quantile_high_uncertainty(): @pytest.mark.parametrize( "lipshitz,w,x,delta,quantile,expected_result", [ - (0.1, np.array([0.1]), np.array([0]), np.array([0.1]), 0.5, 0), - (0.1, np.array([0.1]), np.array([0]), np.array([0.1]), 0.1, -0.0073), - (0.1, np.array([0.1]), np.array([0]), np.array([0.1]), 0.9, +0.0073), - (0.1, np.array([1] * 1000), np.array([-1] * 500 + [1] * 500), np.array([0.1] * 1000), 0.10, -1.0712), - (0.1, np.array([1] * 1000), np.array([-1] * 100 + [1] * 900), np.array([1e-6] * 1000), 0.10, 0.), - (10000, np.array([1] * 1000), np.array([-1] * 102 + [1] * 898), np.array([1e-6] * 1000), 0.01, -1), - (1e12, np.array([1000] * 1000), np.arange(1000, 2000, 1), np.array([1e-6] * 1000), 0.90, 1899.3929), + (0.1, np.array([0.1]), np.array([0.]), np.array([0.1]), 0.5, 0), + (0.1, np.array([0.1]), np.array([0.]), np.array([0.1]), 0.1, 0), + (0.1, np.array([0.1]), np.array([0.]), np.array([0.1]), 0.9, 0), + (0.1, np.array([1.] * 1000), np.array([-1.] * 500 + [1.] * 500), np.array([0.1] * 1000), 0.10, -0.986816), + (0.1, np.array([1.] * 1000), np.array([-1.] * 100 + [1.] * 900), np.array([1e-6] * 1000), 0.10, 0.), + (10000., np.array([1.] * 1000), np.array([-1.] * 102 + [1.] * 898), np.array([1e-6] * 1000), 0.01, -1.), + (1e12, np.array([1000.] * 1000), np.arange(1000., 2000, 1), np.array([1e-6] * 1000), 0.90, 1899.1817), ] ) def test_qr_quantile_returns_expected_results(lipshitz,w,x,delta,quantile,expected_result): @@ -100,8 +100,8 @@ def test_qr_quantile_returns_expected_results(lipshitz,w,x,delta,quantile,expect def test_qr_standard_deviation(): standard_deviation = qr_standard_deviation( - lipschitz=1, - values=np.array([-4, -2, 0, 2, 4]), + lipschitz=1.0, + values=np.array([-4.0, -2.0, 0.0, 2.0, 4.0]), quantile_dev=0.5, ) assert standard_deviation == pytest.approx(2, abs=1e-3) diff --git a/solidago/tests/test_privacy_settings.py b/solidago/tests/test_privacy_settings.py index 28d828388e..b92cffb441 100644 --- a/solidago/tests/test_privacy_settings.py +++ b/solidago/tests/test_privacy_settings.py @@ -11,10 +11,10 @@ def test_privacy_io(): def test_tournesol_import(): inputs = TournesolInputFromPublicDataset("tests/data/tiny_tournesol.zip") - privacy = inputs.get_pipeline_objects()[3] + privacy = inputs.get_pipeline_kwargs(criterion="largely_recommended")["privacy"] aidjango_id = inputs.users[inputs.users["public_username"] == "aidjango"].index[0] video_id_to_entity_id = { video_id: entity_id for (entity_id, video_id) in inputs.entity_id_to_video_id.items() } - assert not privacy[aidjango_id, video_id_to_entity_id['dBap_Lp-0oc']] + assert privacy[aidjango_id, video_id_to_entity_id['dBap_Lp-0oc']] == False diff --git a/solidago/tests/test_scaling.py b/solidago/tests/test_scaling.py index acbaf198bd..df8d91c6c7 100644 --- a/solidago/tests/test_scaling.py +++ b/solidago/tests/test_scaling.py @@ -13,7 +13,7 @@ def test_score_shift_when_all_scores_are_equal(): )) tau = estimate_positive_score_shift( scaled_individual_scores, - W=1, + W=1.0, quantile=0.05 ) assert tau == pytest.approx(12.12) @@ -27,7 +27,7 @@ def test_all_users_equal_voting_right_for_score_shift(): )) tau = estimate_positive_score_shift( scaled_individual_scores, - W=1, + W=1.0, quantile=1/3 ) assert tau == pytest.approx(0, abs=1e-4) diff --git a/solidago/tests/test_solvers.py b/solidago/tests/test_solvers.py index 8aa9982fbb..2a841aaaea 100644 --- a/solidago/tests/test_solvers.py +++ b/solidago/tests/test_solvers.py @@ -1,7 +1,35 @@ import pytest +from numba import njit + +from solidago.solvers.dichotomy import solve as dichotomy_solve +from solidago.solvers.optimize import njit_brentq as brentq -from solidago.solvers.dichotomy import solve def test_dichotomy(): - assert solve(lambda t: t, 1, 0, 3) == pytest.approx(1, abs=1e-4) - assert solve(lambda t: 2 - t**2, 1, 0, 3) == pytest.approx(1, abs=1e-4) + assert dichotomy_solve(lambda t: t, 1, 0, 3) == pytest.approx(1, abs=1e-4) + assert dichotomy_solve(lambda t: 2 - t**2, 1, 0, 3) == pytest.approx(1, abs=1e-4) + + +def test_brentq_fails_to_converge_for_non_zero_method(): + @njit + def one_plus_x_square(x): + return 1 + x * x + + with pytest.raises(ValueError): + brentq(one_plus_x_square, extend_bounds="no") + + +def test_brentq_finds_zeros_of_simple_increasing_linear(): + @njit + def x_plus_five(x): + return x + 5 + + assert brentq(x_plus_five, extend_bounds="ascending") == -5.0 + + +def test_brentq_finds_zeros_of_simple_decreasing_linear(): + @njit + def minus_x_plus_twelve(x): + return -x + 12 + + assert brentq(minus_x_plus_twelve, extend_bounds="descending") == 12.0