Skip to content

Commit

Permalink
WIP Scaling left to adapt.
Browse files Browse the repository at this point in the history
And then tests, tests, tests...
  • Loading branch information
lenhoanglnh committed Jan 2, 2025
1 parent 0b5634b commit 41bbafa
Show file tree
Hide file tree
Showing 26 changed files with 3,152 additions and 3,231 deletions.
6 changes: 3 additions & 3 deletions solidago/src/solidago/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .trust_propagation import *
# from .preference_learning import *
from .preference_learning import *
from .voting_rights import *
# from .scaling import *
# from .aggregation import *
# from .post_process import *
from .aggregation import *
from .post_process import *

from .base import *
from .sequential import *
Expand Down
4 changes: 2 additions & 2 deletions solidago/src/solidago/pipeline/aggregation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
from .base import Aggregation
from .average import Average
from .entitywise_qr_quantile import EntitywiseQrQuantile
from .standardized_qr_quantile import StandardizedQrQuantile
from .standardized_qr_median import StandardizedQrMedian
# from .standardized_qr_quantile import StandardizedQrQuantile
# from .standardized_qr_median import StandardizedQrMedian
80 changes: 35 additions & 45 deletions solidago/src/solidago/pipeline/aggregation/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,47 @@


class Average(Aggregation):
def __call__(
self,
def main(self,
entities: Entities,
voting_rights: VotingRights,
user_models: dict[int, ScoringModel],
users: pd.DataFrame,
entities: pd.DataFrame
) -> tuple[dict[int, ScoringModel], ScoringModel]:
""" Returns scaled user models
Parameters
----------
voting_rights: VotingRights
voting_rights[user, entity]: float
user_models: dict[int, ScoringModel]
user_models[user] is user's scoring model
users: DataFrame with columns
* user_id (int, index)
* trust_score (float)
entities: DataFrame with columns
* entity_id (int, ind)
Returns
-------
updated_user_models[user]: ScoringModel
Returns a scaled user model
global_model: ScoringModel
Returns a global scoring model
"""
user_models: UserModels,
) -> ScoringModel:
""" Returns weighted average of user's scores """
global_model = DirectScoringModel()
voting_rights = voting_rights.reorder_keys(["username", "entity_name", "criterion"])

for entity in entities.index:
for entity in entities:

total_voting_rights, total_scores = 0, 0
total_lefts, total_rights = 0, 0
total_voting_rights, total_scores = dict(), dict()
total_lefts, total_rights = dict(), dict()

for user in user_models:
output = user_models[user](entity, entities.loc[entity])
if output is None:
continue
total_voting_rights += voting_rights[user, entity]
total_scores = voting_rights[user, entity] * output[0]
total_lefts = voting_rights[user, entity] * output[1]
total_rights = voting_rights[user, entity] * output[2]
for user, model in user_models:
multiscore = model(entity)

for criterion, score in multiscore:

if total_voting_rights == 0:
continue
if score.isnan():
continue

score = total_scores / total_voting_rights
left = total_lefts / total_voting_rights
right = total_rights / total_voting_rights
global_model[entity] = score, left, right
for d in (total_voting_rights, total_scores, total_lefts, total_rights):
if criterion not in d:
d[criterion] = 0

return user_models, global_model
total_voting_rights[criterion] += voting_rights[user, entity, criterion]
total_scores[criterion] = voting_rights[user, entity, criterion] * output[0]
total_lefts[criterion] = voting_rights[user, entity, criterion] * output[1]
total_rights[criterion] = voting_rights[user, entity, criterion] * output[2]

for criterion in total_voting_rights:

if total_voting_rights[criterion] == 0:
continue

global_model[entity, criterion] = (
total_scores[criterion] / total_voting_rights[criterion],
total_lefts[criterion] / total_voting_rights[criterion],
total_rights[criterion] / total_voting_rights[criterion],
)

return global_model

149 changes: 63 additions & 86 deletions solidago/src/solidago/pipeline/aggregation/entitywise_qr_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@

from .base import Aggregation

from solidago.voting_rights import VotingRights
from solidago.scoring_model import ScoringModel, DirectScoringModel

from solidago.primitives import qr_quantile, qr_uncertainty
from solidago.state import *
from solidago.primitives.lipschitz import qr_quantile, qr_uncertainty


class EntitywiseQrQuantile(Aggregation):
Expand All @@ -25,90 +23,69 @@ def __init__(self, quantile=0.2, lipschitz=0.1, error=1e-5):
self.lipschitz = lipschitz
self.error = error

def __call__(
self,
def main(self,
entities: Entities,
voting_rights: VotingRights,
user_models: dict[int, ScoringModel],
users: pd.DataFrame,
entities: pd.DataFrame
) -> tuple[dict[int, ScoringModel], ScoringModel]:
""" Returns scaled user models
Parameters
----------
voting_rights: VotingRights
voting_rights[user, entity]: float
user_models: dict[int, ScoringModel]
user_models[user] is user's scoring model
users: DataFrame with columns
* user_id (int, index)
* trust_score (float)
entities: DataFrame with columns
* entity_id (int, ind)
Returns
-------
updated_user_models[user]: ScoringModel
Returns a scaled user model
global_model: ScoringModel
Returns a global scoring model
"""
df = _get_user_scores(voting_rights, user_models, entities)
user_models: UserModels,
) -> ScoringModel:
""" Returns scaled user models """
global_scores = DirectScoringModel()

for entity_id, dfe in df.groupby("entity_id"):
score = qr_quantile(
self.lipschitz,
self.quantile,
np.array(dfe["scores"]),
np.array(dfe["voting_rights"]),
np.array(dfe["left_uncertainties"]),
np.array(dfe["right_uncertainties"]),
self.error
)
uncertainty = qr_uncertainty(
self.lipschitz,
np.array(dfe["scores"]),
np.array(dfe["voting_rights"]),
np.array(dfe["left_uncertainties"]),
np.array(dfe["right_uncertainties"]),
default_dev = 1.0,
error = self.error,
median = score if self.quantile == 0.5 else None,
)
global_scores[entity_id] = score, uncertainty
voting_rights = voting_rights.reorder_keys(["entity_name", "username", "criterion"])
for entity in entities:
all_scores = self.get_scores(entity, user_models)
rights = self.get_voting_rights(entity, voting_rights, user_models)
for criterion, scores_list in all_scores.items():
if criterion not in rights:
continue
scores, left_uncs, right_uncs = [ np.array(l) for l in zip(*scores_list)) ]
score = qr_quantile(
lipschitz=self.lipschitz,
quantile=self.quantile,
values=scores,
voting_rights=np.array(rights[criterion]),
left_uncertainties=left_uncs,
right_uncertainties=right_uncs,
error=self.error
)
uncertainty = qr_uncertainty(
lipschitz=self.lipschitz,
values=np.array(dfe["scores"]),
voting_rights=np.array(dfe["voting_rights"]),
left_uncertainties=np.array(dfe["left_uncertainties"]),
right_uncertainties=np.array(dfe["right_uncertainties"]),
default_dev=1.0,
error=self.error,
median = score if self.quantile == 0.5 else None,
)
global_scores[entity, criterion] = score, uncertainty, uncertainty

return user_models, global_scores

def to_json(self):
return type(self).__name__, dict(quantile=self.quantile,
lipschitz=self.lipschitz, error=self.error)

def __str__(self):
prop_names = ["quantile", "lipschitz", "error"]
prop = ", ".join([f"{p}={getattr(self, p)}" for p in prop_names])
return f"{type(self).__name__}({prop})"


def _get_user_scores(
voting_rights: VotingRights,
user_models: dict[int, ScoringModel],
entities: pd.DataFrame
):
user_list, entity_list, voting_right_list = list(), list(), list()
scores, lefts, rights = list(), list(), list()
for user in user_models:
for entity in user_models[user].scored_entities(entities):
user_list.append(user)
entity_list.append(entity)
voting_right_list.append(voting_rights[user, entity])
output = user_models[user](entity, entities.loc[entity])
scores.append(output[0])
lefts.append(output[1])
rights.append(output[2])

return pd.DataFrame(dict(
user_id=user_list, entity_id=entity_list, voting_rights=voting_right_list,
scores=scores, left_uncertainties=lefts, right_uncertainties=rights
))
return global_scores

def get_scores(self,
entity: Entity,
user_models: UserModels,
) -> dict[str, list[MultiScore]]:
""" Collect all user's multiscores of entity """
scores = dict()
for _, model in user_models:
multiscore = model(entity)
for criterion, score in multiscore:
if criterion not in scores:
scores[criterion] = list()
scores[criterion].append(score.to_triplet())
return scores

def get_voting_rights(self,
entity: Entity,
voting_rights: VotingRights,
user_models: UserModels
) -> dict[str, list[float]]:
result = dict()
voting_rights = voting_rights.reorder_keys(["entity_name", "username", "criterion"])
for username, _ in user_models:
for criterion, value in voting_rights[entity, username]:
if criterion not in result:
result[criterion] = list()
result[criterion].append(value)
return result
19 changes: 7 additions & 12 deletions solidago/src/solidago/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,19 @@
logger = logging.getLogger(__name__)

from solidago.state import State
from .base import StateFunction
from .sequential import Sequential
from .identity import Identity
from .trust_propagation import TrustPropagation
from .voting_right import VotingRights
from .preference_learning import PreferenceLearning
from .scaling import Scaling
from .aggregation import Aggregation
from .post_process import PostProcess


class Pipeline(Sequential):
def __init__(self,
trust_propagation: TrustPropagation=TrustPropagation(),
voting_rights_assignment: VotingRightsAssignment=VotingRightsAssignment(),
preference_learning: PreferenceLearning=PreferenceLearning(),
scaling: Scaling=Scaling(),
aggregation: Aggregation=Aggregation(),
post_process: PostProcess=PostProcess(),
trust_propagation: StateFunction=Identity(),
voting_rights_assignment: StateFunction=Identity(),
preference_learning: StateFunction=Identity(),
scaling: StateFunction=Identity(),
aggregation: StateFunction=Identity(),
post_process: StateFunction=Identity(),
):
"""Instantiates the pipeline components.
Expand Down
33 changes: 7 additions & 26 deletions solidago/src/solidago/pipeline/post_process/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,14 @@

import pandas as pd

from solidago.scoring_model import ScoringModel
from solidago.state import *


class PostProcess(ABC):
@abstractmethod
def __call__(
self,
user_models: Mapping[int, ScoringModel],
class PostProcess(StateFunction):
def main(self,
user_models: UserModels,
global_model: ScoringModel,
entities: pd.DataFrame
) -> tuple[Mapping[int, ScoringModel], ScoringModel]:
) -> tuple[UserModels, ScoringModel]:
""" Post-processes user models and global models,
typically to yield human-readible scores
Parameters
----------
user_models: user_model[user] should be a ScoringModel to post-process
global_model: ScoringModel to post-process
entities: DataFrame with columns
* entity_id (int, index)
Returns
-------
user_models: post-processed user models
global_model: post-processed global model
"""
raise NotImplementedError

def to_json(self) -> tuple:
return (type(self).__name__, )
typically to yield human-readible scores """
return user_models, global_model
29 changes: 0 additions & 29 deletions solidago/src/solidago/pipeline/post_process/no_post_process.py

This file was deleted.

Loading

0 comments on commit 41bbafa

Please sign in to comment.