From 4a0c75ef31a8f965c11741d4ca3ad16b3638f32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=AA=20Nguy=C3=AAn=20Hoang?= Date: Wed, 1 Jan 2025 13:49:32 +0100 Subject: [PATCH] WIP Generative model --- .../generative_model/assessment/base.py | 24 ++++++------- .../generative_model/assessment/normal.py | 8 +++-- .../generative_model/comparison/base.py | 24 ++++++------- .../generative_model/comparison/thurston.py | 5 ++- .../generative_model/criterion/base.py | 8 ++--- .../generative_model/engagement/base.py | 36 +++++++++---------- .../generative_model/engagement/simple.py | 23 ++++++------ .../solidago/generative_model/entity/base.py | 4 +-- .../solidago/generative_model/user/base.py | 2 +- .../solidago/generative_model/vouch/base.py | 5 +-- .../generative_model/vouch/erdos_renyi.py | 2 +- solidago/src/solidago/pipeline/base.py | 9 +++-- 12 files changed, 77 insertions(+), 73 deletions(-) diff --git a/solidago/src/solidago/generative_model/assessment/base.py b/solidago/src/solidago/generative_model/assessment/base.py index 55cdb3ce1a..ae6e14e4fb 100644 --- a/solidago/src/solidago/generative_model/assessment/base.py +++ b/solidago/src/solidago/generative_model/assessment/base.py @@ -5,25 +5,23 @@ class AssessmentGenerator(StateFunction): - def __call__(self, state: State) -> assessments: + def main(self, users: Users, entities: Entities, made_public: MadePublic, assessments: Assessments) -> Assessments: """ Fills in the assessments """ - assessments = Assessments() - for (username, entity_name), assessment_list in state.assessments: - assessments[user, entity] = list() + filled_assessments = Assessments() + for (username, entity_name), assessment_list in assessments: + filled_assessments[user, entity] = list() for index, assessment in enumerate(assessment_list): - user = state.users.get(username) - entity = state.entities.get(entity_name) - public = state.made_public[user, entity] - a, a_min, a_max = self.sample(state, assessment, user, entity, public) - if "is_trustworthy" in user and not user["is_trustworthy"]: - a = a_max + a_min - a - assessments[user, entity].append(dict(assessment) | { + user = users.get(username) + entity = entities.get(entity_name) + public = made_public[user, entity] + a, a_min, a_max = self.sample(assessment, user, entity, public) + filled_assessments[user, entity].append(dict(assessment) | { "assessment": a, "assessment_min": a_min, "assessment_max": a_max, }) - return assessments + return filled_assessments - def sample(self, state: State, assessment: Assessment, user: User, entity: Entity, public: bool) -> tuple[float, float, float]: + def sample(self, assessment: Assessment, user: User, entity: Entity, public: bool) -> tuple[float, float, float]: """ Returns assessment min, max and value """ return np.random.random(), 0, 1 diff --git a/solidago/src/solidago/generative_model/assessment/normal.py b/solidago/src/solidago/generative_model/assessment/normal.py index c9137f7bb2..17f2e6655f 100644 --- a/solidago/src/solidago/generative_model/assessment/normal.py +++ b/solidago/src/solidago/generative_model/assessment/normal.py @@ -6,6 +6,10 @@ class NormalAssessmentGenerator(AssessmentGenerator): - def sample(self, state: State, assessment: Assessment, user: VectorUser, entity: VectorEntity, public: bool) -> tuple[float, float, float]: + def sample(self, assessment: Assessment, user: VectorUser, entity: VectorEntity, public: bool) -> tuple[float, float, float]: score = user.vector @ entity.vector / sqrt(user.vector.size) - return score + normal(), -float("inf"), float("inf") + if "is_trustworthy" in user and not user["is_trustworthy"]: + score = - score + else: + score += normal() + return score, -float("inf"), float("inf") diff --git a/solidago/src/solidago/generative_model/comparison/base.py b/solidago/src/solidago/generative_model/comparison/base.py index 5d3a8b19ca..b335adf98d 100644 --- a/solidago/src/solidago/generative_model/comparison/base.py +++ b/solidago/src/solidago/generative_model/comparison/base.py @@ -7,25 +7,23 @@ class ComparisonGenerator(StateFunction): - def __call__(self, state: State) -> None: + def main(self, users: Users, entities: Entities, made_public: MadePublic, comparisons: Comparisons) -> Comparisons: """ Fills in the comparisons """ - comparisons = Comparisons() - for (username, left_name, right_name), comparisons_list in state.comparisons: - comparisons[username, left_name, right_name] = list() + filled_comparisons = Comparisons() + for (username, left_name, right_name), comparisons_list in comparisons: + filled_comparisons[username, left_name, right_name] = list() for index, comparison in enumerate(comparisons_list): - user = state.users.get(username) - left = state.entities.get(left_name) - right = state.entities.get(right_name) - left_public = state.made_public[user, left] - right_public = state.made_public[user, right] + user = users.get(username) + left = entities.get(left_name) + right = entities.get(right_name) + left_public = made_public[user, left] + right_public = made_public[user, right] comparison_value, comparison_max = self.sample(user, left, right, left_public, right_public) - if "is_trustworthy" in user and not user["is_trustworthy"]: - comparison_value = - comparison_value - comparisons[user, left, right].append(dict(comparison) | { + filled_comparisons[user, left, right].append(dict(comparison) | { "comparison_max": comparison_max, "comparison": comparison_value }) - return comparisons + return filled_comparisons def sample(self, user: User, left: Entity, right: Entity, left_public: bool, right_public: bool) -> tuple[float, float]: """ Returns comparison max and value """ diff --git a/solidago/src/solidago/generative_model/comparison/thurston.py b/solidago/src/solidago/generative_model/comparison/thurston.py index bfacfc4472..81304fc210 100644 --- a/solidago/src/solidago/generative_model/comparison/thurston.py +++ b/solidago/src/solidago/generative_model/comparison/thurston.py @@ -24,7 +24,10 @@ def sample(self, user: User, left: Entity, right: Entity, left_public: bool, rig """ `lpublic` and `rpublic` are not used. Returns comparison max and value. """ score_diff = (user.vector @ (right.vector - left.vector)) / np.sqrt(user.vector.size) - return self.sample_comparison(score_diff), self.comparison_max + comparison = self.sample_comparison(score_diff) + if "is_trustworthy" in user and not user["is_trustworthy"]: + comparison = - comparison + return comparison, self.comparison_max @abstractmethod def sample_comparison(self, score_diff: float) -> float: diff --git a/solidago/src/solidago/generative_model/criterion/base.py b/solidago/src/solidago/generative_model/criterion/base.py index 911994dff5..8f134dc2a2 100644 --- a/solidago/src/solidago/generative_model/criterion/base.py +++ b/solidago/src/solidago/generative_model/criterion/base.py @@ -5,13 +5,11 @@ class CriterionGenerator: criteria_cls: type=Criteria def __init__(self, n_criteria: int=0): - assert isinstance(n_criteria, int) and n_criteria >= 0 + assert isinstance(n_criteria, int) and n_criteria > 0 self.n_criteria = n_criteria - def __call__(self, state: State) -> None: - if n_criteria == 0: - return None - state.criteria = self.criteria_cls([ self.sample(c) for c in range(n_criteria) ]) + def main(self) -> Criteria: + return self.criteria_cls([ self.sample(c) for c in range(n_criteria) ]) def sample(self, criterion): return self.criteria_cls.series_cls(name=criterion) diff --git a/solidago/src/solidago/generative_model/engagement/base.py b/solidago/src/solidago/generative_model/engagement/base.py index 280e31ce33..aca61f942c 100644 --- a/solidago/src/solidago/generative_model/engagement/base.py +++ b/solidago/src/solidago/generative_model/engagement/base.py @@ -5,37 +5,37 @@ class EngagementGenerator(StateFunction): - def __call__(self, state: State) -> None: - state.made_public = MadePublic() - state.assessments, state.comparisons = Assessments(), Comparisons() - for user in state.users: - eval_entities = self.sample_evaluated_entities(state, user) + def main(self, users: Users, entities: Entities) -> tuple[MadePublic, Assessments, Comparisons]: + made_public, assessments, comparisons = MadePublic(), Assessments(), Comparisons() + for user in users: + eval_entities = self.sample_evaluated_entities(user, entities) for index, entity in enumerate(eval_entities): - public = self.public(state, user, entity, eval_entities) - state.made_public[user, entity] = public - assess = self.assess(state, user, entity, eval_entities) + public = self.public(user, entity, eval_entities) + made_public[user, entity] = public + assess = self.assess(user, entity, eval_entities) if assess: - state.assessments.add(user, entity) + assessments.add(user, entity) for index2, entity2 in enumerate(eval_entities): if index2 >= index: break - compare = self.compare(state, user, entity, entity2, eval_entities) + compare = self.compare(user, entity, entity2, eval_entities) if compare: - shuffle = self.shuffle(state, user, entity, entity2, eval_entities) + shuffle = self.shuffle(user, entity, entity2, eval_entities) left, right = (entity, entity2) if shuffle else (entity2, entity) - state.comparisons.add(user, left, right) + comparisons.add(user, left, right) + return made_public, assessments, comparisons - def sample_evaluated_entities(self, state: State, user: User) -> Entities: - return type(state.entities)([ e for e in state.entities if random() < 0.5 ]) + def sample_evaluated_entities(self, user: User, entities: Entities) -> Entities: + return type(entities)([ e for e in entities if random() < 0.5 ]) - def public(self, state: State, user: User, entity: Entity, eval_entities: Entities) -> bool: + def public(self, user: User, entity: Entity, eval_entities: Entities) -> bool: return random() < 0.5 - def assess(self, state: State, user: User, entity: Entity, eval_entities: Entities) -> bool: + def assess(self, user: User, entity: Entity, eval_entities: Entities) -> bool: return random() < 0.5 - def compare(self, state: State, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: + def compare(self, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: return random() < 0.5 - def shuffle(self, state: State, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: + def shuffle(self, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: return random() < 0.5 diff --git a/solidago/src/solidago/generative_model/engagement/simple.py b/solidago/src/solidago/generative_model/engagement/simple.py index 50be7e7df0..0018444e8c 100644 --- a/solidago/src/solidago/generative_model/engagement/simple.py +++ b/solidago/src/solidago/generative_model/engagement/simple.py @@ -32,35 +32,36 @@ def __init__( self.p_comparison = p_comparison self._entity_index2id = None - def __call__(self, state: State) -> None: - super().__call__(state) + def main(self, users: Users, entities: Entities) -> tuple[MadePublic, Assessments, Comparisons]: + made_public, assessments, comparisons = super().main(users, entities) self._entity_index2id = None + return made_public, assessments, comparisons - def sample_evaluated_entities(self, state: State, user: User) -> Entities: + def sample_evaluated_entities(self, user: User, entities: Entities) -> Entities: if user["n_comparisons"] <= 0: - return type(state.entities)() + return type(entities)() n_eval_entities = int(2 * user["n_comparisons"] / user["n_comparisons_per_entity"] ) - n_eval_entities = min(len(state.entities), n_eval_entities) + n_eval_entities = min(len(entities), n_eval_entities) p_compare_ab = 2 * user["n_comparisons"] / n_eval_entities**2 # To implement engagement bias, we construct a noisy score-based sort of the entities - scores = state.entities.vectors @ user.vector + scores = entities.vectors @ user.vector noisy_scores = - user["engagement_bias"] * scores + normal(0, 1, len(scores)) argsort = np.argsort(noisy_scores) if self._entity_index2id is None: - self._entity_index2id = { index: str(entity) for index, entity in enumerate(state.entities) } + self._entity_index2id = { index: str(entity) for index, entity in enumerate(entities) } return [ self._entity_index2id[argsort[i]] for i in range(n_eval_entities) ] - def public(self, state: State, user: User, entity: Entity, eval_entities: Entities) -> bool: + def public(self, user: User, entity: Entity, eval_entities: Entities) -> bool: return random() < self.p_public - def assess(self, state: State, user: User, entity: Entity, eval_entities: Entities) -> bool: + def assess(self, user: User, entity: Entity, eval_entities: Entities) -> bool: return random() < self.p_assessment - def compare(self, state: State, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: + def compare(self, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: p_compare_ab = 2 * user["n_comparisons"] / len(eval_entities)**2 return random() < p_compare_ab and random() < self.p_comparison - def shuffle(self, state: State, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: + def shuffle(self, user: User, entity1: Entity, entity2: Entity, eval_entities: Entities) -> bool: return random() < 0.5 diff --git a/solidago/src/solidago/generative_model/entity/base.py b/solidago/src/solidago/generative_model/entity/base.py index d984bf7da4..078af31802 100644 --- a/solidago/src/solidago/generative_model/entity/base.py +++ b/solidago/src/solidago/generative_model/entity/base.py @@ -11,8 +11,8 @@ def __init__(self, n_entities: int=30): assert isinstance(n_entities, int) and n_entities > 0 self.n_entities = n_entities - def __call__(self, state: State) -> None: - state.entities = self.entities_cls([ self.sample(e) for e in range(self.n_entities) ]) + def main(self) -> Entities: + return self.entities_cls([ self.sample(e) for e in range(self.n_entities) ]) def sample(self, entity_name: int) -> Entities: return self.entities_cls.series_cls(name=entity_name) diff --git a/solidago/src/solidago/generative_model/user/base.py b/solidago/src/solidago/generative_model/user/base.py index ae8f396f36..3345986530 100644 --- a/solidago/src/solidago/generative_model/user/base.py +++ b/solidago/src/solidago/generative_model/user/base.py @@ -14,7 +14,7 @@ def __init__(self, n_users: int=30): assert isinstance(n_users, int) and n_users > 0 self.n_users = n_users - def main(self, users: Users, vouches: Vouches) -> Users: + def main(self) -> Users: return self.users_cls([ self.sample(username) for username in range(self.n_users) ]) def sample(self, username: Union[int, str]) -> User: diff --git a/solidago/src/solidago/generative_model/vouch/base.py b/solidago/src/solidago/generative_model/vouch/base.py index ea304c835b..0bde7e4f98 100644 --- a/solidago/src/solidago/generative_model/vouch/base.py +++ b/solidago/src/solidago/generative_model/vouch/base.py @@ -3,8 +3,5 @@ class VouchGenerator(StateFunction): - def __call__(self, state: State) -> None: - state.vouches = self.sample_vouches(state.users) - - def sample_vouches(self, users: Users) -> Vouches: + def main(self, users: Users) -> Vouches: return Vouches() diff --git a/solidago/src/solidago/generative_model/vouch/erdos_renyi.py b/solidago/src/solidago/generative_model/vouch/erdos_renyi.py index 3fd20efdf4..3fa06a7eaa 100644 --- a/solidago/src/solidago/generative_model/vouch/erdos_renyi.py +++ b/solidago/src/solidago/generative_model/vouch/erdos_renyi.py @@ -6,7 +6,7 @@ class ErdosRenyiVouchGenerator(VouchGenerator): - def sample_vouches(self, users: Users) -> Vouches: + def main(self, users: Users) -> Vouches: """ Each vouch is sampled independently, with a probability dependent on users' metadata. Each user must have the two keys `is_trustworthy: bool` and `n_expected_vouches: float`. Trustworthy vouchers only vouch for trustworthy vouchees, diff --git a/solidago/src/solidago/pipeline/base.py b/solidago/src/solidago/pipeline/base.py index 7eb9cafafc..c2fb6dec67 100644 --- a/solidago/src/solidago/pipeline/base.py +++ b/solidago/src/solidago/pipeline/base.py @@ -22,8 +22,13 @@ def __call__(self, state: State, save_directory: Optional[str]=None) -> Any: key: getattr(state, key) for key in self.main.__annotations__ if key != "return" }) - assert isinstance(value, self.main.__annotations__["result"]), "" \ - "Please carefully specify main result type and verify type consistency" + assert "return" in self.main.__annotations__, "" \ + f"Please carefully specify main result type of `{type(self).__name__}`, " \ + f"whose annotation is currently `{self.main.__annotations__}`" + assert isinstance(value, self.main.__annotations__["return"]), "" \ + "Please carefully specify main result type and verify type consistency " \ + f"of `{type(self).__name__}`, " \ + f"whose annotation is currently `{self.main.__annotations__}`" return value @abstractmethod