Skip to content

Commit

Permalink
WIP GenerativeModel
Browse files Browse the repository at this point in the history
  • Loading branch information
lenhoanglnh committed Dec 31, 2024
1 parent cd7e9c3 commit 67e21b4
Show file tree
Hide file tree
Showing 43 changed files with 3,692 additions and 5,300 deletions.
4 changes: 3 additions & 1 deletion solidago/experiments/toy.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"n_seeds": 2,
"generative_model": {
"user_gen": ["NormalUserGenerator", {
"n_users": 30,
"p_trustworthy": 0.8,
"p_pretrusted": 0.2,
"zipf_vouch": 2.0,
Expand All @@ -23,11 +24,12 @@
"mean": 0.0,
"dimension": 5
}],
"vouch_gen": ["ErdosRenyiVouchGenerator", {}],
"entity_gen": ["NormalEntityGenerator", {
"n_entities": 100,
"mean": 0.0,
"dimension": 5
}],
"vouch_gen": ["ErdosRenyiVouchGenerator", {}],
"engagement_gen": ["SimpleEngagementGenerator", {
"p_public": 0.8,
"p_assessment": 0.5,
Expand Down
22 changes: 12 additions & 10 deletions solidago/experiments/toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,24 @@
import numpy as np
import pandas as pd

from pathlib import Path

from solidago import *


dfs = TournesolExport.load_dfs("tests/tiny_tournesol.zip")
t = TournesolExport("tests/tiny_tournesol.zip")

# with open("experiments/toy.json") as f: hps = json.load(f)
# gen = GenerativeModel.load(hps["generative_model"])
with open("experiments/toy.json") as f: hps = json.load(f)
generative_model = GenerativeModel.load(hps["generative_model"])

# users = gen.user_gen(30)
# vouches = gen.vouch_gen(users)
# entities = gen.entity_gen(100)
# criteria = gen.criterion_gen(2)
# made_public, assessments, comparisons = gen.engagement_gen(users, entities, criteria)
# assessments = gen.assessment_gen(users, entities, criteria, made_public, assessments)
# comparisons = gen.comparison_gen(users, entities, criteria, made_public, comparisons)
state = State
generative_model.modules[0](state)
generative_model.modules[1](state)
generative_model.modules[2](state)
generative_model.modules[3](state)
generative_model.modules[4](state)
generative_model.modules[5](state)

# s = generative_model(30, 100, 2, 0)

Expand Down
40 changes: 16 additions & 24 deletions solidago/src/solidago/generative_model/assessment/base.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,24 @@
import numpy as np

from solidago.state import *
from solidago.pipeline import StateFunction


class AssessmentGenerator:
def __call__(self,
users: Users,
entities: Entities,
criteria: Criteria,
made_public: MadePublic,
assessments: Assessments
) -> Assessments:
class AssessmentGenerator(StateFunction):
def __call__(self, state: State) -> None:
""" Fills in the assessments """
for (username, entity_name, criterion_name), _ in assessments:
user = users.get(username)
entity = entities.get(entity_name)
criterion = criteria.get(criterion_name)
public=made_public[user, entity]
a_min, a_max, a = self.sample(user, entity, criterion, public)
assessments[user, entity, criterion] |= { "assessment_min": a_min, "assessment_max": a_max, "assessment": a }
return assessments
for (username, entity_name), assessment_list in state.assessments:
for index, assessment in enumerate(assessment_list):
user = state.users.get(username)
entity = state.entities.get(entity_name)
public = state.made_public[user, entity]
a, a_min, a_max = self.sample(state, assessment, user, entity, public)
state.assessments[user, entity][index] |= {
"assessment": a,
"assessment_min": a_min,
"assessment_max": a_max,
}

def sample(self, user: User, entity: Entity, criterion: Criterion, public: bool) -> tuple[float, float, float]:
def sample(self, state: State, assessment: Assessment, user: User, entity: Entity, public: bool) -> tuple[float, float, float]:
""" Returns assessment min, max and value """
return 0, 1, np.random.random()

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )
return np.random.random(), 0, 1
12 changes: 3 additions & 9 deletions solidago/src/solidago/generative_model/assessment/normal.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
from numpy import sqrt
from numpy.random import normal

from solidago.state import VectorUser, VectorEntity, Criterion
from solidago.state import *
from .base import AssessmentGenerator


class NormalAssessmentGenerator(AssessmentGenerator):
def sample(self, user: VectorUser, entity: VectorEntity, criterion: Criterion, public: bool) -> tuple[float, float, float]:
def sample(self, state: State, assessment: Assessment, user: VectorUser, entity: VectorEntity, public: bool) -> tuple[float, float, float]:
score = user.vector @ entity.vector / sqrt(user.vector.size)
return -float("inf"), float("inf"), score + normal()

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )
return score + normal(), -float("inf"), float("inf")
63 changes: 23 additions & 40 deletions solidago/src/solidago/generative_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
import json

from solidago.state import *
from solidago.pipeline import Sequential

from .user import UserGenerator
from .vouch import VouchGenerator
from .entity import EntityGenerator
from .criterion import CriterionGenerator
from .engagement import EngagementGenerator
from .assessment import AssessmentGenerator
from .comparison import ComparisonGenerator

logger = logging.getLogger(__name__)


class GenerativeModel:
def __init__(
self,
class GenerativeModel(Sequential):
module_names = ("user_gen", "vouch_gen", "entity_gen",
"engagement_gen", "assessment_gen", "comparison_gen")

def __init__(self,
user_gen: UserGenerator = UserGenerator(),
vouch_gen: VouchGenerator = VouchGenerator(),
entity_gen: EntityGenerator = EntityGenerator(),
criterion_gen: CriterionGenerator = CriterionGenerator(),
vouch_gen: VouchGenerator = VouchGenerator(),
engagement_gen: EngagementGenerator = EngagementGenerator(),
assessment_gen: AssessmentGenerator = AssessmentGenerator(),
comparison_gen: ComparisonGenerator = ComparisonGenerator(),
Expand All @@ -35,35 +36,26 @@ def __init__(
----------
user_model: UserModel
Generates users
vouch_model: VouchModel
Generates vouches
entity_model: EntityModel
Generates entities
vouch_model: VouchModel
Generates vouches
engagement_model: EngagementModel
Generates private/public selection, and comparisons to be made
assessment_model: AssessmentModel
Generates assessment values
comparison_model: ComparisonModel
Generates comparisons values, given comparisons to be made and true scores
Generates comparisons values
"""
self.user_gen = user_gen
self.vouch_gen = vouch_gen
self.entity_gen = entity_gen
self.criterion_gen = criterion_gen
self.engagement_gen = engagement_gen
self.assessment_gen = assessment_gen
self.comparison_gen = comparison_gen
super().__init__(user_gen, entity_gen, vouch_gen, engagement_gen,
assessment_gen, comparison_gen)

def __call__(self, n_users: int, n_entities: int, n_criteria: int=1, random_seed: Optional[int]=None) -> State:
def __call__(self, random_seed: Optional[int]=None) -> State:
""" Generates a random dataset, presented as a state.
No processing of the dataset is performed by the generative model.
Parameters
----------
n_users: int
Number of users to generate
n_entities: int
Number of entities to generate
n_criteria: int
Number of criteria to generate
random_seed: None or int
If int, sets numpy seed for reproducibility
Expand All @@ -75,22 +67,12 @@ def __call__(self, n_users: int, n_entities: int, n_criteria: int=1, random_seed
assert type(random_seed) == int
np.random.seed(random_seed)

logger.info(f"Generate {n_users} users using {self.comparison_gen}")
users = self.user_gen(n_users)
logger.info(f"Generate vouches using {self.vouch_gen}")
vouches = self.vouch_gen(users)
logger.info(f"Generate {n_entities} entities using {self.entity_gen}")
entities = self.entity_gen(n_entities)
logger.info(f"Generate {n_criteria} criteria using {self.criterion_gen}")
criteria = self.criterion_gen(n_criteria)
logger.info(f"Generate user engagement using {self.engagement_gen}")
made_public, assessments, comparisons = self.engagement_gen(users, entities, criteria)
logger.info(f"Generate assessments using {self.assessment_gen}")
assessments = self.assessment_gen(users, entities, criteria, made_public, assessments)
logger.info(f"Generate comparisons using {self.comparison_gen}")
comparisons = self.comparison_gen(users, entities, criteria, made_public, comparisons)

return State(users, vouches, entities, criteria, made_public, assessments, comparisons)
state = State()
for name, module in zip(self.module_names, self.modules):
logger.info(f"Running {name} with {type(module).__name__}")
module(state)

return state

@classmethod
def load(cls, d: Union[dict, str]) -> "GenerativeModel":
Expand All @@ -101,4 +83,5 @@ def load(cls, d: Union[dict, str]) -> "GenerativeModel":
return cls(**{ key: getattr(gen, d[key][0])(**d[key][1]) for key in d })

def to_json(self):
return { key: getattr(self, key).to_json() for key in self.__dict__.keys() }
return type(self).__name__, { m: getattr(self, m).to_json() for key in self.module_names }

53 changes: 17 additions & 36 deletions solidago/src/solidago/generative_model/comparison/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,25 @@
import numpy as np

from solidago.state import *
from solidago.pipeline import StateFunction


class ComparisonGenerator:
def __call__(self,
users: Users,
entities: Entities,
criteria: Criteria,
made_public: MadePublic,
comparisons: Comparisons
) -> Comparisons:
class ComparisonGenerator(StateFunction):
def __call__(self, state: State) -> None:
""" Fills in the comparisons """
for username, left_name, right_name, criterion_name in comparisons:
user = users.get(username)
criterion = criteria.get(criterion_name)
left = entities.get(left_name)
right = entities.get(right_name)
left_public = made_public[user, left]
right_public = made_public[user, right]
comparison_max, comparison = self.sample(user, left, right, criterion, lpublic, rpublic)
comparisons[user, criterion, left, right] |= {
"comparison_max": comparison_max,
"comparison": comparison
}
return comparisons
for (username, left_name, right_name), comparisons_list in state.comparisons:
for index, comparison in enumerate(comparisons_list):
user = state.users.get(username)
left = state.entities.get(left_name)
right = state.entities.get(right_name)
left_public = state.made_public[user, left]
right_public = state.made_public[user, right]
comparison, comparison_max = self.sample(user, left, right, left_public, right_public)
state.comparisons[user, left, right][index] |= {
"comparison_max": comparison_max,
"comparison": comparison
}

def sample(self,
user: User,
left: Entity,
right: Entity,
criterion: Criterion,
left_public: bool,
right_public: bool
) -> tuple[float, float]:
def sample(self, user: User, left: Entity, right: Entity, left_public: bool, right_public: bool) -> tuple[float, float]:
""" Returns comparison max and value """
return 1, (2 * np.random.random() - 1)**2

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )
return (2 * np.random.random() - 1)**2, 1
11 changes: 2 additions & 9 deletions solidago/src/solidago/generative_model/comparison/thurston.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,11 @@ def __init__(self, comparison_max: float=float("inf")):
def score_matrix(self, users: VectorUsers, entities: VectorEntities):
return users.vectors @ entities.vectors.T / users.vectors.shape[1]

def sample(self,
user: User,
left: Entity,
right: Entity,
criterion: Criterion,
left_public: bool,
right_public: bool
) -> tuple[float, float]:
def sample(self, user: User, left: Entity, right: Entity, left_public: bool, right_public: bool) -> tuple[float, float]:
""" `lpublic` and `rpublic` are not used.
Returns comparison max and value. """
score_diff = (user.vector @ (right.vector - left.vector)) / np.sqrt(user.vector.size)
return self.comparison_max, self.sample_comparison(score_diff)
return self.sample_comparison(score_diff), self.comparison_max

@abstractmethod
def sample_comparison(self, score_diff: float) -> float:
Expand Down
25 changes: 13 additions & 12 deletions solidago/src/solidago/generative_model/criterion/base.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from solidago.state import Criterion, Criteria
from solidago.state import *


class CriterionGenerator:
def __call__(self, n_criteria: int) -> Criteria:
return Criteria([ self.sample(criterion_name) for criterion_name in range(n_criteria) ])
criteria_cls: type=Criteria

def sample(self, criterion_name):
return Criterion(name=criterion_name)

def __str__(self):
return type(self).__name__
def __init__(self, n_criteria: int=0):
assert isinstance(n_criteria, int) and n_criteria >= 0
self.n_criteria = n_criteria

def to_json(self):
return (type(self).__name__, )


def __call__(self, state: State) -> None:
if n_criteria == 0:
return None
state.criteria = self.criteria_cls([ self.sample(c) for c in range(n_criteria) ])

def sample(self, criterion):
return self.criteria_cls.series_cls(name=criterion)

Loading

0 comments on commit 67e21b4

Please sign in to comment.