tournesol-app · GresilleSiffle · Oct 16, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023
diff --git a/backend/tournesol/entities/video.py b/backend/tournesol/entities/video.py
@@ -38,6 +38,10 @@ def get_allowed_meta_order_fields(cls) -> List[str]:
         """
         return ["duration", "publication_date"]
 
+    @classmethod
+    def get_filter_date_field(cls):
+        return "metadata__publication_date"
+
     @classmethod
     def filter_date_lte(cls, qs, max_date):
         return qs.filter(metadata__publication_date__lte=max_date.isoformat())

diff --git a/backend/tournesol/lib/suggestions/__init__.py b/backend/tournesol/lib/suggestions/__init__.py
diff --git a/backend/tournesol/lib/suggestions/strategies/__init__.py b/backend/tournesol/lib/suggestions/strategies/__init__.py
@@ -0,0 +1 @@
+from .tocompare.classic import ClassicEntitySuggestionStrategy
diff --git a/backend/tournesol/lib/suggestions/strategies/base.py b/backend/tournesol/lib/suggestions/strategies/base.py
@@ -0,0 +1,29 @@
+from abc import ABC, abstractmethod
+
+from core.models import User
+from tournesol.models import Poll
+
+
+class ContributionSuggestionStrategy(ABC):
+    """
+    Abstract Base Class for all contribution suggestion strategies.
+
+    A contribution can be a list of entities to compare, or comparisons to
+    make, etc.
+    """
+
+    def __init__(self, poll: Poll, user: User):
+        self.poll = poll
+        self.user = user
+
+    @abstractmethod
+    def get_results(self):
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_serializer_class(self):
+        """
+        Return a DRF serializer class that should be used to serialize the
+        results returned by `get_results()`.
+        """
+        raise NotImplementedError
diff --git a/backend/tournesol/lib/suggestions/strategies/tocompare/__init__.py b/backend/tournesol/lib/suggestions/strategies/tocompare/__init__.py
diff --git a/backend/tournesol/lib/suggestions/strategies/tocompare/classic.py b/backend/tournesol/lib/suggestions/strategies/tocompare/classic.py
@@ -0,0 +1,254 @@
+import random
+from dataclasses import dataclass
+
+from django.conf import settings
+
+from core.utils.time import time_ago
+from tournesol.lib.suggestions.strategies.base import ContributionSuggestionStrategy
+from tournesol.models import ContributorRating, Entity, EntityPollRating, RateLater
+from tournesol.models.rate_later import RATE_LATER_AUTO_REMOVE_DEFAULT
+from tournesol.serializers.suggestion import EntityToCompare
+
+
+@dataclass
+class IdPool:
+    ids: list[int]
+    sample_size: int
+
+
+class ClassicEntitySuggestionStrategy(ContributionSuggestionStrategy):
+    """
+    A contribution strategy that suggests random entities for comparison.
+
+    The entity are retrieved from the following pools:
+        - entities already compared by the users (but not enough)
+        - entities in the user's rate-later list
+        - the recently recommended entities
+        - completed by the all-time recommendations if needed
+
+    Expected future updates:
+        - use the user's preferred language(s) when retrieving the
+          recommendations
+    """
+
+    # The maximum number of results returned by the strategy.
+    max_suggestions = 20
+
+    # The expected number of entities retrieved from each pool. The sum should
+    # match the `max_suggestions`.
+    sample_size_compared = 9
+    sample_size_rate_later = 7
+    sample_size_reco_last_month = 4
+
+    top_recommendations_limit = 400
+    recent_recommendations_days = 30
+
+    def _get_recommendations(self, entity_filters, exclude_ids: list[int]) -> list[int]:
+        """
+        Return the list of entity ids of all recommendations based on the
+        provided filters.
+        """
+        poll = self.poll
+
+        return (
+            EntityPollRating.objects.filter(
+                poll=poll,
+                sum_trust_scores__gte=settings.RECOMMENDATIONS_MIN_TRUST_SCORES,
+                tournesol_score__gt=settings.RECOMMENDATIONS_MIN_TOURNESOL_SCORE,
+            )
+            .select_related("entity")
+            .filter(**entity_filters)
+            .exclude(entity_id__in=exclude_ids)
+            .values_list("entity_id", flat=True)
+        )
+
+    def _get_compared_sufficiently(self, entity_filters) -> list[int]:
+        """
+        Return the list of entity ids that have been sufficiently compared by
+        the user.
+        """
+        poll = self.poll
+        user = self.user
+
+        max_threshold = user.settings.get(poll.name, {}).get(
+            "rate_later__auto_remove", RATE_LATER_AUTO_REMOVE_DEFAULT
+        )
+
+        return (
+            ContributorRating.objects.filter(poll=poll, user=user)
+            .select_related("entity")
+            .filter(**entity_filters)
+            .annotate_n_comparisons()
+            .filter(n_comparisons__gte=max_threshold)
+            .values_list("entity_id", flat=True)
+        )
+
+    def _ids_from_pool_compared(self) -> list[int]:
+        """
+        Return a random list of entity ids that have been compared at least
+        one time by the user, but strictly less than the user's setting
+        `rate_later__auto_remove`.
+        """
+        poll = self.poll
+        user = self.user
+
+        max_threshold = user.settings.get(poll.name, {}).get(
+            "rate_later__auto_remove", RATE_LATER_AUTO_REMOVE_DEFAULT
+        )
+
+        compared = (
+            ContributorRating.objects.filter(poll=poll, user=user)
+            .select_related("entity")
+            .annotate_n_comparisons()
+            .filter(n_comparisons__lt=max_threshold)
+            .filter(n_comparisons__gt=0)
+            .values_list("entity_id", flat=True)
+        )
+
+        return random.sample(list(compared), min(len(compared), self.max_suggestions))
+
+    def _ids_from_pool_rate_later(self, exclude_ids: list[int]) -> list[int]:
+        """
+        Return a random list entity ids from the user's rate-later list.
+        """
+        poll = self.poll
+        user = self.user
+
+        results = (
+            RateLater.objects.filter(poll=poll, user=user)
+            .exclude(entity_id__in=exclude_ids)
+            .values_list("entity_id", flat=True)
+        )
+
+        return random.sample(list(results), min(len(results), self.max_suggestions))
+
+    def _ids_from_pool_reco_last_month(self, exclude_ids: list[int]) -> list[int]:
+        """
+        Return random entity ids from the recent recommendations.
+
+        Only ids of entities that have been compared fewer times than the
+        user's setting `rate_later__auto_remove` are returned.
+        """
+        poll = self.poll
+
+        entity_filters = {
+            f"entity__{poll.entity_cls.get_filter_date_field()}__gte": time_ago(
+                days=self.recent_recommendations_days
+            ).isoformat(),
+        }
+
+        recommendations = self._get_recommendations(entity_filters, exclude_ids)
+        already_compared = self._get_compared_sufficiently(entity_filters)
+        results = [reco for reco in recommendations if reco not in already_compared]
+
+        return random.sample(results, min(len(results), self.max_suggestions))
+
+    def _ids_from_pool_reco_all_time(self, exclude_ids: list[int]) -> list[int]:
+        """
+        Return random entity ids from the all-time top recommendations.
+
+        Only ids of entities that have been compared fewer times than the
+        user's setting `rate_later__auto_remove` are returned.
+        """
+        poll = self.poll
+
+        entity_filters = {
+            f"entity__{poll.entity_cls.get_filter_date_field()}__lt": time_ago(
+                days=self.recent_recommendations_days
+            ).isoformat(),
+        }
+
+        recommendations = self._get_recommendations(entity_filters, exclude_ids)[
+            : self.top_recommendations_limit
+        ]
+        already_compared = self._get_compared_sufficiently(entity_filters)
+        results = [reco for reco in recommendations if reco not in already_compared]
+
+        return random.sample(results, min(len(results), self.max_suggestions))
+
+    def _consolidate_results(self, pool1: IdPool, pool2: IdPool, pool3: IdPool):
+        """
+        Return a consolidated list of elements from all provided pools.
+
+        A list is considered consolidated when its population size is equals,
+        or is as close as possible, to the sum of all pool's sample sizes.
+        """
+        extra_sample1 = 0
+        extra_sample2 = 0
+
+        free_slots_in_pool1 = pool1.sample_size - len(pool1.ids[: pool1.sample_size])
+        free_slots_in_pool2 = pool2.sample_size - len(pool2.ids[: pool2.sample_size])
+
+        # If the pool 1 contains fewer ids than expected, try to pick more
+        # ids from the pools 2.
+        if free_slots_in_pool1 > 0:
+            extra_sample2 = free_slots_in_pool1
+
+        # If the pool 2 contains fewer ids than expected, try to pick more
+        # ids from the pool 1.
+        if free_slots_in_pool2 > 0:
+            extra_sample1 = free_slots_in_pool2
+
+        sample1 = pool1.ids[: pool1.sample_size + extra_sample1]
+        sample2 = pool2.ids[: pool2.sample_size + extra_sample2]
+        sample3 = pool3.ids[: pool3.sample_size]
+
+        free_slots = self.max_suggestions - len(sample1) - len(sample2) - len(sample3)
+
+        if free_slots > 0:
+            extra_sample3 = free_slots // 2
+
+            if free_slots % 2 == 1:
+                extra_sample3 += 1
+
+            sample3 = pool3.ids[: pool3.sample_size + extra_sample3]
+
+        return sample1 + sample2 + sample3
+
+    def get_serializer_class(self):
+        return EntityToCompare
+
+    def get_results(self):
+        return self.get_results_for_user_intermediate()
+
+    def get_results_for_user_new(self):
+        raise NotImplementedError
+
+    def get_results_for_user_intermediate(self):
+        poll = self.poll
+
+        pool1 = self._ids_from_pool_compared()
+        pool2 = self._ids_from_pool_rate_later(pool1)
+        pool3 = self._ids_from_pool_reco_last_month(pool1 + pool2)
+
+        sample1_size = len(pool1[: self.sample_size_compared])
+        sample2_size = len(pool2[: self.sample_size_rate_later])
+        sample3_size = len(pool3[: self.sample_size_reco_last_month])
+
+        if sample1_size + sample2_size + sample3_size >= self.max_suggestions:
+            return Entity.objects.filter(
+                id__in=pool1[: self.sample_size_compared]
+                + pool2[: self.sample_size_rate_later]
+                + pool3[: self.sample_size_reco_last_month]
+            ).with_prefetched_poll_ratings(poll_name=poll.name)
+
+        # Allow the empty slots from the pool "compared" to be filled by the
+        # items of the pool "rate-later" and vice-versa.
+        results = self._consolidate_results(
+            IdPool(pool1, self.sample_size_compared),
+            IdPool(pool2, self.sample_size_rate_later),
+            IdPool(pool3, self.sample_size_reco_last_month),
+        )
+
+        free_slots = self.max_suggestions - len(results)
+
+        if free_slots > 0:
+            last_resort = self._ids_from_pool_reco_all_time(results)
+            results += last_resort[:free_slots]
+
+        return Entity.objects.filter(id__in=results).with_prefetched_poll_ratings(
+            poll_name=poll.name
+        )
+
+    def get_results_for_user_advanced(self):
+        raise NotImplementedError
diff --git a/backend/tournesol/models/entity.py b/backend/tournesol/models/entity.py
@@ -60,6 +60,7 @@ def with_prefetched_contributor_ratings(self, poll, user, prefetch_criteria_scor
             ContributorRating.objects.filter(poll=poll, user=user)
             .annotate_n_comparisons()
         )
+
         if prefetch_criteria_scores:
             contributor_ratings = contributor_ratings.prefetch_related("criteria_scores")
 

diff --git a/backend/tournesol/serializers/suggestion.py b/backend/tournesol/serializers/suggestion.py
@@ -0,0 +1,12 @@
+from rest_framework import serializers
+
+from tournesol.serializers.entity import RelatedEntitySerializer
+from tournesol.serializers.poll import CollectiveRatingSerializer
+
+
+class EntityToCompare(serializers.Serializer):
+    entity = RelatedEntitySerializer(source="*")
+    collective_rating = CollectiveRatingSerializer(
+        source="single_poll_rating",
+        read_only=True,
+    )
diff --git a/backend/tournesol/tests/lib/__init__.py b/backend/tournesol/tests/lib/__init__.py
diff --git a/backend/tournesol/tests/lib/suggestions/__init__.py b/backend/tournesol/tests/lib/suggestions/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .tocompare.classic import ClassicEntitySuggestionStrategy