Add documentation for tag_db

kozlovsky · Oct 11, 2022 · e7f64ec · e7f64ec
1 parent 4b3e30b
commit e7f64ec
Show file tree

Hide file tree

Showing 2 changed files with 96 additions and 18 deletions.
diff --git a/src/tribler/core/components/tag/community/tag_payload.py b/src/tribler/core/components/tag/community/tag_payload.py
@@ -9,12 +9,12 @@
 class StatementOperation:
     """Do not change the format of the StatementOperation, because this will result in an invalid signature.
     """
-    subject_type: int
+    subject_type: int  # ResourceType enum
     subject: str
-    predicate: int
+    predicate: int  # ResourceType enum
     object: str
-    operation: int
-    clock: int  # this is the lamport-like clock that unique for each quadruple {public_key, subject, predicate, object}
+    operation: int  # Operation enum
+    clock: int  # This is the lamport-like clock that unique for each quadruple {public_key, subject, predicate, object}
     creator_public_key: type_from_format('74s')
 
     def __str__(self):

diff --git a/src/tribler/core/components/tag/db/tag_db.py b/src/tribler/core/components/tag/db/tag_db.py
@@ -14,16 +14,22 @@
 
 PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS = b'auto_generated'
 
-SHOW_THRESHOLD = 1
-HIDE_THRESHOLD = -2
+SHOW_THRESHOLD = 1  # how many operation needed for showing a knowledge graph statement in the UI
+HIDE_THRESHOLD = -2  # how many operation needed for hiding a knowledge graph statement in the UI
 
 
 class Operation(IntEnum):
-    ADD = 1
-    REMOVE = 2
+    """ Available types of statement operations."""
+    ADD = 1  # +1 operation
+    REMOVE = 2  # -1 operation
 
 
 class ResourceType(IntEnum):
+    """ Description of available resources within the Knowledge Graph.
+    These types are also using as a predicate for the statements.
+
+    Based on https://en.wikipedia.org/wiki/Dublin_Core
+    """
     CONTRIBUTOR = 1
     COVERAGE = 2
     CREATOR = 3
@@ -40,6 +46,7 @@ class ResourceType(IntEnum):
     TITLE = 14
     TYPE = 15
 
+    # this is a section for extra types
     TAG = 101
     TORRENT = 102
 
@@ -65,7 +72,7 @@ class Statement(db.Entity):
             id = orm.PrimaryKey(int, auto=True)
 
             subject = orm.Required(lambda: Resource)
-            predicate = orm.Required(int, default=101, index=True)  # default is the 'HAS_TAG' predicate
+            predicate = orm.Required(int, default=101, index=True)  # default is the 'TAG' predicate
             object = orm.Required(lambda: Resource)
 
             operations = orm.Set(lambda: StatementOp)
@@ -102,7 +109,7 @@ def update_counter(self, operation: Operation, increment: int = 1, is_local_peer
         class Resource(db.Entity):
             id = orm.PrimaryKey(int, auto=True)
             name = orm.Required(str)
-            type = orm.Required(int)
+            type = orm.Required(int)  # ResourceType enum
 
             subject_statements = orm.Set(lambda: Statement, reverse="subject")
             object_statements = orm.Set(lambda: Statement, reverse="object")
@@ -165,7 +172,18 @@ def add_operation(self, operation: StatementOperation, signature: bytes, is_loca
                updated_at=datetime.datetime.utcnow(), auto_generated=is_auto_generated)
         return True
 
-    def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate: ResourceType, obj: str):
+    def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate: ResourceType, obj: str) -> bool:
+        """ Add an autogenerated operation.
+
+        The difference between "normal" and "autogenerated" operation is that the  autogenerated operation will be added
+        with the flag `is_auto_generated=True` and with the `PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS` public key.
+
+        Args:
+            subject_type: a type of adding subject. See: ResourceType enum.
+            subject: a string that represents a subject of adding operation.
+            predicate: the enum that represents a predicate of adding operation.
+            obj: a string that represents an object of adding operation.
+        """
         operation = StatementOperation(
             subject_type=subject_type,
             subject=subject,
@@ -176,18 +194,29 @@ def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate
             creator_public_key=PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS,
         )
 
-        self.add_operation(operation, signature=b'', is_local_peer=False, is_auto_generated=True,
-                           counter_increment=SHOW_THRESHOLD)
+        return self.add_operation(operation, signature=b'', is_local_peer=False, is_auto_generated=True,
+                                  counter_increment=SHOW_THRESHOLD)
 
     @staticmethod
     def _show_condition(statement):
-        """This function determines show condition for the torrent_tag"""
+        """This function determines show condition for the statement"""
         return statement.local_operation == Operation.ADD.value or \
                not statement.local_operation and statement.score >= SHOW_THRESHOLD
 
-    def _get_resources(self, resource: str, condition: Callable[[], bool], predicate: ResourceType, case_sensitive: bool,
-                       is_normal_direction: bool) -> List[str]:
-        """ Get resources that satisfy a given condition.
+    def _get_resources(self, resource: str, condition: Callable[[], bool], predicate: ResourceType,
+                       case_sensitive: bool, is_normal_direction: bool) -> List[str]:
+        """ Get resources that satisfies a given condition.
+
+        Args:
+            resource: a string that represents a resource.
+            condition: a condition that will be applied for querying statements.
+            predicate: the enum that represents a predicate of querying operations.
+            case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
+                will be selected in case insensitive manner.
+            is_normal_direction: normality here refers to the direction 'Subject'->'Object'. That is why if this
+                argument is set to 'False', then it refers to the direction 'Object'->'Subject'
+
+        Returns: a list of the strings representing the resources.
         """
         if case_sensitive:
             resources = list(self.instance.Resource.select(lambda r: r.name == resource))
@@ -196,6 +225,7 @@ def _get_resources(self, resource: str, condition: Callable[[], bool], predicate
 
         if not resources:
             return []
+
         result = []
         for resource_entity in resources:
             query = (
@@ -210,20 +240,44 @@ def _get_resources(self, resource: str, condition: Callable[[], bool], predicate
 
     def get_objects(self, subject: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
         """ Get resources that satisfies given subject and predicate.
+
+        Args:
+            subject: a string that represents the subject.
+            predicate: the enum that represents a predicate of querying operations.
+            case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
+                will be selected in case insensitive manner.
+
+        Returns: a list of the strings representing the objects.
         """
         self.logger.debug(f'Get resources for {subject} with {predicate}')
 
         return self._get_resources(subject, self._show_condition, predicate, case_sensitive, is_normal_direction=True)
 
     def get_subjects(self, obj: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
         """ Get list of subjects that could be linked back to the objects.
+
+        Args:
+            obj: a string that represents the object.
+            predicate: the enum that represents a predicate of querying operations.
+            case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
+                will be selected in case insensitive manner.
+
+        Returns: a list of the strings representing the subjects.
         """
         self.logger.debug(f'Get linked back resources for {obj} with {predicate}')
 
         return self._get_resources(obj, self._show_condition, predicate, case_sensitive, is_normal_direction=False)
 
     def get_suggestions(self, subject: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
-        """Get all suggestions for a particular subject.
+        """ Get all suggestions for a particular subject.
+
+        Args:
+            subject: a string that represents the subject.
+            predicate: the enum that represents a predicate of querying operations.
+            case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
+                will be selected in case insensitive manner.
+
+        Returns: a list of the strings representing the objects.
         """
         self.logger.debug(f"Getting suggestions for {subject} with {predicate}")
 
@@ -236,6 +290,18 @@ def show_suggestions_condition(statement):
 
     def get_subjects_intersection(self, objects: Set[str], predicate: ResourceType,
                                   case_sensitive: bool = True) -> Set[str]:
+        """Queries the subjects with the given objects and the predicate. Then made an intersection among them.
+
+        In the Tribler, this method is mostly used for searching by tags.
+
+        Args:
+            objects: a set of strings that represents the objects.
+            predicate: the enum that represents a predicate of querying operations.
+            case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
+                will be selected in case insensitive manner.
+
+        Returns: a list of the strings representing the subjects.
+        """
         # FIXME: Ask @kozlovsky how to do it in a proper way
         sets = [set(self.get_subjects(o, predicate, case_sensitive)) for o in objects]
         return set.intersection(*sets)
@@ -274,6 +340,18 @@ def shutdown(self) -> None:
 
     def _get_random_operations_by_condition(self, condition: Callable[[Entity], bool], count: int = 5,
                                             attempts: int = 100) -> Set[Entity]:
+        """ Get `count` random operations that satisfy the given condition.
+
+        This method were introduce as an fast alternative for native Pony `random` method.
+
+
+        Args:
+            condition: the condition by which the entities will be queried.
+            count: the amount of entities to return.
+            attempts: maximum attempt count for requesting the DB.
+
+        Returns: a set of random operations
+        """
         operations = set()
         for _ in range(attempts):
             if len(operations) == count: