Skip to content

Commit

Permalink
Add documentation for tag_db
Browse files Browse the repository at this point in the history
  • Loading branch information
drew2a committed Oct 11, 2022
1 parent 4b3e30b commit e7f64ec
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 18 deletions.
8 changes: 4 additions & 4 deletions src/tribler/core/components/tag/community/tag_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
class StatementOperation:
"""Do not change the format of the StatementOperation, because this will result in an invalid signature.
"""
subject_type: int
subject_type: int # ResourceType enum
subject: str
predicate: int
predicate: int # ResourceType enum
object: str
operation: int
clock: int # this is the lamport-like clock that unique for each quadruple {public_key, subject, predicate, object}
operation: int # Operation enum
clock: int # This is the lamport-like clock that unique for each quadruple {public_key, subject, predicate, object}
creator_public_key: type_from_format('74s')

def __str__(self):
Expand Down
106 changes: 92 additions & 14 deletions src/tribler/core/components/tag/db/tag_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,22 @@

PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS = b'auto_generated'

SHOW_THRESHOLD = 1
HIDE_THRESHOLD = -2
SHOW_THRESHOLD = 1 # how many operation needed for showing a knowledge graph statement in the UI
HIDE_THRESHOLD = -2 # how many operation needed for hiding a knowledge graph statement in the UI


class Operation(IntEnum):
ADD = 1
REMOVE = 2
""" Available types of statement operations."""
ADD = 1 # +1 operation
REMOVE = 2 # -1 operation


class ResourceType(IntEnum):
""" Description of available resources within the Knowledge Graph.
These types are also using as a predicate for the statements.
Based on https://en.wikipedia.org/wiki/Dublin_Core
"""
CONTRIBUTOR = 1
COVERAGE = 2
CREATOR = 3
Expand All @@ -40,6 +46,7 @@ class ResourceType(IntEnum):
TITLE = 14
TYPE = 15

# this is a section for extra types
TAG = 101
TORRENT = 102

Expand All @@ -65,7 +72,7 @@ class Statement(db.Entity):
id = orm.PrimaryKey(int, auto=True)

subject = orm.Required(lambda: Resource)
predicate = orm.Required(int, default=101, index=True) # default is the 'HAS_TAG' predicate
predicate = orm.Required(int, default=101, index=True) # default is the 'TAG' predicate
object = orm.Required(lambda: Resource)

operations = orm.Set(lambda: StatementOp)
Expand Down Expand Up @@ -102,7 +109,7 @@ def update_counter(self, operation: Operation, increment: int = 1, is_local_peer
class Resource(db.Entity):
id = orm.PrimaryKey(int, auto=True)
name = orm.Required(str)
type = orm.Required(int)
type = orm.Required(int) # ResourceType enum

subject_statements = orm.Set(lambda: Statement, reverse="subject")
object_statements = orm.Set(lambda: Statement, reverse="object")
Expand Down Expand Up @@ -165,7 +172,18 @@ def add_operation(self, operation: StatementOperation, signature: bytes, is_loca
updated_at=datetime.datetime.utcnow(), auto_generated=is_auto_generated)
return True

def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate: ResourceType, obj: str):
def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate: ResourceType, obj: str) -> bool:
""" Add an autogenerated operation.
The difference between "normal" and "autogenerated" operation is that the autogenerated operation will be added
with the flag `is_auto_generated=True` and with the `PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS` public key.
Args:
subject_type: a type of adding subject. See: ResourceType enum.
subject: a string that represents a subject of adding operation.
predicate: the enum that represents a predicate of adding operation.
obj: a string that represents an object of adding operation.
"""
operation = StatementOperation(
subject_type=subject_type,
subject=subject,
Expand All @@ -176,18 +194,29 @@ def add_auto_generated(self, subject_type: ResourceType, subject: str, predicate
creator_public_key=PUBLIC_KEY_FOR_AUTO_GENERATED_TAGS,
)

self.add_operation(operation, signature=b'', is_local_peer=False, is_auto_generated=True,
counter_increment=SHOW_THRESHOLD)
return self.add_operation(operation, signature=b'', is_local_peer=False, is_auto_generated=True,
counter_increment=SHOW_THRESHOLD)

@staticmethod
def _show_condition(statement):
"""This function determines show condition for the torrent_tag"""
"""This function determines show condition for the statement"""
return statement.local_operation == Operation.ADD.value or \
not statement.local_operation and statement.score >= SHOW_THRESHOLD

def _get_resources(self, resource: str, condition: Callable[[], bool], predicate: ResourceType, case_sensitive: bool,
is_normal_direction: bool) -> List[str]:
""" Get resources that satisfy a given condition.
def _get_resources(self, resource: str, condition: Callable[[], bool], predicate: ResourceType,
case_sensitive: bool, is_normal_direction: bool) -> List[str]:
""" Get resources that satisfies a given condition.
Args:
resource: a string that represents a resource.
condition: a condition that will be applied for querying statements.
predicate: the enum that represents a predicate of querying operations.
case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
will be selected in case insensitive manner.
is_normal_direction: normality here refers to the direction 'Subject'->'Object'. That is why if this
argument is set to 'False', then it refers to the direction 'Object'->'Subject'
Returns: a list of the strings representing the resources.
"""
if case_sensitive:
resources = list(self.instance.Resource.select(lambda r: r.name == resource))
Expand All @@ -196,6 +225,7 @@ def _get_resources(self, resource: str, condition: Callable[[], bool], predicate

if not resources:
return []

result = []
for resource_entity in resources:
query = (
Expand All @@ -210,20 +240,44 @@ def _get_resources(self, resource: str, condition: Callable[[], bool], predicate

def get_objects(self, subject: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
""" Get resources that satisfies given subject and predicate.
Args:
subject: a string that represents the subject.
predicate: the enum that represents a predicate of querying operations.
case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
will be selected in case insensitive manner.
Returns: a list of the strings representing the objects.
"""
self.logger.debug(f'Get resources for {subject} with {predicate}')

return self._get_resources(subject, self._show_condition, predicate, case_sensitive, is_normal_direction=True)

def get_subjects(self, obj: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
""" Get list of subjects that could be linked back to the objects.
Args:
obj: a string that represents the object.
predicate: the enum that represents a predicate of querying operations.
case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
will be selected in case insensitive manner.
Returns: a list of the strings representing the subjects.
"""
self.logger.debug(f'Get linked back resources for {obj} with {predicate}')

return self._get_resources(obj, self._show_condition, predicate, case_sensitive, is_normal_direction=False)

def get_suggestions(self, subject: str, predicate: ResourceType, case_sensitive: bool = True) -> List[str]:
"""Get all suggestions for a particular subject.
""" Get all suggestions for a particular subject.
Args:
subject: a string that represents the subject.
predicate: the enum that represents a predicate of querying operations.
case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
will be selected in case insensitive manner.
Returns: a list of the strings representing the objects.
"""
self.logger.debug(f"Getting suggestions for {subject} with {predicate}")

Expand All @@ -236,6 +290,18 @@ def show_suggestions_condition(statement):

def get_subjects_intersection(self, objects: Set[str], predicate: ResourceType,
case_sensitive: bool = True) -> Set[str]:
"""Queries the subjects with the given objects and the predicate. Then made an intersection among them.
In the Tribler, this method is mostly used for searching by tags.
Args:
objects: a set of strings that represents the objects.
predicate: the enum that represents a predicate of querying operations.
case_sensitive: if True, then Resources will be selected in case sensitive manner. if False, then Resources
will be selected in case insensitive manner.
Returns: a list of the strings representing the subjects.
"""
# FIXME: Ask @kozlovsky how to do it in a proper way
sets = [set(self.get_subjects(o, predicate, case_sensitive)) for o in objects]
return set.intersection(*sets)
Expand Down Expand Up @@ -274,6 +340,18 @@ def shutdown(self) -> None:

def _get_random_operations_by_condition(self, condition: Callable[[Entity], bool], count: int = 5,
attempts: int = 100) -> Set[Entity]:
""" Get `count` random operations that satisfy the given condition.
This method were introduce as an fast alternative for native Pony `random` method.
Args:
condition: the condition by which the entities will be queried.
count: the amount of entities to return.
attempts: maximum attempt count for requesting the DB.
Returns: a set of random operations
"""
operations = set()
for _ in range(attempts):
if len(operations) == count:
Expand Down

0 comments on commit e7f64ec

Please sign in to comment.