Skip to content

Commit

Permalink
Merge pull request #7 from erc-releven/lupl/starlegs-decouple-graph-c…
Browse files Browse the repository at this point in the history
…onstructor

feat: Decouple starlegs constructor from custom data structure
  • Loading branch information
lu-pl authored Sep 25, 2024
2 parents 97a0924 + bec652a commit 572ceef
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 30 deletions.
34 changes: 19 additions & 15 deletions r11data/starlegs/runner.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Runner for R11data starlegs generation."""

from collections.abc import Iterable
from itertools import chain
from typing import Iterator

from SPARQLWrapper import SPARQLWrapper
from r11data import settings
from r11data.abcs import _ABCRunner
from r11data.starlegs.utils._types import CRMTemplateMap
from r11data.starlegs.utils.sparql_templates import p140_template_map, p141_template_map
from r11data.starlegs.utils._types import StarlegsQuery
from r11data.starlegs.utils.sparql_templates import p140_queries, p141_queries
from r11data.starlegs.utils.starlegs_logging import (
starlegs_final_graph_log,
starlegs_subgraph_log,
Expand All @@ -13,24 +17,22 @@
from rdflib import Graph


def starlegs(*template_maps: CRMTemplateMap) -> Graph:
def starlegs(queries: Iterable[StarlegsQuery]) -> Graph:
"""Run starlegs construct queries and accumulate results into a Graph instance."""
_graph = Graph()

sparql = SPARQLWrapper("https://graphdb.r11.eu/repositories/RELEVEN")
sparql.setCredentials(user=settings.GRAPHDB_USER, passwd=settings.PASSWD)

for template_map in template_maps:
for cls in template_map.crm_classes:
query: str = template_map.sparql_construct_template.substitute(
target_class=cls
)

sparql.setQuery(query=query)
for query in queries:
_query: StarlegsQuery = query
_target_class: str | None = query.metadata.get("target_class", None)

result_graph = sparql.queryAndConvert()
starlegs_subgraph_log(subgraph=result_graph, target_class=cls)
sparql.setQuery(str(_query))
result_graph = sparql.queryAndConvert()

_graph += result_graph
starlegs_subgraph_log(subgraph=result_graph, target_class=_target_class)
_graph += result_graph

starlegs_final_graph_log(_graph)
return _graph
Expand All @@ -39,15 +41,17 @@ def starlegs(*template_maps: CRMTemplateMap) -> Graph:
class StarlegsRunner(_ABCRunner):
"""Runner for Starleg assertions."""

queries: Iterator[StarlegsQuery] = chain(p140_queries, p141_queries)

def persist(self) -> None:
"""Run the conversion and persist the result in r11data/output."""
graph = self.run()
output_file = output_starlegs / "starlegs.ttl"

with open(output_file, "w") as f:
with open(output_file, "w") as f: # type: ignore
f.write(graph.serialize())

def run(self) -> Graph:
"""Run the deaths table to RDF conversion."""
graph = starlegs(p140_template_map, p141_template_map)
graph = starlegs(self.queries)
return graph
16 changes: 9 additions & 7 deletions r11data/starlegs/utils/_types.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Custom types for starlegs functionality."""

from dataclasses import dataclass
from string import Template
from collections import UserString


@dataclass
class CRMTemplateMap:
"""Simple dataclass for associating a construct template with applicable CRM classes."""
class StarlegsQuery(UserString):
"""UserString which can hold additional metadata in a Namespace.
sparql_construct_template: Template
crm_classes: list[str]
Intended for Starlegs SPARQL queries.
"""

def __init__(self, data: str, **metadata):
self.data: str = data
self.metadata: dict = metadata
17 changes: 11 additions & 6 deletions r11data/starlegs/utils/sparql_templates.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""SPARQL construct templates for starleg generation."""

from collections.abc import Iterator
from string import Template

from r11data.starlegs.utils._types import CRMTemplateMap
from r11data.starlegs.utils._types import StarlegsQuery


_base_sparql_template: str = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
Expand Down Expand Up @@ -40,15 +42,18 @@
_p140_template: Template = Template(_p140_sparql_template)
_p141_template: Template = Template(_p141_sparql_template)

p140_template_map: CRMTemplateMap = CRMTemplateMap(
sparql_construct_template=_p140_template,
crm_classes=[

p140_queries: Iterator[StarlegsQuery] = map(
lambda x: StarlegsQuery(_p140_template.substitute(target_class=x), target_class=x),
[
"E13_sdhss_P13",
"E13_sdhss_P26",
"E13_sdhss_P36",
"E13_crm_P41",
],
)
p141_template_map: CRMTemplateMap = CRMTemplateMap(
sparql_construct_template=_p141_template, crm_classes=["E13_sdhss_P38"]

p141_queries: Iterator[StarlegsQuery] = map(
lambda x: StarlegsQuery(_p141_template.substitute(target_class=x), target_class=x),
["E13_sdhss_P38"],
)
4 changes: 2 additions & 2 deletions r11data/starlegs/utils/starlegs_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ def _starlegs_create_count_log(count_mapping: dict[str, int], indent: int = 4) -
return output.getvalue()


def starlegs_subgraph_log(subgraph: Graph, target_class: str):
def starlegs_subgraph_log(subgraph: Graph, target_class: str | None):
"""Logger for intermediary starlegs graph results."""
count_mapping = _starlegs_count_assertions(subgraph)

_log_message = (
f"Running starlegs constructor for {target_class} instances.\n"
f"Running starlegs constructor{'.' if target_class is None else f' for {target_class} instances.'}\n"
f"Generated {len(subgraph)} assertions{':' if subgraph else '.'}\n"
f"{_starlegs_create_count_log(count_mapping=count_mapping)}"
)
Expand Down

0 comments on commit 572ceef

Please sign in to comment.