Skip to content

Commit 83684fe

Browse files
committed
Graph expansion using inoculation or SHACL TripleRules will now expand into a separate named graph if you are working with an RDFLib Dataset instead of a Graph.
1 parent d7241ca commit 83684fe

13 files changed

+351
-139
lines changed

CHANGELOG.md

+20-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
77
## [Unreleased]
88
- Nothing yet
99

10+
## [0.29.0] - 2024-11-01
11+
12+
### Added
13+
- When validating a Dataset instead of a bare Graph, PySHACL will now expand RDFS and OWL-RL inferences into
14+
a separate named graph, to avoid polluting the datagraph.
15+
- When using SHACL Triple Rules from SHACL-AF spec, PySHACL will now add the expressed triples into
16+
a separate named graph. This allows you to more easily get the expanded triples back out again afterward.
17+
18+
### Changed
19+
- PySHACL no longer supports older RDFLib versions
20+
- PySHACL relies on the latest OWL-RL version, that in-turn relies on the latest RDFLib version
21+
- Therefore PySHACL now requires RDFLib v7.1.1 or newer
22+
- Dropped Python 3.8 support.
23+
- Python developers discontinued Python 3.8 last month
24+
- The next version of RDFLib and OWL-RL will not support Python 3.8
25+
- Removed Python 3.8 from the RDFLib test suite
26+
- Python 3.9-specific typing changes will be incrementally introduced
27+
1028
## [0.28.1] - 2024-10-25
1129

1230
### Fixed
@@ -1182,7 +1200,8 @@ just leaves the files open. Now it is up to the command-line client to close the
11821200

11831201
- Initial version, limited functionality
11841202

1185-
[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...HEAD
1203+
[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.29.0...HEAD
1204+
[0.29.0]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...v0.29.0
11861205
[0.28.1]: https://github.com/RDFLib/pySHACL/compare/v0.28.0...v0.28.1
11871206
[0.28.0]: https://github.com/RDFLib/pySHACL/compare/v0.27.0...v0.28.0
11881207
[0.27.0]: https://github.com/RDFLib/pySHACL/compare/v0.26.0...v0.27.0

pyshacl/inference/custom_rdfs_closure.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# -*- coding: utf-8 -*-
2+
from typing import Optional, TYPE_CHECKING
3+
24
try:
35
from owlrl import OWL
46

@@ -25,6 +27,9 @@
2527
from owlrl.OWLRL import OWLRL_Semantics
2628
from owlrl.RDFSClosure import RDFS_Semantics as OrigRDFSSemantics
2729

30+
if TYPE_CHECKING:
31+
from rdflib.graph import Graph
32+
2833

2934
class CustomRDFSSemantics(OrigRDFSSemantics):
3035
def one_time_rules(self):
@@ -49,9 +54,9 @@ class CustomRDFSOWLRLSemantics(CustomRDFSSemantics, OWLRL_Semantics):
4954
(OWL.DataRange, OWL.equivalentClass, RDFS.Datatype),
5055
]
5156

52-
def __init__(self, graph, axioms, daxioms, rdfs=True):
53-
OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs)
54-
CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs)
57+
def __init__(self, graph, axioms, daxioms, rdfs: bool = True, destination: Optional['Graph'] = None):
58+
OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination)
59+
CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination)
5560
self.rdfs = True
5661

5762
# noinspection PyMethodMayBeStatic

pyshacl/rdfutil/inoculate.py

+22-96
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,20 @@
22
from typing import TYPE_CHECKING, Dict, Optional, Union
33

44
import rdflib
5-
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
6-
from rdflib.namespace import NamespaceManager
75

8-
from .clone import clone_blank_node, clone_graph, clone_node
6+
from .clone import clone_blank_node, clone_node, clone_dataset
97
from .consts import OWL, RDF, ConjunctiveLike, GraphLike, OWL_classes, OWL_properties, RDFS_classes, RDFS_properties
108

119
if TYPE_CHECKING:
1210
from rdflib import BNode
13-
from rdflib.term import IdentifiedNode
11+
from rdflib.term import URIRef
1412

1513
from .consts import RDFNode
1614

1715
OWLNamedIndividual = OWL.NamedIndividual
1816

1917

20-
def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:
18+
def inoculate(data_graph: rdflib.Graph, ontology: GraphLike) -> rdflib.Graph:
2119
"""
2220
Copies all RDFS and OWL axioms (classes, relationship definitions, and properties)
2321
from the ontology graph into the data_graph.
@@ -33,6 +31,9 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:
3331
ontology_ns = ontology.namespace_manager
3432
data_graph_ns = data_graph.namespace_manager
3533

34+
if isinstance(ontology, (rdflib.ConjunctiveGraph, rdflib.Dataset)):
35+
# always set default context true on the ontology DS
36+
ontology.default_context = True
3637
# Bind any missing ontology namespaces in the DataGraph NS manager.
3738
if ontology_ns is not data_graph_ns:
3839
data_graph_prefixes = {p: n for (p, n) in data_graph_ns.namespaces()}
@@ -108,119 +109,44 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:
108109

109110

110111
def inoculate_dataset(
111-
base_ds: ConjunctiveLike, ontology_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None
112+
base_ds: ConjunctiveLike, ontology_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None,
113+
target_graph_identifier: Optional['URIRef'] = None,
112114
):
113115
"""
114116
Make a clone of base_ds (dataset) and add RDFS and OWL triples from ontology_ds
115117
:param base_ds:
116118
:type base_ds: rdflib.Dataset
117119
:param ontology_ds:
118-
:type ontology_ds: rdflib.Dataset
120+
:type ontology_ds: rdflib.Dataset|rdflib.ConjunctiveGraph|rdflib.Graph
119121
:param target_ds:
120122
:type target_ds: rdflib.Dataset|str|NoneType
123+
:param target_graph_identifier:
124+
:type target_graph_identifier: rdflib.URIRef | None
121125
:return: The cloned Dataset with ontology triples from ontology_ds
122126
:rtype: rdflib.Dataset
123127
"""
124128

125-
# TODO: Decide whether we need to clone base_ds before calling this,
126-
# or we clone base_ds as part of this function
127-
default_union: bool = base_ds.default_union
128-
base_named_graphs = [
129-
(
130-
rdflib.Graph(base_ds.store, i, namespace_manager=base_ds.namespace_manager) # type: ignore[arg-type]
131-
if not isinstance(i, rdflib.Graph)
132-
else i
133-
)
134-
for i in base_ds.store.contexts(None)
135-
]
136-
if isinstance(base_ds, rdflib.Dataset) and len(base_named_graphs) < 1:
137-
base_named_graphs = [
138-
rdflib.Graph(base_ds.store, DATASET_DEFAULT_GRAPH_ID, namespace_manager=base_ds.namespace_manager)
139-
]
140-
base_default_context_id = base_ds.default_context.identifier
141129
if target_ds is None:
142-
target_ds = rdflib.Dataset(default_union=default_union)
143-
target_ds.namespace_manager = NamespaceManager(target_ds, 'core')
144-
target_ds.default_context.namespace_manager = target_ds.namespace_manager
130+
target_ds = clone_dataset(base_ds)
131+
elif target_ds is base_ds:
132+
pass
145133
elif target_ds == "inplace" or target_ds == "base":
146134
target_ds = base_ds
147135
elif isinstance(target_ds, str):
148136
raise RuntimeError("target_ds cannot be a string (unless it is 'inplace' or 'base')")
137+
149138
if isinstance(target_ds, (rdflib.ConjunctiveGraph, rdflib.Dataset)):
150139
if not isinstance(target_ds, rdflib.Dataset):
151140
raise RuntimeError("Cannot inoculate ConjunctiveGraph, use Dataset instead.")
152141
else:
153142
raise RuntimeError("Cannot inoculate datasets if target_ds passed in is not a Dataset itself.")
154-
ont_default_context_id: Union[IdentifiedNode, str, None]
155-
if isinstance(ontology_ds, (rdflib.Dataset, rdflib.ConjunctiveGraph)):
156-
ont_graphs = [
157-
(
158-
rdflib.Graph(ontology_ds.store, i, namespace_manager=ontology_ds.namespace_manager) # type: ignore[arg-type]
159-
if not isinstance(i, rdflib.Graph)
160-
else i
161-
)
162-
for i in ontology_ds.store.contexts(None)
163-
]
164-
ont_default_context_id = ontology_ds.default_context.identifier
165-
else:
166-
ont_graphs = [ontology_ds]
167-
ont_default_context_id = None
168-
if target_ds is base_ds or target_ds == "inplace" or target_ds == "base":
169-
target_ds = base_ds
170-
for bg in base_named_graphs:
171-
if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1:
172-
# skip empty default named graph in base_graph
173-
continue
174-
for og in ont_graphs:
175-
if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1:
176-
# skip empty default named graph in ontology_graph
177-
continue
178-
inoculate(bg, og)
143+
144+
if target_graph_identifier:
145+
dest_graph = target_ds.get_context(target_graph_identifier)
179146
else:
180-
inoculated_graphs = {}
181-
for bg in base_named_graphs:
182-
if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1:
183-
# skip empty default named graph in base_graph
184-
continue
185-
target_g = rdflib.Graph(store=target_ds.store, identifier=bg.identifier)
186-
clone_g = clone_graph(bg, target_graph=target_g)
187-
for og in ont_graphs:
188-
if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1:
189-
# skip empty default named graph in ontology_graph
190-
continue
191-
inoculate(clone_g, og)
192-
inoculated_graphs[bg.identifier] = clone_g
193-
194-
base_graph_identifiers = [bg.identifier for bg in base_named_graphs]
195-
base_default_context_id = base_ds.default_context.identifier
196-
target_default_context_id = target_ds.default_context.identifier
197-
if base_default_context_id != target_default_context_id:
198-
old_target_default_context = target_ds.default_context
199-
old_target_default_context_id = old_target_default_context.identifier
200-
if isinstance(target_ds, rdflib.Dataset):
201-
new_target_default_context = target_ds.graph(base_default_context_id)
202-
else:
203-
new_target_default_context = target_ds.get_context(base_default_context_id)
204-
target_ds.store.add_graph(new_target_default_context)
205-
target_ds.default_context = new_target_default_context
206-
if old_target_default_context_id not in base_graph_identifiers:
207-
if isinstance(target_ds, rdflib.Dataset):
208-
target_ds.remove_graph(old_target_default_context)
209-
else:
210-
target_ds.store.remove_graph(old_target_default_context)
211-
target_default_context_id = new_target_default_context.identifier
212-
else:
213-
if isinstance(target_ds, rdflib.Dataset):
214-
_ = target_ds.graph(target_default_context_id)
215-
else:
216-
t_default = target_ds.get_context(target_default_context_id)
217-
target_ds.store.add_graph(t_default)
218-
for i, ig in inoculated_graphs.items():
219-
if ig == target_ds.default_context or i == target_default_context_id:
220-
continue
221-
if isinstance(target_ds, rdflib.Dataset):
222-
_ = target_ds.graph(ig) # alias to Dataset.add_graph()
223-
else:
224-
target_ds.store.add_graph(ig)
147+
dest_graph = target_ds.default_context
148+
149+
# inoculate() routine will set default_union on the ontology_ds if it is a Dataset
150+
inoculate(dest_graph, ontology_ds)
225151

226152
return target_ds

pyshacl/rule_expand_runner.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def mix_in_ontology(self):
9292
else:
9393
to_graph = clone_graph(self.data_graph, identifier=self.data_graph.identifier)
9494
return inoculate(to_graph, self.ont_graph)
95-
return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None)
95+
return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None, URIRef("urn:pyshacl:inoculation"))
9696

9797
def make_executor(self) -> SHACLExecutor:
9898
return SHACLExecutor(
@@ -134,7 +134,7 @@ def run(self) -> GraphLike:
134134
datagraph = clone_graph(datagraph)
135135
has_cloned = True
136136
self.logger.debug(f"Running pre-inferencing with option='{inference_option}'.")
137-
self._run_pre_inference(datagraph, inference_option, logger=self.logger)
137+
self._run_pre_inference(datagraph, inference_option, URIRef("urn:pyshacl:inference"), logger=self.logger)
138138
self.pre_inferenced = True
139139
if not has_cloned and not self.inplace:
140140
# We still need to clone in advanced mode, because of triple rules

pyshacl/rules/__init__.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def gather_rules(
8585
ret_rules[shape].append(rule)
8686
return ret_rules
8787

88+
RULES_ITERATE_LIMIT = 100
8889

8990
def apply_rules(
9091
executor: SHACLExecutor,
@@ -98,11 +99,13 @@ def apply_rules(
9899
for shape, rules in sorted_shapes_rules:
99100
# sort the rules by the sh:order before execution
100101
rules = sorted(rules, key=lambda x: x.order)
101-
iterate_limit = 100
102+
_iterate_limit = int(RULES_ITERATE_LIMIT)
102103
while True:
103-
if iterate_limit < 1:
104-
raise ReportableRuntimeError("SHACL Shape Rule iteration exceeded iteration limit of 100.")
105-
iterate_limit -= 1
104+
if _iterate_limit < 1:
105+
raise ReportableRuntimeError(
106+
f"SHACL Shape Rule iteration exceeded iteration limit of {RULES_ITERATE_LIMIT}."
107+
)
108+
_iterate_limit -= 1
106109
this_modified = 0
107110
for r in rules:
108111
if r.deactivated:

pyshacl/rules/shacl_rule.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
# -*- coding: utf-8 -*-
22
from decimal import Decimal
3-
from typing import Sequence, Union
3+
from typing import Sequence, TYPE_CHECKING, Optional
44

55
from rdflib import RDF, Literal
66

77
from pyshacl.consts import SH_condition, SH_deactivated, SH_order
88
from pyshacl.errors import RuleLoadError
99
from pyshacl.pytypes import RDFNode, SHACLExecutor
1010

11+
if TYPE_CHECKING:
12+
from pyshacl.pytypes import GraphLike
13+
from rdflib.term import URIRef
14+
1115
RDF_first = RDF.first
1216

1317

@@ -41,7 +45,7 @@ def __init__(self, executor: SHACLExecutor, shape, rule_node, iterate=False):
4145
self.executor = executor
4246
self.shape = shape
4347
self.node = rule_node
44-
self.iterate = False
48+
self.iterate = iterate
4549

4650
deactivated_nodes = list(self.shape.sg.objects(self.node, SH_deactivated))
4751
self._deactivated = len(deactivated_nodes) > 0 and bool(deactivated_nodes[0])
@@ -111,7 +115,8 @@ def filter_conditions(self, focus_nodes: Sequence[RDFNode], data_graph):
111115

112116
def apply(
113117
self,
114-
data_graph,
115-
focus_nodes: Union[Sequence[RDFNode], None] = None,
118+
data_graph: 'GraphLike',
119+
focus_nodes: Optional[Sequence[RDFNode]] = None,
120+
target_graph_identifier: Optional['URIRef'] = None,
116121
):
117122
raise NotImplementedError()

pyshacl/rules/sparql/__init__.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
from typing import TYPE_CHECKING, List, Sequence, Union
2+
from typing import TYPE_CHECKING, List, Sequence, Union, Optional
33

44
import rdflib
55
from rdflib import Literal
@@ -13,11 +13,13 @@
1313
from ..shacl_rule import SHACLRule
1414

1515
if TYPE_CHECKING:
16+
from rdflib.term import URIRef
1617
from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor
1718
from pyshacl.shape import Shape
1819

1920
XSD_string = XSD.string
2021

22+
SPARQL_RULE_ITERATE_LIMIT = 100
2123

2224
class SPARQLRule(SHACLRule):
2325
__slots__ = ("_constructs", "_qh")
@@ -52,7 +54,8 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib
5254
def apply(
5355
self,
5456
data_graph: 'GraphLike',
55-
focus_nodes: Union[Sequence['RDFNode'], None] = None,
57+
focus_nodes: Optional[Sequence['RDFNode']] = None,
58+
target_graph_identifier: Optional['URIRef'] = None,
5659
) -> int:
5760
focus_list: Sequence['RDFNode']
5861
if focus_nodes is not None:
@@ -70,10 +73,12 @@ def apply(
7073
focus_list = filtered_focus_nodes
7174
all_added = 0
7275
SPARQLQueryHelper = get_query_helper_cls()
73-
iterate_limit = 100
76+
iterate_limit = int(SPARQL_RULE_ITERATE_LIMIT)
7477
while True:
7578
if iterate_limit < 1:
76-
raise ReportableRuntimeError("Local SPARQLRule iteration exceeded iteration limit of 100.")
79+
raise ReportableRuntimeError(
80+
f"Local SPARQLRule iteration exceeded iteration limit of {SPARQL_RULE_ITERATE_LIMIT}."
81+
)
7782
iterate_limit -= 1
7883
added = 0
7984
applicable_nodes = self.filter_conditions(focus_list, data_graph)
@@ -101,8 +106,15 @@ def apply(
101106
added += 1
102107
construct_graphs.add(result_graph)
103108
if added > 0:
109+
if isinstance(data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)):
110+
if target_graph_identifier is not None:
111+
target_graph = data_graph.get_context(target_graph_identifier)
112+
else:
113+
target_graph = data_graph.default_context
114+
else:
115+
target_graph = data_graph
104116
for g in construct_graphs:
105-
data_graph = clone_graph(g, target_graph=data_graph)
117+
data_graph = clone_graph(g, target_graph=target_graph)
106118
all_added += added
107119
if self.iterate:
108120
continue # Jump up to iterate

0 commit comments

Comments
 (0)