Merge pull request #505 from dice-group/drill_confusion_matrix_sparql…

…_integrate DRILL shifts the quality computation to triplestore
dice-group · Dec 5, 2024 · 79c58e8 · 79c58e8
2 parents d328569 + d2f976a
commit 79c58e8
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -99,16 +99,16 @@ weighted avg       1.00      1.00      1.00         4
 
 ## Learning OWL Class Expression over DBpedia
 ```python
-from ontolearn.learners import TDL
+from ontolearn.learners import TDL, Drill
 from ontolearn.triple_store import TripleStore
 from ontolearn.learning_problem import PosNegLPStandard
 from owlapy.owl_individual import OWLNamedIndividual
 from owlapy import owl_expression_to_sparql, owl_expression_to_dl
 from ontolearn.utils.static_funcs import save_owl_class_expressions
 # (1) Initialize Triplestore
-kb = TripleStore(url="http://dice-dbpedia.cs.upb.de:9080/sparql")
+kb = TripleStore(url="https://dbpedia.data.dice-research.org/sparql")
 # (3) Initialize a learner.
-model = TDL(knowledge_base=kb)
+model = Drill(knowledge_base=kb) #  or  TDL(knowledge_base=kb)
 # (4) Define a description logic concept learning problem.
 lp = PosNegLPStandard(pos={OWLNamedIndividual("http://dbpedia.org/resource/Angela_Merkel")},
                       neg={OWLNamedIndividual("http://dbpedia.org/resource/Barack_Obama")})
@@ -117,7 +117,7 @@ h = model.fit(learning_problem=lp).best_hypotheses()
 print(h)
 print(owl_expression_to_dl(h))
 print(owl_expression_to_sparql(expression=h))
-save_owl_class_expressions(expressions=h,path="owl_prediction")
+save_owl_class_expressions(expressions=h,path="#owl_prediction")
 ```
 
 Fore more please refer to  the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.

diff --git a/ontolearn/learners/drill.py b/ontolearn/learners/drill.py
@@ -21,7 +21,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 # -----------------------------------------------------------------------------
-
 import pandas as pd
 import json
 from owlapy.class_expression import OWLClassExpression
@@ -42,11 +41,14 @@
 import time
 import os
 # F1 class will be deprecated to become compute_f1_score function.
-from ontolearn.utils.static_funcs import compute_f1_score
+from ontolearn.utils.static_funcs import compute_f1_score, compute_f1_score_from_confusion_matrix
 import random
 from ontolearn.heuristics import CeloeBasedReward
 from ontolearn.data_struct import PrepareBatchOfPrediction
 from tqdm import tqdm
+from owlapy.converter import owl_expression_to_sparql_with_confusion_matrix
+
+from ..triple_store import TripleStore
 from ..utils.static_funcs import make_iterable_verbose
 from owlapy.utils import get_expression_length
 
@@ -162,7 +164,11 @@ def __init__(self, knowledge_base,
                                                max_num_of_concepts_tested=max_num_of_concepts_tested,
                                                max_runtime=max_runtime)
         # CD: This setting the valiable will be removed later.
-        self.quality_func = compute_f1_score
+
+        if isinstance(self.kb, TripleStore):
+            self.quality_func = compute_f1_score_from_confusion_matrix
+        else:
+            self.quality_func = compute_f1_score
 
     def initialize_training_class_expression_learning_problem(self,
                                                               pos: FrozenSet[OWLNamedIndividual],
@@ -301,9 +307,9 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
         if max_runtime:
             assert isinstance(max_runtime, float) or isinstance(max_runtime, int)
             self.max_runtime = max_runtime
-
+        # (1) Reinitialize few attributes to ensure a clean start.
         self.clean()
-        # (1) Initialize the start time
+        # (2) Initialize the start time
         self.start_time = time.time()
         # (2) Two mappings from a unique OWL Concept to integer, where a unique concept represents the type info
         # C(x) s.t. x \in E^+ and  C(y) s.t. y \in E^-.
@@ -429,9 +435,20 @@ def compute_quality_of_class_expression(self, state: RL_State) -> None:
         # (3) Increment the number of tested concepts attribute.
 
         """
+        if isinstance(self.kb,TripleStore):
+            sparql_query=owl_expression_to_sparql_with_confusion_matrix(expression=state.concept,
+                                                           positive_examples=self.pos,
+                                                           negative_examples=self.neg)
+            bindings=self.kb.query_results(sparql_query).json()["results"]["bindings"]
+            assert len(bindings) == 1
+            bindings=bindings.pop()
+            confusion_matrix={k : v["value"]for k,v in bindings.items()}
+            quality = self.quality_func(confusion_matrix=confusion_matrix)
+
 
-        individuals = frozenset([i for i in self.kb.individuals(state.concept)])
-        quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
+        else:
+            individuals = frozenset([i for i in self.kb.individuals(state.concept)])
+            quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
         state.quality = quality
         self._number_of_tested_concepts += 1
 

diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py
@@ -985,9 +985,10 @@ class TripleStore:
     url: str
     def __init__(self, reasoner=None, url: str = None):
 
+        self.url=url
         if reasoner is None:
-            assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {url} cannot be both None."
-            self.g = TripleStoreReasonerOntology(url=url)
+            assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {self.url} cannot be both None."
+            self.g = TripleStoreReasonerOntology(url=self.url)
         else:
             self.g = reasoner
         self.ontology = self.g
@@ -1192,3 +1193,6 @@ def least_general_named_concepts(self):
 
     def query(self, sparql: str):
         yield from self.g.query(sparql_query=sparql)
+
+    def query_results(self, sparql: str):
+        return self.g.query(sparql_query=sparql)
diff --git a/ontolearn/utils/static_funcs.py b/ontolearn/utils/static_funcs.py
@@ -236,6 +236,26 @@ def compute_f1_score(individuals, pos, neg) -> float:
     return f_1
 
 
+def compute_f1_score_from_confusion_matrix(confusion_matrix:dict)->float:
+    tp=int(confusion_matrix["tp"])
+    fn=int(confusion_matrix["fn"])
+    fp=int(confusion_matrix["fp"])
+    tn=int(confusion_matrix["tn"])
+    try:
+        recall = tp / (tp + fn)
+    except ZeroDivisionError:
+        return 0.0
+    try:
+        precision = tp / (tp + fp)
+    except ZeroDivisionError:
+        return 0.0
+
+    if precision == 0 or recall == 0:
+        return 0.0
+
+    f_1 = 2 * ((precision * recall) / (precision + recall))
+    return f_1
+
 def plot_umap_reduced_embeddings(X: pandas.DataFrame, y: List[float], name: str = "umap_visualization.pdf") -> None:  # pragma: no cover
     # TODO:AB: 'umap' is not part of the dependencies !?
     import umap

diff --git a/tests/test_example_concept_learning_evaluation.py b/tests/test_example_concept_learning_evaluation.py
@@ -134,7 +134,7 @@ def test_learning(self):
                                                                                                   0.2,
                                                                                                   0.97,
                                                                                                   0.1,
-                                                                                                  0.92,
+                                                                                                  0.90,
                                                                                                   0.4,
                                                                                                   0.95,
                                                                                                   0.3])):
-Original file line number
+Diff line change
@@ Expand Up / @@ -134,7 +134,7 @@ def test_learning(self): @@
 .2,
 .97,
 .1,
-.92,
+.90,
 .4,
 .95,
 .3])):
@@ Expand Down @@