Merge pull request #464 from dice-group/develop

Ontolearn 0.8.0
dice-group · Oct 28, 2024 · f0796dd · f0796dd
2 parents 93eedb6 + 8d2da19
commit f0796dd
Show file tree

Hide file tree

Showing 51 changed files with 2,111 additions and 1,939 deletions.
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
-[![Coverage](docs/images/tag_coverage.png)](https://ontolearn-docs-dice-group.netlify.app/usage/09_further_resources#code-coverage)
-[![Pypi](docs/images/tag_version.png)](https://pypi.org/project/ontolearn/0.7.1/)
-[![Docs](docs/images/tag_docs.png)](https://ontolearn-docs-dice-group.netlify.app/usage/01_introduction)
-
+[![Coverage](https://img.shields.io/badge/coverage-86%25-green)](https://ontolearn-docs-dice-group.netlify.app/usage/09_further_resources#code-coverage)
+[![Pypi](https://img.shields.io/badge/pypi-0.8.0-blue)](https://pypi.org/project/ontolearn/0.8.0/)
+[![Docs](https://img.shields.io/badge/documentation-0.8.0-yellow)](https://ontolearn-docs-dice-group.netlify.app/usage/01_introduction)
+[![Python](https://img.shields.io/badge/python-3.10.13+-4584b6)](https://www.python.org/downloads/release/python-31013/)
 &nbsp;
 
-![Ontolearn](docs/images/Ontolearn_logo.png)
+![Ontolearn](docs/_static/images/Ontolearn_logo.png)
 
-# Ontolearn: Learning OWL Class Expression
+# Ontolearn: Learning OWL Class Expressions
 
 *Ontolearn* is an open-source software library for learning owl class expressions at large scale.
 
@@ -15,7 +15,7 @@ $E^+$ and $E^-$, learning [OWL Class expression](https://www.w3.org/TR/owl2-synt
 
 $$\forall p \in E^+\ \mathcal{K} \models H(p) \wedge \forall n \in E^-\ \mathcal{K} \not \models H(n).$$
 
-To tackle this supervised learnign problem, ontolearn offers many symbolic, neuro-sybmoloc and deep learning based Learning algorithms: 
+To tackle this supervised learning problem, ontolearn offers many symbolic, neuro-symbolic and deep learning based Learning algorithms: 
 - **Drill** &rarr; [Neuro-Symbolic Class Expression Learning](https://www.ijcai.org/proceedings/2023/0403.pdf)
 - **EvoLearner** &rarr; [EvoLearner: Learning Description Logics with Evolutionary Algorithms](https://dl.acm.org/doi/abs/10.1145/3485447.3511925)
 - **NCES2** &rarr; (soon) [Neural Class Expression Synthesis in ALCHIQ(D)](https://papers.dice-research.org/2023/ECML_NCES2/NCES2_public.pdf)
@@ -42,40 +42,67 @@ wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip &&
 # To download learning problems
 wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip
 ```
-```shell
-pytest -p no:warnings -x # Running 64 tests takes ~ 6 mins
-```
 
 ## Learning OWL Class Expression
 ```python
 from ontolearn.learners import TDL
 from ontolearn.triple_store import TripleStore
+from ontolearn.knowledge_base import KnowledgeBase
 from ontolearn.learning_problem import PosNegLPStandard
 from owlapy.owl_individual import OWLNamedIndividual
 from owlapy import owl_expression_to_sparql, owl_expression_to_dl
-# (1) Initialize Triplestore
+# (1) Initialize Triplestore or KnowledgeBase
 # sudo docker run -p 3030:3030 -e ADMIN_PASSWORD=pw123 stain/jena-fuseki
-# Login http://localhost:3030/#/ with admin and pw123
-# Create a new dataset called family and upload KGs/Family/family.owl
-kb = TripleStore(url="http://localhost:3030/family")
+# Login http://localhost:3030/#/ with admin and pw123 and upload KGs/Family/family.owl
+# kb = TripleStore(url="http://localhost:3030/family")
+kb = KnowledgeBase(path="KGs/Family/father.owl")
 # (2) Initialize a learner.
-model = TDL(knowledge_base=kb)
+model = TDL(knowledge_base=kb, use_nominals=True)
 # (3) Define a description logic concept learning problem.
 lp = PosNegLPStandard(pos={OWLNamedIndividual("http://example.com/father#stefan")},
                       neg={OWLNamedIndividual("http://example.com/father#heinz"),
                            OWLNamedIndividual("http://example.com/father#anna"),
                            OWLNamedIndividual("http://example.com/father#michelle")})
 # (4) Learn description logic concepts best fitting (3).
 h = model.fit(learning_problem=lp).best_hypotheses()
-print(h)
+print(h) 
 print(owl_expression_to_dl(h))
-print(owl_expression_to_sparql(expression=h))
+print(owl_expression_to_sparql(expression=h)) 
+"""
+OWLObjectSomeValuesFrom(property=OWLObjectProperty(IRI('http://example.com/father#','hasChild')),filler=OWLObjectOneOf((OWLNamedIndividual(IRI('http://example.com/father#','markus')),)))
+
+∃ hasChild.{markus}
+
+SELECT
+ DISTINCT ?x WHERE { 
+?x <http://example.com/father#hasChild> ?s_1 . 
+ FILTER ( ?s_1 IN ( 
+<http://example.com/father#markus>
+ ) )
+ }
+"""
+print(model.classification_report)
+"""
+Classification Report: Negatives: -1 and Positives 1 
+              precision    recall  f1-score   support
+
+    Negative       1.00      1.00      1.00         3
+    Positive       1.00      1.00      1.00         1
+
+    accuracy                           1.00         4
+   macro avg       1.00      1.00      1.00         4
+weighted avg       1.00      1.00      1.00         4
+"""
 ```
 
 ## Learning OWL Class Expression over DBpedia
 ```python
+from ontolearn.learners import TDL
+from ontolearn.triple_store import TripleStore
+from ontolearn.learning_problem import PosNegLPStandard
+from owlapy.owl_individual import OWLNamedIndividual
+from owlapy import owl_expression_to_sparql, owl_expression_to_dl
 from ontolearn.utils.static_funcs import save_owl_class_expressions
-
 # (1) Initialize Triplestore
 kb = TripleStore(url="http://dice-dbpedia.cs.upb.de:9080/sparql")
 # (3) Initialize a learner.
@@ -134,17 +161,59 @@ TDL (a more scalable learner) can also be used as follows
 ```python
 import json
 import requests
+response = requests.get('http://0.0.0.0:8000/cel',
+                        headers={'accept': 'application/json', 'Content-Type': 'application/json'},
+                        json={"pos": examples['positive_examples'],
+                              "neg": examples['negative_examples'],
+                              "model": "TDL"})
+print(response.json())
+```
+NCES (another scalable learner). The following will first train NCES if the provided path `path_to_pretrained_nces` does not exist
+```python
+import json
+import requests
 with open(f"LPs/Mutagenesis/lps.json") as json_file:
     learning_problems = json.load(json_file)["problems"]
+## This trains NCES before solving the provided learning problems. Expect poor performance for this number of epochs, and this training data size.
+## If GPU is available, set `num_of_training_learning_problems` t0 10_000 or more. Set `nces_train_epochs` to 300 or more, and increase `nces_batch_size`.
 for str_target_concept, examples in learning_problems.items():
     response = requests.get('http://0.0.0.0:8000/cel',
                             headers={'accept': 'application/json', 'Content-Type': 'application/json'},
                             json={"pos": examples['positive_examples'],
                                   "neg": examples['negative_examples'],
-                                  "model": "TDL"})
+                                  "model": "NCES",
+                                  "path_embeddings": "mutagenesis_embeddings/Keci_entity_embeddings.csv",
+                                  "path_to_pretrained_nces": None,
+                                  # if pretrained_nces exists, load weghts, otherwise train one and save it
+                                  "num_of_training_learning_problems": 100,
+                                  "nces_train_epochs": 5,
+                                  "nces_batch_size": 16
+                                  })
     print(response.json())
 ```
 
+Now this will use pretrained weights for NCES
+
+```python
+import json
+import requests
+with open(f"LPs/Mutagenesis/lps.json") as json_file:
+    learning_problems = json.load(json_file)["problems"]
+for str_target_concept, examples in learning_problems.items():
+    response = requests.get('http://0.0.0.0:8000/cel',
+                            headers={'accept': 'application/json', 'Content-Type': 'application/json'},
+                            json={"pos": examples['positive_examples'],
+                                  "neg": examples['negative_examples'],
+                                  "model": "NCES",
+                                  "path_embeddings": "./NCESData/mutagenesis/embeddings/ConEx_entity_embeddings.csv",
+                                  "path_to_pretrained_nces": "./NCESData/mutagenesis/trained_models/",
+                                  # if pretrained_nces exists, load weghts, otherwise train one and save it
+                                  "num_of_training_learning_problems": 100,
+                                  "nces_train_epochs": 5,
+                                  "nces_batch_size": 16
+                                  })
+    print(response.json())
+```
 
 </details>
 
@@ -224,14 +293,29 @@ python examples/concept_learning_cv_evaluation.py --kb ./KGs/Carcinogenesis/carc
 
 ## Development
 
+
 <details> <summary> To see the results </summary>
-  
+
 Creating a feature branch **refactoring** from development branch
 
 ```shell
 git branch refactoring develop
 ```
 
+Each feature branch must be merged to develop branch. To this end, the tests must run without a problem:
+```shell
+# To download knowledge graphs
+wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip && unzip KGs.zip
+# To download learning problems
+wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip
+# Download weights for some model for few tests
+wget https://files.dice-research.org/projects/NCES/NCES_Ontolearn_Data/NCESData.zip -O ./NCESData.zip && unzip NCESData.zip && rm NCESData.zip
+wget https://files.dice-research.org/projects/Ontolearn/CLIP/CLIPData.zip && unzip CLIPData.zip && rm CLIPData.zip 
+pytest -p no:warnings -x # Running 76 tests takes ~ 17 mins
+```
+
+
+
 </details>
 
 ## References

diff --git a/docs/images/Ontolearn_logo.png → docs/_static/images/Ontolearn_logo.png b/docs/images/Ontolearn_logo.png → docs/_static/images/Ontolearn_logo.png
diff --git a/docs/_static/images/favicon.ico b/docs/_static/images/favicon.ico
diff --git a/docs/conf.py b/docs/conf.py
@@ -98,6 +98,10 @@
     '_static'
 ]
 
+html_logo = '_static/images/Ontolearn_logo.png'
+
+html_favicon = '_static/images/favicon.ico'
+
 if stanford_theme_mod:
     html_theme = 'sphinx_rtd_theme'
 

diff --git a/docs/images/tag_coverage.png b/docs/images/tag_coverage.png
diff --git a/docs/images/tag_docs.png b/docs/images/tag_docs.png
diff --git a/docs/images/tag_version.png b/docs/images/tag_version.png
diff --git a/docs/usage/01_introduction.md b/docs/usage/01_introduction.md
@@ -1,6 +1,6 @@
 # About Ontolearn
 
-**Version:** ontolearn 0.7.1
+**Version:** ontolearn 0.8.0
 
 **GitHub repository:** [https://github.com/dice-group/Ontolearn](https://github.com/dice-group/Ontolearn)
 

diff --git a/docs/usage/03_examples.md b/docs/usage/03_examples.md
@@ -261,66 +261,6 @@ if __name__ == '__main__':
     start(parser.parse_args())
 ```
 
-----------------------------------------------------------------------
-
-## Ex. 4: Using Model Adaptor
-
-To simplify the connection between all the
-components, there is a
-model adaptor available that automatically constructs and connects them.
-Here is how to implement the previous example using the [ModelAdapter](ontolearn.mode_adapter.ModelAdapter):
-
-```python
-from ontolearn.concept_learner import CELOE
-from ontolearn.heuristics import CELOEHeuristic
-from ontolearn.metrics import Accuracy
-from ontolearn.model_adapter import ModelAdapter
-from owlapy.owl_individual import OWLNamedIndividual, IRI
-from owlapy.namespaces import Namespaces
-from owlapy.owl_ontology_manager import OntologyManager
-from owlapy.owl_reasoner import SyncReasoner
-from owlapy.render import DLSyntaxObjectRenderer
-
-# Create an reasoner instance
-manager = OntologyManager()
-onto = manager.load_ontology(IRI.create("KGs/Family/father.owl"))
-sync_reasoner = SyncReasoner(onto)
-
-# Define the learning problem
-NS = Namespaces('ex', 'http://example.com/father#')
-positive_examples = {OWLNamedIndividual(IRI.create(NS, 'stefan')),
-                     OWLNamedIndividual(IRI.create(NS, 'markus')),
-                     OWLNamedIndividual(IRI.create(NS, 'martin'))}
-negative_examples = {OWLNamedIndividual(IRI.create(NS, 'heinz')),
-                     OWLNamedIndividual(IRI.create(NS, 'anna')),
-                     OWLNamedIndividual(IRI.create(NS, 'michelle'))}
-
-# Define the learning model using ModelAdapter
-# Only the class of the learning algorithm is specified
-model = ModelAdapter(learner_type=CELOE,
-                     reasoner=sync_reasoner,  # (*)
-                     path="KGs/Family/father.owl",
-                     quality_type=Accuracy,
-                     heuristic_type=CELOEHeuristic,  # (*)
-                     expansionPenaltyFactor=0.05,
-                     startNodeBonus=1.0,
-                     nodeRefinementPenalty=0.01,
-                     )
-
-# No need to construct the IRI here ourselves
-model.fit(pos=positive_examples, neg=negative_examples)
-
-# Create a Description Logics renderer
-dlsr = DLSyntaxObjectRenderer()
-
-# Render the hypothesis to DL syntax
-for desc in model.best_hypotheses(1):
-    print('The result:', dlsr.render(desc.concept), 'has quality', desc.quality)
-```
-
-Lines marked with `(*)` are not strictly required as they happen to be
-the default choices. For now, you can use ModelAdapter only for EvoLearner, CELOE and OCEL.
-
 -----------------------------------------------------------
 
 In the next guide we will explore the [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase) class that is needed to 

diff --git a/docs/usage/04_knowledge_base.md b/docs/usage/04_knowledge_base.md
@@ -2,8 +2,8 @@
 
 In Ontolearn we represent a knowledge base 
 by the class [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase) which contains two main class attributes, 
-an ontology [OWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.OWLOntology)
-and a reasoner [OWLReasoner](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_reasoner/index.html#owlapy.owl_reasoner.OWLReasoner).
+an ontology [AbstractOWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.AbstractOWLOntology)
+and a reasoner [AbstractOWLReasoner](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_reasoner/index.html#owlapy.owl_reasoner.AbstractOWLReasoner).
 It also contains the class and properties hierarchy as well as other
 Ontology-related attributes required for the Structured Machine Learning library.
 
@@ -19,7 +19,7 @@ Therefore, differently from the ontology you can use methods that require reason
 the methods for each in the links below:
 
 - [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase)
-- [OWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.OWLOntology)
+- [AbstractOWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.AbstractOWLOntology)
 
 In summary:
 
@@ -47,7 +47,7 @@ kb = KnowledgeBase(path="file://KGs/Family/father.owl")
 ```
 
 What happens in the background is that the ontology located in this path will be loaded
-in the `OWLOntology` object of `kb` as done [here](https://dice-group.github.io/owlapy/usage/ontologies.html#loading-an-ontology).
+in the `AbstractOWLOntology` object of `kb` as done [here](https://dice-group.github.io/owlapy/usage/ontologies.html#loading-an-ontology).
 
 In our recent version you can also initialize a knowledge base using a dataset hosted in a triplestore.
 Since that knowledge base is mainly used for executing a concept learner, we cover that matter more in depth 

diff --git a/docs/usage/06_concept_learners.md b/docs/usage/06_concept_learners.md
@@ -53,7 +53,7 @@ hardcoded lines which we can now simply access by loading the json file. Below i
 an example file that we are naming `synthetic_problems.json`  showing how should it look:
 
     {  
-      "data_path": "../KGs/Family/family-benchmark_rich_background2.owl",  
+      "data_path": "../KGs/Family/family-benchmark_rich_background.owl",  
       "learning_problem": {
         "positive_examples": [  
         "http://www.benchmark.org/family#F2F28",  
@@ -142,8 +142,8 @@ and `negative_examples` to `OWLNamedIndividual`:
 from ontolearn.learning_problem import PosNegLPStandard
 from owlapy.owl_individual import IRI, OWLNamedIndividual
 
-typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p)))
-typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n)))
+typed_pos = set(map(OWLNamedIndividual, map(IRI.create, positive_examples)))
+typed_neg = set(map(OWLNamedIndividual, map(IRI.create, negative_examples)))
 lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg)
 ```