diff --git a/README.md b/README.md index aa5190b5..ffd436a2 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -[![Coverage](docs/images/tag_coverage.png)](https://ontolearn-docs-dice-group.netlify.app/usage/09_further_resources#code-coverage) -[![Pypi](docs/images/tag_version.png)](https://pypi.org/project/ontolearn/0.7.1/) -[![Docs](docs/images/tag_docs.png)](https://ontolearn-docs-dice-group.netlify.app/usage/01_introduction) - +[![Coverage](https://img.shields.io/badge/coverage-86%25-green)](https://ontolearn-docs-dice-group.netlify.app/usage/09_further_resources#code-coverage) +[![Pypi](https://img.shields.io/badge/pypi-0.8.0-blue)](https://pypi.org/project/ontolearn/0.8.0/) +[![Docs](https://img.shields.io/badge/documentation-0.8.0-yellow)](https://ontolearn-docs-dice-group.netlify.app/usage/01_introduction) +[![Python](https://img.shields.io/badge/python-3.10.13+-4584b6)](https://www.python.org/downloads/release/python-31013/)   -![Ontolearn](docs/images/Ontolearn_logo.png) +![Ontolearn](docs/_static/images/Ontolearn_logo.png) -# Ontolearn: Learning OWL Class Expression +# Ontolearn: Learning OWL Class Expressions *Ontolearn* is an open-source software library for learning owl class expressions at large scale. @@ -15,7 +15,7 @@ $E^+$ and $E^-$, learning [OWL Class expression](https://www.w3.org/TR/owl2-synt $$\forall p \in E^+\ \mathcal{K} \models H(p) \wedge \forall n \in E^-\ \mathcal{K} \not \models H(n).$$ -To tackle this supervised learnign problem, ontolearn offers many symbolic, neuro-sybmoloc and deep learning based Learning algorithms: +To tackle this supervised learning problem, ontolearn offers many symbolic, neuro-symbolic and deep learning based Learning algorithms: - **Drill** → [Neuro-Symbolic Class Expression Learning](https://www.ijcai.org/proceedings/2023/0403.pdf) - **EvoLearner** → [EvoLearner: Learning Description Logics with Evolutionary Algorithms](https://dl.acm.org/doi/abs/10.1145/3485447.3511925) - **NCES2** → (soon) [Neural Class Expression Synthesis in ALCHIQ(D)](https://papers.dice-research.org/2023/ECML_NCES2/NCES2_public.pdf) @@ -42,24 +42,22 @@ wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip && # To download learning problems wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip ``` -```shell -pytest -p no:warnings -x # Running 64 tests takes ~ 6 mins -``` ## Learning OWL Class Expression ```python from ontolearn.learners import TDL from ontolearn.triple_store import TripleStore +from ontolearn.knowledge_base import KnowledgeBase from ontolearn.learning_problem import PosNegLPStandard from owlapy.owl_individual import OWLNamedIndividual from owlapy import owl_expression_to_sparql, owl_expression_to_dl -# (1) Initialize Triplestore +# (1) Initialize Triplestore or KnowledgeBase # sudo docker run -p 3030:3030 -e ADMIN_PASSWORD=pw123 stain/jena-fuseki -# Login http://localhost:3030/#/ with admin and pw123 -# Create a new dataset called family and upload KGs/Family/family.owl -kb = TripleStore(url="http://localhost:3030/family") +# Login http://localhost:3030/#/ with admin and pw123 and upload KGs/Family/family.owl +# kb = TripleStore(url="http://localhost:3030/family") +kb = KnowledgeBase(path="KGs/Family/father.owl") # (2) Initialize a learner. -model = TDL(knowledge_base=kb) +model = TDL(knowledge_base=kb, use_nominals=True) # (3) Define a description logic concept learning problem. lp = PosNegLPStandard(pos={OWLNamedIndividual("http://example.com/father#stefan")}, neg={OWLNamedIndividual("http://example.com/father#heinz"), @@ -67,15 +65,44 @@ lp = PosNegLPStandard(pos={OWLNamedIndividual("http://example.com/father#stefan" OWLNamedIndividual("http://example.com/father#michelle")}) # (4) Learn description logic concepts best fitting (3). h = model.fit(learning_problem=lp).best_hypotheses() -print(h) +print(h) print(owl_expression_to_dl(h)) -print(owl_expression_to_sparql(expression=h)) +print(owl_expression_to_sparql(expression=h)) +""" +OWLObjectSomeValuesFrom(property=OWLObjectProperty(IRI('http://example.com/father#','hasChild')),filler=OWLObjectOneOf((OWLNamedIndividual(IRI('http://example.com/father#','markus')),))) + +∃ hasChild.{markus} + +SELECT + DISTINCT ?x WHERE { +?x ?s_1 . + FILTER ( ?s_1 IN ( + + ) ) + } +""" +print(model.classification_report) +""" +Classification Report: Negatives: -1 and Positives 1 + precision recall f1-score support + + Negative 1.00 1.00 1.00 3 + Positive 1.00 1.00 1.00 1 + + accuracy 1.00 4 + macro avg 1.00 1.00 1.00 4 +weighted avg 1.00 1.00 1.00 4 +""" ``` ## Learning OWL Class Expression over DBpedia ```python +from ontolearn.learners import TDL +from ontolearn.triple_store import TripleStore +from ontolearn.learning_problem import PosNegLPStandard +from owlapy.owl_individual import OWLNamedIndividual +from owlapy import owl_expression_to_sparql, owl_expression_to_dl from ontolearn.utils.static_funcs import save_owl_class_expressions - # (1) Initialize Triplestore kb = TripleStore(url="http://dice-dbpedia.cs.upb.de:9080/sparql") # (3) Initialize a learner. @@ -134,17 +161,59 @@ TDL (a more scalable learner) can also be used as follows ```python import json import requests +response = requests.get('http://0.0.0.0:8000/cel', + headers={'accept': 'application/json', 'Content-Type': 'application/json'}, + json={"pos": examples['positive_examples'], + "neg": examples['negative_examples'], + "model": "TDL"}) +print(response.json()) +``` +NCES (another scalable learner). The following will first train NCES if the provided path `path_to_pretrained_nces` does not exist +```python +import json +import requests with open(f"LPs/Mutagenesis/lps.json") as json_file: learning_problems = json.load(json_file)["problems"] +## This trains NCES before solving the provided learning problems. Expect poor performance for this number of epochs, and this training data size. +## If GPU is available, set `num_of_training_learning_problems` t0 10_000 or more. Set `nces_train_epochs` to 300 or more, and increase `nces_batch_size`. for str_target_concept, examples in learning_problems.items(): response = requests.get('http://0.0.0.0:8000/cel', headers={'accept': 'application/json', 'Content-Type': 'application/json'}, json={"pos": examples['positive_examples'], "neg": examples['negative_examples'], - "model": "TDL"}) + "model": "NCES", + "path_embeddings": "mutagenesis_embeddings/Keci_entity_embeddings.csv", + "path_to_pretrained_nces": None, + # if pretrained_nces exists, load weghts, otherwise train one and save it + "num_of_training_learning_problems": 100, + "nces_train_epochs": 5, + "nces_batch_size": 16 + }) print(response.json()) ``` +Now this will use pretrained weights for NCES + +```python +import json +import requests +with open(f"LPs/Mutagenesis/lps.json") as json_file: + learning_problems = json.load(json_file)["problems"] +for str_target_concept, examples in learning_problems.items(): + response = requests.get('http://0.0.0.0:8000/cel', + headers={'accept': 'application/json', 'Content-Type': 'application/json'}, + json={"pos": examples['positive_examples'], + "neg": examples['negative_examples'], + "model": "NCES", + "path_embeddings": "./NCESData/mutagenesis/embeddings/ConEx_entity_embeddings.csv", + "path_to_pretrained_nces": "./NCESData/mutagenesis/trained_models/", + # if pretrained_nces exists, load weghts, otherwise train one and save it + "num_of_training_learning_problems": 100, + "nces_train_epochs": 5, + "nces_batch_size": 16 + }) + print(response.json()) +``` @@ -224,14 +293,29 @@ python examples/concept_learning_cv_evaluation.py --kb ./KGs/Carcinogenesis/carc ## Development +
To see the results - + Creating a feature branch **refactoring** from development branch ```shell git branch refactoring develop ``` +Each feature branch must be merged to develop branch. To this end, the tests must run without a problem: +```shell +# To download knowledge graphs +wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip && unzip KGs.zip +# To download learning problems +wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip +# Download weights for some model for few tests +wget https://files.dice-research.org/projects/NCES/NCES_Ontolearn_Data/NCESData.zip -O ./NCESData.zip && unzip NCESData.zip && rm NCESData.zip +wget https://files.dice-research.org/projects/Ontolearn/CLIP/CLIPData.zip && unzip CLIPData.zip && rm CLIPData.zip +pytest -p no:warnings -x # Running 76 tests takes ~ 17 mins +``` + + +
## References diff --git a/docs/images/Ontolearn_logo.png b/docs/_static/images/Ontolearn_logo.png similarity index 100% rename from docs/images/Ontolearn_logo.png rename to docs/_static/images/Ontolearn_logo.png diff --git a/docs/_static/images/favicon.ico b/docs/_static/images/favicon.ico new file mode 100644 index 00000000..42e8969e Binary files /dev/null and b/docs/_static/images/favicon.ico differ diff --git a/docs/conf.py b/docs/conf.py index b3070a48..f58964b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -98,6 +98,10 @@ '_static' ] +html_logo = '_static/images/Ontolearn_logo.png' + +html_favicon = '_static/images/favicon.ico' + if stanford_theme_mod: html_theme = 'sphinx_rtd_theme' diff --git a/docs/images/tag_coverage.png b/docs/images/tag_coverage.png deleted file mode 100644 index 6750f355..00000000 Binary files a/docs/images/tag_coverage.png and /dev/null differ diff --git a/docs/images/tag_docs.png b/docs/images/tag_docs.png deleted file mode 100644 index 61808213..00000000 Binary files a/docs/images/tag_docs.png and /dev/null differ diff --git a/docs/images/tag_version.png b/docs/images/tag_version.png deleted file mode 100644 index 8d11f9e9..00000000 Binary files a/docs/images/tag_version.png and /dev/null differ diff --git a/docs/usage/01_introduction.md b/docs/usage/01_introduction.md index 3f74ed32..3e9c8f33 100644 --- a/docs/usage/01_introduction.md +++ b/docs/usage/01_introduction.md @@ -1,6 +1,6 @@ # About Ontolearn -**Version:** ontolearn 0.7.1 +**Version:** ontolearn 0.8.0 **GitHub repository:** [https://github.com/dice-group/Ontolearn](https://github.com/dice-group/Ontolearn) diff --git a/docs/usage/03_examples.md b/docs/usage/03_examples.md index 2867b8bc..57b52950 100644 --- a/docs/usage/03_examples.md +++ b/docs/usage/03_examples.md @@ -261,66 +261,6 @@ if __name__ == '__main__': start(parser.parse_args()) ``` ----------------------------------------------------------------------- - -## Ex. 4: Using Model Adaptor - -To simplify the connection between all the -components, there is a -model adaptor available that automatically constructs and connects them. -Here is how to implement the previous example using the [ModelAdapter](ontolearn.mode_adapter.ModelAdapter): - -```python -from ontolearn.concept_learner import CELOE -from ontolearn.heuristics import CELOEHeuristic -from ontolearn.metrics import Accuracy -from ontolearn.model_adapter import ModelAdapter -from owlapy.owl_individual import OWLNamedIndividual, IRI -from owlapy.namespaces import Namespaces -from owlapy.owl_ontology_manager import OntologyManager -from owlapy.owl_reasoner import SyncReasoner -from owlapy.render import DLSyntaxObjectRenderer - -# Create an reasoner instance -manager = OntologyManager() -onto = manager.load_ontology(IRI.create("KGs/Family/father.owl")) -sync_reasoner = SyncReasoner(onto) - -# Define the learning problem -NS = Namespaces('ex', 'http://example.com/father#') -positive_examples = {OWLNamedIndividual(IRI.create(NS, 'stefan')), - OWLNamedIndividual(IRI.create(NS, 'markus')), - OWLNamedIndividual(IRI.create(NS, 'martin'))} -negative_examples = {OWLNamedIndividual(IRI.create(NS, 'heinz')), - OWLNamedIndividual(IRI.create(NS, 'anna')), - OWLNamedIndividual(IRI.create(NS, 'michelle'))} - -# Define the learning model using ModelAdapter -# Only the class of the learning algorithm is specified -model = ModelAdapter(learner_type=CELOE, - reasoner=sync_reasoner, # (*) - path="KGs/Family/father.owl", - quality_type=Accuracy, - heuristic_type=CELOEHeuristic, # (*) - expansionPenaltyFactor=0.05, - startNodeBonus=1.0, - nodeRefinementPenalty=0.01, - ) - -# No need to construct the IRI here ourselves -model.fit(pos=positive_examples, neg=negative_examples) - -# Create a Description Logics renderer -dlsr = DLSyntaxObjectRenderer() - -# Render the hypothesis to DL syntax -for desc in model.best_hypotheses(1): - print('The result:', dlsr.render(desc.concept), 'has quality', desc.quality) -``` - -Lines marked with `(*)` are not strictly required as they happen to be -the default choices. For now, you can use ModelAdapter only for EvoLearner, CELOE and OCEL. - ----------------------------------------------------------- In the next guide we will explore the [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase) class that is needed to diff --git a/docs/usage/04_knowledge_base.md b/docs/usage/04_knowledge_base.md index ff207a72..654a0752 100644 --- a/docs/usage/04_knowledge_base.md +++ b/docs/usage/04_knowledge_base.md @@ -2,8 +2,8 @@ In Ontolearn we represent a knowledge base by the class [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase) which contains two main class attributes, -an ontology [OWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.OWLOntology) -and a reasoner [OWLReasoner](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_reasoner/index.html#owlapy.owl_reasoner.OWLReasoner). +an ontology [AbstractOWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.AbstractOWLOntology) +and a reasoner [AbstractOWLReasoner](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_reasoner/index.html#owlapy.owl_reasoner.AbstractOWLReasoner). It also contains the class and properties hierarchy as well as other Ontology-related attributes required for the Structured Machine Learning library. @@ -19,7 +19,7 @@ Therefore, differently from the ontology you can use methods that require reason the methods for each in the links below: - [KnowledgeBase](ontolearn.knowledge_base.KnowledgeBase) -- [OWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.OWLOntology) +- [AbstractOWLOntology](https://dice-group.github.io/owlapy/autoapi/owlapy/owl_ontology/index.html#owlapy.owl_ontology.AbstractOWLOntology) In summary: @@ -47,7 +47,7 @@ kb = KnowledgeBase(path="file://KGs/Family/father.owl") ``` What happens in the background is that the ontology located in this path will be loaded -in the `OWLOntology` object of `kb` as done [here](https://dice-group.github.io/owlapy/usage/ontologies.html#loading-an-ontology). +in the `AbstractOWLOntology` object of `kb` as done [here](https://dice-group.github.io/owlapy/usage/ontologies.html#loading-an-ontology). In our recent version you can also initialize a knowledge base using a dataset hosted in a triplestore. Since that knowledge base is mainly used for executing a concept learner, we cover that matter more in depth diff --git a/docs/usage/06_concept_learners.md b/docs/usage/06_concept_learners.md index 11ee24ac..8631584b 100644 --- a/docs/usage/06_concept_learners.md +++ b/docs/usage/06_concept_learners.md @@ -53,7 +53,7 @@ hardcoded lines which we can now simply access by loading the json file. Below i an example file that we are naming `synthetic_problems.json` showing how should it look: { - "data_path": "../KGs/Family/family-benchmark_rich_background2.owl", + "data_path": "../KGs/Family/family-benchmark_rich_background.owl", "learning_problem": { "positive_examples": [ "http://www.benchmark.org/family#F2F28", @@ -142,8 +142,8 @@ and `negative_examples` to `OWLNamedIndividual`: from ontolearn.learning_problem import PosNegLPStandard from owlapy.owl_individual import IRI, OWLNamedIndividual -typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p))) -typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) +typed_pos = set(map(OWLNamedIndividual, map(IRI.create, positive_examples))) +typed_neg = set(map(OWLNamedIndividual, map(IRI.create, negative_examples))) lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg) ``` diff --git a/docs/usage/09_further_resources.md b/docs/usage/09_further_resources.md index 4af02ab1..40fd7a53 100644 --- a/docs/usage/09_further_resources.md +++ b/docs/usage/09_further_resources.md @@ -118,41 +118,40 @@ Name Stmts Miss Cover Missing ------------------------------------------------------------------------ ontolearn/__init__.py 1 0 100% ontolearn/abstracts.py 60 0 100% -ontolearn/base_concept_learner.py 160 2 99% 313, 317 +ontolearn/base_concept_learner.py 158 2 99% 311, 315 ontolearn/base_nces.py 38 0 100% -ontolearn/clip_architectures.py 93 0 100% -ontolearn/clip_trainer.py 94 7 93% 85, 94, 97, 102, 109, 122, 145 +ontolearn/clip_architectures.py 93 77 17% 33-41, 45-56, 61-69, 73-84, 90-101, 105-119, 125-131, 137-141 +ontolearn/clip_trainer.py 94 76 19% 45-50, 53-55, 69-75, 78-151 ontolearn/concept_generator.py 95 2 98% 68, 84 -ontolearn/concept_learner.py 699 43 94% 291, 336, 411, 466-467, 533, 972-973, 1033, 1044, 1053, 1065, 1213, 1235, 1237, 1242, 1282-1286, 1325, 1336, 1371, 1374, 1379, 1389, 1391, 1446, 1452, 1457, 1502-1506, 1533, 1542-1545, 1552-1554, 1637, 1639 +ontolearn/concept_learner.py 748 173 77% 219, 294, 339, 414, 469-470, 536, 975-976, 1036, 1047, 1056, 1068, 1187-1211, 1214-1242, 1245, 1282-1298, 1301-1314, 1320-1382, 1387-1397, 1450, 1458-1463, 1469-1490, 1497-1499, 1544-1548, 1575, 1586-1589, 1596-1598, 1672-1678, 1688-1689, 1694, 1696 ontolearn/data_struct.py 5 0 100% -ontolearn/ea_algorithms.py 57 2 96% 93, 96 +ontolearn/ea_algorithms.py 57 1 98% 93 ontolearn/ea_initialization.py 216 7 97% 93, 97, 310-315 ontolearn/ea_utils.py 88 5 94% 93, 110-111, 114-115 ontolearn/fitness_functions.py 13 0 100% ontolearn/heuristics.py 45 0 100% -ontolearn/knowledge_base.py 342 38 89% 120, 130, 155-156, 158, 161, 168, 172-173, 478-479, 519, 527, 530, 536, 574, 588, 596, 614, 618, 622, 641, 643, 657, 712, 722, 728-733, 782, 1030, 1039, 1049, 1058, 1107 +ontolearn/knowledge_base.py 340 53 84% 120, 130, 153-154, 156, 159, 166, 170-171, 175, 479-480, 512, 520, 528, 531, 537, 571, 574-582, 587-588, 595-597, 618, 622, 626, 641-643, 647, 662, 711, 721, 727-732, 779, 1027, 1036, 1046, 1055, 1104 ontolearn/learners/__init__.py 2 0 100% ontolearn/learners/drill.py 30 0 100% -ontolearn/learners/tree_learner.py 173 13 92% 193, 377, 412, 418, 421-427, 435-437, 440, 443, 469 +ontolearn/learners/tree_learner.py 205 28 86% 190, 273-303, 391, 398, 400-404, 420, 423, 444, 453 ontolearn/learning_problem.py 31 1 97% 98 ontolearn/learning_problem_generator.py 16 0 100% ontolearn/lp_generator/__init__.py 2 0 100% ontolearn/lp_generator/generate_data.py 10 0 100% ontolearn/lp_generator/helper_classes.py 125 14 89% 76, 85-93, 116, 135, 169-170 ontolearn/metrics.py 50 0 100% -ontolearn/model_adapter.py 33 0 100% ontolearn/nces_architectures.py 72 0 100% ontolearn/nces_modules.py 53 5 91% 44-45, 68-69, 72 -ontolearn/nces_trainer.py 127 10 92% 70, 74, 83, 87, 147, 156, 159, 164, 173, 185 +ontolearn/nces_trainer.py 127 11 91% 48, 70, 74, 83, 87, 147, 156, 159, 164, 173, 185 ontolearn/nces_utils.py 24 0 100% -ontolearn/owl_neural_reasoner.py 214 11 95% 63, 96, 124, 129, 140, 205, 290, 485, 498-501 -ontolearn/refinement_operators.py 521 26 95% 167-168, 226, 299, 400-401, 447, 541, 565, 599-601, 746, 782, 888, 916, 961-963, 970, 991-993, 995, 997, 1065, 1087 +ontolearn/owl_neural_reasoner.py 215 11 95% 57, 93, 121, 126, 137, 193, 281, 475, 488-491 +ontolearn/refinement_operators.py 521 31 94% 167-168, 226, 299, 400-401, 447, 541, 565, 599-601, 746, 782, 867-868, 888, 916, 935, 961-963, 967-968, 970, 991-993, 995, 997, 1065, 1087 ontolearn/search.py 293 25 91% 70, 133, 196, 216, 303, 307, 310, 339, 392, 429, 433, 441, 457, 467, 482, 484, 509, 511, 576-577, 666-667, 762, 766, 770 -ontolearn/utils/__init__.py 33 2 94% 55, 95 +ontolearn/utils/__init__.py 33 2 94% 58, 98 ontolearn/utils/log_config.py 19 0 100% ontolearn/utils/oplogging.py 8 0 100% -ontolearn/utils/static_funcs.py 43 2 95% 55, 86 +ontolearn/utils/static_funcs.py 77 31 60% 63-79, 102-106, 124-135, 151, 180 ontolearn/value_splitter.py 159 6 96% 111-113, 118, 127, 130 ------------------------------------------------------------------------ -TOTAL 4044 221 95% +TOTAL 4123 561 86% ``` \ No newline at end of file diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py index c9f3b11a..b70b9c48 100644 --- a/examples/concept_learning_cv_evaluation.py +++ b/examples/concept_learning_cv_evaluation.py @@ -2,6 +2,9 @@ python examples/concept_learning_cv_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family.owl --max_runtime 3 --report family.csv python examples/concept_learning_cv_evaluation.py --lps LPs/Carcinogenesis/lps.json --kb KGs/Carcinogenesis/carcinogenesis.owl --max_runtime 3 --report carcinogenesis.csv +python examples/concept_learning_cv_evaluation.py --lps LPs/Carcinogenesis/lps.json --kb KGs/Carcinogenesis/carcinogenesis.owl --max_runtime 3 --report carcinogenesis.csv --path_of_nces_embeddings "TODO" --path_of_clip_embeddings "TODO" + + """ import json import time @@ -23,14 +26,13 @@ from ontolearn.utils.static_funcs import compute_f1_score pd.set_option("display.precision", 5) - +""" def get_embedding_path(ftp_link: str, embeddings_path_arg: str, kb_path_arg: str)->str: - """ - ftp_link: ftp link to download data - embeddings_path_arg:local path of an embedding file - kb_path_arg:local path of an RDF KG - """ + # ftp_link: ftp link to download data + # embeddings_path_arg:local path of an embedding file + # kb_path_arg:local path of an RDF KG + if embeddings_path_arg is None or (embeddings_path_arg is not None and not os.path.exists(embeddings_path_arg)): file_name = ftp_link.split("/")[-1] @@ -67,6 +69,7 @@ def get_embedding_path(ftp_link: str, embeddings_path_arg: str, kb_path_arg: str else: return embeddings_path_arg +""" def dl_concept_learning(args): with open(args.lps) as json_file: @@ -86,23 +89,21 @@ def dl_concept_learning(args): kwargs_classifier={"random_state": 1}, max_runtime=args.max_runtime, verbose=0) + nces = NCES(knowledge_base_path=args.kb, quality_func=F1(), - path_of_embeddings=get_embedding_path("https://files.dice-research.org/projects/NCES/NCES_Ontolearn_Data/NCESData.zip",args.path_of_nces_embeddings, args.kb), - pretrained_model_name=["LSTM", "GRU", "SetTransformer"], + path_of_embeddings=args.path_of_nces_embeddings, + learner_names=["LSTM", "GRU", "SetTransformer"], num_predictions=100, verbose=0) - args.path_of_clip_embeddings = get_embedding_path( - "https://files.dice-research.org/projects/Ontolearn/CLIP/CLIPData.zip", - args.path_of_clip_embeddings, args.kb) clip = CLIP(knowledge_base=kb, - #refinement_operator=ExpressRefinement(kb, use_inverse=True, use_numeric_datatypes=True, sample_fillers_count=3, expressivity=0.2), refinement_operator=ModifiedCELOERefinement(kb), quality_func=F1(), max_num_of_concepts_tested=int(1e9), max_runtime=args.max_runtime, path_of_embeddings=args.path_of_clip_embeddings, pretrained_predictor_name=["LSTM", "GRU", "SetTransformer"], load_pretrained=True) + # dictionary to store the data data = dict() if "problems" in settings: @@ -261,7 +262,7 @@ def dl_concept_learning(args): start_time = time.time() # () Fit model training dataset - pred_nces = nces.fit(train_lp.pos, train_lp.neg).best_hypotheses(n=1) + pred_nces = nces.fit(train_lp).best_hypotheses(n=1) print("NCES ends..", end="\t") rt_nces = time.time() - start_time @@ -281,7 +282,6 @@ def dl_concept_learning(args): print(f"NCES Test Quality: {test_f1_nces:.3f}", end="\t") print(f"NCES Runtime: {rt_nces:.3f}") - #""" print("CLIP starts..", end="\t") start_time = time.time() pred_clip = clip.fit(train_lp).best_hypotheses() @@ -302,7 +302,6 @@ def dl_concept_learning(args): print(f"CLIP Train Quality: {train_f1_clip:.3f}", end="\t") print(f"CLIP Test Quality: {test_f1_clip:.3f}", end="\t") print(f"CLIP Runtime: {rt_clip:.3f}") - #""" df = pd.DataFrame.from_dict(data) df.to_csv(args.report, index=False) diff --git a/examples/concept_learning_with_celoe_heuristic_ma.py b/examples/concept_learning_with_celoe_heuristic_ma.py index 05a9c327..00ab6880 100644 --- a/examples/concept_learning_with_celoe_heuristic_ma.py +++ b/examples/concept_learning_with_celoe_heuristic_ma.py @@ -4,13 +4,10 @@ from ontolearn.concept_learner import CELOE from ontolearn.knowledge_base import KnowledgeBase -from ontolearn.model_adapter import ModelAdapter, Trainer +from ontolearn.executor import Trainer from owlapy.owl_individual import OWLNamedIndividual, IRI from owlapy.class_expression import OWLClass from ontolearn.utils import setup_logging -from owlapy.owl_ontology import Ontology -from owlapy.owl_reasoner import SyncReasoner, BaseReasoner -from typing import cast setup_logging() try: @@ -55,16 +52,12 @@ typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) kb = KnowledgeBase(path=settings['data_path']) - reasoner = SyncReasoner(cast(Ontology, kb.ontology()), BaseReasoner.HERMIT) + target_kb = kb.ignore_and_copy(concepts_to_ignore) - model = ModelAdapter(path=settings['data_path'], - ignore=concepts_to_ignore, - reasoner=reasoner, - learner_type=CELOE, - max_runtime=5, - max_num_of_concepts_tested=10_000_000_000, - iter_bound=10_000_000_000, - expansionPenaltyFactor=0.01) + model = CELOE(target_kb, + max_runtime=5, + max_num_of_concepts_tested=10_000_000_000, + iter_bound=10_000_000_000,) model = model.fit(pos=typed_pos, neg=typed_neg) @@ -79,6 +72,7 @@ # Using Trainer model2 = CELOE(knowledge_base=kb, max_runtime=5) + reasoner = target_kb.reasoner trainer = Trainer(model, reasoner) trainer.fit(pos=typed_pos, neg=typed_neg) hypotheses = list(model2.best_hypotheses(n=3)) diff --git a/examples/dl_learner.py b/examples/dl_learner.py index fd9be956..22dda609 100644 --- a/examples/dl_learner.py +++ b/examples/dl_learner.py @@ -18,7 +18,12 @@ Author: Caglar Demir """ from ontolearn.binders import DLLearnerBinder +from owlapy.iri import IRI +from owlapy.owl_individual import OWLNamedIndividual import json + +from ontolearn.learning_problem import PosNegLPStandard + # (1) Load learning problems with open('synthetic_problems.json') as json_file: settings = json.load(json_file) @@ -36,9 +41,14 @@ p = examples['positive_examples'] n = examples['negative_examples'] - best_pred_celoe = celoe.fit(pos=p, neg=n, max_runtime=1).best_hypothesis() + positives = {OWLNamedIndividual(IRI.create(i)) for i in p} + negatives = {OWLNamedIndividual(IRI.create(i)) for i in n} + + lp = PosNegLPStandard(pos=positives, neg=positives) + + best_pred_celoe = celoe.fit(lp, max_runtime=1).best_hypothesis() print(best_pred_celoe) - best_pred_ocel = ocel.fit(pos=p, neg=n, max_runtime=1).best_hypothesis() + best_pred_ocel = ocel.fit(lp, max_runtime=1).best_hypothesis() print(best_pred_ocel) - best_pred_eltl = eltl.fit(pos=p, neg=n, max_runtime=1).best_hypothesis() + best_pred_eltl = eltl.fit(lp, max_runtime=1).best_hypothesis() print(best_pred_eltl) diff --git a/examples/example_reasoner.py b/examples/example_reasoner.py index 677167f9..566f3242 100644 --- a/examples/example_reasoner.py +++ b/examples/example_reasoner.py @@ -118,14 +118,14 @@ onto = kb.ontology manager = onto.get_owl_ontology_manager() -manager.add_axiom(onto, OWLEquivalentObjectPropertiesAxiom([r6, r5])) -manager.add_axiom(onto, OWLEquivalentObjectPropertiesAxiom([r5, r6])) -manager.add_axiom(onto, OWLObjectPropertyDomainAxiom(r1, ST)) +onto.add_axiom(OWLEquivalentObjectPropertiesAxiom([r6, r5])) +onto.add_axiom(OWLEquivalentObjectPropertiesAxiom([r5, r6])) +onto.add_axiom(OWLObjectPropertyDomainAxiom(r1, ST)) -manager.add_axiom(onto, OWLSubClassOfAxiom(R, r5Q)) -manager.add_axiom(onto, OWLSubClassOfAxiom(ST, U)) +onto.add_axiom(OWLSubClassOfAxiom(R, r5Q)) +onto.add_axiom(OWLSubClassOfAxiom(ST, U)) -# manager.save_ontology(onto, IRI.create('file:/' + 'test' + '.owl')) +# onto.save(IRI.create('test' + '.owl')) base_reasoner = OntologyReasoner(onto) # reasoner = FastInstanceCheckerReasoner( diff --git a/examples/nces_notebook1.ipynb b/examples/nces_notebook1.ipynb index bc5cb4ec..7732c54d 100644 --- a/examples/nces_notebook1.ipynb +++ b/examples/nces_notebook1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "registered-vietnamese", + "id": "assured-bible", "metadata": {}, "source": [ "From the main directory \"Ontolearn\", run the commands for NCES data mentioned [here](https://ontolearn-docs-dice-group.netlify.app/usage/02_installation#download-external-files) to download pretrained models and datasets." @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "ignored-brunswick", + "id": "pleasant-transaction", "metadata": {}, "source": [ "## Inference with NCES" @@ -19,20 +19,21 @@ { "cell_type": "code", "execution_count": 1, - "id": "operational-boating", + "id": "romantic-valley", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/nkouagou/.conda/envs/onto/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/opt/conda/envs/onto_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from ontolearn.concept_learner import NCES\n", + "from ontolearn.learning_problem import PosNegLPStandard\n", "from ontolearn.knowledge_base import KnowledgeBase\n", "from ontolearn.metrics import F1\n", "from owlapy.parser import DLSyntaxParser" @@ -41,27 +42,28 @@ { "cell_type": "code", "execution_count": 2, - "id": "nonprofit-conditions", + "id": "streaming-payday", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/nkouagou/.conda/envs/onto/lib/python3.10/site-packages/torch/cuda/__init__.py:141: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 6050). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n", - " return torch._C._cuda_getDeviceCount() > 0\n" + "\n", + " Loaded NCES weights!\n", + "\n" ] } ], "source": [ - "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", quality_func=F1(), num_predictions=100, learner_name=\"SetTransformer\",\n", - " path_of_embeddings=\"../NCESData/family/embeddings/ConEx_entity_embeddings.csv\", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32, pretrained_model_name=\"SetTransformer\")" + "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", quality_func=F1(), num_predictions=100, learner_names=[\"SetTransformer\"],\n", + " path_of_embeddings=\"../NCESData/family/embeddings/ConEx_entity_embeddings.csv\", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32)" ] }, { "cell_type": "code", "execution_count": 3, - "id": "private-advocacy", + "id": "fifth-sampling", "metadata": {}, "outputs": [], "source": [ @@ -71,7 +73,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "optional-persian", + "id": "compatible-mexico", "metadata": {}, "outputs": [], "source": [ @@ -80,7 +82,7 @@ }, { "cell_type": "markdown", - "id": "worse-cursor", + "id": "durable-venice", "metadata": {}, "source": [ "### Let's learn the concept ``Father''" @@ -89,7 +91,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "varied-laundry", + "id": "eleven-director", "metadata": {}, "outputs": [], "source": [ @@ -99,7 +101,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "elder-mercury", + "id": "graphic-context", "metadata": {}, "outputs": [], "source": [ @@ -109,7 +111,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "precious-kernel", + "id": "hollow-consultancy", "metadata": {}, "outputs": [], "source": [ @@ -119,7 +121,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "oriental-brighton", + "id": "subjective-occurrence", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +130,7 @@ }, { "cell_type": "markdown", - "id": "flush-choir", + "id": "interstate-center", "metadata": {}, "source": [ "#### Prediction with SetTransformer" @@ -137,25 +139,26 @@ { "cell_type": "code", "execution_count": 9, - "id": "interesting-sunglasses", + "id": "becoming-python", "metadata": {}, "outputs": [], "source": [ - "nodes = nces.fit(pos, neg).best_hypotheses(3)" + "lp = PosNegLPStandard(pos=pos, neg=neg)\n", + "nodes = nces.fit(lp).best_hypotheses(3)" ] }, { "cell_type": "code", "execution_count": 10, - "id": "finished-schema", + "id": "fabulous-preliminary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[ at 0x7485924\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0xb329d38\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", - " at 0x7c09448\tGrandfather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60]" + "[OWLObjectIntersectionOf((OWLClass(IRI('http://www.benchmark.org/family#','Father')), OWLObjectUnionOf((OWLClass(IRI('http://www.benchmark.org/family#','Father')), OWLObjectSomeValuesFrom(property=OWLObjectProperty(IRI('http://www.benchmark.org/family#','hasParent')),filler=OWLClass(IRI('http://www.benchmark.org/family#','PersonWithASibling'))))), OWLObjectAllValuesFrom(property=OWLObjectProperty(IRI('http://www.benchmark.org/family#','hasParent')),filler=OWLClass(IRI('http://www.w3.org/2002/07/owl#','Thing'))))),\n", + " OWLObjectUnionOf((OWLClass(IRI('http://www.benchmark.org/family#','Father')), OWLClass(IRI('http://www.benchmark.org/family#','Father')), OWLClass(IRI('http://www.benchmark.org/family#','Grandfather')), OWLClass(IRI('http://www.benchmark.org/family#','Grandfather')))),\n", + " OWLClass(IRI('http://www.benchmark.org/family#','Father'))]" ] }, "execution_count": 10, @@ -170,7 +173,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "radical-spectrum", + "id": "harmful-consumer", "metadata": {}, "outputs": [ { @@ -190,7 +193,7 @@ }, { "cell_type": "markdown", - "id": "norwegian-nudist", + "id": "filled-canvas", "metadata": {}, "source": [ "#### Prediction with GRU" @@ -199,29 +202,39 @@ { "cell_type": "code", "execution_count": 12, - "id": "molecular-emperor", + "id": "necessary-illinois", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Loaded NCES weights!\n", + "\n" + ] + } + ], "source": [ - "nces.pretrained_model_name = 'GRU'\n", + "nces.learner_names = ['GRU']\n", "nces.refresh()\n", - "nodes = list(nces.fit(pos, neg).best_predictions)" + "nodes = list(nces.fit(lp).best_predictions)" ] }, { "cell_type": "code", "execution_count": 13, - "id": "mechanical-japanese", + "id": "gothic-listing", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[ at 0x7485954\tFather ⊔ Grandfather\tQuality:1.0\tLength:3\t|Indv.|:60,\n", - " at 0x7c09948\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", - " at 0x7c27918\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", - " at 0x7c27944\tFather ⊔ Father ⊔ Grandfather\tQuality:1.0\tLength:5\t|Indv.|:60,\n", - " at 0x7486610\tFather\tQuality:1.0\tLength:1\t|Indv.|:60]" + "[ at 0x69d8928\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x70389f8\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", + " at 0x7038a54\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x69d8884\tGrandfather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x69db160\tFather ⊔ Grandfather\tQuality:1.0\tLength:3\t|Indv.|:60]" ] }, "execution_count": 13, @@ -235,7 +248,7 @@ }, { "cell_type": "markdown", - "id": "certain-winning", + "id": "bizarre-practice", "metadata": {}, "source": [ "#### Prediction with LSTM" @@ -244,29 +257,39 @@ { "cell_type": "code", "execution_count": 14, - "id": "separate-simon", + "id": "certain-corner", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Loaded NCES weights!\n", + "\n" + ] + } + ], "source": [ - "nces.pretrained_model_name = 'LSTM'\n", + "nces.learner_names = ['LSTM']\n", "nces.refresh()\n", - "nodes = list(nces.fit(pos, neg).best_predictions)" + "nodes = list(nces.fit(lp).best_predictions)" ] }, { "cell_type": "code", "execution_count": 15, - "id": "apart-constitutional", + "id": "renewable-virgin", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[ at 0x74859e0\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0x7c096b4\tFather ⊔ Father ⊔ Grandfather\tQuality:1.0\tLength:5\t|Indv.|:60,\n", - " at 0x74685d4\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0x7c22228\tFather ⊔ (Person ⊓ (Grandfather ⊔ (∃ hasParent.(¬Parent))))\tQuality:1.0\tLength:10\t|Indv.|:60,\n", - " at 0x7487460\tFather\tQuality:1.0\tLength:1\t|Indv.|:60]" + "[ at 0x69da730\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", + " at 0x7035258\tMale ⊓ (∃ hasChild.(Female ⊔ (∃ hasParent.(¬Daughter))))\tQuality:1.0\tLength:10\t|Indv.|:60,\n", + " at 0x69bcd10\tFather ⊓ (Father ⊓ (Male ⊔ (∃ hasSibling.(¬Grandfather))))\tQuality:1.0\tLength:10\t|Indv.|:60,\n", + " at 0x69d93b0\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x7038a78\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60]" ] }, "execution_count": 15, @@ -280,7 +303,7 @@ }, { "cell_type": "markdown", - "id": "streaming-transfer", + "id": "public-cement", "metadata": {}, "source": [ "#### Prediction with ensemble SetTransformer+GRU+LSTM" @@ -289,29 +312,39 @@ { "cell_type": "code", "execution_count": 16, - "id": "welsh-sunrise", + "id": "upset-toyota", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Loaded NCES weights!\n", + "\n" + ] + } + ], "source": [ - "nces.pretrained_model_name = ['SetTransformer','GRU','LSTM']\n", - "nces.refresh()\n", - "nodes = list(nces.fit(pos, neg).best_predictions)" + "nces.learner_names = ['SetTransformer','GRU','LSTM']\n", + "nces.refresh() # Load model weights\n", + "nodes = list(nces.fit(lp).best_predictions)" ] }, { "cell_type": "code", "execution_count": 17, - "id": "dramatic-astronomy", + "id": "polyphonic-kansas", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[ at 0x7485980\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", - " at 0x7486500\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0x7472594\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0x7486460\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", - " at 0x7486490\tFather ⊔ Grandfather\tQuality:1.0\tLength:3\t|Indv.|:60]" + "[ at 0x69d8970\tFather ⊔ Grandfather\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x69d889c\tFather ⊔ Grandfather\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x702cc18\tFather\tQuality:1.0\tLength:1\t|Indv.|:60,\n", + " at 0x69d7358\tFather ⊔ Father\tQuality:1.0\tLength:3\t|Indv.|:60,\n", + " at 0x69d7274\tFather\tQuality:1.0\tLength:1\t|Indv.|:60]" ] }, "execution_count": 17, @@ -325,7 +358,7 @@ }, { "cell_type": "markdown", - "id": "involved-syndicate", + "id": "silent-authorization", "metadata": {}, "source": [ "### Scalability of NCES (solving multiple learning problems in a go!)" @@ -334,7 +367,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "olympic-feeling", + "id": "textile-wildlife", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +377,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "uniform-shuttle", + "id": "subjective-columbia", "metadata": {}, "outputs": [], "source": [ @@ -357,7 +390,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "substantial-crossing", + "id": "cathedral-gibson", "metadata": {}, "outputs": [], "source": [ @@ -368,14 +401,14 @@ { "cell_type": "code", "execution_count": 21, - "id": "bizarre-antibody", + "id": "improved-pickup", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Duration: 2.310948610305786 seconds!\n" + "Duration: 0.46558618545532227 seconds!\n" ] } ], @@ -389,7 +422,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "increased-singapore", + "id": "filled-sunglasses", "metadata": {}, "outputs": [ { @@ -407,7 +440,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "naughty-chicago", + "id": "killing-conspiracy", "metadata": {}, "outputs": [ { @@ -424,7 +457,7 @@ }, { "cell_type": "markdown", - "id": "inappropriate-separate", + "id": "august-classics", "metadata": {}, "source": [ "### Change pretrained model name, e.g., use ensemble model prediction" @@ -433,17 +466,17 @@ { "cell_type": "code", "execution_count": 24, - "id": "transsexual-migration", + "id": "portuguese-thinking", "metadata": {}, "outputs": [], "source": [ - "nces.pretrained_model_name = ['SetTransformer', 'GRU']" + "nces.learner_names = ['SetTransformer', 'GRU']" ] }, { "cell_type": "code", "execution_count": 25, - "id": "amazing-mixer", + "id": "instant-hearts", "metadata": {}, "outputs": [ { @@ -458,15 +491,25 @@ } ], "source": [ - "nces.pretrained_model_name" + "nces.learner_names" ] }, { "cell_type": "code", "execution_count": 26, - "id": "received-hartford", + "id": "several-bailey", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Loaded NCES weights!\n", + "\n" + ] + } + ], "source": [ "nces.refresh()" ] @@ -474,14 +517,14 @@ { "cell_type": "code", "execution_count": 27, - "id": "visible-arizona", + "id": "elementary-kitchen", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Duration: 3.00120210647583 seconds!\n" + "Duration: 0.8283030986785889 seconds!\n" ] } ], @@ -495,7 +538,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "close-poker", + "id": "least-training", "metadata": {}, "outputs": [ { @@ -515,7 +558,7 @@ }, { "cell_type": "markdown", - "id": "sought-degree", + "id": "australian-layout", "metadata": {}, "source": [ "## Training NCES" @@ -524,7 +567,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "structured-digit", + "id": "constitutional-style", "metadata": {}, "outputs": [], "source": [ @@ -536,7 +579,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "lesser-ownership", + "id": "exterior-illinois", "metadata": {}, "outputs": [], "source": [ @@ -546,19 +589,27 @@ { "cell_type": "code", "execution_count": 31, - "id": "golden-tulsa", + "id": "sticky-artist", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "!!!Returning untrained models, could not load pretrained. Check the `load_pretrained parameter` or train the models using NCES.train(data).\n" + ] + } + ], "source": [ - "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", learner_name=\"SetTransformer\",\n", + "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", learner_names=[\"SetTransformer\"],\n", " path_of_embeddings=\"../NCESData/family/embeddings/ConEx_entity_embeddings.csv\", max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32,\n", - " load_pretrained=False, pretrained_model_name=\"SetTransformer\")" + " load_pretrained=False)" ] }, { "cell_type": "code", "execution_count": 32, - "id": "handed-thermal", + "id": "general-adult", "metadata": {}, "outputs": [ { @@ -569,7 +620,7 @@ "Synthesizer: 515296\n", "********************Trainable model size********************\n", "\n", - "Training on CPU, it may take long...\n", + "GPU available !\n", "\n", "##################################################\n", "\n", @@ -583,14 +634,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loss: 0.5600, Soft Acc: 73.91%, Hard Acc: 70.67%: 100%|██████████| 20/20 [00:33<00:00, 1.66s/it]" + "Loss: 0.4141, Soft Acc: 72.98%, Hard Acc: 72.07%: 100%|██████████| 20/20 [00:05<00:00, 3.43it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Top performance: loss: 0.5600, soft accuracy: 74.71% ... hard accuracy: 72.45%\n", + "Top performance: loss: 0.4141, soft accuracy: 77.84% ... hard accuracy: 76.40%\n", "\n", "SetTransformer saved\n" ] @@ -604,13 +655,13 @@ } ], "source": [ - "nces.train(list(data.items())[-200:], epochs=20, learning_rate=0.001, save_model=True, storage_path=f\"./NCES-{time.time()}/\")" + "nces.train(list(data.items())[-200:], epochs=20, learning_rate=0.001, save_model=True)" ] }, { "cell_type": "code", "execution_count": null, - "id": "still-twist", + "id": "advised-cause", "metadata": {}, "outputs": [], "source": [] @@ -618,7 +669,7 @@ { "cell_type": "code", "execution_count": null, - "id": "mediterranean-crawford", + "id": "challenging-relations", "metadata": {}, "outputs": [], "source": [] @@ -626,9 +677,9 @@ ], "metadata": { "kernelspec": { - "display_name": "onto", + "display_name": "onto_env", "language": "python", - "name": "onto" + "name": "onto_env" }, "language_info": { "codemirror_mode": { diff --git a/examples/nces_notebook2.ipynb b/examples/nces_notebook2.ipynb index c024718f..86f756ed 100644 --- a/examples/nces_notebook2.ipynb +++ b/examples/nces_notebook2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "convertible-ethics", + "id": "atmospheric-marker", "metadata": {}, "source": [ "From the main directory \"Ontolearn\", run the commands for NCES data mentioned [here](https://ontolearn-docs-dice-group.netlify.app/usage/02_installation#download-external-files) to download pretrained models and datasets." @@ -11,14 +11,14 @@ { "cell_type": "code", "execution_count": 1, - "id": "intended-bullet", + "id": "specific-german", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/nkouagou/.conda/envs/onto/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/opt/conda/envs/onto_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -26,6 +26,7 @@ "source": [ "from ontolearn.concept_learner import NCES\n", "from ontolearn.knowledge_base import KnowledgeBase\n", + "from ontolearn.learning_problem import PosNegLPStandard\n", "from owlapy.parser import DLSyntaxParser\n", "from owlapy.render import DLSyntaxObjectRenderer\n", "import sys\n", @@ -39,27 +40,28 @@ { "cell_type": "code", "execution_count": 2, - "id": "serial-might", + "id": "functional-messaging", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/nkouagou/.conda/envs/onto/lib/python3.10/site-packages/torch/cuda/__init__.py:141: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 6050). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n", - " return torch._C._cuda_getDeviceCount() > 0\n" + "\n", + " Loaded NCES weights!\n", + "\n" ] } ], "source": [ - "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", quality_func=F1(), num_predictions=100, learner_name=\"SetTransformer\",\n", - " path_of_embeddings=\"../NCESData/family/embeddings/ConEx_entity_embeddings.csv\", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32, pretrained_model_name=\"SetTransformer\")" + "nces = NCES(knowledge_base_path=\"../NCESData/family/family.owl\", quality_func=F1(), num_predictions=100, learner_names=[\"SetTransformer\"],\n", + " path_of_embeddings=\"../NCESData/family/embeddings/ConEx_entity_embeddings.csv\", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32)" ] }, { "cell_type": "code", "execution_count": 3, - "id": "precise-tobacco", + "id": "meaningful-hindu", "metadata": {}, "outputs": [], "source": [ @@ -69,7 +71,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "helpful-bonus", + "id": "divine-denial", "metadata": {}, "outputs": [], "source": [ @@ -79,7 +81,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "pleased-circular", + "id": "written-threshold", "metadata": {}, "outputs": [], "source": [ @@ -89,7 +91,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "published-supplement", + "id": "pleasant-hebrew", "metadata": {}, "outputs": [ { @@ -127,7 +129,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "greek-transition", + "id": "lovely-confusion", "metadata": {}, "outputs": [], "source": [ @@ -137,7 +139,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "administrative-disorder", + "id": "false-solution", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +149,7 @@ }, { "cell_type": "markdown", - "id": "instructional-syndrome", + "id": "rolled-thursday", "metadata": {}, "source": [ "#### Input examples can be sets or lists" @@ -156,7 +158,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "coated-pressing", + "id": "ahead-threshold", "metadata": {}, "outputs": [], "source": [ @@ -166,7 +168,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "current-floor", + "id": "serial-music", "metadata": {}, "outputs": [], "source": [ @@ -176,7 +178,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "continental-march", + "id": "raised-royalty", "metadata": {}, "outputs": [ { @@ -194,9 +196,19 @@ "len(neg)" ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "solid-syndicate", + "metadata": {}, + "outputs": [], + "source": [ + "lp = PosNegLPStandard(pos=pos, neg=neg)" + ] + }, { "cell_type": "markdown", - "id": "sound-fitness", + "id": "failing-disclaimer", "metadata": {}, "source": [ "#### Prediction with SetTransformer (default model)" @@ -204,8 +216,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "solar-desire", + "execution_count": 13, + "id": "bound-decline", "metadata": {}, "outputs": [ { @@ -213,21 +225,21 @@ "output_type": "stream", "text": [ "\n", - "Duration: 0.5292303562164307 seconds\n" + "Duration: 0.48354291915893555 seconds\n" ] } ], "source": [ "t0 = time.time()\n", - "node = list(nces.fit(pos, neg).best_predictions)[0]\n", + "node = list(nces.fit(lp).best_predictions)[0]\n", "t1 = time.time()\n", "print(\"\\nDuration: \", t1-t0, \" seconds\")" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "adult-valuation", + "execution_count": 14, + "id": "current-haven", "metadata": {}, "outputs": [ { @@ -236,7 +248,7 @@ "82" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -247,17 +259,17 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "collect-gothic", + "execution_count": 15, + "id": "lasting-stage", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - " at 0x9fe2fe8\tBrother ⊔ Sister ⊔ Daughter\tQuality:1.0\tLength:5\t|Indv.|:82" + " at 0x75f30ec\tBrother ⊔ (Person ⊓ (Daughter ⊔ (∃ hasSibling.Granddaughter)))\tQuality:1.0\tLength:9\t|Indv.|:82" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -268,8 +280,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "chronic-horse", + "execution_count": 16, + "id": "sweet-standing", "metadata": {}, "outputs": [ { @@ -288,7 +300,7 @@ "(100.0, 100.0, 100.0, 100.0)" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -299,7 +311,7 @@ }, { "cell_type": "markdown", - "id": "noble-psychiatry", + "id": "three-polymer", "metadata": {}, "source": [ "### Ensemble prediction" @@ -307,8 +319,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "desirable-auction", + "execution_count": 17, + "id": "informal-poetry", "metadata": {}, "outputs": [ { @@ -316,23 +328,26 @@ "output_type": "stream", "text": [ "\n", - "Duration: 0.6503381729125977 seconds\n" + " Loaded NCES weights!\n", + "\n", + "\n", + "Duration: 0.4260680675506592 seconds\n" ] } ], "source": [ - "nces.pretrained_model_name = ['SetTransformer','GRU','LSTM']\n", - "nces.refresh()\n", + "nces.learner_names = ['SetTransformer','GRU','LSTM']\n", + "nces.refresh() # Loads model weights\n", "t0 = time.time()\n", - "node = list(nces.fit(pos, neg).best_predictions)[0]\n", + "node = list(nces.fit(lp).best_predictions)[0]\n", "t1 = time.time()\n", "print(\"\\nDuration: \", t1-t0, \" seconds\")" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "victorian-amateur", + "execution_count": 18, + "id": "least-tribe", "metadata": {}, "outputs": [ { @@ -351,7 +366,7 @@ "(100.0, 100.0, 100.0, 100.0)" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -362,17 +377,17 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "domestic-breakfast", + "execution_count": 19, + "id": "surrounded-roller", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - " at 0x9fe2c38\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82" + " at 0xa840520\tBrother ⊔ Daughter ⊔ Daughter\tQuality:1.0\tLength:5\t|Indv.|:82" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -383,21 +398,21 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "careful-works", + "execution_count": 20, + "id": "medieval-advance", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[ at 0x9fe2c38\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", - " at 0xa05c220\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", - " at 0x9fe34b0\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", - " at 0x9fe3ca8\tPersonWithASibling ⊔ Daughter ⊔ Sister\tQuality:1.0\tLength:5\t|Indv.|:82,\n", - " at 0x9fe3e08\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82]" + "[ at 0xa840520\tBrother ⊔ Daughter ⊔ Daughter\tQuality:1.0\tLength:5\t|Indv.|:82,\n", + " at 0x7c63c78\tPersonWithASibling ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", + " at 0x7c63c58\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", + " at 0x7c5ea34\tBrother ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82,\n", + " at 0x7c5ea74\tPersonWithASibling ⊔ Daughter\tQuality:1.0\tLength:3\t|Indv.|:82]" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -408,7 +423,7 @@ }, { "cell_type": "markdown", - "id": "capable-quality", + "id": "conditional-dependence", "metadata": {}, "source": [ "### Complex learning problems, potentially without an exact solution" @@ -416,7 +431,7 @@ }, { "cell_type": "markdown", - "id": "honest-empire", + "id": "fluid-crawford", "metadata": {}, "source": [ "#### First learning problem" @@ -424,8 +439,8 @@ }, { "cell_type": "code", - "execution_count": 20, - "id": "novel-protest", + "execution_count": 21, + "id": "revised-major", "metadata": {}, "outputs": [], "source": [ @@ -437,8 +452,18 @@ }, { "cell_type": "code", - "execution_count": 21, - "id": "amended-found", + "execution_count": 22, + "id": "developmental-tender", + "metadata": {}, + "outputs": [], + "source": [ + "lp = PosNegLPStandard(pos=pos, neg=neg)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "failing-patrick", "metadata": {}, "outputs": [ { @@ -447,19 +472,19 @@ "['SetTransformer', 'GRU', 'LSTM']" ] }, - "execution_count": 21, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "nces.pretrained_model_name" + "nces.learner_names" ] }, { "cell_type": "code", - "execution_count": 22, - "id": "elder-fever", + "execution_count": 24, + "id": "balanced-linux", "metadata": {}, "outputs": [ { @@ -467,30 +492,30 @@ "output_type": "stream", "text": [ "\n", - "Duration: 0.6927070617675781 seconds\n" + "Duration: 0.3714330196380615 seconds\n" ] } ], "source": [ "t0 = time.time()\n", - "node = list(nces.fit(pos, neg).best_predictions)[0]\n", + "node = list(nces.fit(lp).best_predictions)[0]\n", "t1 = time.time()\n", "print(\"\\nDuration: \", t1-t0, \" seconds\")" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "chinese-avatar", + "execution_count": 25, + "id": "constitutional-lindsay", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - " at 0xcb9e860\tPerson ⊔ Son\tQuality:0.85227\tLength:3\t|Indv.|:202" + " at 0x75f5058\tPerson ⊔ Child\tQuality:0.85227\tLength:3\t|Indv.|:202" ] }, - "execution_count": 23, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -501,8 +526,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "accessory-excess", + "execution_count": 26, + "id": "legal-broadcasting", "metadata": {}, "outputs": [ { @@ -521,7 +546,7 @@ "(74.25699999999999, 74.25699999999999, 100.0, 85.227)" ] }, - "execution_count": 24, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -532,7 +557,7 @@ }, { "cell_type": "markdown", - "id": "varied-danger", + "id": "ambient-percentage", "metadata": {}, "source": [ "#### Second learning problem" @@ -540,8 +565,8 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "cardiac-webmaster", + "execution_count": 27, + "id": "speaking-plain", "metadata": {}, "outputs": [], "source": [ @@ -552,8 +577,18 @@ }, { "cell_type": "code", - "execution_count": 26, - "id": "fantastic-piece", + "execution_count": 28, + "id": "upper-oasis", + "metadata": {}, + "outputs": [], + "source": [ + "lp = PosNegLPStandard(pos=pos, neg=neg)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "weekly-desperate", "metadata": {}, "outputs": [ { @@ -561,40 +596,40 @@ "output_type": "stream", "text": [ "\n", - "Duration: 0.7965126037597656 seconds\n" + "Duration: 0.37883949279785156 seconds\n" ] } ], "source": [ "t0 = time.time()\n", - "node = list(nces.fit(pos, neg).best_predictions)[0]\n", + "node = list(nces.fit(lp).best_predictions)[0]\n", "t1 = time.time()\n", "print(\"\\nDuration: \", t1-t0, \" seconds\")" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "pregnant-prague", + "execution_count": 30, + "id": "fallen-programming", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Accuracy: 57.921%\n", - "Precision: 47.934%\n", - "Recall: 72.5%\n", - "F1: 57.711%\n" + "Accuracy: 39.604%\n", + "Precision: 39.604%\n", + "Recall: 100.0%\n", + "F1: 56.738%\n" ] }, { "data": { "text/plain": [ - "(57.921, 47.934, 72.5, 57.711)" + "(39.604, 39.604, 100.0, 56.738)" ] }, - "execution_count": 27, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -606,14 +641,14 @@ { "cell_type": "code", "execution_count": null, - "id": "alternative-lloyd", + "id": "administrative-karen", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "latin-spanking", + "id": "personal-basketball", "metadata": {}, "source": [ "## Important note\n", @@ -628,7 +663,7 @@ { "cell_type": "code", "execution_count": null, - "id": "anonymous-principle", + "id": "atomic-crime", "metadata": {}, "outputs": [], "source": [] @@ -636,9 +671,9 @@ ], "metadata": { "kernelspec": { - "display_name": "onto", + "display_name": "onto_env", "language": "python", - "name": "onto" + "name": "onto_env" }, "language_info": { "codemirror_mode": { diff --git a/examples/retrieval_eval.py b/examples/retrieval_eval.py index e52a9bd8..140f2c62 100644 --- a/examples/retrieval_eval.py +++ b/examples/retrieval_eval.py @@ -1,9 +1,49 @@ -"""python examples/retrieval_eval.py""" +"""$ python examples/retrieval_eval.py --path_kg KGs/Family/father.owl + +################################################## +Description of generated Concepts +NC denotes the named concepts |NC|=3 +NNC denotes the negated named concepts |NNC|=3 +|NC UNION NC|=9 +|NC Intersection NC|=9 +NC* denotes the union of named concepts and negated named concepts |NC*|=6 +|NC* UNION NC*|=36 +|NC* Intersection NC*|=36 +|exist R* NC*|=12 +|forall R* NC*|=12 +|Max Cardinalities|=36 +|Min Cardinalities|=36 +|exist R* Nominals|=40 +################################################## + +Expression: ∃ hasChild⁻.{heinz , stefan , martin} | Jaccard Similarity:1.0000 | F1 :1.0000 | Runtime Benefits:-0.003: 0%| | 0/232 [00:00 Set[ - Union[OWLQuantifiedObjectRestriction, OWLObjectCardinalityRestriction]]: - """ - Map a set of owl concepts and a set of properties into OWL Restrictions - - Args: - concepts: - properties: - cls (Callable): An owl Restriction class - cardinality: A positive Integer - - Returns: List of OWL Restrictions - - """ - assert isinstance(concepts, Iterable), "Concepts must be an Iterable" - assert isinstance(properties, Iterable), "properties must be an Iterable" - assert isinstance(cls, Callable), "cls must be an Callable" - assert cardinality > 0 - result = set() - for i in concepts: - for j in properties: - if cls == OWLObjectMinCardinality or cls == OWLObjectMaxCardinality: - result.add(cls(cardinality=cardinality, property=j, filler=i)) - continue - result.add(cls(j, i)) - return result - - -# @TODO: CD: Perhaps we can remove this function. -def concept_to_retrieval(concepts, retriever) -> List[Tuple[float, Set[str]]]: - results = [] - for c in concepts: - start_time_ = time.time() - retrieval = {i.str for i in retriever.individuals(c)} - results.append((time.time() - start_time_, retrieval)) - return results - - -# @TODO: CD: Perhaps we can remove this function. -def retrieval_eval(expressions, y, yhat, verbose=1): - assert len(y) == len(yhat) - similarities = [] - runtime_diff = [] - number_of_concepts = len(expressions) - for expressions, y_report_i, yhat_report_i in zip(expressions, y, yhat): - runtime_y_i, y_i = y_report_i - runtime_yhat_i, yhat_i = yhat_report_i - - jaccard_sim = jaccard_similarity(y_i, yhat_i) - runtime_benefits = runtime_y_i - runtime_yhat_i - if verbose > 0: - print( - f"Concept:{expressions}\tTrue Size:{len(y_i)}\tPredicted Size:{len(yhat_i)}\tRetrieval Similarity:{jaccard_sim}\tRuntime Benefit:{runtime_benefits:.3f}" - ) - similarities.append(jaccard_sim) - runtime_diff.append(runtime_benefits) - avg_jaccard_sim = sum(similarities) / len(similarities) - avg_runtime_benefits = sum(runtime_diff) / len(runtime_diff) - return number_of_concepts, avg_jaccard_sim, avg_runtime_benefits +import itertools +import ast def execute(args): @@ -105,91 +74,189 @@ def execute(args): else: symbolic_kb = KnowledgeBase(path=args.path_kg) # (2) Initialize Neural OWL Reasoner. - neural_owl_reasoner = TripleStoreNeuralReasoner(path_of_kb=args.path_kg, gamma=args.gamma) + if args.path_kge_model: + neural_owl_reasoner = TripleStoreNeuralReasoner(path_neural_embedding=args.path_kge_model, gamma=args.gamma) + else: + neural_owl_reasoner = TripleStoreNeuralReasoner(path_of_kb=args.path_kg, gamma=args.gamma) + # Fix the random seed. + random.seed(args.seed) ################################################################### - # GENERATE ALCQ CONCEPTS TO EVALUATE RETRIEVAL PERFORMANCES + # GENERATE DL CONCEPTS TO EVALUATE RETRIEVAL PERFORMANCES # (3) R: Extract object properties. - object_properties = {i for i in symbolic_kb.get_object_properties()} + object_properties = sorted({i for i in symbolic_kb.get_object_properties()}) + + # (3.1) Subsample if required. + if args.ratio_sample_object_prob: + object_properties = {i for i in random.sample(population=list(object_properties), + k=max(1, int(len(object_properties) * args.ratio_sample_object_prob)))} + + object_properties = set(object_properties) + # (4) R⁻: Inverse of object properties. object_properties_inverse = {i.get_inverse_property() for i in object_properties} + # (5) R*: R UNION R⁻. object_properties_and_inverse = object_properties.union(object_properties_inverse) # (6) NC: Named owl concepts. - nc = {i for i in symbolic_kb.get_concepts()} + nc = sorted({i for i in symbolic_kb.get_concepts()}) + + + + + if args.ratio_sample_nc: + # (6.1) Subsample if required. + nc = {i for i in random.sample(population=list(nc), k=max(1, int(len(nc) * args.ratio_sample_nc)))} + + nc = set(nc) # return to a set # (7) NC⁻: Complement of NC. nnc = {i.get_object_complement_of() for i in nc} - # (8) UNNC: NC UNION NC⁻. - unnc = nc.union(nnc) - # (9) NC UNION NC. + + # (8) NC*: NC UNION NC⁻. + nc_star = nc.union(nnc) + # (9) Retrieve 10 random Nominals. + if len(symbolic_kb.all_individuals_set())>args.num_nominals: + nominals = set(random.sample(symbolic_kb.all_individuals_set(), args.num_nominals)) + else: + nominals = symbolic_kb.all_individuals_set() + # (10) All combinations of 3 for Nominals, e.g. {martin, heinz, markus} + nominal_combinations = set( OWLObjectOneOf(combination)for combination in itertools.combinations(nominals, 3)) + + # (11) NC UNION NC. unions = concept_reducer(nc, opt=OWLObjectUnionOf) - # (10) NC INTERSECTION NC. + # (12) NC INTERSECTION NC. intersections = concept_reducer(nc, opt=OWLObjectIntersectionOf) - # (11) UNNC UNION UNNC. - unions_unnc = concept_reducer(unnc, opt=OWLObjectUnionOf) - # (12) UNNC INTERACTION UNNC. - intersections_unnc = concept_reducer(unnc, opt=OWLObjectIntersectionOf) - - # (13) \exist r. C s.t. C \in UNNC and r \in R* . - exist_unnc = concept_reducer_properties(concepts=unnc, - properties=object_properties_and_inverse, - cls=OWLObjectSomeValuesFrom) - # (15) \forall r. C s.t. C \in UNNC and r \in R* . - for_all_unnc = concept_reducer_properties(concepts=unnc, - properties=object_properties_and_inverse, - cls=OWLObjectAllValuesFrom) - # (16) >= n r. C and =< n r. C, s.t. C \in UNNC and r \in R* . - min_cardinality_unnc_1, min_cardinality_unnc_2, min_cardinality_unnc_3 = ( - concept_reducer_properties(concepts=unnc, properties=object_properties_and_inverse, cls=OWLObjectMinCardinality, - cardinality=i) + # (13) NC* UNION NC*. + unions_nc_star = concept_reducer(nc_star, opt=OWLObjectUnionOf) + # (14) NC* INTERACTION NC*. + intersections_nc_star = concept_reducer(nc_star, opt=OWLObjectIntersectionOf) + # (15) \exist r. C s.t. C \in NC* and r \in R* . + exist_nc_star = concept_reducer_properties( + concepts=nc_star, + properties=object_properties_and_inverse, + cls=OWLObjectSomeValuesFrom, + ) + # (16) \forall r. C s.t. C \in NC* and r \in R* . + for_all_nc_star = concept_reducer_properties( + concepts=nc_star, + properties=object_properties_and_inverse, + cls=OWLObjectAllValuesFrom, + ) + # (17) >= n r. C and =< n r. C, s.t. C \in NC* and r \in R* . + min_cardinality_nc_star_1, min_cardinality_nc_star_2, min_cardinality_nc_star_3 = ( + concept_reducer_properties( + concepts=nc_star, + properties=object_properties_and_inverse, + cls=OWLObjectMinCardinality, + cardinality=i, + ) for i in [1, 2, 3] ) - max_cardinality_unnc_1, max_cardinality_unnc_2, max_cardinality_unnc_3 = ( - concept_reducer_properties(concepts=unnc, - properties=object_properties_and_inverse, - cls=OWLObjectMaxCardinality, - cardinality=i) + max_cardinality_nc_star_1, max_cardinality_nc_star_2, max_cardinality_nc_star_3 = ( + concept_reducer_properties( + concepts=nc_star, + properties=object_properties_and_inverse, + cls=OWLObjectMaxCardinality, + cardinality=i, + ) for i in [1, 2, 3] ) + # (18) \exist r. Nominal s.t. Nominal \in Nominals and r \in R* . + exist_nominals = concept_reducer_properties( + concepts=nominal_combinations, + properties=object_properties_and_inverse, + cls=OWLObjectSomeValuesFrom, + ) ################################################################### # Retrieval Results - def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]: start_time = time.time() return {i.str for i in retriever_func.individuals(c)}, time.time() - start_time + # () Collect the data. data = [] - # Converted to list so that the progress bar works. - concepts=list(chain(nc, unions, intersections, - nnc, unnc, unions_unnc, intersections_unnc, - exist_unnc, for_all_unnc, - min_cardinality_unnc_1, min_cardinality_unnc_2, - min_cardinality_unnc_3, - max_cardinality_unnc_1, max_cardinality_unnc_2, - max_cardinality_unnc_3)) - # Shuffled the data so that the progress bar is not influenced by the order of concepts. + # () Converted to list so that the progress bar works. + concepts = list( + chain( + nc, # named concepts (C) + nnc, # negated named concepts (\neg C) + unions_nc_star, # A set of Union of named concepts and negat + intersections_nc_star, # + exist_nc_star, + for_all_nc_star, + min_cardinality_nc_star_1, min_cardinality_nc_star_1, min_cardinality_nc_star_3, + max_cardinality_nc_star_1, max_cardinality_nc_star_2, max_cardinality_nc_star_3, + exist_nominals)) + print("\n") + print("#"*50) + print("Description of generated Concepts") + print(f"NC denotes the named concepts\t|NC|={len(nc)}") + print(f"NNC denotes the negated named concepts\t|NNC|={len(nnc)}") + print(f"|NC UNION NC|={len(unions)}") + print(f"|NC Intersection NC|={len(intersections)}") + + print(f"NC* denotes the union of named concepts and negated named concepts\t|NC*|={len(nc_star)}") + print(f"|NC* UNION NC*|={len(unions_nc_star)}") + print(f"|NC* Intersection NC*|={len(intersections_nc_star)}") + print(f"|exist R* NC*|={len(exist_nc_star)}") + print(f"|forall R* NC*|={len(for_all_nc_star)}") + + print(f"|Max Cardinalities|={len(max_cardinality_nc_star_1) + len(max_cardinality_nc_star_2)+ len(max_cardinality_nc_star_3)}") + print(f"|Min Cardinalities|={len(min_cardinality_nc_star_1) + len(min_cardinality_nc_star_1)+ len(min_cardinality_nc_star_3)}") + print(f"|exist R* Nominals|={len(exist_nominals)}") + print("#" * 50,end="\n\n") + + + # () Shuffled the data so that the progress bar is not influenced by the order of concepts. + random.shuffle(concepts) - # Converted to list so that the progress bar works. + + # () Iterate over single OWL Class Expressions in ALCQIHO for expression in (tqdm_bar := tqdm(concepts, position=0, leave=True)): + retrieval_y: Set[str] + runtime_y: Set[str] + # () Retrieve the true set of individuals and elapsed runtime. retrieval_y, runtime_y = concept_retrieval(symbolic_kb, expression) + # () Retrieve a set of inferred individuals and elapsed runtime. retrieval_neural_y, runtime_neural_y = concept_retrieval(neural_owl_reasoner, expression) + # () Compute the Jaccard similarity. jaccard_sim = jaccard_similarity(retrieval_y, retrieval_neural_y) - data.append({"Expression": owl_expression_to_dl(expression), - "Type": type(expression).__name__, - "Jaccard Similarity": jaccard_sim, - "Runtime Benefits": runtime_y - runtime_neural_y - }) + # () Compute the F1-score. + f1_sim = f1_set_similarity(retrieval_y, retrieval_neural_y) + # () Store the data. + data.append( + { + "Expression": owl_expression_to_dl(expression), + "Type": type(expression).__name__, + "Jaccard Similarity": jaccard_sim, + "F1": f1_sim, + "Runtime Benefits": runtime_y - runtime_neural_y, + "Runtime Neural": runtime_neural_y, + "Symbolic_Retrieval": retrieval_y, + "Symbolic_Retrieval_Neural": retrieval_neural_y, + } + ) + # () Update the progress bar. tqdm_bar.set_description_str( - f"Expression: {owl_expression_to_dl(expression)} | Jaccard Similarity:{jaccard_sim:.4f} | Runtime Benefits:{runtime_y - runtime_neural_y:.3f}") - + f"Expression: {owl_expression_to_dl(expression)} | Jaccard Similarity:{jaccard_sim:.4f} | F1 :{f1_sim:.4f} | Runtime Benefits:{runtime_y - runtime_neural_y:.3f}" + ) + # () Read the data into pandas dataframe df = pd.DataFrame(data) - assert df["Jaccard Similarity"].mean() == 1.0 - + assert df["Jaccard Similarity"].mean() >= args.min_jaccard_similarity + # () Save the experimental results into csv file. df.to_csv(args.path_report) del df - df = pd.read_csv(args.path_report, index_col=0) - numerical_df = df.select_dtypes(include=['number']) + # () Load the saved CSV file. + df = pd.read_csv(args.path_report, index_col=0, converters={'Symbolic_Retrieval': lambda x: ast.literal_eval(x), + 'Symbolic_Retrieval_Neural': lambda x: ast.literal_eval( + x)}) + # () A retrieval result can be parsed into set of instances to python object. + x = df["Symbolic_Retrieval_Neural"].iloc[0] + assert isinstance(x, set) + # () Extract the numerical features. + numerical_df = df.select_dtypes(include=["number"]) + # () Extract the type of owl concepts df_g = df.groupby(by="Type") print(df_g["Type"].count()) mean_df = df_g[numerical_df.columns].mean() @@ -198,131 +265,19 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]: def get_default_arguments(): parser = ArgumentParser() - parser.add_argument("--path_kg", type=str, - default="KGs/Family/family-benchmark_rich_background.owl") + parser.add_argument("--path_kg", type=str, default="KGs/Family/father.owl") + parser.add_argument("--path_kge_model", type=str, default=None) parser.add_argument("--endpoint_triple_store", type=str, default=None) - parser.add_argument("--gamma", type=float, default=0.8) - parser.add_argument("--path_report", type=str, default="ALCQ_Retrieval_Results.csv") + parser.add_argument("--gamma", type=float, default=0.9) + parser.add_argument("--seed", type=int, default=1) + parser.add_argument("--ratio_sample_nc", type=float, default=0.2, help="To sample OWL Classes.") + parser.add_argument("--ratio_sample_object_prob", type=float, default=0.1, help="To sample OWL Object Properties.") + parser.add_argument("--min_jaccard_similarity", type=float, default=0.0, help="Minimum Jaccard similarity to be achieve by the reasoner") + parser.add_argument("--num_nominals", type=int, default=10, help="Number of OWL named individuals to be sampled.") + + # H is obtained if the forward chain is applied on KG. + parser.add_argument("--path_report", type=str, default="ALCQHI_Retrieval_Results.csv") return parser.parse_args() - -if __name__ == '__main__': +if __name__ == "__main__": execute(get_default_arguments()) -# @TODO:CD:I guess we can remove the below part. What do you think Luke ? -""" -nc_retrieval_results = retrieval_eval( - expressions=nc, - y=concept_to_retrieval(nc, symbolic_kb), - yhat=concept_to_retrieval(nc, neural_owl_reasoner), -) - -unions_nc_retrieval_results = retrieval_eval( - expressions=unions, - y=concept_to_retrieval(unions, symbolic_kb), - yhat=concept_to_retrieval(unions, neural_owl_reasoner), -) -intersections_nc_retrieval_results = retrieval_eval( - expressions=intersections, - y=concept_to_retrieval(intersections, symbolic_kb), - yhat=concept_to_retrieval(intersections, neural_owl_reasoner), -) -nnc_retrieval_results = retrieval_eval( - expressions=nnc, - y=concept_to_retrieval(nnc, symbolic_kb), - yhat=concept_to_retrieval(nnc, neural_owl_reasoner), -) -unnc_retrieval_results = retrieval_eval( - expressions=unnc, - y=concept_to_retrieval(unnc, symbolic_kb), - yhat=concept_to_retrieval(unnc, neural_owl_reasoner), -) -unions_unnc_retrieval_results = retrieval_eval( - expressions=unions_unnc, - y=concept_to_retrieval(unions_unnc, symbolic_kb), - yhat=concept_to_retrieval(unions_unnc, neural_owl_reasoner), -) -intersections_unnc_retrieval_results = retrieval_eval( - expressions=intersections_unnc, - y=concept_to_retrieval(intersections_unnc, symbolic_kb), - yhat=concept_to_retrieval(intersections_unnc, neural_owl_reasoner), -) -exist_unnc_retrieval_results = retrieval_eval( - expressions=exist_unnc, - y=concept_to_retrieval(exist_unnc, symbolic_kb), - yhat=concept_to_retrieval(exist_unnc, neural_owl_reasoner), -) -for_all_unnc_retrieval_results = retrieval_eval( - expressions=for_all_unnc, - y=concept_to_retrieval(for_all_unnc, symbolic_kb), - yhat=concept_to_retrieval(for_all_unnc, neural_owl_reasoner), -) - -( - min_cardinality_unnc_1_retrieval_results, - min_cardinality_unnc_2_retrieval_results, - min_cardinality_unnc_3_retrieval_results, -) = ( - retrieval_eval( - expressions=expressions, - y=concept_to_retrieval(expressions, symbolic_kb), - yhat=concept_to_retrieval(expressions, neural_owl_reasoner), - ) - for expressions in [ - min_cardinality_unnc_1, - min_cardinality_unnc_2, - min_cardinality_unnc_3, -] -) - -( - max_cardinality_unnc_1_retrieval_results, - max_cardinality_unnc_2_retrieval_results, - max_cardinality_unnc_3_retrieval_results, -) = ( - retrieval_eval( - expressions=expressions, - y=concept_to_retrieval(expressions, symbolic_kb), - yhat=concept_to_retrieval(expressions, neural_owl_reasoner), - ) - for expressions in [ - max_cardinality_unnc_1, - max_cardinality_unnc_2, - max_cardinality_unnc_3, -] -) - -results = { - "nc_retrieval_results": nc_retrieval_results, - "unions_nc_retrieval_results": unions_nc_retrieval_results, - "intersections_nc_retrieval_results": intersections_nc_retrieval_results, - "nnc_retrieval_results": nnc_retrieval_results, - "unnc_retrieval_results": unnc_retrieval_results, - "unions_unnc_retrieval_results": unions_unnc_retrieval_results, - "intersections_unnc_retrieval_results": intersections_unnc_retrieval_results, - "exist_unnc_retrieval_results": exist_unnc_retrieval_results, - "for_all_unnc_retrieval_results": for_all_unnc_retrieval_results, - "min_cardinality_unnc_1_retrieval_results": min_cardinality_unnc_1_retrieval_results, - "min_cardinality_unnc_2_retrieval_results": min_cardinality_unnc_2_retrieval_results, - "min_cardinality_unnc_3_retrieval_results": min_cardinality_unnc_3_retrieval_results, - "max_cardinality_unnc_1_retrieval_results": max_cardinality_unnc_1_retrieval_results, - "max_cardinality_unnc_2_retrieval_results": max_cardinality_unnc_2_retrieval_results, - "max_cardinality_unnc_3_retrieval_results": max_cardinality_unnc_3_retrieval_results, -} - - -# logger that prints the results -def print_results(results): - print(f"Number of named and negated named concepts: {len(unnc)}") - print( - f"Number of object properties and their inverses: {len(object_properties_and_inverse)}" - ) - print("\n") - print("(Number of Concepts, Jaccard Similarity, Runtime Benefits)") - for k, v in results.items(): - print("\n") - print(f"{k}:") - print(v) - - -print_results(results) -""" diff --git a/examples/retrieval_eval_under_incomplete.py b/examples/retrieval_eval_under_incomplete.py new file mode 100644 index 00000000..2a1d544e --- /dev/null +++ b/examples/retrieval_eval_under_incomplete.py @@ -0,0 +1,243 @@ +"""The goal of this script is to perform retrieval task on inconsistent or incomplete KB. + Given and input KB, we first generate a number of subgraphs that are either incomplete + or inconsistent. Each subgraph is then evaluated by running a retrieval task, using + using a neural method or different symbolic reasoners (HermiT, Pellet, JFact, and Openllet). + for each subgraph, the script computes and records Jaccard similarity scores between + the retrieval results of each reasoner and the expected goal, as well as their runtime. + The result is then save as a csv file for further investigation. + + To run the script: python examples/retrieval_eval_under_incomplete.py""" + + + +from argparse import ArgumentParser +from ontolearn.knowledge_base import KnowledgeBase +import pandas as pd +from typing import Set +import time +from ontolearn.incomplete_kb import make_kb_incomplete, make_kb_inconsistent +import os +from ontolearn.utils import jaccard_similarity +import subprocess +from owlapy.class_expression import * +from owlapy.iri import IRI +from owlapy.parser import DLSyntaxParser +import ast +from owlapy import owl_expression_to_dl +from owlapy.owl_ontology_manager import OntologyManager +from owlapy.owlapi_adaptor import OWLAPIAdaptor +import pandas as pd + + +# Create incomplete/noisy KGs +def generate_subgraphs(kb_path: str, directory: str, n: int, ratio: float, operation: str) -> Set[str]: + + """ + Generates a specified number of paths of subgraphs (incomplete or noisy knowledge graphs) + by applying either the "incomplete" or "inconsistent" operation from the functions make_kb_incomplete and + make_kb_inconsistent to the given KB. + + Inputs: + --------------- + + kb_path (str): The path to the input KB file. + directory (str): The directory where the generated subgraphs will be stored. + n (int): The number of subgraphs to generate. + ratio (float): The ratio of elements to modify within the KB (as a percentage). + operation (str): The type of operation to perform on the KB. Expected values are + "incomplete" or "inconsistent", which define the type of subgraph to generate. + + Output: + --------------- + + Set[str]: A set containing the file paths of all the generated subgraphs. + """ + + name = kb_path.split('/')[-1].split('.')[0] + rate = int(ratio * 100) + + os.makedirs(directory, exist_ok=True) + + file_paths = set() + + for i in range(1, n + 1): + + + if "incomplete" in operation: + + # output path for the incomplete KGs + output_path = f'{directory}/{operation}_{name}_ratio_{rate}_number_{i}.owl' + + # Check if the file already exists + if not os.path.exists(output_path): + # If file does not exist, generate it + make_kb_incomplete(kb_path, output_path, rate, seed=i) + + else: + output_path = f'{directory}/{operation}_{name}_ratio_{rate}_number_{i}.owl' + + # Check if the file already exists + if not os.path.exists(output_path): + # If file does not exist, generate it + make_kb_inconsistent(kb_path, output_path, rate, seed=i) + + # Add the output path to the set + file_paths.add(output_path) + + return file_paths + +def execute(args): + symbolic_kb = KnowledgeBase(path=args.path_kg) + namespace = list(symbolic_kb.ontology.classes_in_signature())[0].iri.get_namespace() + parser = DLSyntaxParser(namespace) + name_KG = args.path_kg.split('/')[-1].split('.')[0] + ratio_str = str(args.ratio).replace('.', '_') + directory = f"{args.operation}_{name_KG}_{ratio_str}" + paths_of_subgraphs = generate_subgraphs( + kb_path=args.path_kg, + directory=directory, + n=args.number_of_subgraphs, + ratio=args.ratio, + operation=args.operation + ) + path_report = f"{directory}/ALCQHI_Retrieval_Results.csv" + + expressions = None + all_results = [] + + for path in paths_of_subgraphs: + + list_jaccard_neural = [] + data = [] + + if args.sample == "Yes": + subprocess.run(['python', 'examples/retrieval_eval.py', "--path_kg", path, "--ratio_sample_nc","0.1", "--ratio_sample_object_prob", "0.2", "--path_report", path_report]) + else: + subprocess.run(['python', 'examples/retrieval_eval.py', "--path_kg", path, "--path_report", path_report]) + + df = pd.read_csv(f"{directory}/ALCQHI_Retrieval_Results.csv", index_col=0) + + expressions = {i for i in df["Expression"].to_list()} + + ontology_path = path + reasoners = ['HermiT', 'Pellet', 'JFact', 'Openllet'] + reasoner_jaccards = {reasoner: [] for reasoner in reasoners} + reasoner_times = {reasoner: [] for reasoner in reasoners} # To store running times + + + owlapi_adaptor = OWLAPIAdaptor(path=ontology_path, name_reasoner='HermiT') + + if owlapi_adaptor.has_consistent_ontology(): + + for expression in expressions: + + print("-"*100) + print("Expression:", expression) + target_concept = parser.parse_expression(expression) + goal_retrieval = {i.str for i in symbolic_kb.individuals(target_concept)} + result_neural_symbolic = df[df["Expression"] == expression]["Symbolic_Retrieval_Neural"].apply(ast.literal_eval).iloc[0] + jaccard_sim_neural = jaccard_similarity(result_neural_symbolic, goal_retrieval) + list_jaccard_neural.append(jaccard_sim_neural) + + result_row = { + "Incomplete_KG": path.split('/')[-1], + "Expression": expression, + "Type": type(parser.parse_expression(expression)).__name__, + "Jaccard_EBR": jaccard_sim_neural, + "Runtime_EBR": df[df["Expression"] == expression]["Runtime Neural"].iloc[0] + } + + + for reasoner in reasoners: + + owlapi_adaptor = OWLAPIAdaptor(path=ontology_path, name_reasoner=reasoner) + + print(f"...Reasoner {reasoner} starts") + + start_time = time.time() # Start timing + + result_symbolic = {i.str for i in (owlapi_adaptor.instances(target_concept, direct=False))} + end_time = time.time() # End timing + + elapsed_time = end_time - start_time # Calculate elapsed time + jaccard_sim_symbolic = jaccard_similarity(result_symbolic, goal_retrieval) + reasoner_jaccards[reasoner].append(jaccard_sim_symbolic) + reasoner_times[reasoner].append(elapsed_time) # Store running time + + result_row[f"Jaccard_{reasoner}"] = jaccard_sim_symbolic + result_row[f"Runtime_{reasoner}"] = elapsed_time + + + + data.append(result_row) + + all_results.extend(data) + + + avg_jaccard_neural = sum(list_jaccard_neural) / len(list_jaccard_neural) + avg_jaccard_reasoners = {reasoner: sum(reasoner_jaccards[reasoner]) / len(reasoner_jaccards[reasoner]) for reasoner in reasoners} + avg_time_reasoners = {reasoner: sum(reasoner_times[reasoner]) / len(reasoner_times[reasoner]) for reasoner in reasoners} + + print(f"Average Jaccard neural ({path}):", avg_jaccard_neural) + for reasoner, avg_jaccard in avg_jaccard_reasoners.items(): + print(f"Average Jaccard {reasoner} ({path}):", avg_jaccard) + print(f"Average Runtime {reasoner} ({path}):", avg_time_reasoners[reasoner]) + + else: + + for expression in expressions: + + print("-"*100) + print("Expression:", expression) + + target_concept = parser.parse_expression(expression) + goal_retrieval = {i.str for i in symbolic_kb.individuals(target_concept)} + result_neural_symbolic = df[df["Expression"] == expression]["Symbolic_Retrieval_Neural"].apply(ast.literal_eval).iloc[0] + jaccard_sim_neural = jaccard_similarity(result_neural_symbolic, goal_retrieval) + list_jaccard_neural.append(jaccard_sim_neural) + + result_row = { + "Subgraphs": path.split('/')[-1], + "Expression": expression, + "Type": type(parser.parse_expression(expression)).__name__, + "Jaccard_EBR": jaccard_sim_neural, + "Runtime_EBR": df[df["Expression"] == expression]["Runtime Neural"].iloc[0] + } + + + data.append(result_row) + + all_results.extend(data) + print("The Knowledge base is not consistent, hence other reasoners will fail") + + # Create a final DataFrame from all results and write to a CSV file + final_df = pd.DataFrame(all_results) + final_csv_path = f"{directory}/comparison_results.csv" + final_df.to_csv(final_csv_path, index=False) + + print(final_df.head()) + print(f"Results have been saved to {final_csv_path}") + + owlapi_adaptor.stopJVM() # Stop the standard reasoner + + + + +def get_default_arguments(): + parser = ArgumentParser() + parser.add_argument("--path_kg", type=str, default="KGs/Family/family-benchmark_rich_background.owl") + parser.add_argument("--seed", type=int, default=1) + parser.add_argument("--ratio_sample_nc", type=float, default=None, help="To sample OWL Classes.") + parser.add_argument("--ratio_sample_object_prob", type=float, default=None, help="To sample OWL Object Properties.") + parser.add_argument("--path_report", type=str, default="ALCQHI_Retrieval_Incomplete_Results.csv") + parser.add_argument("--number_of_subgraphs", type=int, default=1) + parser.add_argument("--ratio", type=float, default=0.1, \ + help="Percentage of incompleteness or inconsistency from the original KG between 0 and 1") + parser.add_argument("--operation", type=str, default="incomplete", choices=["incomplete", "inconsistent"],\ + help = "Choose to make the KB incomplete or inconsistent") + parser.add_argument("--sample", type=str, default="No", choices=["No", "Yes"], help = "Sample if needed") + return parser.parse_args() + + +if __name__ == "__main__": + execute(get_default_arguments()) diff --git a/examples/train_nces.py b/examples/train_nces.py index 35bac29c..e84f31a1 100644 --- a/examples/train_nces.py +++ b/examples/train_nces.py @@ -20,26 +20,39 @@ def str2bool(v): raise ValueError('Invalid boolean value.') -parser = argparse.ArgumentParser() -parser.add_argument('--kbs', type=str, nargs='+', default=['carcinogenesis'], help='Knowledge base name(s)') -parser.add_argument('--models', type=str, nargs='+', default=['SetTransformer', 'LSTM', 'GRU'], help='Neural models') -parser.add_argument('--load_pretrained', type=str2bool, default=False, help='Whether to load the pretrained model') -parser.add_argument('--learning_rate', type=float, default=0.001, help='The learning rate') -parser.add_argument('--epochs', type=int, default=300, help='Number of training epochs') -args = parser.parse_args() - -for kb in args.kbs: - knowledge_base_path = f"./NCESData/{kb}/{kb}.owl" - path_of_embeddings = f"./NCESData/{kb}/embeddings/ConEx_entity_embeddings.csv" - with open(f"./NCESData/{kb}/training_data/Data.json") as file: - training_data = list(json.load(file).items()) - - nces = NCES(knowledge_base_path=knowledge_base_path, learner_name="SetTransformer", - path_of_embeddings=path_of_embeddings, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, - num_heads=4, num_seeds=1, num_inds=32, load_pretrained=args.load_pretrained) - - for model in args.models: - nces.learner_name = model - nces.pretrained_model_name = model - nces.refresh() - nces.train(training_data, epochs=args.epochs, learning_rate=args.learning_rate, save_model=True) +def start(args): + assert (args.kbs is not None), "Argument 'kbs' is required." + assert (args.embeddings is not None), "Argument 'embeddings' is required." + assert (len(args.kbs) == len(args.embeddings)), "There should be embeddings for each knowledge base." + for i, knowledge_base_path in enumerate(args.kbs): + path_of_embeddings = args.embeddings[i] + training_data = None + if args.path_train_data is not None: + try: + with open(args.path_train_data+"/LPs.json") as file: + training_data = list(json.load(file).items()) + except FileNotFoundError: + print("Couldn't find training data in the specified path. Defaulting to generating training data.") + else: + print("Could not find training data. Will generate some data and train.") + + + nces = NCES(knowledge_base_path=knowledge_base_path, learner_names=args.models, + path_of_embeddings=path_of_embeddings, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, + num_heads=4, num_seeds=1, num_inds=32, verbose=True, load_pretrained=args.load_pretrained) + + nces.train(training_data, epochs=args.epochs, learning_rate=args.learning_rate, num_workers=2, save_model=True) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--kbs', type=str, nargs='+', default=None, help='Paths of knowledge bases') + parser.add_argument('--embeddings', type=str, nargs='+', default=None, help='Paths of embeddings for each KB.') + parser.add_argument('--path_train_data', type=str, help='Path to training data') + parser.add_argument('--models', type=str, nargs='+', default=['SetTransformer', 'LSTM', 'GRU'], + help='Neural models') + parser.add_argument('--load_pretrained', type=str2bool, default=False, help='Whether to load the pretrained model') + parser.add_argument('--learning_rate', type=float, default=0.001, help='The learning rate') + parser.add_argument('--epochs', type=int, default=300, help='Number of training epochs') + + start(parser.parse_args()) diff --git a/main.py b/main.py index 89df0e44..27296c41 100644 --- a/main.py +++ b/main.py @@ -22,7 +22,7 @@ # SOFTWARE. # ----------------------------------------------------------------------------- -from ontolearn.model_adapter import execute +from ontolearn.executor import execute from argparse import ArgumentParser @@ -105,7 +105,7 @@ def get_default_arguments(description=None): help="Random initialization method.", choices=["GROW", "FULL", "RAMPED_HALF_HALF"]) # NCES only - parser.add_argument("--learner_name", type=str, default="SetTransformer", help="Learner name.", + parser.add_argument("--learner_names", type=str, nargs="+", default=["SetTransformer"], help="Learner name.", choices=["SetTransformer", "GRU", "LSTM"]) parser.add_argument("--proj_dim", type=int, default=128, help="Number of projection dimensions.") parser.add_argument("--rnn_n_layers", type=int, default=2, help="Number of RNN layers (only for LSTM and GRU).") @@ -122,8 +122,8 @@ def get_default_arguments(description=None): parser.add_argument("--max_length", type=int, default=48, help="Maximum length") parser.add_argument("--load_pretrained", type=bool, default=True, help="Load pretrained.") parser.add_argument("--sorted_examples", type=bool, default=True, help="Sorted examples.") - parser.add_argument("--pretrained_model_name", type=str, default="SetTransformer", help="Pretrained model name", - choices=["SetTransformer", "GRU", "LSTM"]) +# parser.add_argument("--pretrained_model_name", type=str, default="SetTransformer", help="Pretrained model name", +# choices=["SetTransformer", "GRU", "LSTM"]) if description is None: return parser.parse_args() diff --git a/ontolearn/__init__.py b/ontolearn/__init__.py index 8e19ad19..8b732d2b 100644 --- a/ontolearn/__init__.py +++ b/ontolearn/__init__.py @@ -22,4 +22,4 @@ # SOFTWARE. # ----------------------------------------------------------------------------- -__version__ = '0.7.1' +__version__ = '0.8.0' diff --git a/ontolearn/abstracts.py b/ontolearn/abstracts.py index 6d4add7d..90c3e968 100644 --- a/ontolearn/abstracts.py +++ b/ontolearn/abstracts.py @@ -28,7 +28,7 @@ from abc import ABCMeta, abstractmethod from typing import Set, List, Tuple, Iterable, TypeVar, Generic, ClassVar, Optional from owlapy.class_expression import OWLClassExpression -from owlapy.owl_ontology import OWLOntology +from owlapy.abstracts import AbstractOWLOntology from owlapy.utils import iter_count from .data_struct import Experience from .utils import read_csv @@ -355,7 +355,7 @@ class AbstractKnowledgeBase(metaclass=ABCMeta): # CD: This function is used as "a get method". Insteadf either access the atttribute directly # or use it as a property @abstractmethod - def ontology(self) -> OWLOntology: + def ontology(self) -> AbstractOWLOntology: """The base ontology of this knowledge base.""" pass diff --git a/ontolearn/base_concept_learner.py b/ontolearn/base_concept_learner.py index 96a74b57..38a5cbc7 100644 --- a/ontolearn/base_concept_learner.py +++ b/ontolearn/base_concept_learner.py @@ -30,15 +30,14 @@ from typing import List, Tuple, Dict, Optional, Iterable, Generic, TypeVar, ClassVar, Final, Union, cast, Callable, Type import numpy as np import pandas as pd -import os from owlapy.class_expression import OWLClass, OWLClassExpression, OWLThing from owlapy.iri import IRI from owlapy.owl_axiom import OWLDeclarationAxiom, OWLEquivalentClassesAxiom, OWLAxiom from owlapy.owl_individual import OWLNamedIndividual -from owlapy.owl_ontology import OWLOntology -from owlapy.owl_ontology_manager import OWLOntologyManager, AddImport, OWLImportsDeclaration -from owlapy.owl_reasoner import OWLReasoner +from owlapy.abstracts import AbstractOWLOntology, AbstractOWLOntologyManager, AbstractOWLReasoner +from owlapy.owl_ontology_manager import AddImport, OWLImportsDeclaration +from owlapy.owl_reasoner import FastInstanceCheckerReasoner, OntologyReasoner from ontolearn.heuristics import CELOEHeuristic from ontolearn.knowledge_base import KnowledgeBase @@ -46,7 +45,6 @@ from ontolearn.refinement_operators import ModifiedCELOERefinement from owlapy.owl_ontology import Ontology from owlapy.owl_ontology_manager import OntologyManager -from owlapy.owl_reasoner import SyncReasoner from owlapy.render import DLSyntaxObjectRenderer from .abstracts import BaseRefinement, AbstractScorer, AbstractHeuristic, \ AbstractConceptNode, AbstractLearningProblem @@ -89,7 +87,7 @@ class BaseConceptLearner(Generic[_N], metaclass=ABCMeta): terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. max_runtime (int): Limit to stop the algorithm after n seconds. _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` takes to execute. """ @@ -110,7 +108,7 @@ class BaseConceptLearner(Generic[_N], metaclass=ABCMeta): @abstractmethod def __init__(self, knowledge_base: KnowledgeBase, - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, quality_func: Optional[AbstractScorer] = None, max_num_of_concepts_tested: Optional[int] = None, max_runtime: Optional[int] = None, @@ -242,7 +240,7 @@ def best_hypotheses(self, n=10) -> Iterable[OWLClassExpression]: def _assign_labels_to_individuals(self, individuals: List[OWLNamedIndividual], hypotheses: List[OWLClassExpression], - reasoner: Optional[OWLReasoner] = None) -> np.ndarray: + reasoner: Optional[AbstractOWLReasoner] = None) -> np.ndarray: """ Use each class expression as a hypothesis, and use it as a binary function to assign 1 or 0 to each individual. @@ -302,12 +300,12 @@ def predict(self, individuals: List[OWLNamedIndividual], # If axioms are provided they need to be added to the ontology if axioms is not None: - ontology: OWLOntology = cast(Ontology, self.kb.ontology) - manager: OWLOntologyManager = ontology.get_owl_ontology_manager() + ontology: AbstractOWLOntology = cast(Ontology, self.kb.ontology) + manager: AbstractOWLOntologyManager = ontology.get_owl_ontology_manager() for axiom in axioms: - manager.add_axiom(ontology, axiom) + ontology.add_axiom(axiom) if reasoner is None: - reasoner = SyncReasoner(ontology) + reasoner = FastInstanceCheckerReasoner(ontology, base_reasoner=OntologyReasoner(ontology)) if hypotheses is None: hypotheses = [hyp.concept for hyp in self.best_hypotheses(n)] @@ -324,9 +322,9 @@ def predict(self, individuals: List[OWLNamedIndividual], # Remove the axioms from the ontology if axioms is not None: for axiom in axioms: - manager.remove_axiom(ontology, axiom) + ontology.remove_axiom(axiom) for ind in individuals: - manager.remove_axiom(ontology, OWLDeclarationAxiom(ind)) + ontology.remove_axiom(OWLDeclarationAxiom(ind)) return predictions @@ -334,7 +332,7 @@ def predict(self, individuals: List[OWLNamedIndividual], def number_of_tested_concepts(self): return self._number_of_tested_concepts - def save_best_hypothesis(self, n: int = 10, path: str = 'Predictions', rdf_format: str = 'rdfxml') -> None: + def save_best_hypothesis(self, n: int = 10, path: str = './Predictions', rdf_format: str = 'rdfxml') -> None: """Serialise the best hypotheses to a file. @TODO: CD: This function should be deprecated. @TODO: CD: Saving owl class expressions into disk should be disentangled from a concept earner @@ -356,15 +354,15 @@ def save_best_hypothesis(self, n: int = 10, path: str = 'Predictions', rdf_forma if len(best) >= n: logger.warning("There was/were only %d unique result/-s found", len(best)) - manager: OWLOntologyManager = OntologyManager() + manager: AbstractOWLOntologyManager = OntologyManager() - ontology: OWLOntology = manager.create_ontology(IRI.create(NS)) + ontology: AbstractOWLOntology = manager.create_ontology(IRI.create(NS)) manager.load_ontology(IRI.create(self.kb.path)) manager.apply_change(AddImport(ontology, OWLImportsDeclaration(IRI.create('file://' + self.kb.path)))) for ith, h in enumerate(self.best_hypotheses(n=n)): cls_a: OWLClass = OWLClass(IRI.create(NS, "Pred_" + str(ith))) equivalent_classes_axiom = OWLEquivalentClassesAxiom([cls_a, h]) - manager.add_axiom(ontology, equivalent_classes_axiom) + ontology.add_axiom(equivalent_classes_axiom) # @TODO:CD: We should find a way to include information (F1score etc) outside of OWL class expression instances """ try: @@ -375,14 +373,14 @@ def save_best_hypothesis(self, n: int = 10, path: str = 'Predictions', rdf_forma if isinstance(self.quality_func, Accuracy): accuracy = OWLAnnotationAssertionAxiom(cls_a.iri, OWLAnnotation( OWLAnnotationProperty(IRI.create(SNS, "accuracy")), OWLLiteral(quality))) - manager.add_axiom(ontology, accuracy) + ontology.add_axiom(accuracy) elif isinstance(self.quality_func, F1): f1_score = OWLAnnotationAssertionAxiom(cls_a.iri, OWLAnnotation( OWLAnnotationProperty(IRI.create(SNS, "f1_score")), OWLLiteral(quality))) - manager.add_axiom(ontology, f1_score) + ontology.add_axiom(f1_score) """ - manager.save_ontology(ontology, IRI.create('file:/' + path + '.owl')) + ontology.save(IRI.create(path + '.owl')) def load_hypotheses(self, path: str) -> Iterable[OWLClassExpression]: """ @@ -415,7 +413,7 @@ class RefinementBasedConceptLearner(BaseConceptLearner[_N]): terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. max_runtime (int): Limit to stop the algorithm after n seconds. _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` takes to execute. iter_bound (int): Limit to stop the algorithm after n refinement steps are done. @@ -437,7 +435,7 @@ class RefinementBasedConceptLearner(BaseConceptLearner[_N]): @abstractmethod def __init__(self, knowledge_base: KnowledgeBase, - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, refinement_operator: Optional[BaseRefinement] = None, heuristic_func: Optional[AbstractHeuristic] = None, quality_func: Optional[AbstractScorer] = None, diff --git a/ontolearn/base_nces.py b/ontolearn/base_nces.py index 226aedd8..ca0730c9 100644 --- a/ontolearn/base_nces.py +++ b/ontolearn/base_nces.py @@ -35,7 +35,7 @@ class BaseNCES: - def __init__(self, knowledge_base_path, learner_name, path_of_embeddings, batch_size=256, learning_rate=1e-4, + def __init__(self, knowledge_base_path, learner_names, path_of_embeddings, batch_size=256, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, num_workers=4): self.name = "NCES" kb = KnowledgeBase(path=knowledge_base_path) @@ -52,7 +52,7 @@ def __init__(self, knowledge_base_path, learner_name, path_of_embeddings, batch_ self.all_individuals = set([ind.str.split("/")[-1] for ind in kb.individuals()]) self.inv_vocab = np.array(vocab, dtype='object') self.vocab = {vocab[i]: i for i in range(len(vocab))} - self.learner_name = learner_name + self.learner_names = learner_names self.num_examples = self.find_optimal_number_of_examples(kb) self.batch_size = batch_size self.learning_rate = learning_rate diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py index 92992365..9a9d2112 100644 --- a/ontolearn/concept_learner.py +++ b/ontolearn/concept_learner.py @@ -26,8 +26,8 @@ import logging import operator -import random import time +from datetime import datetime from contextlib import contextmanager from itertools import islice, chain from typing import Any, Callable, Dict, FrozenSet, Set, List, Tuple, Iterable, Optional, Union @@ -39,7 +39,7 @@ from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import OWLLiteral from owlapy.owl_property import OWLDataProperty -from owlapy.owl_reasoner import OWLReasoner +from owlapy.abstracts import AbstractOWLReasoner from torch.utils.data import DataLoader from torch.functional import F from torch.nn.utils.rnn import pad_sequence @@ -78,6 +78,9 @@ from owlapy.utils import OrderedOWLObject from sortedcontainers import SortedSet import os +import json +import glob +from ontolearn.lp_generator import LPGen logger = logging.getLogger(__name__) @@ -104,7 +107,7 @@ class CELOE(RefinementBasedConceptLearner[OENode]): _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts. operator (BaseRefinement): Operator used to generate refinements. quality_func (AbstractScorer) The quality function to be used. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. search_tree (Dict[OWLClassExpression, TreeNode[OENode]]): Dict to store the TreeNode for a class expression. start_class (OWLClassExpression): The starting class expression for the refinement operation. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` @@ -131,7 +134,7 @@ class CELOE(RefinementBasedConceptLearner[OENode]): def __init__(self, knowledge_base: KnowledgeBase, - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, refinement_operator: Optional[BaseRefinement[OENode]] = None, quality_func: Optional[AbstractScorer] = None, heuristic_func: Optional[AbstractHeuristic] = None, @@ -158,7 +161,7 @@ def __init__(self, max_runtime (int): Limit to stop the algorithm after n seconds. Defaults to 5. max_results (int): Maximum hypothesis to store. Defaults to 10. quality_func (AbstractScorer) The quality function to be used. Defaults to `F1`. - reasoner (OWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of + reasoner (AbstractOWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of the :attr:`knowledge_base` is used. terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. Defaults to True. @@ -605,7 +608,7 @@ class OCEL(CELOE): _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts. operator (BaseRefinement): Operator used to generate refinements. quality_func (AbstractScorer) The quality function to be used. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. search_tree (Dict[OWLClassExpression, TreeNode[OENode]]): Dict to store the TreeNode for a class expression. start_class (OWLClassExpression): The starting class expression for the refinement operation. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` @@ -618,7 +621,7 @@ class OCEL(CELOE): def __init__(self, knowledge_base: KnowledgeBase, - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, refinement_operator: Optional[BaseRefinement[OENode]] = None, quality_func: Optional[AbstractScorer] = None, heuristic_func: Optional[AbstractHeuristic] = None, @@ -645,7 +648,7 @@ def __init__(self, max_runtime (int): Limit to stop the algorithm after n seconds. Defaults to 5. max_results (int): Maximum hypothesis to store. Defaults to 10. quality_func (AbstractScorer) The quality function to be used. Defaults to `F1`. - reasoner (OWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of + reasoner (AbstractOWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of the :attr:`knowledge_base` is used. terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. Defaults to True. @@ -706,7 +709,7 @@ class EvoLearner(BaseConceptLearner[EvoLearnerNode]): population_size (int): Population size for the evolutionary algorithm. pset (gp.PrimitiveSetTyped): Contains the primitives that can be used to solve a Strongly Typed GP problem. quality_func: Function to evaluate the quality of solution concepts. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` takes to execute. terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. @@ -754,7 +757,7 @@ class EvoLearner(BaseConceptLearner[EvoLearnerNode]): def __init__(self, knowledge_base: KnowledgeBase, - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, quality_func: Optional[AbstractScorer] = None, fitness_func: Optional[AbstractFitness] = None, init_method: Optional[AbstractEAInitialization] = None, @@ -789,7 +792,7 @@ def __init__(self, num_generations (int): Number of generation for the evolutionary algorithm. Defaults to 200. population_size (int): Population size for the evolutionary algorithm. Defaults to 800. quality_func: Function to evaluate the quality of solution concepts. Defaults to `Accuracy`. - reasoner (OWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of + reasoner (AbstractOWLReasoner): Optionally use a different reasoner. If reasoner=None, the reasoner of the :attr:`knowledge_base` is used. terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found. Defaults to True. tournament_size (int): The number of evolutionary individuals participating in each tournament. @@ -1144,7 +1147,7 @@ class CLIP(CELOE): _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts. operator (BaseRefinement): Operator used to generate refinements. quality_func (AbstractScorer) The quality function to be used. - reasoner (OWLReasoner): The reasoner that this model is using. + reasoner (AbstractOWLReasoner): The reasoner that this model is using. search_tree (Dict[OWLClassExpression, TreeNode[OENode]]): Dict to store the TreeNode for a class expression. start_class (OWLClassExpression): The starting class expression for the refinement operation. start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit` @@ -1162,7 +1165,7 @@ class CLIP(CELOE): def __init__(self, knowledge_base: KnowledgeBase, knowledge_base_path='', - reasoner: Optional[OWLReasoner] = None, + reasoner: Optional[AbstractOWLReasoner] = None, refinement_operator: Optional[BaseRefinement[OENode]] = ExpressRefinement, quality_func: Optional[AbstractScorer] = None, heuristic_func: Optional[AbstractHeuristic] = None, @@ -1226,7 +1229,7 @@ def load_model(predictor_name, load_pretrained): pretrained_model_path = self.path_of_embeddings.split("embeddings")[ 0] + "trained_models/trained_" + predictor_name + ".pt" if load_pretrained and os.path.isfile(pretrained_model_path): - model.load_state_dict(torch.load(pretrained_model_path, map_location=self.device)) + model.load_state_dict(torch.load(pretrained_model_path, map_location=self.device, weights_only=True)) model.eval() print("\n Loaded length predictor!") return model @@ -1399,11 +1402,10 @@ class NCES(BaseNCES): def __init__(self, knowledge_base_path, quality_func: Optional[AbstractScorer] = None, num_predictions=5, - learner_name="SetTransformer", path_of_embeddings="", proj_dim=128, rnn_n_layers=2, drop_prob=0.1, + learner_names=["SetTransformer"], path_of_embeddings="", proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32, ln=False, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, - batch_size=256, num_workers=4, max_length=48, load_pretrained=True, sorted_examples=False, - pretrained_model_name=None, verbose: int = 0): - super().__init__(knowledge_base_path, learner_name, path_of_embeddings, batch_size, learning_rate, decay_rate, + batch_size=256, num_workers=4, max_length=48, load_pretrained=True, sorted_examples=False, verbose: int = 0): + super().__init__(knowledge_base_path, learner_names, path_of_embeddings, batch_size, learning_rate, decay_rate, clip_value, num_workers) self.quality_func = quality_func self.num_predictions = num_predictions @@ -1419,42 +1421,82 @@ def __init__(self, knowledge_base_path, self.ln = ln self.load_pretrained = load_pretrained self.sorted_examples = sorted_examples - self.pretrained_model_name = pretrained_model_name self.verbose = verbose self.model = self.get_synthesizer() self.dl_parser = DLSyntaxParser(namespace=self.kb_namespace) self.best_predictions = None - def get_synthesizer(self): - def load_model(learner_name, load_pretrained): - if learner_name == 'SetTransformer': - model = SetTransformer(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, - self.input_size, self.proj_dim, self.num_heads, self.num_seeds, self.num_inds, - self.ln) - elif learner_name == 'GRU': - model = GRU(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, - self.proj_dim, self.rnn_n_layers, self.drop_prob) - elif learner_name == 'LSTM': - model = LSTM(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, - self.proj_dim, self.rnn_n_layers, self.drop_prob) - if load_pretrained: - model_path = self.path_of_embeddings.split("embeddings")[ - 0] + "trained_models/trained_" + learner_name + ".pt" - model.load_state_dict(torch.load(model_path, map_location=self.device)) - model.eval() - if self.verbose > 0: - print("\n Loaded synthesizer model!") - return model + def get_synthesizer(self, path=None): + m1 = SetTransformer(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, + self.input_size, self.proj_dim, self.num_heads, self.num_seeds, self.num_inds, + self.ln) + m2 = GRU(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, + self.proj_dim, self.rnn_n_layers, self.drop_prob) + + m3 = LSTM(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, + self.proj_dim, self.rnn_n_layers, self.drop_prob) + Untrained = [] + for name in self.learner_names: + for m in [m1,m2,m3]: + if m.name == name: + Untrained.append(m) + + Models = [] + + if self.load_pretrained: + if path is None: + try: + if len(glob.glob(self.path_of_embeddings.split("embeddings")[0] + "trained_models/*.pt")) == 0: + raise FileNotFoundError + else: + for file_name in glob.glob(self.path_of_embeddings.split("embeddings")[0] + "trained_models/*.pt"): + for m in Untrained: + if m.name in file_name: + try: + m.load_state_dict(torch.load(file_name, map_location=self.device, weights_only=True)) + Models.append(m.eval()) + except Exception as e: + print(e) + pass + except Exception as e: + print(e) + raise RuntimeError + + if Models: + print("\n Loaded NCES weights!\n") + return Models + else: + print("!!!Returning untrained models, could not load pretrained") + return Untrained - if not self.load_pretrained: - return [load_model(self.learner_name, self.load_pretrained)] - elif self.load_pretrained and isinstance(self.pretrained_model_name, str): - return [load_model(self.pretrained_model_name, self.load_pretrained)] - elif self.load_pretrained and isinstance(self.pretrained_model_name, list): - return [load_model(name, self.load_pretrained) for name in self.pretrained_model_name] + elif len(glob.glob(path+"/*.pt")) == 0: + print("No pretrained model found! If directory is empty or does not exist, set the NCES `load_pretrained` parameter to `False` or make sure `save_model` was set to `True` in the .train() method.") + raise FileNotFoundError + else: + for file_name in glob.glob(path+"/*.pt"): + for m in Untrained: + if m.name in file_name: + try: + m.load_state_dict(torch.load(file_name, map_location=self.device, weights_only=True)) + Models.append(m.eval()) + except Exception as e: + print(e) + pass + if Models: + print("\n Loaded NCES weights!\n") + return Models + else: + print("!!!Returning untrained models, could not load pretrained") + return Untrained + else: + print("!!!Returning untrained models, could not load pretrained. Check the `load_pretrained parameter` or train the models using NCES.train(data).") + return Untrained - def refresh(self): - self.model = self.get_synthesizer() + + def refresh(self, path=None): + if path is not None: + self.load_pretrained = True + self.model = self.get_synthesizer(path) def sample_examples(self, pos, neg): # pragma: no cover assert type(pos[0]) == type(neg[0]), "The two iterables pos and neg must be of same type" @@ -1507,7 +1549,7 @@ def fit_one(self, pos: Union[Set[OWLNamedIndividual], Set[str]], neg: Union[Set[ Pos = np.random.choice(pos_str, size=(self.num_predictions, len(pos_str)), replace=True) Neg = np.random.choice(neg_str, size=(self.num_predictions, len(neg_str)), replace=True) - assert self.load_pretrained and self.pretrained_model_name, \ + assert self.load_pretrained and self.learner_names, \ "No pretrained model found. Please first train NCES, see the <> method below" dataset = NCESDataLoaderInference([("", Pos_str, Neg_str) for (Pos_str, Neg_str) in zip(Pos, Neg)], @@ -1534,7 +1576,9 @@ def fit_one(self, pos: Union[Set[OWLNamedIndividual], Set[str]], neg: Union[Set[ predictions.append(concept) return predictions - def fit(self, pos: Union[Set[OWLNamedIndividual], Set[str]], neg: Union[Set[OWLNamedIndividual], Set[str]], **kwargs): + def fit(self, learning_problem: PosNegLPStandard, **kwargs): + pos = learning_problem.pos + neg = learning_problem.neg if isinstance(pos, set) or isinstance(pos, frozenset): pos_list = list(pos) neg_list = list(neg) @@ -1572,7 +1616,7 @@ def best_hypotheses(self, n=1) -> Union[OWLClassExpression, Iterable[OWLClassExp elif len(self.best_predictions) == 1 or n == 1: return self.best_predictions[0].concept else: - return self.best_predictions[:n] + return [best.concept for best in self.best_predictions[:n]] def convert_to_list_str_from_iterable(self, data): # pragma: no cover target_concept_str, examples = data[0], data[1:] @@ -1597,7 +1641,7 @@ def fit_from_iterable(self, dataset: Union[List[Tuple[str, Set[OWLNamedIndividua - This function returns predictions as owl class expressions, not nodes as in fit """ - assert self.load_pretrained and self.pretrained_model_name, \ + assert self.load_pretrained and self.learner_names, \ "No pretrained model found. Please first train NCES, refer to the <> method" dataset = [self.convert_to_list_str_from_iterable(datapoint) for datapoint in dataset] dataset = NCESDataLoaderInference(dataset, self.instance_embeddings, self.vocab, self.inv_vocab, @@ -1623,20 +1667,39 @@ def fit_from_iterable(self, dataset: Union[List[Tuple[str, Set[OWLNamedIndividua print("Predictions: ", predictions_str) return predictions_as_owl_class_expressions - def train(self, data: Iterable[List[Tuple]], epochs=300, batch_size=None, learning_rate=1e-4, decay_rate=0.0, + @staticmethod + def generate_training_data(kb_path, num_lps=1000, storage_dir="./NCES_Training_Data"): + lp_gen = LPGen(kb_path=kb_path, max_num_lps=num_lps, storage_dir=storage_dir) + lp_gen.generate() + print("Loading generated data...") + with open(f"{storage_dir}/LPs.json") as file: + lps = list(json.load(file).items()) + print("Number of learning problems:", len(lps)) + return lps + + + + def train(self, data: Iterable[List[Tuple]]=None, epochs=50, batch_size=64, num_lps=1000, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, num_workers=8, save_model=True, storage_path=None, optimizer='Adam', record_runtime=True, example_sizes=None, shuffle_examples=False): + if os.cpu_count() <= num_workers: + num_workers = max(0,os.cpu_count()-1) + if storage_path is None: + currentDateAndTime = datetime.now() + storage_path = f'NCES-Experiment-{currentDateAndTime.strftime("%H-%M-%S")}' + if not os.path.exists(storage_path): + os.mkdir(storage_path) + self.trained_models_path = storage_path+"/trained_models" if batch_size is None: batch_size = self.batch_size + if data is None: + data = self.generate_training_data(self.knowledge_base_path, num_lps=num_lps, storage_dir=storage_path) train_dataset = NCESDataLoader(data, self.instance_embeddings, self.vocab, self.inv_vocab, shuffle_examples=shuffle_examples, max_length=self.max_length, example_sizes=example_sizes) - train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=self.num_workers, + train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=self.collate_batch, shuffle=True) - if storage_path is None: - storage_path = self.knowledge_base_path[:self.knowledge_base_path.rfind("/")] - elif not os.path.exists(storage_path) and (record_runtime or save_model): - os.mkdir(storage_path) + trainer = NCESTrainer(self, epochs=epochs, learning_rate=learning_rate, decay_rate=decay_rate, clip_value=clip_value, num_workers=num_workers, storage_path=storage_path) trainer.train(train_dataloader, save_model, optimizer, record_runtime) diff --git a/ontolearn/model_adapter.py b/ontolearn/executor.py similarity index 62% rename from ontolearn/model_adapter.py rename to ontolearn/executor.py index bf7f3c2b..122e7b49 100644 --- a/ontolearn/model_adapter.py +++ b/ontolearn/executor.py @@ -33,12 +33,10 @@ from owlapy.iri import IRI from owlapy.owl_axiom import OWLAxiom from owlapy.owl_individual import OWLNamedIndividual -from owlapy.owl_reasoner import OWLReasoner +from owlapy.abstracts import AbstractOWLReasoner -from ontolearn.abstracts import AbstractHeuristic, AbstractScorer, BaseRefinement, AbstractKnowledgeBase, \ - AbstractNode +from ontolearn.abstracts import AbstractNode from ontolearn.base_concept_learner import BaseConceptLearner -from owlapy.owl_reasoner import SyncReasoner from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES from ontolearn.ea_algorithms import EASimple from ontolearn.ea_initialization import EARandomWalkInitialization, EARandomInitialization, RandomInitMethod @@ -68,6 +66,7 @@ heuristics = {'celoe': CELOEHeuristic, 'ocel': OCELHeuristic} + def transform_string(input_string): """Used to turn camelCase arguments to snake_case""" # Use regex to find all capital letters C and replace them with '_C' @@ -90,6 +89,7 @@ def compute_quality(KB, solution, pos, neg, qulaity_func="f1"): # pragma: no co tn = len(neg.difference(instances)) return func(tp=tp, fn=fn, fp=fp, tn=tn)[-1] + def _get_matching_opts(_Type, optargs, kwargs, *, prefix=None): # pragma: no cover """Find the keys in kwargs that are parameters of _Type. @@ -124,177 +124,8 @@ def p(s): _N = TypeVar('_N', bound=AbstractNode) #: -def ModelAdapter(*args, **kwargs): # pragma: no cover - """Instantiate a model through the model adapter. - - .. warning :: - You should not specify both: the _type and the object. For - example, you should not give both 'reasoner' and 'reasoner_type' because the ModelAdapter cant decide - which one to use, the reasoner object or create a new reasoner instance using 'reasoner_type'. - - Note: - If you give `_type` for an argument you can pass further arguments to construct the instance of that - class. The model adapter will arrange every argument automatically and use them to construct an object - for that certain class type. - - Args: - knowledge_base (AbstractKnowledgeBase): A knowledge base. - knowledge_base_type: A knowledge base type. - ...: Knowledge base arguments. - reasoner: A reasoner. - reasoner_type: A reasoner type. - ...: Reasoner constructor arguments. - refinement_operator_type: A refinement operator type. - ...: Refinement operator arguments. - quality_type: An Abstract Scorer type. - ...: Quality arguments. - heuristic_func (AbstractHeuristic): A heuristic. - heuristic_type: An Abstract Heuristic type. - ...: arguments For the heuristic type. - learner_type: A Base Concept Learner type. - ...: Arguments for the learning algorithm. - """ - if "knowledge_base" in kwargs: - kb = kwargs.pop("knowledge_base") - if "reasoner" in kwargs: - kwargs["cl_reasoner"] = kwargs["reasoner"] - kwargs.pop("reasoner") - if "knowledge_base_type" in kwargs: - raise ValueError("both knowledge_base and _type specified") - else: - kb_type = kwargs.pop("knowledge_base_type", None) - if kb_type is None: - kb_type = KnowledgeBase - else: - kb_type = kb_type - if "reasoner" in kwargs: - kwargs["cl_reasoner"] = kwargs["reasoner"] - kb_args = _get_matching_opts(kb_type, {}, kwargs) - try: - kb = kb_type(**kb_args) - except TypeError: - kb = None - if kb is not None: - assert isinstance(kb, AbstractKnowledgeBase) - - if "ignore" in kwargs: - assert isinstance(kb, KnowledgeBase) - target_kb = kb.ignore_and_copy(ignored_classes=kwargs.pop("ignore")) - else: - target_kb = kb - - if "cl_reasoner" in kwargs: - reasoner = kwargs.pop("cl_reasoner") - if "reasoner_type" in kwargs: - raise ValueError("both reasoner and _type specified") - else: - reasoner_type = kwargs.pop("reasoner_type", None) - if reasoner_type is None: - reasoner_type = SyncReasoner - assert issubclass(reasoner_type, OWLReasoner) - reasoner = reasoner_type(**_get_matching_opts( - reasoner_type, {'ontology': target_kb.ontology}, kwargs)) - assert isinstance(reasoner, OWLReasoner) - - if "refinement_operator" in kwargs: - operator = kwargs.pop("refinement_operator") - if "refinement_operator_type" in kwargs: - raise ValueError("both refinement_operator and _type specified") - else: - op_type = kwargs.pop("refinement_operator_type", None) - if op_type is None: - op_type = ModifiedCELOERefinement - assert issubclass(op_type, BaseRefinement) - operator = op_type(**_get_matching_opts( - op_type, { - 'knowledge_base': target_kb - }, kwargs)) - assert isinstance(operator, BaseRefinement) - - if "quality_func" in kwargs: - qual = kwargs.pop("quality_func") - if "quality_type" in kwargs: - raise ValueError("both quality_func and _type specified") - else: - quality_type = kwargs.pop("quality_type", None) - if quality_type is None: - quality_type = F1 - assert issubclass(quality_type, AbstractScorer) - qual = quality_type(**_get_matching_opts(quality_type, {}, kwargs)) - assert isinstance(qual, AbstractScorer) - - if "heuristic_func" in kwargs: - heur = kwargs.pop("heuristic_func") - if "heuristic_type" in kwargs: - raise ValueError("both heuristic_func and _type specified") - else: - heuristic_type = kwargs.pop("heuristic_type", None) - if heuristic_type is None: - heuristic_type = CELOEHeuristic - assert issubclass(heuristic_type, AbstractHeuristic) - heur = heuristic_type(**_get_matching_opts(heuristic_type, {}, kwargs)) - assert isinstance(heur, AbstractHeuristic) - - if "learner" in kwargs: - learner = kwargs.pop("learner") - learner_type = type(learner) - if "learner_type" in kwargs: - raise ValueError("both learner and _type specified") - else: - learner_type = kwargs.pop("learner_type", None) - if learner_type is None: - learner_type = CELOE - assert issubclass(learner_type, BaseConceptLearner) - learner_args = _get_matching_opts(learner_type, {}, kwargs) - learner = None - - other_components = dict() - clearkeys = set() - for k in list(kwargs): - if k in kwargs and k.endswith("_type"): - clearkeys.add(k) - cls = kwargs[k] - assert issubclass(cls, object) - other_components[k[:-5]] = (cls, _get_matching_opts(cls, {}, kwargs)) - - for k in clearkeys: - kwargs.pop(k) - - if kwargs: - logger.warning("Unused parameters: %s", kwargs) - - other_instances = dict() - for k in other_components: - cls = other_components[k][0] - logger.debug("Instantiating %s of type %s", k, cls) - - # noinspection PyArgumentList - inst = cls(**_get_matching_opts(cls, { - 'knowledge_base': target_kb, - 'reasoner': reasoner, - 'refinement_operator': operator, - 'quality_func': qual, - 'heuristic_func': heur, - }, other_components[k][1])) - other_instances[k] = inst - - if learner is None: - learner = learner_type(**_get_matching_opts( - learner_type, { - **other_instances, - 'knowledge_base': target_kb, - 'reasoner': reasoner, - 'refinement_operator': operator, - 'quality_func': qual, - 'heuristic_func': heur, - }, learner_args - )) - - return learner - - -class Trainer: # pragma: no cover - def __init__(self, learner: BaseConceptLearner, reasoner: OWLReasoner): +class Trainer: # pragma: no cover + def __init__(self, learner: BaseConceptLearner, reasoner: AbstractOWLReasoner): """ A class to disentangle the learner from its training. diff --git a/ontolearn/incomplete_kb.py b/ontolearn/incomplete_kb.py new file mode 100644 index 00000000..4a4574f0 --- /dev/null +++ b/ontolearn/incomplete_kb.py @@ -0,0 +1,203 @@ +from owlready2 import * +import random +from typing import Set + + +def make_kb_incomplete_ass(kb_path, output_path, rate, seed): + """ + Makes the knowledge base incomplete by removing a certain percentage of statements (triples). + + Inputs: + --------------- + + kb_path: Path to the input knowledge base. + output_path: Path to save the modified (incomplete) knowledge base. + rate: Percentage of statements to remove (0-100). + seed: random seed for reproducibility. + + Output: + --------------- + + Incomplete KB at level rate % + """ + + random.seed(seed) + + # Load the ontology + kb = get_ontology(kb_path).load() + + # Get all individuals in the ontology + all_individuals = list(kb.individuals()) + + # Collect all triples (subject-predicate-object) related to the individuals + all_triples = [] + for individual in all_individuals: + for prop in individual.get_properties(): + for value in prop[individual]: + all_triples.append((individual, prop, value)) + + # Calculate the number of triples to remove based on the rate + num_to_remove = int(len(all_triples) * (rate / 100)) + + # Randomly select triples to remove + triples_to_remove = random.sample(all_triples, num_to_remove) + + # Remove the selected triples + for subject, predicate, obj in triples_to_remove: + + predicate[subject].remove(obj) + + # Save the modified ontology to a new file + kb.save(file=output_path, format="rdfxml") + + + + + +def make_kb_incomplete(kb_path, output_path, rate, seed)-> Set[str]: + """ + Makes the knowledge base incomplete by removing a certain percentage of individuals. + + + Inputs: + --------------- + + kb_path: Path to the input knowledge base. + output_path: Path to save the modified (incomplete) knowledge base. + rate: Percentage of individuals to remove (0-100). + seed: random seed for reproducibility. + + Output: + --------------- + + Incomplete KB at level rate % + """ + + random.seed(seed) + + # Load the ontology + kb = get_ontology(kb_path).load() + + # Get all individuals (instances) in the ABox + all_individuals = list(kb.individuals()) + + # Calculate the number of individuals to remove based on the rate + num_to_remove = int(len(all_individuals) * (rate / 100)) + + # Randomly select individuals to remove + individuals_to_remove = random.sample(all_individuals, num_to_remove) + + # Remove the selected individuals + for individual in individuals_to_remove: + destroy_entity(individual) + + # Save the modified ontology to a new file + kb.save(file=output_path, format="rdfxml") + + +def make_kb_inconsistent(kb_path, output_path, rate, seed, max_attempts=100): + """ + This function makes the knowledge base (KB) inconsistent by introducing incorrect statements. + + Parameters: + kb_path (str): Path to the original OWL ontology file. + output_path (str): Path to save the inconsistent ontology file. + rate (float): Percentage of incorrect statements to introduce (0-100). + seed (int): Seed for reproducibility. + max_attempts (int): Maximum attempts to find a valid incorrect statement. + """ + + # Set the random seed for reproducibility + random.seed(seed) + + # Load the ontology + onto = get_ontology(kb_path).load() + + # Get all individuals, classes, and properties + all_individuals = list(onto.individuals()) + all_classes = list(onto.classes()) + all_object_properties = list(onto.object_properties()) + all_data_properties = list(onto.data_properties()) + + def count_triples(): + """Count the number of triples (statements) in the ontology.""" + return len(list(onto.world.sparql(""" + SELECT ?s ?p ?o + WHERE { + ?s ?p ?o . + } + """))) + + def generate_incorrect_class_assertion(individual): + """Generate an incorrect class assertion by adding a disjoint or contradictory class.""" + class_candidates = [cls for cls in all_classes if cls not in individual.is_a] + if not class_candidates: + return None + + selected_class = random.choice(class_candidates) + individual.is_a.append(selected_class) + print(f"Added incorrect class assertion: {individual} rdf:type {selected_class}") + return f"Added incorrect class assertion: {individual} rdf:type {selected_class}" + + def generate_incorrect_object_property(individual): + """Generate an incorrect object property assertion.""" + prop = random.choice(all_object_properties) + incorrect_object = random.choice(all_individuals) + + if incorrect_object not in prop[individual]: + prop[individual].append(incorrect_object) + print(f"Added incorrect object property assertion: {individual} {prop.name} {incorrect_object}") + return f"Added incorrect object property assertion: {individual} {prop.name} {incorrect_object}" + + def generate_incorrect_data_property(individual): + + """Generate an incorrect data property assertion (if exist in the KB).""" + if len(all_data_properties) != 0: + prop = random.choice(all_data_properties) + incorrect_value = "inconsistent_value" # Example of an incorrect data value + + if incorrect_value not in prop[individual]: + setattr(individual, prop.name, incorrect_value) + print(f"Added incorrect data property assertion: {individual} {prop.name} {incorrect_value}") + return f"Added incorrect data property assertion: {individual} {prop.name} {incorrect_value}" + + + + def insert_incorrect_statements(): + """Insert incorrect statements based on the specified rate.""" + num_triples = count_triples() # Use the total number of triples in the KB + num_incorrect = int(num_triples * (rate / 100)) + + incorrect_statements = [] + + for _ in range(num_incorrect): + attempts = 0 + while attempts < max_attempts: + individual = random.choice(all_individuals) + statement_type = random.choice(['class', 'object_property']) #could also add data properties later on + + if statement_type == 'class': + result = generate_incorrect_class_assertion(individual) + elif statement_type == 'object_property': + result = generate_incorrect_object_property(individual) + + + if result: + incorrect_statements.append(result) + break + + attempts += 1 + + return incorrect_statements + + # Insert incorrect statements + inconsistencies = insert_incorrect_statements() + + # Save the modified ontology + onto.save(file=output_path, format="rdfxml") + + # Return the list of inconsistencies added + return inconsistencies + + + \ No newline at end of file diff --git a/ontolearn/knowledge_base.py b/ontolearn/knowledge_base.py index c6f279ba..a5632d90 100644 --- a/ontolearn/knowledge_base.py +++ b/ontolearn/knowledge_base.py @@ -37,11 +37,9 @@ from owlapy.owl_datatype import OWLDatatype from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import BooleanOWLDatatype, NUMERIC_DATATYPES, DoubleOWLDatatype, TIME_DATATYPES, OWLLiteral -from owlapy.owl_ontology import OWLOntology -from owlapy.owl_ontology_manager import OWLOntologyManager +from owlapy.abstracts import AbstractOWLOntology, AbstractOWLReasoner, AbstractOWLOntologyManager from owlapy.owl_property import OWLObjectProperty, OWLDataProperty, OWLObjectPropertyExpression, \ OWLDataPropertyExpression -from owlapy.owl_reasoner import OWLReasoner from owlapy.owl_ontology import Ontology from owlapy.owl_ontology_manager import OntologyManager from owlapy.owl_reasoner import OntologyReasoner, FastInstanceCheckerReasoner @@ -63,7 +61,7 @@ logger = logging.getLogger(__name__) -def depth_Default_ReasonerFactory(onto: OWLOntology) -> OWLReasoner: # pragma: no cover +def depth_Default_ReasonerFactory(onto: AbstractOWLOntology) -> AbstractOWLReasoner: # pragma: no cover assert isinstance(onto, Ontology) base_reasoner = OntologyReasoner(ontology=onto) return FastInstanceCheckerReasoner(ontology=onto, base_reasoner=base_reasoner) @@ -105,13 +103,15 @@ class KnowledgeBase(AbstractKnowledgeBase): path: str use_individuals_cache: bool generator: ConceptGenerator + # TODO:CD: We do not benefit from using overloading in the init of KG + # TODO:CD: We need to remove overloading by having a single __init__() filled with default parameters @overload def __init__(self, *, path: str, - ontologymanager_factory: Callable[[], OWLOntologyManager] = OntologyManager( + ontologymanager_factory: Callable[[], AbstractOWLOntologyManager] = OntologyManager( world_store=None), - reasoner_factory: Callable[[OWLOntology], OWLReasoner] = None, + reasoner_factory: Callable[[AbstractOWLOntology], AbstractOWLReasoner] = None, length_metric: Optional[OWLClassExpressionLengthMetric] = None, length_metric_factory: Optional[Callable[[], OWLClassExpressionLengthMetric]] = None, individuals_cache_size=128, @@ -121,8 +121,8 @@ def __init__(self, *, @overload def __init__(self, *, - ontology: OWLOntology, - reasoner: OWLReasoner, + ontology: AbstractOWLOntology, + reasoner: AbstractOWLReasoner, load_class_hierarchy: bool = True, length_metric: Optional[OWLClassExpressionLengthMetric] = None, length_metric_factory: Optional[Callable[[], OWLClassExpressionLengthMetric]] = None, @@ -132,22 +132,20 @@ def __init__(self, *, def __init__(self, *, path: Optional[str] = None, - ontologymanager_factory: Optional[Callable[[], OWLOntologyManager]] = None, - reasoner_factory: Optional[Callable[[OWLOntology], OWLReasoner]] = None, + ontologymanager_factory: Optional[Callable[[], AbstractOWLOntologyManager]] = None, + reasoner_factory: Optional[Callable[[AbstractOWLOntology], AbstractOWLReasoner]] = None, length_metric_factory: Optional[Callable[[], OWLClassExpressionLengthMetric]] = None, - ontology: Optional[OWLOntology] = None, - reasoner: Optional[OWLReasoner] = None, + ontology: Optional[AbstractOWLOntology] = None, + reasoner: Optional[AbstractOWLReasoner] = None, length_metric: Optional[OWLClassExpressionLengthMetric] = None, - - individuals_cache_size=128, + individuals_cache_size:int=0, backend_store: bool = False, class_hierarchy: Optional[ClassHierarchy] = None, load_class_hierarchy: bool = True, object_property_hierarchy: Optional[ObjectPropertyHierarchy] = None, data_property_hierarchy: Optional[DatatypePropertyHierarchy] = None, - include_implicit_individuals=False - ): + include_implicit_individuals=False): AbstractKnowledgeBase.__init__(self) self.path = path @@ -172,7 +170,7 @@ def __init__(self, *, self.manager.save_world() logger.debug("Synced world to backend store") - reasoner: OWLReasoner + reasoner: AbstractOWLReasoner if reasoner is not None: self.reasoner = reasoner elif reasoner_factory is not None: @@ -206,7 +204,9 @@ def __init__(self, *, self.dp_ranges = dict() # OWL class expression generator self.generator = ConceptGenerator() - + # TODO:CD: We need to remove these next two lines + # TODO:CD: No caching: Caching must be done by the reasoners and it must be optional. + # TODO:CD: No ind_set. This hinders us scaling large KGs self.use_individuals_cache, self.ind_cache = init_named_individuals(individuals_cache_size) self.ind_set = init_individuals_from_concepts(include_implicit_individuals, reasoner=self.reasoner, @@ -226,11 +226,12 @@ def individuals(self, concept: Optional[OWLClassExpression] = None, named_indivi Returns: Individuals belonging to the given class. """ - + # TODO: CD: is_owl_thing workaround must be implemented by reasoner if it is needed if concept is None or concept.is_owl_thing(): for i in self.ind_set: yield i else: + # TODO: CD: Disable caching yield from self.maybe_cache_individuals(concept) def abox(self, individual: Union[OWLNamedIndividual, Iterable[OWLNamedIndividual]] = None, mode='native'): # pragma: no cover @@ -581,6 +582,7 @@ def cache_individuals(self, ce: OWLClassExpression) -> None: self.ind_cache[ce] = frozenset(temp) def maybe_cache_individuals(self, ce: OWLClassExpression) -> Iterable[OWLNamedIndividual]: + # TODO:CD: Disable caching. if self.use_individuals_cache: self.cache_individuals(ce) yield from self.ind_cache[ce] @@ -588,6 +590,7 @@ def maybe_cache_individuals(self, ce: OWLClassExpression) -> Iterable[OWLNamedIn yield from self.reasoner.instances(ce) def maybe_cache_individuals_count(self, ce: OWLClassExpression) -> int: + # TODO:CD: Disable caching. if self.use_individuals_cache: self.cache_individuals(ce) r = self.ind_cache[ce] @@ -595,6 +598,7 @@ def maybe_cache_individuals_count(self, ce: OWLClassExpression) -> int: else: return iter_count(self.reasoner.instances(ce)) + # TODO:CD: Remove this function from KB. Size count should not be done by KB. def individuals_count(self, concept: Optional[OWLClassExpression] = None) -> int: """Returns the number of all individuals belonging to the concept in the ontology. @@ -603,12 +607,12 @@ def individuals_count(self, concept: Optional[OWLClassExpression] = None) -> int Returns: Number of the individuals belonging to the given class. """ - if concept is None or concept.is_owl_thing(): return len(self.ind_set) else: return self.maybe_cache_individuals_count(concept) + # TODO:CD: Delete individuals_set functions. @overload def individuals_set(self, concept: OWLClassExpression): ... @@ -644,6 +648,7 @@ def individuals_set(self, else: return frozenset(arg) + # TODO:CD: Redundant def all_individuals_set(self): """Retrieve all the individuals of the knowledge base. @@ -656,6 +661,7 @@ def all_individuals_set(self): else: return frozenset(self.ontology.individuals_in_signature()) + def most_general_object_properties(self, *, domain: OWLClassExpression, inverse: bool = False) \ -> Iterable[OWLObjectProperty]: """Find the most general object property. @@ -682,16 +688,9 @@ def data_properties_for_domain(self, domain: OWLClassExpression, data_properties if domain.is_owl_thing() or inds_domain <= self.individuals_set(self.get_data_property_domains(prop)): yield prop - # in case more types of AbstractLearningProblem are introduced to the project uncomment the method below and use - # decorators - # @singledispatchmethod - # def encode_learning_problem(self, lp: AbstractLearningProblem): - # raise NotImplementedError(lp) - + # TODO:CD: A learning problem (DL concept learning problem) should not be a part of a knowledge base def encode_learning_problem(self, lp: PosNegLPStandard): """ - @TODO: A learning problem (DL concept learning problem) should not be a part of a knowledge base - Provides the encoded learning problem (lp), i.e. the class containing the set of OWLNamedIndividuals as follows: kb_pos --> the positive examples set, @@ -739,13 +738,11 @@ def encode_learning_problem(self, lp: PosNegLPStandard): kb_neg=kb_neg, kb_all=kb_all, kb_diff=kb_all.difference(kb_pos.union(kb_neg))) - + # TODO: CD: A knowledge base is a data structure and the context of "evaluating" a concept seems to be unrelated def evaluate_concept(self, concept: OWLClassExpression, quality_func: AbstractScorer, encoded_learning_problem: EncodedLearningProblem) -> EvaluatedConcept: """Evaluates a concept by using the encoded learning problem examples, in terms of Accuracy or F1-score. - @ TODO: A knowledge base is a data structure and the context of "evaluating" a concept seems to be unrelated - Note: This method is useful to tell the quality (e.q) of a generated concept by the concept learners, to get the set of individuals (e.inds) that are classified by this concept and the amount of them (e.ic). @@ -762,7 +759,7 @@ def evaluate_concept(self, concept: OWLClassExpression, quality_func: AbstractSc e.ic = len(e.inds) _, e.q = quality_func.score_elp(e.inds, encoded_learning_problem) return e - + # TODO: CD: We need to do refactoring to remove redundant class methods defined below in our next release def get_leaf_concepts(self, concept: OWLClass): """Get leaf classes. diff --git a/ontolearn/learners/drill.py b/ontolearn/learners/drill.py index abf82af4..8e46aefa 100644 --- a/ontolearn/learners/drill.py +++ b/ontolearn/learners/drill.py @@ -51,8 +51,8 @@ import torch from ontolearn.data_struct import PrepareBatchOfTraining, PrepareBatchOfPrediction from tqdm import tqdm -from owlapy.utils import OWLClassExpressionLengthMetric from ..utils.static_funcs import make_iterable_verbose +from owlapy.utils import get_expression_length class Drill(RefinementBasedConceptLearner): # pragma: no cover @@ -173,7 +173,8 @@ def initialize_training_class_expression_learning_problem(self, neg: FrozenSet[OWLNamedIndividual]) -> RL_State: """ Initialize """ assert isinstance(pos, frozenset) and isinstance(neg, frozenset), "Pos and neg must be sets" - assert 0 < len(pos) and 0 < len(neg) + assert 0 < len(pos) and 0 < len(neg), ("Positive and negative examples must have at least a single item\n" + "fCurrently: Pos:len(pos)\t Neg:len(neg)\n") # print("Initializing learning problem") # (2) Obtain embeddings of positive and negative examples. self.init_embeddings_of_examples(pos_uri=pos, neg_uri=neg) @@ -247,7 +248,7 @@ def train(self, dataset: Optional[Iterable[Tuple[str, Set, Set]]] = None, """ if isinstance(self.heuristic_func, CeloeBasedReward): - print("No training") + print("No training...") return self.terminate_training() if self.verbose > 0: @@ -257,6 +258,9 @@ def train(self, dataset: Optional[Iterable[Tuple[str, Set, Set]]] = None, else: training_data = self.generate_learning_problems(num_of_target_concepts, num_learning_problems) + if isinstance(training_data,Iterable) is False: + print(f"We couldn't generate training data on this given knowledge base ({self.kb})") + return self.terminate_training() for (target_owl_ce, positives, negatives) in training_data: print(f"\nGoal Concept:\t {target_owl_ce}\tE^+:[{len(positives)}]\t E^-:[{len(negatives)}]") @@ -319,7 +323,7 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None): root_state = self.initialize_training_class_expression_learning_problem(pos=learning_problem.pos, neg=learning_problem.neg) self.operator.set_input_examples(pos=learning_problem.pos, neg=learning_problem.neg) - assert root_state.quality > 0, f"Root state {root_state} must have quality >0" + assert root_state.quality > 0, f"Root state {root_state} must have the quality >0" # (5) Add root state into search tree root_state.heuristic = root_state.quality self.search_tree.add(root_state) @@ -337,7 +341,7 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None): for _ in make_iterable_verbose(range(0, self.iter_bound), verbose=self.verbose, desc=f"Learning OWL Class Expression at most {self.iter_bound} iteration"): - assert len(self.search_tree) > 0 + assert len(self.search_tree) > 0, "Search Tree cannot be empty!" self.search_tree.show_current_search_tree() # (6.1) Get the most fitting RL-state. most_promising = self.next_node_to_expand() @@ -419,8 +423,7 @@ def create_rl_state(self, c: OWLClassExpression, parent_node: Optional[RL_State] is_root: bool = False) -> RL_State: """ Create an RL_State instance.""" rl_state = RL_State(c, parent_node=parent_node, is_root=is_root) - # TODO: Will be fixed by https://github.com/dice-group/owlapy/issues/35 - rl_state.length = OWLClassExpressionLengthMetric.get_default().length(c) + rl_state.length = get_expression_length(c) return rl_state def compute_quality_of_class_expression(self, state: RL_State) -> None: @@ -455,8 +458,8 @@ def sequence_of_actions(self, root_rl_state: RL_State) \ current_state = root_rl_state path_of_concepts = [] rewards = [] - assert current_state.quality > 0 - assert current_state.heuristic is None + assert current_state.quality > 0, f"Root state ({current_state}) must have quality >0. \tCurrently {current_state.quality}" + assert current_state.heuristic is None,f"Root state ({current_state}) must have heuristic value >0 . \tCurrently {current_state.heuristic}" # (1) for _ in range(self.num_of_sequential_actions): assert isinstance(current_state, RL_State) @@ -745,11 +748,15 @@ def generate_learning_problems(self, individuals_j = set(self.kb.individuals(j)) if len(individuals_j) < size_of_examples: continue + # Generate Learning problems from a single target for _ in range(num_of_target_concepts): - lp = (str_dl_concept_i, - set(random.sample(individuals_i, size_of_examples)), - set(random.sample(individuals_j, size_of_examples))) + sampled_positives = set(random.sample(individuals_i, size_of_examples)) + sampled_negatives = set(random.sample(individuals_j, size_of_examples)) + if sampled_negatives== sampled_positives: + print("Sampled Positives and negatives are same. We need to ignore this example") + continue + lp = (str_dl_concept_i,sampled_positives,sampled_negatives) examples.append(lp) counter += 1 if counter == num_learning_problems: diff --git a/ontolearn/learners/tree_learner.py b/ontolearn/learners/tree_learner.py index d531275b..2d534212 100644 --- a/ontolearn/learners/tree_learner.py +++ b/ontolearn/learners/tree_learner.py @@ -118,7 +118,6 @@ def explain_inference(clf, X: pd.DataFrame): if leaf_id[sample_id] == node_id: continue - # check if value of the split feature for sample 0 is below threshold if np_X[sample_id, feature[node_id]] <= threshold[node_id]: threshold_sign = "<=" @@ -150,7 +149,7 @@ def explain_inference(clf, X: pd.DataFrame): def concepts_reducer( - concepts: List[OWLClassExpression], reduced_cls: Callable + concepts: List[OWLClassExpression], reduced_cls: Callable ) -> Union[OWLObjectUnionOf, OWLObjectIntersectionOf]: """Reduces a list of OWLClassExpression instances into a single instance of OWLObjectUnionOf or OWLObjectIntersectionOf""" dl_concept_path = None @@ -166,7 +165,6 @@ def concepts_reducer( class TDL: """Tree-based Description Logic Concept Learner""" - def __init__(self, knowledge_base, use_inverse: bool = False, use_data_properties: bool = False, @@ -176,16 +174,15 @@ def __init__(self, knowledge_base, max_runtime: int = 1, grid_search_over: dict = None, grid_search_apply: bool = False, - report_classification: bool = False, + report_classification: bool = True, plot_tree: bool = False, plot_embeddings: bool = False, plot_feature_importance: bool = False, - verbose: int = 1): + verbose: int = 10): assert use_inverse is False, "use_inverse not implemented" assert use_data_properties is False, "use_data_properties not implemented" assert use_card_restrictions is False, "use_card_restrictions not implemented" - self.use_nominals = use_nominals self.use_card_restrictions = use_card_restrictions @@ -200,9 +197,9 @@ def __init__(self, knowledge_base, else: grid_search_over = dict() assert ( - isinstance(knowledge_base, KnowledgeBase) - or isinstance(knowledge_base, ontolearn.triple_store.TripleStore) - or isinstance(knowledge_base) + isinstance(knowledge_base, KnowledgeBase) + or isinstance(knowledge_base, ontolearn.triple_store.TripleStore) + or isinstance(knowledge_base) ), "knowledge_base must be a KnowledgeBase instance" print(f"Knowledge Base: {knowledge_base}") self.grid_search_over = grid_search_over @@ -225,33 +222,72 @@ def __init__(self, knowledge_base, self.types_of_individuals = dict() self.verbose = verbose self.data_property_cast = dict() - - - def extract_expressions_from_owl_individuals(self, individuals: List[OWLNamedIndividual]) -> List[ - OWLClassExpression]: - features = [] - for i in make_iterable_verbose(individuals, + self.__classification_report = None + self.X = None + self.y = None + + def extract_expressions_from_owl_individuals(self, individuals: List[OWLNamedIndividual]) -> ( + Tuple)[Dict[str, OWLClassExpression],Dict[str, str]]: + # () Store mappings from str dl concept to owl class expression objects. + features = dict() + # () Grouped str dl concepts given str individuals. + individuals_to_feature_mapping = dict() + for owl_named_individual in make_iterable_verbose(individuals, verbose=self.verbose, desc="Extracting information about examples"): - for expression in self.knowledge_base.abox(individual=i, mode="expression"): - features.append(expression) - assert len( - features) > 0, f"First hop features cannot be extracted. Ensure that there are axioms about the examples." - # (5) Obtain unique features from (4). + for owl_class_expression in self.knowledge_base.abox(individual=owl_named_individual, mode="expression"): + str_dl_concept=owl_expression_to_dl(owl_class_expression) + individuals_to_feature_mapping.setdefault(owl_named_individual.str,set()).add(str_dl_concept) + if str_dl_concept not in features: + # A mapping from str dl representation to owl object. + features[str_dl_concept] = owl_class_expression + + assert len(features) > 0, f"First hop features cannot be extracted. Ensure that there are axioms about the examples." if self.verbose > 0: - print("Total extracted features:", len(features)) - features = set(features) - if self.verbose > 0: - print("Unique features:", len(features)) - return list(features) + print("Unique OWL Class Expressions as features :", len(features)) + # () Iterate over features/extracted owl expressions. + # TODO:CD: We need to use parse tensor representation that we can use to train decision tree + X = [] + features = [ v for k,v in features.items()] + for owl_named_individual in make_iterable_verbose(individuals, + verbose=self.verbose, + desc="Constructing Training Data"): + binary_sparse_representation = [] + + features_of_owl_named_individual=individuals_to_feature_mapping[owl_named_individual.str] - def construct_sparse_binary_representations(self, features: List[OWLClassExpression], - examples: List[OWLNamedIndividual]) -> np.array: + for owl_class_expression in features: + if owl_expression_to_dl(owl_class_expression) in features_of_owl_named_individual: + binary_sparse_representation.append(1.0) + else: + binary_sparse_representation.append(0.0) + X.append(binary_sparse_representation) + X = np.array(X) + return X, features + + def construct_sparse_binary_representations(self, + features: List[OWLClassExpression], + examples: List[OWLNamedIndividual], examples_to_features) -> np.array: # () Constructing sparse binary vector representations for examples. # () Iterate over features/extracted owl expressions. X = [] - for f in features: + # () + str_owl_named_individual:str + for str_owl_named_individual, list_of_owl_expressions in examples_to_features.items(): + for kk in list_of_owl_expressions: + assert kk in features + # number of rows + for i in examples: + print(i.str) + + exit(1) + + assert len(X)==len(examples) + for f in make_iterable_verbose(features, + verbose=self.verbose, + desc="Creating sparse binary representations for the training"): # () Retrieve instances belonging to a feature/owl class expression + # TODO: Very inefficient. feature_retrieval = {_ for _ in self.knowledge_base.individuals(f)} # () Add 1.0 if positive example found otherwise 0. feature_value_per_example = [] @@ -268,6 +304,8 @@ def construct_sparse_binary_representations(self, features: List[OWLClassExpress def create_training_data(self, learning_problem: PosNegLPStandard) -> Tuple[pd.DataFrame, pd.DataFrame]: # (1) Initialize ordering over positive and negative examples. + if self.verbose > 0: + print("Creating a Training Dataset") positive_examples: List[OWLNamedIndividual] negative_examples: List[OWLNamedIndividual] positive_examples = [i for i in learning_problem.pos] @@ -276,77 +314,19 @@ def create_training_data(self, learning_problem: PosNegLPStandard) -> Tuple[pd.D y = [1.0 for _ in positive_examples] + [0.0 for _ in negative_examples] # (3) Iterate over examples to extract unique features. examples = positive_examples + negative_examples - features = self.extract_expressions_from_owl_individuals(examples) + # For the sake of convenience. sort features in ascending order of string lengths of DL representations. + X, features = self.extract_expressions_from_owl_individuals(examples) # (4) Creating a tabular data for the binary classification problem. - X = self.construct_sparse_binary_representations(features, examples) - - + # X = self.construct_sparse_binary_representations(features, examples, examples_to_features) self.features = features X = pd.DataFrame(data=X, index=examples, columns=self.features) y = pd.DataFrame(data=y, index=examples, columns=["label"]) - + # Remove redundant columns same_value_columns = X.apply(lambda col: col.nunique() == 1) X = X.loc[:, ~same_value_columns] - self.features=X.columns.values.tolist() + self.features = X.columns.values.tolist() return X, y - """ - for ith_row, i in enumerate(make_iterable_verbose(examples, - verbose=self.verbose, - desc="Creating supervised binary classification data")): - - # IMPORTANT: None existence is described as 0.0 features. - X_i = [0.0 for _ in range(len(mapping_features))] - expression: [ - OWLClass, - OWLObjectSomeValuesFrom, - OWLObjectMinCardinality, - OWLDataSomeValuesFrom, - ] - # Filling the features - for expression in self.knowledge_base.abox(individual=i, mode="expression"): - if isinstance(expression, OWLDataSomeValuesFrom): - fillers: OWLDataOneOf[OWLLiteral] - fillers = expression.get_filler() - datavalues_in_fillers = list(fillers.values()) - if datavalues_in_fillers[0].is_boolean(): - X_i[mapping_features[expression]] = 1 - elif datavalues_in_fillers[0].is_double(): - X_i[mapping_features[expression]] = 1.0 - else: - raise RuntimeError( - f"Type of literal in OWLDataSomeValuesFrom is not understood:{datavalues_in_fillers}" - ) - elif isinstance(expression, OWLClass) or isinstance( - expression, OWLObjectSomeValuesFrom - ): - assert expression in mapping_features, expression - X_i[mapping_features[expression]] = 1.0 - elif isinstance(expression, OWLObjectMinCardinality): - X_i[mapping_features[expression]] = expression.get_cardinality() - else: - raise RuntimeError( - f"Unrecognized type:{expression}-{type(expression)}" - ) - - X.append(X_i) - # Filling the label - if ith_row < len(positive_examples): - # Sanity checking for positive examples. - assert i in positive_examples and i not in negative_examples - label = 1.0 - else: - # Sanity checking for negative examples. - assert i in negative_examples and i not in positive_examples - label = 0.0 - y.append(label) - - self.features = features - X = pd.DataFrame(data=X, index=examples, columns=self.features) - y = pd.DataFrame(data=y, index=examples, columns=["label"]) - return X, y - """ - def construct_owl_expression_from_tree(self, X: pd.DataFrame, y: pd.DataFrame) -> List[OWLObjectIntersectionOf]: """ Construct an OWL class expression from a decision tree""" @@ -356,46 +336,45 @@ def construct_owl_expression_from_tree(self, X: pd.DataFrame, y: pd.DataFrame) - prediction_per_example = [] # () Iterate over reasoning steps of predicting a positive example pos: OWLNamedIndividual - for sequence_of_reasoning_steps, pos in zip( - explain_inference(self.clf, - X=vector_representation_of_positive_examples), positive_examples): + for sequence_of_reasoning_steps, pos in zip(make_iterable_verbose(explain_inference(self.clf, + X=vector_representation_of_positive_examples), + verbose=self.verbose, + desc="Constructing Description Logic Concepts"), positive_examples): concepts_per_reasoning_step = [] for i in sequence_of_reasoning_steps: - if i["inequality"] == ">": owl_class_expression = i["owl_expression"] else: owl_class_expression = i["owl_expression"].get_object_complement_of() - + concepts_per_reasoning_step.append(owl_class_expression) + # TODO : CD: No need to perform retrieval. + """ + print(i,owl_class_expression) retrival_result = pos in {_ for _ in self.knowledge_base.individuals(owl_class_expression)} if retrival_result: concepts_per_reasoning_step.append(owl_class_expression) else: raise RuntimeError("Incorrect retrival") - - pred = concepts_reducer( - concepts=concepts_per_reasoning_step, - reduced_cls=OWLObjectIntersectionOf, - ) + """ + pred = concepts_reducer(concepts=concepts_per_reasoning_step, reduced_cls=OWLObjectIntersectionOf) prediction_per_example.append((pred, pos)) # From list to set to remove identical paths from the root to leafs. - prediction_per_example = { - pred for pred, positive_example in prediction_per_example - } + prediction_per_example = {pred for pred, positive_example in prediction_per_example} return list(prediction_per_example) def fit(self, learning_problem: PosNegLPStandard = None, max_runtime: int = None): """Fit the learner to the given learning problem - (1) Extract multi-hop information about E^+ and E^- denoted by \mathcal{F}. - (1.1) E = list of (E^+ \sqcup E^-). - (2) Build a training data \mathbf{X} \in \mathbb{R}^{ |E| \times |\mathcal{F}| } . - (3) Create binary labels \mathbf{X}. - + (1) Extract multi-hop information about E^+ and E^-. + (2) Create OWL Class Expressions from (1) + (3) Build a binary sparse training data X where + first |E+| rows denote the binary representations of positives + Remaining rows denote the binary representations of E⁻ + (4) Create binary labels. (4) Construct a set of DL concept for each e \in E^+ (5) Union (4) @@ -413,29 +392,30 @@ def fit(self, learning_problem: PosNegLPStandard = None, max_runtime: int = None X: pd.DataFrame y: Union[pd.DataFrame, pd.Series] X, y = self.create_training_data(learning_problem=learning_problem) - + # CD: Remember so that if user wants to use them + self.X, self.y = X, y if self.plot_embeddings: plot_umap_reduced_embeddings(X, y.label.to_list(), "umap_visualization.pdf") - if self.grid_search_over: grid_search = sklearn.model_selection.GridSearchCV( tree.DecisionTreeClassifier(**self.kwargs_classifier), - param_grid=self.grid_search_over, - cv=10, - ).fit(X.values, y.values) + param_grid=self.grid_search_over, cv=10, ).fit(X.values, y.values) print(grid_search.best_params_) self.kwargs_classifier.update(grid_search.best_params_) - - self.clf = tree.DecisionTreeClassifier(**self.kwargs_classifier).fit( - X=X.values, y=y.values - ) + # Training + if self.verbose>0: + print("Training starts!") + self.clf = tree.DecisionTreeClassifier(**self.kwargs_classifier).fit(X=X.values, y=y.values) if self.report_classification: if self.verbose > 0: - print("Classification Report: Negatives: -1 and Positives 1 ") - print(sklearn.metrics.classification_report(y.values, self.clf.predict(X.values), - target_names=["Negative", "Positive"])) + self.__classification_report = "Classification Report: Negatives: -1 and Positives 1 \n" + self.__classification_report += sklearn.metrics.classification_report(y.values, + self.clf.predict(X.values), + target_names=["Negative", + "Positive"]) + print(self.__classification_report) if self.plot_tree: plot_decision_tree_of_expressions(feature_names=[owl_expression_to_dl(f) for f in self.features], cart_tree=self.clf) @@ -443,24 +423,28 @@ def fit(self, learning_problem: PosNegLPStandard = None, max_runtime: int = None plot_topk_feature_importance(feature_names=[owl_expression_to_dl(f) for f in self.features], cart_tree=self.clf) - self.owl_class_expressions.clear() # Each item can be considered is a path of OWL Class Expressions # starting from the root node in the decision tree and # ending in a leaf node. self.conjunctive_concepts: List[OWLObjectIntersectionOf] + if self.verbose >0: + print("Computing conjunctive_concepts...") self.conjunctive_concepts = self.construct_owl_expression_from_tree(X, y) for i in self.conjunctive_concepts: self.owl_class_expressions.add(i) - - self.disjunction_of_conjunctive_concepts = concepts_reducer( - concepts=self.conjunctive_concepts, reduced_cls=OWLObjectUnionOf - ) + if self.verbose >0: + print("Computing disjunction_of_conjunctive_concepts...") + self.disjunction_of_conjunctive_concepts = concepts_reducer(concepts=self.conjunctive_concepts, reduced_cls=OWLObjectUnionOf) return self + @property + def classification_report(self) -> str: + return self.__classification_report + def best_hypotheses( - self, n=1 + self, n=1 ) -> Tuple[OWLClassExpression, List[OWLClassExpression]]: """Return the prediction""" if n == 1: @@ -474,4 +458,3 @@ def predict(self, X: List[OWLNamedIndividual], proba=True) -> np.ndarray: """ Predict the likelihoods of individuals belonging to the classes""" raise NotImplementedError("Unavailable. Predict the likelihoods of individuals belonging to the classes") - diff --git a/ontolearn/learning_problem_generator.py b/ontolearn/learning_problem_generator.py index ef0530e1..e07462df 100644 --- a/ontolearn/learning_problem_generator.py +++ b/ontolearn/learning_problem_generator.py @@ -33,8 +33,8 @@ OWLAnnotationProperty from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import OWLLiteral -from owlapy.owl_ontology import OWLOntology -from owlapy.owl_ontology_manager import OWLOntologyManager, AddImport, OWLImportsDeclaration +from owlapy.abstracts import AbstractOWLOntology, AbstractOWLOntologyManager +from owlapy.owl_ontology_manager import AddImport, OWLImportsDeclaration from ontolearn.knowledge_base import KnowledgeBase from .refinement_operators import LengthBasedRefinement from .search import Node, RL_State @@ -95,16 +95,16 @@ def export_concepts(self, concepts: List[Node], path: str): assert isinstance(self.kb, KnowledgeBase) from owlapy.owl_ontology_manager import OntologyManager - manager: OWLOntologyManager = OntologyManager() + manager: AbstractOWLOntologyManager = OntologyManager() - ontology: OWLOntology = manager.create_ontology(IRI.create(NS)) + ontology: AbstractOWLOntology = manager.create_ontology(IRI.create(NS)) manager.load_ontology(IRI.create(self.kb.path)) kb_iri = self.kb.ontology().get_ontology_id().get_ontology_iri() manager.apply_change(AddImport(ontology, OWLImportsDeclaration(kb_iri))) for ith, h in enumerate(concepts): cls_a: OWLClass = OWLClass(IRI.create(NS, "Pred_" + str(ith))) equivalent_classes_axiom = OWLEquivalentClassesAxiom([cls_a, h.concept]) - manager.add_axiom(ontology, equivalent_classes_axiom) + ontology.add_axiom(equivalent_classes_axiom) count = None try: @@ -118,9 +118,9 @@ def export_concepts(self, concepts: List[Node], path: str): if count is not None: num_inds = OWLAnnotationAssertionAxiom(cls_a.iri, OWLAnnotation( OWLAnnotationProperty(IRI.create(SNS, "covered_inds")), OWLLiteral(count))) - manager.add_axiom(ontology, num_inds) + ontology.add_axiom(num_inds) - manager.save_ontology(ontology, IRI.create('file:/' + path + '.owl')) + ontology.save(IRI.create(path + '.owl')) def concept_individuals_to_string_balanced_examples(self, concept: OWLClassExpression) -> Dict[str, Set]: diff --git a/ontolearn/nces_trainer.py b/ontolearn/nces_trainer.py index 5b4caee3..2123b399 100644 --- a/ontolearn/nces_trainer.py +++ b/ontolearn/nces_trainer.py @@ -103,9 +103,10 @@ def get_optimizer(self, synthesizer, optimizer='Adam'): # pragma: no cover raise ValueError print('Unsupported optimizer') - def show_num_learnable_params(self): + @staticmethod + def show_num_learnable_params(model): print("*"*20+"Trainable model size"+"*"*20) - size = sum([p.numel() for p in self.nces.model.parameters()]) + size = sum([p.numel() for p in model.parameters()]) size_ = 0 print("Synthesizer: ", size) print("*"*20+"Trainable model size"+"*"*20) @@ -134,95 +135,94 @@ def collate_batch(self, batch): # pragma: no cover return pos_emb_list, neg_emb_list, target_labels def map_to_token(self, idx_array): - return self.nces.model.inv_vocab[idx_array] + return self.nces.model[0].inv_vocab[idx_array] def train(self, train_dataloader, save_model=True, optimizer='Adam', record_runtime=True): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if isinstance(self.nces.model, list): - self.nces.model = copy.deepcopy(self.nces.model[0]) - model_size = self.show_num_learnable_params() - if device.type == "cpu": - print("Training on CPU, it may take long...") - else: - print("GPU available !") - print() - print("#"*50) - print() - print("{} starts training... \n".format(self.nces.model.name)) - print("#"*50, "\n") - synthesizer = copy.deepcopy(self.nces.model).train() - desc = synthesizer.name - if device.type == "cuda": - synthesizer.cuda() - opt = self.get_optimizer(synthesizer=synthesizer, optimizer=optimizer) - if self.decay_rate: - self.scheduler = ExponentialLR(opt, self.decay_rate) - Train_loss = [] - Train_acc = defaultdict(list) - best_score = 0. - if record_runtime: - t0 = time.time() - s_acc, h_acc = 0, 0 - Epochs = trange(self.epochs, desc=f'Loss: {np.nan}, Soft Acc: {s_acc}, Hard Acc: {h_acc}', leave=True) - for e in Epochs: - soft_acc, hard_acc = [], [] - train_losses = [] - for x1, x2, labels in train_dataloader: - target_sequence = self.map_to_token(labels) - if device.type == "cuda": - x1, x2, labels = x1.cuda(), x2.cuda(), labels.cuda() - pred_sequence, scores = synthesizer(x1, x2) - loss = synthesizer.loss(scores, labels) - s_acc, h_acc = self.compute_accuracy(pred_sequence, target_sequence) - soft_acc.append(s_acc) - hard_acc.append(h_acc) - train_losses.append(loss.item()) - opt.zero_grad() - loss.backward() - clip_grad_value_(synthesizer.parameters(), clip_value=self.clip_value) - opt.step() - if self.decay_rate: - self.scheduler.step() - train_soft_acc, train_hard_acc = np.mean(soft_acc), np.mean(hard_acc) - Train_loss.append(np.mean(train_losses)) - Train_acc['soft'].append(train_soft_acc) - Train_acc['hard'].append(train_hard_acc) - Epochs.set_description('Loss: {:.4f}, Soft Acc: {:.2f}%, Hard Acc: {:.2f}%'.format(Train_loss[-1], - train_soft_acc, - train_hard_acc)) - Epochs.refresh() - weights = copy.deepcopy(synthesizer.state_dict()) - if Train_acc['hard'] and Train_acc['hard'][-1] > best_score: - best_score = Train_acc['hard'][-1] - best_weights = weights - synthesizer.load_state_dict(best_weights) - if record_runtime: # pragma: no cover - duration = time.time()-t0 - runtime_info = {"Architecture": synthesizer.name, - "Number of Epochs": self.epochs, "Runtime (s)": duration} - if not os.path.exists(self.storage_path+"/runtime/"): - os.mkdir(self.storage_path+"/runtime/") - with open(self.storage_path+"/runtime/runtime"+"_"+desc+".json", "w") as file: - json.dump(runtime_info, file, indent=3) - results_dict = dict() - print("Top performance: loss: {:.4f}, soft accuracy: {:.2f}% ... " - "hard accuracy: {:.2f}%".format(min(Train_loss), max(Train_acc['soft']), max(Train_acc['hard']))) - print() - results_dict.update({"Train Max Soft Acc": max(Train_acc['soft']), "Train Max Hard Acc": max(Train_acc['hard']), - "Train Min Loss": min(Train_loss)}) - - if save_model: # pragma: no cover - if not os.path.exists(self.storage_path+"/results/"): - os.mkdir(self.storage_path+"/results/") - with open(self.storage_path+"/results/"+"results"+"_"+desc+".json", "w") as file: - json.dump(results_dict, file, indent=3) + for model in self.nces.model: + model_size = self.show_num_learnable_params(model) + if device.type == "cpu": + print("Training on CPU, it may take long...") + else: + print("GPU available !") + print() + print("#"*50) + print() + print("{} starts training... \n".format(model.name)) + print("#"*50, "\n") + synthesizer = copy.deepcopy(model).train() + desc = synthesizer.name + if device.type == "cuda": + synthesizer.cuda() + opt = self.get_optimizer(synthesizer=synthesizer, optimizer=optimizer) + if self.decay_rate: + self.scheduler = ExponentialLR(opt, self.decay_rate) + Train_loss = [] + Train_acc = defaultdict(list) + best_score = 0. + if record_runtime: + t0 = time.time() + s_acc, h_acc = 0, 0 + Epochs = trange(self.epochs, desc=f'Loss: {np.nan}, Soft Acc: {s_acc}, Hard Acc: {h_acc}', leave=True) + for e in Epochs: + soft_acc, hard_acc = [], [] + train_losses = [] + for x1, x2, labels in train_dataloader: + target_sequence = self.map_to_token(labels) + if device.type == "cuda": + x1, x2, labels = x1.cuda(), x2.cuda(), labels.cuda() + pred_sequence, scores = synthesizer(x1, x2) + loss = synthesizer.loss(scores, labels) + s_acc, h_acc = self.compute_accuracy(pred_sequence, target_sequence) + soft_acc.append(s_acc) + hard_acc.append(h_acc) + train_losses.append(loss.item()) + opt.zero_grad() + loss.backward() + clip_grad_value_(synthesizer.parameters(), clip_value=self.clip_value) + opt.step() + if self.decay_rate: + self.scheduler.step() + train_soft_acc, train_hard_acc = np.mean(soft_acc), np.mean(hard_acc) + Train_loss.append(np.mean(train_losses)) + Train_acc['soft'].append(train_soft_acc) + Train_acc['hard'].append(train_hard_acc) + Epochs.set_description('Loss: {:.4f}, Soft Acc: {:.2f}%, Hard Acc: {:.2f}%'.format(Train_loss[-1], + train_soft_acc, + train_hard_acc)) + Epochs.refresh() + weights = copy.deepcopy(synthesizer.state_dict()) + if Train_acc['hard'] and Train_acc['hard'][-1] > best_score: + best_score = Train_acc['hard'][-1] + best_weights = weights + synthesizer.load_state_dict(best_weights) + if record_runtime: # pragma: no cover + duration = time.time()-t0 + runtime_info = {"Architecture": synthesizer.name, + "Number of Epochs": self.epochs, "Runtime (s)": duration} + if not os.path.exists(self.storage_path+"/runtime/"): + os.mkdir(self.storage_path+"/runtime/") + with open(self.storage_path+"/runtime/runtime"+"_"+desc+".json", "w") as file: + json.dump(runtime_info, file, indent=3) + results_dict = dict() + print("Top performance: loss: {:.4f}, soft accuracy: {:.2f}% ... " + "hard accuracy: {:.2f}%".format(min(Train_loss), max(Train_acc['soft']), max(Train_acc['hard']))) + print() + results_dict.update({"Train Max Soft Acc": max(Train_acc['soft']), "Train Max Hard Acc": max(Train_acc['hard']), + "Train Min Loss": min(Train_loss)}) + + if save_model: # pragma: no cover + if not os.path.exists(self.storage_path+"/results/"): + os.mkdir(self.storage_path+"/results/") + with open(self.storage_path+"/results/"+"results"+"_"+desc+".json", "w") as file: + json.dump(results_dict, file, indent=3) - if not os.path.exists(self.storage_path+"/trained_models/"): - os.mkdir(self.storage_path+"/trained_models/") - torch.save(synthesizer.state_dict(), self.storage_path+"/trained_models/"+"trained_"+desc+".pt") - print("{} saved".format(synthesizer.name)) - if not os.path.exists(self.storage_path+"/metrics/"): - os.mkdir(self.storage_path+"/metrics/") - with open(self.storage_path+"/metrics/"+"metrics_"+desc+".json", "w") as plot_file: - json.dump({"soft acc": Train_acc['soft'], "hard acc": Train_acc['hard'], "loss": Train_loss}, plot_file, - indent=3) + if not os.path.exists(self.storage_path+"/trained_models/"): + os.mkdir(self.storage_path+"/trained_models/") + torch.save(synthesizer.state_dict(), self.storage_path+"/trained_models/"+"trained_"+desc+".pt") + print("{} saved".format(synthesizer.name)) + if not os.path.exists(self.storage_path+"/metrics/"): + os.mkdir(self.storage_path+"/metrics/") + with open(self.storage_path+"/metrics/"+"metrics_"+desc+".json", "w") as plot_file: + json.dump({"soft acc": Train_acc['soft'], "hard acc": Train_acc['hard'], "loss": Train_loss}, plot_file, + indent=3) diff --git a/ontolearn/owl_neural_reasoner.py b/ontolearn/owl_neural_reasoner.py index 35ea5c9c..05231c34 100644 --- a/ontolearn/owl_neural_reasoner.py +++ b/ontolearn/owl_neural_reasoner.py @@ -1,23 +1,16 @@ from owlapy.owl_property import ( OWLDataProperty, - OWLObjectPropertyExpression, OWLObjectInverseOf, OWLObjectProperty, OWLProperty, ) - -from owlapy.owl_datatype import OWLDatatype from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import OWLLiteral - from owlapy.class_expression import * - -from typing import Iterable, Set, Optional, Generator, Union, FrozenSet, Tuple, Callable - +from typing import Generator, Tuple from dicee.knowledge_graph_embeddings import KGE - import os - +import re from collections import Counter @@ -44,7 +37,6 @@ def __init__(self, path_of_kb: str = None, # Train a KGE on the fly from dicee.executer import Execute from dicee.config import Namespace - args = Namespace() args.model = 'Keci' args.scoring_technique = "AllvsAll" @@ -52,10 +44,12 @@ def __init__(self, path_of_kb: str = None, path_of_kb = path_of_kb.replace("/", "_") path_of_kb = path_of_kb.replace(".", "_") args.path_to_store_single_run = path_of_kb - args.num_epochs = 500 + args.num_epochs = 100 args.embedding_dim = 512 args.batch_size = 1024 args.backend = "rdflib" + args.trainer = "PL" + # args.save_embeddings_as_csv = "True" reports = Execute(args).start() path_neural_embedding = reports["path_experiment_folder"] self.model = KGE(path=path_neural_embedding) @@ -70,6 +64,7 @@ def __init__(self, path_of_kb: str = None, self.inferred_owl_individuals = None self.inferred_object_properties = None self.inferred_named_owl_classes = None + @property def set_inferred_individuals(self): if self.inferred_owl_individuals is None: @@ -77,6 +72,7 @@ def set_inferred_individuals(self): return {i for i in self.individuals_in_signature()} else: return self.inferred_owl_individuals + @property def set_inferred_object_properties(self): # pragma: no cover if self.inferred_object_properties is None: @@ -84,6 +80,7 @@ def set_inferred_object_properties(self): # pragma: no cover return {i for i in self.object_properties_in_signature()} else: return self.inferred_object_properties + @property def set_inferred_owl_classes(self): # pragma: no cover if self.inferred_named_owl_classes is None: @@ -157,42 +154,33 @@ def abox(self, str_iri: str) -> Generator[ Tuple[ Tuple[OWLNamedIndividual, OWLProperty, OWLClass], Tuple[OWLObjectProperty, OWLObjectProperty, OWLNamedIndividual], - Tuple[OWLObjectProperty, OWLDataProperty, OWLLiteral], - ], - None, - None, - ]: + Tuple[OWLObjectProperty, OWLDataProperty, OWLLiteral]], None,None ]: + # Initialize an owl named individual object. subject_ = OWLNamedIndividual(str_iri) - # for p == type + # Return a triple indicating the type. for cl in self.get_type_individuals(str_iri): - yield ( - subject_, - OWLProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - cl, - ) + yield subject_,OWLProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), cl - # for p == object property + # Return a triple based on an object property. for op in self.object_properties_in_signature(): for o in self.get_object_property_values(str_iri, op): yield subject_, op, o - # for p == data property + # Return a triple based on a data property. for dp in self.data_properties_in_signature(): # pragma: no cover print("these data properties are in the signature: ", dp.str) for l in self.get_data_property_values(str_iri, dp): yield subject_, dp, l def classes_in_signature( - self, confidence_threshold: float = None - ) -> Generator[OWLClass, None, None]: + self, confidence_threshold: float = None) -> Generator[OWLClass, None, None]: if self.inferred_named_owl_classes is None: self.inferred_named_owl_classes = set() for prediction in self.get_predictions( h=None, r="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", t="http://www.w3.org/2002/07/owl#Class", - confidence_threshold=confidence_threshold, - ): + confidence_threshold=confidence_threshold): try: owl_class = OWLClass(prediction[0]) self.inferred_named_owl_classes.add(owl_class) @@ -208,17 +196,20 @@ def most_general_classes( self, confidence_threshold: float = None ) -> Generator[OWLClass, None, None]: # pragma: no cover """At least it has single subclass and there is no superclass""" - for _class in self.classes_in_signature(confidence_threshold): - for concept in self.get_direct_parents(_class, confidence_threshold): + + for c in self.classes_in_signature(confidence_threshold): + for x in self.get_direct_parents(c, confidence_threshold): + # Ignore c if (c subclass x) \in KG. break else: # checks if subconcepts is not empty -> there is at least one subclass + # c should have at least a single subclass. if subconcepts := list( self.subconcepts( - named_concept=_class, confidence_threshold=confidence_threshold + named_concept=c, confidence_threshold=confidence_threshold ) ): - yield _class + yield c def least_general_named_concepts( self, confidence_threshold: float = None @@ -316,7 +307,6 @@ def instances(self, expression: OWLClassExpression, named_individuals=False, retrieve its instances => Retrieval(¬A)= All Instance Set-DIFF { x | phi(x, type, A) ≥ γ } """ excluded_individuals = set(self.instances(expression.get_operand(), confidence_threshold)) yield from self.set_inferred_individuals - excluded_individuals - # Handling intersection of class expressions elif isinstance(expression, OWLObjectIntersectionOf): """ Given an OWLObjectIntersectionOf (C ⊓ D), @@ -356,7 +346,6 @@ def instances(self, expression: OWLClassExpression, named_individuals=False, for individual in common_individuals: yield individual """ - elif isinstance(expression, OWLObjectAllValuesFrom): """ Given an OWLObjectAllValuesFrom ∀ r.C, retrieve its instances => @@ -375,8 +364,6 @@ def instances(self, expression: OWLClassExpression, named_individuals=False, to_yield_individuals.add(individual) yield from to_yield_individuals - - elif isinstance(expression, OWLObjectMinCardinality) or isinstance(expression, OWLObjectSomeValuesFrom): """ Given an OWLObjectSomeValuesFrom ∃ r.C, retrieve its instances => @@ -408,34 +395,40 @@ def instances(self, expression: OWLClassExpression, named_individuals=False, for individual, count in result.items(): if count >= cardinality: yield individual - elif isinstance(expression, OWLObjectMaxCardinality): + object_property: OWLObjectProperty object_property = expression.get_property() + + filler_expression:OWLClassExpression filler_expression = expression.get_filler() + + cardinality:int cardinality = expression.get_cardinality() - # Get all individuals that are instances of the filler expression - object_individuals = set(self.instances(filler_expression, confidence_threshold)) + # Get all individuals that are instances of the filler expression. + owl_individual:OWLNamedIndividual + object_individuals = { owl_individual for owl_individual + in self.instances(filler_expression, confidence_threshold)} - # Initialize a dictionary to keep track of counts of related individuals for each entity - subject_individuals_count = {individual: 0 for individual in self.set_inferred_individuals} + # Initialize a dictionary to keep track of counts of related individuals for each entity. + owl_individual:OWLNamedIndividual + str_subject_individuals_to_count = {owl_individual.str: (owl_individual,0) for owl_individual in self.set_inferred_individuals} for object_individual in object_individuals: - # Get all individuals related to the object individual via the object property - subject_individuals = ( - self.get_individuals_with_object_property(obj=object_individual, object_property=object_property, - confidence_threshold=confidence_threshold)) + # Get all individuals related to the object individual via the object property. + subject_individuals = self.get_individuals_with_object_property(obj=object_individual, + object_property=object_property, + confidence_threshold=confidence_threshold) - # Update the count of related individuals for each object individual + # Update the count of related individuals for each object individual. for subject_individual in subject_individuals: - subject_individuals_count[subject_individual] += 1 - - # Filter out individuals who exceed the specified cardinality - valid_individuals = {ind for ind, count in subject_individuals_count.items() if count <= cardinality} - - yield from valid_individuals - + if subject_individual.str in str_subject_individuals_to_count: + owl_obj, count = str_subject_individuals_to_count[subject_individual.str] + # Increment the count. + str_subject_individuals_to_count[subject_individual.str] = (owl_obj, count+1) + # Filter out individuals who exceed the specified cardinality. + yield from {ind for str_ind, (ind, count) in str_subject_individuals_to_count.items() if count <= cardinality} # Handling union of class expressions elif isinstance(expression, OWLObjectUnionOf): @@ -464,11 +457,8 @@ def individuals_in_signature(self) -> Generator[OWLNamedIndividual, None, None]: try: for cl in self.classes_in_signature(): predictions = self.get_predictions( - h=None, - r="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", - t=cl.str, - confidence_threshold=self.gamma, - ) + h=None, r="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", t=cl.str, + confidence_threshold=self.gamma) for prediction in predictions: try: owl_named_individual = OWLNamedIndividual(prediction[0]) @@ -631,11 +621,11 @@ def get_individuals_of_class( def get_individuals_with_object_property( self, - object_property: OWLObjectProperty, - obj: OWLClass, - confidence_threshold: float = None, - ) -> Generator[OWLNamedIndividual, None, None]: + object_property: OWLObjectProperty, obj: OWLClass, confidence_threshold: float = None ) \ + -> Generator[OWLNamedIndividual, None, None]: + is_inverse = isinstance(object_property, OWLObjectInverseOf) + if is_inverse: object_property = object_property.get_inverse() @@ -643,8 +633,8 @@ def get_individuals_with_object_property( h=obj.str if is_inverse else None, r=object_property.str, t=None if is_inverse else obj.str, - confidence_threshold=confidence_threshold, - ): + confidence_threshold=confidence_threshold): + try: yield OWLNamedIndividual(prediction[0]) except Exception as e: # pragma: no cover diff --git a/ontolearn/scripts/run.py b/ontolearn/scripts/run.py index 0f4f7a04..d2fa6fe5 100644 --- a/ontolearn/scripts/run.py +++ b/ontolearn/scripts/run.py @@ -1,3 +1,7 @@ +""" + + +""" # ----------------------------------------------------------------------------- # MIT License # @@ -24,24 +28,23 @@ import argparse +import glob from fastapi import FastAPI import uvicorn from typing import Dict, Iterable, Union, List from owlapy.class_expression import OWLClassExpression from owlapy.iri import IRI from owlapy.owl_individual import OWLNamedIndividual -from ..utils.static_funcs import compute_f1_score -from ..knowledge_base import KnowledgeBase -from ..triple_store import TripleStore -from ..learning_problem import PosNegLPStandard -from ..refinement_operators import LengthBasedRefinement -from ..learners import Drill, TDL -from ..metrics import F1 -from owlapy.render import DLSyntaxObjectRenderer -from ..utils.static_funcs import save_owl_class_expressions +from ontolearn.utils import compute_f1_score +from ontolearn.knowledge_base import KnowledgeBase +from ontolearn.triple_store import TripleStore +from ontolearn.learning_problem import PosNegLPStandard +from ontolearn.learners import Drill, TDL +from ontolearn.concept_learner import NCES +from ontolearn.metrics import F1 +from ontolearn.verbalizer import LLMVerbalizer from owlapy import owl_expression_to_dl import os -from ..verbalizer import LLMVerbalizer app = FastAPI() args = None @@ -74,10 +77,10 @@ def get_drill(data: dict): iter_bound=data.get("iter_bound", 10), # total refinement operation applied max_runtime=data.get("max_runtime", 60), # seconds num_episode=data.get("num_episode", 2), # for the training - use_inverse=True, - use_data_properties=True, - use_card_restrictions=True, - use_nominals=True, + use_inverse=data.get("use_inverse", True), + use_data_properties=data.get("use_data_properties", True), + use_card_restrictions=data.get("use_card_restrictions", True), + use_nominals=data.get("use_nominals", True), verbose=1) # (2) Either load the weights of DRILL or train it. if data.get("path_to_pretrained_drill", None) and os.path.isdir(data["path_to_pretrained_drill"]): @@ -89,19 +92,48 @@ def get_drill(data: dict): drill.save(directory=data.get("path_to_pretrained_drill", None)) return drill +def get_nces(data: dict) -> NCES: + """ Load NCES """ + global kb + global args + assert args.path_knowledge_base.endswith(".owl"), "NCES supports only a knowledge base file with extension .owl" + # (1) Init NCES. + nces = NCES(knowledge_base_path=args.path_knowledge_base, + path_of_embeddings=data.get("path_embeddings", None), + quality_func=F1(), + load_pretrained=False, + learner_names=["SetTransformer", "LSTM", "GRU"], + num_predictions=64 + ) + # (2) Either load the weights of NCES or train it. + if data.get("path_to_pretrained_nces", None) and os.path.isdir(data["path_to_pretrained_nces"]) and glob.glob(data["path_to_pretrained_nces"]+"/*.pt"): + nces.refresh(data["path_to_pretrained_nces"]) + else: + nces.train(epochs=data["nces_train_epochs"], batch_size=data["nces_batch_size"], num_lps=data["num_of_training_learning_problems"]) + nces.refresh(nces.trained_models_path) + return nces + def get_tdl(data) -> TDL: global kb - return TDL(knowledge_base=kb) + return TDL(knowledge_base=kb, + use_inverse=False, + use_data_properties=False, + use_nominals=False, + use_card_restrictions=data.get("use_card_restrictions",False), + kwargs_classifier=data.get("kwargs_classifier",None), + verbose=10) -def get_learner(data: dict) -> Union[Drill, TDL]: +def get_learner(data: dict) -> Union[Drill, TDL, NCES, None]: if data["model"] == "Drill": return get_drill(data) elif data["model"] == "TDL": return get_tdl(data) + elif data["model"] == "NCES": + return get_nces(data) else: - raise NotImplementedError(f"There is no learner {data['model']} available") + return None @app.get("/cel") @@ -109,12 +141,14 @@ async def cel(data: dict) -> Dict: global args global kb print("######### CEL Arguments ###############") - print(f"Knowledgebase/Triplestore:{kb}\n") - print(f"Input data:{data}\n") + print(f"Knowledgebase/Triplestore: {kb}\n") + print(f"Input data: {data}\n") print("######### CEL Arguments ###############\n") # (1) Initialize OWL CEL and verbalizer owl_learner = get_learner(data) - verbalizer = LLMVerbalizer() + if owl_learner is None: + return {"Results": f"There is no learner named as {data['model']}. Available models: Drill, TDL, NCES"} + # (2) Read Positives and Negatives. positives = {OWLNamedIndividual(IRI.create(i)) for i in data['pos']} negatives = {OWLNamedIndividual(IRI.create(i)) for i in data['neg']} @@ -126,17 +160,18 @@ async def cel(data: dict) -> Dict: # ()Learning Process. results = [] learned_owl_expression: OWLClassExpression - predictions = owl_learner.fit(lp).best_hypotheses(n=data.get("topk", 3)) if not isinstance(predictions, List): predictions = [predictions] - + verbalizer = LLMVerbalizer() for ith, learned_owl_expression in enumerate(predictions): # () OWL to DL dl_learned_owl_expression: str dl_learned_owl_expression = owl_expression_to_dl(learned_owl_expression) # () Get Individuals print(f"Retrieving individuals of {dl_learned_owl_expression}...") + # TODO:CD: With owlapy:1.3.1, we can move the f1 score computation into triple store. + # TODO: By this, we do not need to wait for the retrival results to return an answer to the user individuals: Iterable[OWLNamedIndividual] individuals = kb.individuals(learned_owl_expression) # () F1 score training @@ -167,9 +202,10 @@ def main(): elif args.endpoint_triple_store: kb = TripleStore(url=args.endpoint_triple_store) else: - raise RuntimeError("Either --path_knowledge_base or --endpoint_triplestore must be not None") + raise RuntimeError("Either --path_knowledge_base or --endpoint_triplestore must be provided") uvicorn.run(app, host=args.host, port=args.port) if __name__ == '__main__': main() + diff --git a/ontolearn/search.py b/ontolearn/search.py index 9ed05383..db718ffd 100644 --- a/ontolearn/search.py +++ b/ontolearn/search.py @@ -779,7 +779,10 @@ def get_most_promising(self) -> RL_State: ------- node: A node object """ - assert len(self.items_in_queue.queue) > 0 + assert len(self.items_in_queue.queue) > 0 ,("Search tree is empty. " + "\nEnsure that there is at least one " + "owl:Class or" + "owl:ObjectProperty definitions") _, __, dl_representation = self.items_in_queue.get(timeout=1.0) # R node = self.nodes[dl_representation] diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py index 11f9927b..2bc2eb60 100644 --- a/ontolearn/triple_store.py +++ b/ontolearn/triple_store.py @@ -27,10 +27,12 @@ import logging import re from itertools import chain -from typing import Iterable, Set, Optional, Generator, Union, FrozenSet, Tuple, Callable +from typing import Iterable, Set, Optional, Generator, Union, Tuple, Callable import requests +from owlapy import owl_expression_to_sparql from owlapy.class_expression import * +from owlapy.class_expression import OWLThing from owlapy.iri import IRI from owlapy.owl_axiom import ( OWLObjectPropertyRangeAxiom, @@ -38,12 +40,13 @@ OWLDataPropertyRangeAxiom, OWLDataPropertyDomainAxiom, OWLClassAxiom, - OWLEquivalentClassesAxiom, + OWLEquivalentClassesAxiom, OWLAxiom, ) from owlapy.owl_datatype import OWLDatatype from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import OWLLiteral -from owlapy.owl_ontology import OWLOntologyID, OWLOntology +from owlapy.owl_ontology import OWLOntologyID +from owlapy.abstracts import AbstractOWLOntology, AbstractOWLReasonerEx from owlapy.owl_property import ( OWLDataProperty, OWLObjectPropertyExpression, @@ -54,11 +57,7 @@ from requests import Response from requests.exceptions import RequestException, JSONDecodeError from owlapy.converter import Owl2SparqlConverter -from owlapy.owl_reasoner import OWLReasonerEx from ontolearn.knowledge_base import KnowledgeBase -import rdflib -from ontolearn.concept_generator import ConceptGenerator -from owlapy.utils import OWLClassExpressionLengthMetric import traceback from collections import Counter @@ -69,23 +68,6 @@ rdf_prefix = "PREFIX rdf: \n " xsd_prefix = "PREFIX xsd: \n" -# CD: For the sake of efficient software development. -limit_posix = "" - -from owlapy import owl_expression_to_sparql - -from dicee.knowledge_graph_embeddings import KGE -import os - - -def rdflib_to_str(sparql_result: rdflib.plugins.sparql.processor.SPARQLResult) -> str: - """ - @TODO: CD: Not quite sure whether we need this continuent function - """ - for result_row in sparql_result: - str_iri: str - yield result_row.x.n3() - def is_valid_url(url) -> bool: """ @@ -110,14 +92,14 @@ def is_valid_url(url) -> bool: return url is not None and regex.search(url) -def get_results_from_ts(triplestore_address: str, query: str, return_type: type): +def send_http_request_to_ts_and_fetch_results(triplestore_address: str, query: str, return_type: Callable): """ Execute the SPARQL query in the given triplestore_address and return the result as the given return_type. Args: triplestore_address (str): The triplestore address where the query will be executed. query (str): SPARQL query where the root variable should be '?x'. - return_type (type): OWLAPY class as type. e.g. OWLClass, OWLNamedIndividual, etc. + return_type (Callable): OWLAPY class as type. e.g. OWLClass, OWLNamedIndividual, etc. Returns: Generator containing the results of the query as the given type. @@ -184,59 +166,50 @@ def suf(direct: bool): return " " if direct else "* " -class TripleStoreOntology(OWLOntology): +class TripleStoreOntology(AbstractOWLOntology): def __init__(self, triplestore_address: str): assert is_valid_url(triplestore_address), ( "You should specify a valid URL in the following argument: " - "'triplestore_address' of class `TripleStore`" - ) - + "'triplestore_address' of class `TripleStore`") self.url = triplestore_address def classes_in_signature(self) -> Iterable[OWLClass]: query = owl_prefix + "SELECT DISTINCT ?x WHERE {?x a owl:Class.}" - yield from get_results_from_ts(self.url, query, OWLClass) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass) def data_properties_in_signature(self) -> Iterable[OWLDataProperty]: - query = ( - owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:DatatypeProperty.}" - ) - yield from get_results_from_ts(self.url, query, OWLDataProperty) + query = owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:DatatypeProperty.}" + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLDataProperty) def object_properties_in_signature(self) -> Iterable[OWLObjectProperty]: - query = ( - owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:ObjectProperty.}" - ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + query = owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:ObjectProperty.}" + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) def individuals_in_signature(self) -> Iterable[OWLNamedIndividual]: - query = ( - owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:NamedIndividual.}" - ) - yield from get_results_from_ts(self.url, query, OWLNamedIndividual) + query = owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a owl:NamedIndividual.}" + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLNamedIndividual) - def equivalent_classes_axioms( - self, c: OWLClass - ) -> Iterable[OWLEquivalentClassesAxiom]: + def equivalent_classes_axioms(self, c: OWLClass) -> Iterable[OWLEquivalentClassesAxiom]: + # TODO:CD: Please fit the query into a single line query = ( - owl_prefix - + "SELECT DISTINCT ?x" - + "WHERE { ?x owl:equivalentClass " - + f"<{c.str}>." - + "FILTER(?x != " - + f"<{c.str}>)}}" + owl_prefix + + "SELECT DISTINCT ?x" + + "WHERE { ?x owl:equivalentClass " + + f"<{c.str}>." + + "FILTER(?x != " + + f"<{c.str}>)}}" ) - for cls in get_results_from_ts(self.url, query, OWLClass): + for cls in send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass): yield OWLEquivalentClassesAxiom([c, cls]) def general_class_axioms(self) -> Iterable[OWLClassAxiom]: - raise NotImplementedError + # TODO:CD: What does general class axiom mean ? Please document this function. + # / RE:AB: Doc strings in the base class + raise NotImplementedError("Currently, ") - def data_property_domain_axioms( - self, pe: OWLDataProperty - ) -> Iterable[OWLDataPropertyDomainAxiom]: - domains = self._get_property_domains(pe) + def data_property_domain_axioms(self, pe: OWLDataProperty) -> Iterable[OWLDataPropertyDomainAxiom]: + domains = self.get_property_domains(pe) if len(domains) == 0: yield OWLDataPropertyDomainAxiom(pe, OWLThing) else: @@ -244,66 +217,67 @@ def data_property_domain_axioms( yield OWLDataPropertyDomainAxiom(pe, dom) def data_property_range_axioms( - self, pe: OWLDataProperty - ): # -> Iterable[OWLDataPropertyRangeAxiom]: - query = ( - rdfs_prefix - + "SELECT DISTINCT ?x WHERE { " - + f"<{pe.str}>" - + " rdfs:range ?x. }" - ) - - ranges = set(get_results_from_ts(self.url, query, OWLDatatype)) - if len(ranges) == 0: - pass - else: - for rng in ranges: - yield OWLDataPropertyRangeAxiom(pe, rng) + self, pe: OWLDataProperty + )-> Iterable[OWLDataPropertyRangeAxiom]: + query = f"{rdfs_prefix}SELECT DISTINCT ?x WHERE {{ <{pe.str}> rdfs:range ?x. }}" + for rng in send_http_request_to_ts_and_fetch_results(self.url, query, OWLDatatype): + yield OWLDataPropertyRangeAxiom(pe, rng) def object_property_domain_axioms( - self, pe: OWLObjectProperty + self, pe: OWLObjectProperty ) -> Iterable[OWLObjectPropertyDomainAxiom]: - domains = self._get_property_domains(pe) + domains = self.get_property_domains(pe) if len(domains) == 0: yield OWLObjectPropertyDomainAxiom(pe, OWLThing) else: for dom in domains: yield OWLObjectPropertyDomainAxiom(pe, dom) - def object_property_range_axioms( - self, pe: OWLObjectProperty - ) -> Iterable[OWLObjectPropertyRangeAxiom]: + def object_property_range_axioms(self, pe: OWLObjectProperty) -> Iterable[OWLObjectPropertyRangeAxiom]: query = rdfs_prefix + "SELECT ?x WHERE { " + f"<{pe.str}>" + " rdfs:range ?x. }" - ranges = set(get_results_from_ts(self.url, query, OWLClass)) + # TODO: CD: Why do we need to use set operation ?! \ RE:AB: In order to calculate its length im converting to set + ranges = set(send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass)) if len(ranges) == 0: yield OWLObjectPropertyRangeAxiom(pe, OWLThing) else: for rng in ranges: yield OWLObjectPropertyRangeAxiom(pe, rng) - def _get_property_domains(self, pe: OWLProperty): + def get_property_domains(self, pe: OWLProperty)->Set: if isinstance(pe, OWLObjectProperty) or isinstance(pe, OWLDataProperty): query = ( - rdfs_prefix - + "SELECT ?x WHERE { " - + f"<{pe.str}>" - + " rdfs:domain ?x. }" + rdfs_prefix + + "SELECT ?x WHERE { " + + f"<{pe.str}>" + + " rdfs:domain ?x. }" ) - domains = set(get_results_from_ts(self.url, query, OWLClass)) + # TODO: CD: Why do we need to use set operation ?! + domains = set(send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass)) return domains else: raise NotImplementedError def get_owl_ontology_manager(self): # no manager for this kind of Ontology + # @TODO:CD: Please document this class method / RE:AB: Doc strings in the base class pass def get_ontology_id(self) -> OWLOntologyID: + # @TODO:CD: Please document this class method / RE:AB: Doc strings in the base class + # query = (rdf_prefix + owl_prefix + # "SELECT ?ontologyIRI WHERE { ?ontology rdf:type owl:Ontology . ?ontology rdf:about ?ontologyIRI .}") # return list(get_results_from_ts(self.url, query, OWLOntologyID)).pop() raise NotImplementedError + def add_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]): + """Cant modify a triplestore ontology. Implemented because of the base class.""" + pass + + def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]): + """Cant modify a triplestore ontology. Implemented because of the base class.""" + pass + def __eq__(self, other): if isinstance(other, type(self)): return self.url == other.url @@ -316,7 +290,7 @@ def __repr__(self): return f"TripleStoreOntology({self.url})" -class TripleStoreReasoner(OWLReasonerEx): +class TripleStoreReasoner(AbstractOWLReasonerEx): __slots__ = "ontology" def __init__(self, ontology: TripleStoreOntology): @@ -325,7 +299,7 @@ def __init__(self, ontology: TripleStoreOntology): self._owl2sparql_converter = Owl2SparqlConverter() def data_property_domains( - self, pe: OWLDataProperty, direct: bool = False + self, pe: OWLDataProperty, direct: bool = False ) -> Iterable[OWLClassExpression]: domains = { d.get_domain() for d in self.ontology.data_property_domain_axioms(pe) @@ -336,7 +310,7 @@ def data_property_domains( yield from sub_domains def object_property_domains( - self, pe: OWLObjectProperty, direct: bool = False + self, pe: OWLObjectProperty, direct: bool = False ) -> Iterable[OWLClassExpression]: domains = { d.get_domain() for d in self.ontology.object_property_domain_axioms(pe) @@ -347,7 +321,7 @@ def object_property_domains( yield from sub_domains def object_property_ranges( - self, pe: OWLObjectProperty, direct: bool = False + self, pe: OWLObjectProperty, direct: bool = False ) -> Iterable[OWLClassExpression]: ranges = {r.get_range() for r in self.ontology.object_property_range_axioms(pe)} sub_ranges = set(chain.from_iterable([self.sub_classes(d) for d in ranges])) @@ -356,44 +330,42 @@ def object_property_ranges( yield from sub_ranges def equivalent_classes( - self, ce: OWLClassExpression, only_named: bool = True + self, ce: OWLClassExpression, only_named: bool = True ) -> Iterable[OWLClassExpression]: if only_named: if isinstance(ce, OWLClass): query = ( - owl_prefix - + "SELECT DISTINCT ?x " - + "WHERE { {?x owl:equivalentClass " - + f"<{ce.str}>.}}" - + "UNION {" - + f"<{ce.str}>" - + " owl:equivalentClass ?x.}" - + "FILTER(?x != " - + f"<{ce.str}>)}}" + owl_prefix + + "SELECT DISTINCT ?x " + + "WHERE { {?x owl:equivalentClass " + + f"<{ce.str}>.}}" + + "UNION {" + + f"<{ce.str}>" + + " owl:equivalentClass ?x.}" + + "FILTER(?x != " + + f"<{ce.str}>)}}" ) - yield from get_results_from_ts(self.url, query, OWLClass) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass) else: - raise NotImplementedError( - "Equivalent classes for complex class expressions is not implemented" - ) + print(f"Equivalent classes for complex class expressions is not implemented\t{ce}") + # raise NotImplementedError(f"Equivalent classes for complex class expressions is not implemented\t{ce}") + yield from {} else: - raise NotImplementedError( - "Finding equivalent complex classes is not implemented" - ) + raise NotImplementedError("Finding equivalent complex classes is not implemented") def disjoint_classes( - self, ce: OWLClassExpression, only_named: bool = True + self, ce: OWLClassExpression, only_named: bool = True ) -> Iterable[OWLClassExpression]: if only_named: if isinstance(ce, OWLClass): query = ( - owl_prefix - + " SELECT DISTINCT ?x " - + "WHERE { " - + f"<{ce.str}>" - + " owl:disjointWith ?x .}" + owl_prefix + + " SELECT DISTINCT ?x " + + "WHERE { " + + f"<{ce.str}>" + + " owl:disjointWith ?x .}" ) - yield from get_results_from_ts(self.url, query, OWLClass) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass) else: raise NotImplementedError( "Disjoint classes for complex class expressions is not implemented" @@ -404,105 +376,105 @@ def disjoint_classes( ) def different_individuals( - self, ind: OWLNamedIndividual + self, ind: OWLNamedIndividual ) -> Iterable[OWLNamedIndividual]: query = ( - owl_prefix - + rdf_prefix - + "SELECT DISTINCT ?x \n" - + "WHERE{ ?allDifferent owl:distinctMembers/rdf:rest*/rdf:first ?x.\n" - + "?allDifferent owl:distinctMembers/rdf:rest*/rdf:first" - + f"<{ind.str}>" - + ".\n" - + "FILTER(?x != " - + f"<{ind.str}>" - + ")}" + owl_prefix + + rdf_prefix + + "SELECT DISTINCT ?x \n" + + "WHERE{ ?allDifferent owl:distinctMembers/rdf:rest*/rdf:first ?x.\n" + + "?allDifferent owl:distinctMembers/rdf:rest*/rdf:first" + + f"<{ind.str}>" + + ".\n" + + "FILTER(?x != " + + f"<{ind.str}>" + + ")}" ) - yield from get_results_from_ts(self.url, query, OWLNamedIndividual) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLNamedIndividual) def same_individuals(self, ind: OWLNamedIndividual) -> Iterable[OWLNamedIndividual]: query = ( - owl_prefix - + "SELECT DISTINCT ?x " - + "WHERE {{ ?x owl:sameAs " - + f"<{ind.str}>" - + " .}" - + "UNION { " - + f"<{ind.str}>" - + " owl:sameAs ?x.}}" + owl_prefix + + "SELECT DISTINCT ?x " + + "WHERE {{ ?x owl:sameAs " + + f"<{ind.str}>" + + " .}" + + "UNION { " + + f"<{ind.str}>" + + " owl:sameAs ?x.}}" ) - yield from get_results_from_ts(self.url, query, OWLNamedIndividual) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLNamedIndividual) def equivalent_object_properties( - self, op: OWLObjectPropertyExpression + self, op: OWLObjectPropertyExpression ) -> Iterable[OWLObjectPropertyExpression]: if isinstance(op, OWLObjectProperty): query = ( - owl_prefix - + "SELECT DISTINCT ?x " - + "WHERE { {?x owl:equivalentProperty " - + f"<{op.str}>.}}" - + "UNION {" - + f"<{op.str}>" - + " owl:equivalentProperty ?x.}" - + "FILTER(?x != " - + f"<{op.str}>)}}" + owl_prefix + + "SELECT DISTINCT ?x " + + "WHERE { {?x owl:equivalentProperty " + + f"<{op.str}>.}}" + + "UNION {" + + f"<{op.str}>" + + " owl:equivalentProperty ?x.}" + + "FILTER(?x != " + + f"<{op.str}>)}}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) elif isinstance(op, OWLObjectInverseOf): query = ( - owl_prefix - + "SELECT DISTINCT ?x " - + "WHERE { ?inverseProperty owl:inverseOf " - + f"<{op.get_inverse().str}> ." - + " {?x owl:equivalentProperty ?inverseProperty .}" - + "UNION { ?inverseProperty owl:equivalentClass ?x.}" - + "FILTER(?x != ?inverseProperty }>)}" + owl_prefix + + "SELECT DISTINCT ?x " + + "WHERE { ?inverseProperty owl:inverseOf " + + f"<{op.get_inverse().str}> ." + + " {?x owl:equivalentProperty ?inverseProperty .}" + + "UNION { ?inverseProperty owl:equivalentClass ?x.}" + + "FILTER(?x != ?inverseProperty }>)}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) def equivalent_data_properties( - self, dp: OWLDataProperty + self, dp: OWLDataProperty ) -> Iterable[OWLDataProperty]: query = ( - owl_prefix - + "SELECT DISTINCT ?x" - + "WHERE { {?x owl:equivalentProperty " - + f"<{dp.str}>.}}" - + "UNION {" - + f"<{dp.str}>" - + " owl:equivalentProperty ?x.}" - + "FILTER(?x != " - + f"<{dp.str}>)}}" + owl_prefix + + "SELECT DISTINCT ?x" + + "WHERE { {?x owl:equivalentProperty " + + f"<{dp.str}>.}}" + + "UNION {" + + f"<{dp.str}>" + + " owl:equivalentProperty ?x.}" + + "FILTER(?x != " + + f"<{dp.str}>)}}" ) - yield from get_results_from_ts(self.url, query, OWLDataProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLDataProperty) def data_property_values( - self, ind: OWLNamedIndividual, pe: OWLDataProperty, direct: bool = True + self, ind: OWLNamedIndividual, pe: OWLDataProperty, direct: bool = True ) -> Iterable[OWLLiteral]: query = "SELECT ?x WHERE { " + f"<{ind.str}>" + f"<{pe.str}>" + " ?x . }" - yield from get_results_from_ts(self.url, query, OWLLiteral) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLLiteral) if not direct: for prop in self.sub_data_properties(pe): yield from self.data_property_values(ind, prop, True) def object_property_values( - self, - ind: OWLNamedIndividual, - pe: OWLObjectPropertyExpression, - direct: bool = True, + self, + ind: OWLNamedIndividual, + pe: OWLObjectPropertyExpression, + direct: bool = True, ) -> Iterable[OWLNamedIndividual]: if isinstance(pe, OWLObjectProperty): query = "SELECT ?x WHERE { " + f"<{ind.str}> " + f"<{pe.str}>" + " ?x . }" - yield from get_results_from_ts(self.url, query, OWLNamedIndividual) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLNamedIndividual) elif isinstance(pe, OWLObjectInverseOf): query = ( - owl_prefix - + "SELECT ?x WHERE { ?inverseProperty owl:inverseOf " - + f"<{pe.get_inverse().str}>." - + f"<{ind.str}> ?inverseProperty ?x . }}" + owl_prefix + + "SELECT ?x WHERE { ?inverseProperty owl:inverseOf " + + f"<{pe.get_inverse().str}>." + + f"<{ind.str}> ?inverseProperty ?x . }}" ) - yield from get_results_from_ts(self.url, query, OWLNamedIndividual) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLNamedIndividual) if not direct: for prop in self.sub_object_properties(pe): yield from self.object_property_values(ind, prop, True) @@ -511,7 +483,7 @@ def flush(self) -> None: pass def instances( - self, ce: OWLClassExpression, direct: bool = False, seen_set: Set = None + self, ce: OWLClassExpression, direct: bool = False, seen_set: Set = None ) -> Iterable[OWLNamedIndividual]: if not seen_set: seen_set = set() @@ -523,7 +495,7 @@ def instances( "?x a ?some_cls. \n ?some_cls " "* ", ) - yield from get_results_from_ts(self.url, ce_to_sparql, OWLNamedIndividual) + yield from send_http_request_to_ts_and_fetch_results(self.url, ce_to_sparql, OWLNamedIndividual) if not direct: for cls in self.equivalent_classes(ce): if cls not in seen_set: @@ -531,19 +503,19 @@ def instances( yield from self.instances(cls, direct, seen_set) def sub_classes( - self, ce: OWLClassExpression, direct: bool = False, only_named: bool = True + self, ce: OWLClassExpression, direct: bool = False, only_named: bool = True ) -> Iterable[OWLClassExpression]: if not only_named: raise NotImplementedError("Finding anonymous subclasses not implemented") if isinstance(ce, OWLClass): query = ( - rdfs_prefix - + "SELECT ?x WHERE { ?x rdfs:subClassOf" - + suf(direct) - + f"<{ce.str}>" - + ". }" + rdfs_prefix + + "SELECT ?x WHERE { ?x rdfs:subClassOf" + + suf(direct) + + f"<{ce.str}>" + + ". }" ) - results = list(get_results_from_ts(self.url, query, OWLClass)) + results = list(send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass)) if ce in results: results.remove(ce) yield from results @@ -561,7 +533,7 @@ def sub_classes( # yield from get_results_from_ts(self._triplestore_address, query, OWLClass) def super_classes( - self, ce: OWLClassExpression, direct: bool = False, only_named: bool = True + self, ce: OWLClassExpression, direct: bool = False, only_named: bool = True ) -> Iterable[OWLClassExpression]: if not only_named: raise NotImplementedError("Finding anonymous superclasses not implemented") @@ -569,14 +541,14 @@ def super_classes( if ce == OWLThing: return [] query = ( - rdfs_prefix - + "SELECT ?x WHERE { " - + f"<{ce.str}>" - + " rdfs:subClassOf" - + suf(direct) - + "?x. }" + rdfs_prefix + + "SELECT ?x WHERE { " + + f"<{ce.str}>" + + " rdfs:subClassOf" + + suf(direct) + + "?x. }" ) - results = list(get_results_from_ts(self.url, query, OWLClass)) + results = list(send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass)) if ce in results: results.remove(ce) if (not direct and OWLThing not in results) or len(results) == 0: @@ -589,177 +561,170 @@ def super_classes( ) def disjoint_object_properties( - self, op: OWLObjectPropertyExpression + self, op: OWLObjectPropertyExpression ) -> Iterable[OWLObjectPropertyExpression]: if isinstance(op, OWLObjectProperty): query = ( - owl_prefix - + rdf_prefix - + "SELECT DISTINCT ?x \n" - + "WHERE{ ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" - + "?AllDisjointProperties owl:members/rdf:rest*/rdf:first" - + f"<{op.str}>" - + ".\n" - + "FILTER(?x != " - + f"<{op.str}>" - + ")}" + owl_prefix + + rdf_prefix + + "SELECT DISTINCT ?x \n" + + "WHERE{ ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" + + "?AllDisjointProperties owl:members/rdf:rest*/rdf:first" + + f"<{op.str}>" + + ".\n" + + "FILTER(?x != " + + f"<{op.str}>" + + ")}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) elif isinstance(op, OWLObjectInverseOf): query = ( - owl_prefix - + " SELECT DISTINCT ?x " - + "WHERE { ?inverseProperty owl:inverseOf " - + f"<{op.get_inverse().str}> ." - + " ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" - + " ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?inverseProperty.\n" - + " FILTER(?x != ?inverseProperty)}" + owl_prefix + + " SELECT DISTINCT ?x " + + "WHERE { ?inverseProperty owl:inverseOf " + + f"<{op.get_inverse().str}> ." + + " ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" + + " ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?inverseProperty.\n" + + " FILTER(?x != ?inverseProperty)}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) def disjoint_data_properties( - self, dp: OWLDataProperty + self, dp: OWLDataProperty ) -> Iterable[OWLDataProperty]: query = ( - owl_prefix - + rdf_prefix - + "SELECT DISTINCT ?x \n" - + "WHERE{ ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" - + "?AllDisjointProperties owl:members/rdf:rest*/rdf:first" - + f"<{dp.str}>" - + ".\n" - + "FILTER(?x != " - + f"<{dp.str}>" - + ")}" + owl_prefix + + rdf_prefix + + "SELECT DISTINCT ?x \n" + + "WHERE{ ?AllDisjointProperties owl:members/rdf:rest*/rdf:first ?x.\n" + + "?AllDisjointProperties owl:members/rdf:rest*/rdf:first" + + f"<{dp.str}>" + + ".\n" + + "FILTER(?x != " + + f"<{dp.str}>" + + ")}" ) - yield from get_results_from_ts(self.url, query, OWLDataProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLDataProperty) def all_data_property_values( - self, pe: OWLDataProperty, direct: bool = True + self, pe: OWLDataProperty, direct: bool = True ) -> Iterable[OWLLiteral]: query = "SELECT DISTINCT ?x WHERE { ?y" + f"<{pe.str}>" + " ?x . }" - yield from get_results_from_ts(self.url, query, OWLLiteral) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLLiteral) if not direct: for prop in self.sub_data_properties(pe): yield from self.all_data_property_values(prop, True) def sub_data_properties( - self, dp: OWLDataProperty, direct: bool = False + self, dp: OWLDataProperty, direct: bool = False ) -> Iterable[OWLDataProperty]: query = ( - rdfs_prefix - + "SELECT ?x WHERE { ?x rdfs:subPropertyOf" - + suf(direct) - + f"<{dp.str}>" - + ". }" + rdfs_prefix + + "SELECT ?x WHERE { ?x rdfs:subPropertyOf" + + suf(direct) + + f"<{dp.str}>" + + ". }" ) - yield from get_results_from_ts(self.url, query, OWLDataProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLDataProperty) def super_data_properties( - self, dp: OWLDataProperty, direct: bool = False + self, dp: OWLDataProperty, direct: bool = False ) -> Iterable[OWLDataProperty]: query = ( - rdfs_prefix - + "SELECT ?x WHERE {" - + f"<{dp.str}>" - + " rdfs:subPropertyOf" - + suf(direct) - + " ?x. }" + rdfs_prefix + + "SELECT ?x WHERE {" + + f"<{dp.str}>" + + " rdfs:subPropertyOf" + + suf(direct) + + " ?x. }" ) - yield from get_results_from_ts(self.url, query, OWLDataProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLDataProperty) def sub_object_properties( - self, op: OWLObjectPropertyExpression, direct: bool = False + self, op: OWLObjectPropertyExpression, direct: bool = False ) -> Iterable[OWLObjectPropertyExpression]: if isinstance(op, OWLObjectProperty): query = ( - rdfs_prefix - + "SELECT ?x WHERE { ?x rdfs:subPropertyOf" - + suf(direct) - + f"<{op.str}> . FILTER(?x != " - + f"<{op.str}>) }}" + rdfs_prefix + + "SELECT ?x WHERE { ?x rdfs:subPropertyOf" + + suf(direct) + + f"<{op.str}> . FILTER(?x != " + + f"<{op.str}>) }}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) elif isinstance(op, OWLObjectInverseOf): query = ( - rdfs_prefix - + "SELECT ?x " - + "WHERE { ?inverseProperty owl:inverseOf " - + f"<{op.get_inverse().str}> ." - + " ?x rdfs:subPropertyOf" - + suf(direct) - + " ?inverseProperty . }" + rdfs_prefix + + "SELECT ?x " + + "WHERE { ?inverseProperty owl:inverseOf " + + f"<{op.get_inverse().str}> ." + + " ?x rdfs:subPropertyOf" + + suf(direct) + + " ?inverseProperty . }" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) def super_object_properties( - self, op: OWLObjectPropertyExpression, direct: bool = False + self, op: OWLObjectPropertyExpression, direct: bool = False ) -> Iterable[OWLObjectPropertyExpression]: if isinstance(op, OWLObjectProperty): query = ( - rdfs_prefix - + "SELECT ?x WHERE {" - + f"<{op.str}>" - + " rdfs:subPropertyOf" - + suf(direct) - + " ?x. FILTER(?x != " - + f"<{op.str}>) }}" + rdfs_prefix + + "SELECT ?x WHERE {" + + f"<{op.str}>" + + " rdfs:subPropertyOf" + + suf(direct) + + " ?x. FILTER(?x != " + + f"<{op.str}>) }}" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) elif isinstance(op, OWLObjectInverseOf): query = ( - rdfs_prefix - + "SELECT ?x " - + "WHERE { ?inverseProperty owl:inverseOf " - + f"<{op.get_inverse().str}> ." - + " ?inverseProperty rdfs:subPropertyOf" - + suf(direct) - + "?x . }" + rdfs_prefix + + "SELECT ?x " + + "WHERE { ?inverseProperty owl:inverseOf " + + f"<{op.get_inverse().str}> ." + + " ?inverseProperty rdfs:subPropertyOf" + + suf(direct) + + "?x . }" ) - yield from get_results_from_ts(self.url, query, OWLObjectProperty) + yield from send_http_request_to_ts_and_fetch_results(self.url, query, OWLObjectProperty) def types( - self, ind: OWLNamedIndividual, direct: bool = False + self, ind: OWLNamedIndividual, direct: bool = False ) -> Iterable[OWLClass]: if direct: query = "SELECT ?x WHERE {" + f"<{ind.str}> a" + " ?x. }" else: query = ( - rdfs_prefix + "SELECT DISTINCT ?x WHERE {" + f"<{ind.str}> a ?cls. " - " ?cls rdfs:subClassOf* ?x}" + rdfs_prefix + "SELECT DISTINCT ?x WHERE {" + f"<{ind.str}> a ?cls. " + " ?cls rdfs:subClassOf* ?x}" ) yield from [ i - for i in get_results_from_ts(self.url, query, OWLClass) + for i in send_http_request_to_ts_and_fetch_results(self.url, query, OWLClass) if i != OWLClass(IRI("http://www.w3.org/2002/07/owl#", "NamedIndividual")) ] - def get_root_ontology(self) -> OWLOntology: + def get_root_ontology(self) -> AbstractOWLOntology: return self.ontology def is_isolated(self): # not needed here pass - def is_using_triplestore(self): - """No use! Deprecated.""" - # TODO: Deprecated! Remove after it is removed from OWLReasoner in owlapy - pass - class TripleStoreKnowledgeBase(KnowledgeBase): url: str ontology: TripleStoreOntology reasoner: TripleStoreReasoner - def __init__(self, triplestore_address: str): - self.url = triplestore_address - self.ontology = TripleStoreOntology(triplestore_address) + def __init__(self, url: str=None): + assert url is not None, "url must be string" + self.url = url + self.ontology = TripleStoreOntology(url) self.reasoner = TripleStoreReasoner(self.ontology) - - super().__init__( - ontology=self.ontology, reasoner=self.reasoner, load_class_hierarchy=False - ) + super().__init__( ontology=self.ontology, reasoner=self.reasoner, load_class_hierarchy=False) def get_direct_sub_concepts(self, concept: OWLClass) -> Iterable[OWLClass]: assert isinstance(concept, OWLClass) @@ -769,16 +734,11 @@ def get_direct_parents(self, concept: OWLClassExpression) -> Iterable[OWLClass]: assert isinstance(concept, OWLClass) yield from self.reasoner.super_classes(concept, direct=True) - def get_all_direct_sub_concepts( - self, concept: OWLClassExpression - ) -> Iterable[OWLClassExpression]: + def get_all_direct_sub_concepts(self, concept: OWLClassExpression) -> Iterable[OWLClassExpression]: assert isinstance(concept, OWLClass) yield from self.reasoner.sub_classes(concept, direct=True) - def get_all_sub_concepts( - self, concept: OWLClassExpression - ) -> Iterable[OWLClassExpression]: - + def get_all_sub_concepts(self, concept: OWLClassExpression) -> Iterable[OWLClassExpression]: assert isinstance(concept, OWLClass) yield from self.reasoner.sub_classes(concept, direct=False) @@ -794,8 +754,7 @@ def contains_class(self, concept: OWLClassExpression) -> bool: return concept in self.ontology.classes_in_signature() def most_general_object_properties( - self, *, domain: OWLClassExpression, inverse: bool = False - ) -> Iterable[OWLObjectProperty]: + self, *, domain: OWLClassExpression, inverse: bool = False) -> Iterable[OWLObjectProperty]: assert isinstance(domain, OWLClassExpression) func: Callable func = ( @@ -821,7 +780,7 @@ def data_properties(self) -> Iterable[OWLDataProperty]: yield from self.ontology.data_properties_in_signature() def get_data_properties( - self, ranges: Set[OWLDatatype] = None + self, ranges: Set[OWLDatatype] = None ) -> Iterable[OWLDataProperty]: if ranges is not None: @@ -833,7 +792,7 @@ def get_data_properties( ####################################################################################################################### - +# See https://github.com/dice-group/Ontolearn/issues/451 for the decision behind this seperation class TripleStoreReasonerOntology: @@ -883,19 +842,33 @@ def abox(self, str_iri: str) -> Generator[ o["value"] ) elif o["type"] == "literal": - if o["datatype"] == "http://www.w3.org/2001/XMLSchema#boolean": - yield subject_, OWLDataProperty(p["value"]), OWLLiteral( - value=bool(o["value"]) - ) - elif o["datatype"] == "http://www.w3.org/2001/XMLSchema#double": - yield subject_, OWLDataProperty(p["value"]), OWLLiteral( - value=float(o["value"]) - ) + if data_type := o.get("datatype", None): + if data_type == "http://www.w3.org/2001/XMLSchema#boolean": + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=bool(o["value"])) + elif data_type == "http://www.w3.org/2001/XMLSchema#integer": + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=float(o["value"])) + elif data_type == "http://www.w3.org/2001/XMLSchema#nonNegativeInteger": + # TODO: We do not have http://www.w3.org/2001/XMLSchema#nonNegativeInteger implemented + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=float(o["value"])) + elif data_type == "http://www.w3.org/2001/XMLSchema#double": + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=float(o["value"])) + else: + # TODO: Unclear for the time being. + # print(f"Currently this type of literal is not supported:{o} but can done easily let us know :)") + continue + """ + # TODO: Converting a SPARQL query becomes an issue with strings. + elif data_type == "http://www.w3.org/2001/XMLSchema#string": + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=repr(o["value"])) + elif data_type == "http://www.w3.org/2001/XMLSchema#date": + yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=repr(o["value"])) + """ + else: - raise NotImplementedError( - f"Currently this type of literal is not supported:{o} " - f"but can done easily let us know :)" - ) + # print(f"Currently this type of literal is not supported:{o} but can done easily let us know :)") + continue + # yield subject_, OWLDataProperty(p["value"]), OWLLiteral(value=repr(o["value"])) + else: raise RuntimeError(f"Unrecognized type {subject_} ({p}) ({o})") @@ -949,13 +922,12 @@ def get_type_individuals(self, individual: str): yield OWLClass(binding["x"]["value"]) def instances( - self, expression: OWLClassExpression, named_individuals: bool = False + self, expression: OWLClassExpression, named_individuals: bool = False ) -> Generator[OWLNamedIndividual, None, None]: assert isinstance(expression, OWLClassExpression) try: - sparql_query = owl_expression_to_sparql( - expression=expression, named_individuals=named_individuals - ) + sparql_query = owl_expression_to_sparql(expression=expression, + named_individuals=named_individuals) except Exception as exc: print(f"Error at converting {expression} into sparql") @@ -963,6 +935,7 @@ def instances( print(f"Error at converting {expression} into sparql") raise RuntimeError("Couldn't convert") try: + # TODO:Be aware of the implicit inference of x being OWLNamedIndividual! for binding in self.query(sparql_query).json()["results"]["bindings"]: yield OWLNamedIndividual(binding["x"]["value"]) except: @@ -972,14 +945,14 @@ def instances( def individuals_in_signature(self) -> Generator[OWLNamedIndividual, None, None]: # owl:OWLNamedIndividual is often missing: Perhaps we should add union as well query = ( - owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a ?y. ?y a owl:Class.}" + owl_prefix + "SELECT DISTINCT ?x\n " + "WHERE {?x a ?y. ?y a owl:Class.}" ) for binding in self.query(query).json()["results"]["bindings"]: yield OWLNamedIndividual(binding["x"]["value"]) def data_properties_in_signature(self) -> Iterable[OWLDataProperty]: query = ( - owl_prefix + "SELECT DISTINCT ?x " + "WHERE {?x a owl:DatatypeProperty.}" + owl_prefix + "SELECT DISTINCT ?x " + "WHERE {?x a owl:DatatypeProperty.}" ) for binding in self.query(query).json()["results"]["bindings"]: yield OWLDataProperty(binding["x"]["value"]) @@ -1008,24 +981,15 @@ def domain_of_double_data_properties(self, prop: OWLDataProperty): query = f"{rdf_prefix}\n{rdfs_prefix}\n{xsd_prefix}SELECT DISTINCT ?x WHERE {{?x <{prop.str}> ?z}}" for binding in self.query(query).json()["results"]["bindings"]: yield OWLNamedIndividual(binding["x"]["value"]) - - class TripleStore: - """Connecting a triple store""" - url: str - def __init__(self, reasoner=None, url: str = None): if reasoner is None: - assert ( - url is not None - ), f"Reasoner:{reasoner} and url of a triplestore {url} cannot be both None." + assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {url} cannot be both None." self.g = TripleStoreReasonerOntology(url=url) else: self.g = reasoner - # This assigment is done as many CEL models are implemented to use both attributes seperately. - # CEL models will be refactored. self.ontology = self.g self.reasoner = self.g @@ -1080,8 +1044,8 @@ def __abox_expression(self, individual: OWLNamedIndividual) -> Generator[ ) # Iterating over the mappings of object properties to individuals. for ( - object_property, - list_owl_individuals, + object_property, + list_owl_individuals, ) in object_property_to_individuals.items(): # RETURN: \exists r. {x1,x33, .., x8} => Existential restriction over nominals yield OWLObjectSomeValuesFrom( @@ -1090,11 +1054,11 @@ def __abox_expression(self, individual: OWLNamedIndividual) -> Generator[ owl_class: OWLClass count: int for owl_class, count in Counter( - [ - type_i - for i in list_owl_individuals - for type_i in self.get_types(ind=i, direct=True) - ] + [ + type_i + for i in list_owl_individuals + for type_i in self.get_types(ind=i, direct=True) + ] ).items(): existential_quantifier = OWLObjectSomeValuesFrom( property=object_property, filler=owl_class @@ -1189,9 +1153,9 @@ def get_range_of_double_data_properties(self, prop: OWLDataProperty): yield from self.reasoner.range_of_double_data_properties(prop) def individuals( - self, - concept: Optional[OWLClassExpression] = None, - named_individuals: bool = False, + self, + concept: Optional[OWLClassExpression] = None, + named_individuals: bool = False, ) -> Generator[OWLNamedIndividual, None, None]: """Given an OWL class expression, retrieve all individuals belonging to it. Args: @@ -1210,7 +1174,7 @@ def individuals( ) def get_types( - self, ind: OWLNamedIndividual, direct: True + self, ind: OWLNamedIndividual, direct: True ) -> Generator[OWLClass, None, None]: if not direct: raise NotImplementedError("Inferring indirect types not available") diff --git a/ontolearn/utils/__init__.py b/ontolearn/utils/__init__.py index 5a5ec8ec..1674192c 100644 --- a/ontolearn/utils/__init__.py +++ b/ontolearn/utils/__init__.py @@ -28,20 +28,23 @@ import pickle import random import time -from typing import Callable, Set, TypeVar, Tuple, Union +from typing import Callable, TypeVar, Tuple, Union from owlapy.class_expression import OWLClass from owlapy.iri import IRI from owlapy.meta_classes import HasIRI from owlapy.owl_individual import OWLNamedIndividual from ontolearn.utils.log_config import setup_logging # noqa: F401 import pandas as pd -from .static_funcs import compute_f1_score +from .static_funcs import compute_f1_score, f1_set_similarity, concept_reducer, concept_reducer_properties + Factory = Callable from typing import Set + # DEFAULT_FMT = '[{elapsed:0.8f}s] {name}({args}) -> {result}' DEFAULT_FMT = 'Func:{name} took {elapsed:0.8f}s' flag_for_performance = False + def jaccard_similarity(y: Set[str], yhat: Set[str]) -> float: """ Compute Jaccard Similarity diff --git a/ontolearn/utils/static_funcs.py b/ontolearn/utils/static_funcs.py index 355a8ebe..47a17783 100644 --- a/ontolearn/utils/static_funcs.py +++ b/ontolearn/utils/static_funcs.py @@ -30,14 +30,110 @@ from owlapy.class_expression import OWLClass, OWLClassExpression from owlapy.iri import IRI from owlapy.owl_axiom import OWLEquivalentClassesAxiom -from owlapy.owl_ontology import OWLOntology -from owlapy.owl_ontology_manager import OWLOntologyManager, OntologyManager +from owlapy.abstracts import AbstractOWLOntology, AbstractOWLOntologyManager +from owlapy.owl_ontology_manager import OntologyManager from owlapy.owl_hierarchy import ClassHierarchy, ObjectPropertyHierarchy, DatatypePropertyHierarchy from owlapy.utils import OWLClassExpressionLengthMetric, LRUCache import traceback -from typing import Iterable from tqdm import tqdm +from typing import Set, Iterable +from owlapy.class_expression import ( + OWLQuantifiedObjectRestriction, + OWLObjectCardinalityRestriction, +) +from owlapy.class_expression import ( + OWLObjectUnionOf, + OWLObjectIntersectionOf, + OWLObjectSomeValuesFrom, + OWLObjectAllValuesFrom, + OWLObjectMinCardinality, + OWLObjectMaxCardinality, + OWLObjectOneOf, +) + + +def f1_set_similarity(y: Set[str], yhat: Set[str]) -> float: + """ + Compute F1 score for two set + :param y: A set of URIs + :param yhat: A set of URIs + :return: + """ + if len(yhat) == len(y) == 0: + return 1.0 + if len(yhat) == 0 or len(y) == 0: + return 0.0 + + tp = len(y.intersection(yhat)) + fp = len(yhat.difference(y)) + fn = len(y.difference(yhat)) + + precision = tp / (tp + fp) + recall = tp / (tp + fn) + + + if precision == 0 and recall == 0: + return 0.0 + + return (2 * precision * recall) / (precision + recall) + + +def concept_reducer(concepts, opt): + """ + Reduces a set of concepts by applying a binary operation to each pair of concepts. + + Args: + concepts (set): A set of concepts to be reduced. + opt (function): A binary function that takes a pair of concepts and returns a single concept. + + Returns: + set: A set containing the results of applying the binary operation to each pair of concepts. + + Example: + >>> concepts = {1, 2, 3} + >>> opt = lambda x: x[0] + x[1] + >>> concept_reducer(concepts, opt) + {2, 3, 4, 5, 6} + + Note: + The operation `opt` should be commutative and associative to ensure meaningful reduction in the context of set operations. + """ + result = set() + for i in concepts: + for j in concepts: + result.add(opt((i, j))) + return result + + +def concept_reducer_properties( + concepts: Set, properties, cls: Callable = None, cardinality: int = 2 +) -> Set[Union[OWLQuantifiedObjectRestriction, OWLObjectCardinalityRestriction]]: + """ + Map a set of owl concepts and a set of properties into OWL Restrictions + + Args: + concepts: + properties: + cls (Callable): An owl Restriction class + cardinality: A positive Integer + + Returns: List of OWL Restrictions + + """ + assert isinstance(concepts, Iterable), "Concepts must be an Iterable" + assert isinstance(properties, Iterable), "properties must be an Iterable" + assert isinstance(cls, Callable), "cls must be an Callable" + assert cardinality > 0 + result = set() + for i in concepts: + for j in properties: + if cls == OWLObjectMinCardinality or cls == OWLObjectMaxCardinality: + result.add(cls(cardinality=cardinality, property=j, filler=i)) + continue + result.add(cls(j, i)) + return result + def make_iterable_verbose(iterable_object, verbose, desc="Default", position=None, leave=True) -> Iterable: if verbose > 0: @@ -141,6 +237,7 @@ def compute_f1_score(individuals, pos, neg) -> float: # pragma: no cover def plot_umap_reduced_embeddings(X: pandas.DataFrame, y: List[float], name: str = "umap_visualization.pdf") -> None: # pragma: no cover + # TODO:AB: 'umap' is not part of the dependencies !? import umap reducer = umap.UMAP(random_state=1) embedding = reducer.fit_transform(X) @@ -203,7 +300,7 @@ def plot_topk_feature_importance(feature_names, cart_tree, topk: int = 10)->None def save_owl_class_expressions(expressions: Union[OWLClassExpression, List[OWLClassExpression]], - path: str = 'Predictions', + path: str = './Predictions', rdf_format: str = 'rdfxml') -> None: # pragma: no cover assert isinstance(expressions, OWLClassExpression) or isinstance(expressions[0], OWLClassExpression), "expressions must be either OWLClassExpression or a list of OWLClassExpression" @@ -216,15 +313,15 @@ def save_owl_class_expressions(expressions: Union[OWLClassExpression, List[OWLCl # @TODO: CD: Lazy import. CD: Can we use rdflib to serialize concepts ?! from owlapy.owl_ontology import Ontology # () - manager: OWLOntologyManager = OntologyManager() + manager: AbstractOWLOntologyManager = OntologyManager() # () - ontology: OWLOntology = manager.create_ontology(IRI.create(NS)) + ontology: AbstractOWLOntology = manager.create_ontology(IRI.create(NS)) # () Iterate over concepts for th, i in enumerate(expressions): cls_a = OWLClass(IRI.create(NS, str(th))) equivalent_classes_axiom = OWLEquivalentClassesAxiom([cls_a, i]) try: - manager.add_axiom(ontology, equivalent_classes_axiom) + ontology.add_axiom(equivalent_classes_axiom) except AttributeError: print(traceback.format_exc()) print("Exception at creating OWLEquivalentClassesAxiom") @@ -233,7 +330,7 @@ def save_owl_class_expressions(expressions: Union[OWLClassExpression, List[OWLCl print(i) print(expressions) exit(1) - manager.save_ontology(ontology, IRI.create('file:/' + path + '.owl')) + ontology.save(IRI.create(path + '.owl')) def verbalize(predictions_file_path: str): # pragma: no cover diff --git a/ontolearn/value_splitter.py b/ontolearn/value_splitter.py index 99fd205b..e3c050c4 100644 --- a/ontolearn/value_splitter.py +++ b/ontolearn/value_splitter.py @@ -33,7 +33,7 @@ from owlapy.owl_individual import OWLNamedIndividual from owlapy.owl_literal import OWLLiteral from owlapy.owl_property import OWLDataProperty -from owlapy.owl_reasoner import OWLReasoner +from owlapy.abstracts import AbstractOWLReasoner from pandas import Timedelta from scipy.stats import entropy from sortedcontainers import SortedDict @@ -57,7 +57,7 @@ def __init__(self, max_nr_splits: int): self.max_nr_splits = max_nr_splits @abstractmethod - def compute_splits_properties(self, reasoner: OWLReasoner, properties: List[OWLDataProperty]) \ + def compute_splits_properties(self, reasoner: AbstractOWLReasoner, properties: List[OWLDataProperty]) \ -> Dict[OWLDataProperty, List[OWLLiteral]]: pass @@ -81,7 +81,7 @@ class BinningValueSplitter(AbstractValueSplitter): def __init__(self, max_nr_splits: int = 12): super().__init__(max_nr_splits) - def compute_splits_properties(self, reasoner: OWLReasoner, properties: List[OWLDataProperty]) \ + def compute_splits_properties(self, reasoner: AbstractOWLReasoner, properties: List[OWLDataProperty]) \ -> Dict[OWLDataProperty, List[OWLLiteral]]: return {p: self._compute_splits(set(reasoner.all_data_property_values(p))) for p in properties} @@ -145,7 +145,7 @@ def __init__(self, max_nr_splits: int = 2): super().__init__(max_nr_splits) self._prop_to_values = {} - def compute_splits_properties(self, reasoner: OWLReasoner, properties: List[OWLDataProperty], + def compute_splits_properties(self, reasoner: AbstractOWLReasoner, properties: List[OWLDataProperty], pos: Set[OWLNamedIndividual] = None, neg: Set[OWLNamedIndividual] = None) \ -> Dict[OWLDataProperty, List[OWLLiteral]]: assert pos is not None @@ -245,7 +245,7 @@ def _get_inds_below_above(self, value: Values, ind_value_map: 'SortedDict[Values inds_above = list(chain.from_iterable(ind_value_map.values()[idx:])) return inds_below, inds_above - def _get_values_for_inds(self, reasoner: OWLReasoner, property_: OWLDataProperty, inds: Set[OWLNamedIndividual]) \ + def _get_values_for_inds(self, reasoner: AbstractOWLReasoner, property_: OWLDataProperty, inds: Set[OWLNamedIndividual]) \ -> Dict[str, Values]: inds_to_value = dict() for ind in inds: diff --git a/report.csv b/report.csv deleted file mode 100644 index 56e46e6c..00000000 --- a/report.csv +++ /dev/null @@ -1,181 +0,0 @@ -LP,Fold,Train-F1-DRILL,Test-F1-DRILL,RT-DRILL,Train-F1-TDL,Test-F1-TDL,RT-TDL -Aunt,0,0.7575757575757577,0.0,10.643711805343628,0.9600000000000001,1.0,1.629626750946045 -Aunt,1,0.6788990825688073,0.6153846153846153,10.086888790130615,0.8787878787878788,1.0,1.5542900562286377 -Aunt,2,0.8131868131868133,0.8,10.152482032775879,0.7241379310344828,0.888888888888889,1.5611705780029297 -Aunt,3,0.8131868131868133,0.8,10.077905893325806,0.9577464788732395,0.5,1.5893428325653076 -Aunt,4,0.8131868131868133,0.7272727272727273,10.255544900894165,0.7241379310344828,0.888888888888889,1.667982578277588 -Aunt,5,0.8043478260869564,0.888888888888889,10.143845319747925,0.911764705882353,1.0,1.662182331085205 -Aunt,6,0.8314606741573033,0.6666666666666666,10.0705246925354,0.9428571428571428,0.6666666666666666,1.5716662406921387 -Aunt,7,0.8131868131868133,0.8,10.195331573486328,0.911764705882353,0.888888888888889,1.5937020778656006 -Aunt,8,0.8222222222222222,0.6,10.059152603149414,0.7241379310344828,0.75,1.5808241367340088 -Aunt,9,0.8131868131868133,0.8,10.066349029541016,0.6037735849056604,0.4,1.592435359954834 -Brother,0,0.9473684210526316,1.0,10.122657060623169,1.0,1.0,0.9948694705963135 -Brother,1,0.9642857142857143,1.0,10.137031316757202,1.0,1.0,0.9953510761260986 -Brother,2,0.9642857142857143,0.8571428571428571,10.191105604171753,1.0,1.0,0.9685995578765869 -Brother,3,0.9473684210526316,1.0,10.152650833129883,1.0,1.0,0.9713225364685059 -Brother,4,0.9473684210526316,1.0,10.30179476737976,1.0,1.0,0.9888601303100586 -Brother,5,0.9473684210526316,1.0,10.508636713027954,1.0,1.0,1.0506775379180908 -Brother,6,0.9642857142857143,0.8571428571428571,10.4213707447052,1.0,1.0,1.0210256576538086 -Brother,7,0.9642857142857143,0.8571428571428571,10.452847480773926,1.0,1.0,1.0264787673950195 -Brother,8,0.9473684210526316,1.0,10.534273862838745,1.0,1.0,1.0497479438781738 -Brother,9,0.9473684210526316,1.0,10.60268521308899,1.0,1.0,1.075979232788086 -Cousin,0,0.6737967914438503,0.7272727272727273,10.579139709472656,0.7580645161290323,0.6666666666666666,2.819420099258423 -Cousin,1,0.6808510638297872,0.6666666666666666,10.340148448944092,0.6185567010309279,0.6666666666666666,3.015047550201416 -Cousin,2,0.6772486772486772,0.7000000000000001,10.17171049118042,0.7460317460317459,0.75,2.976219654083252 -Cousin,3,0.6772486772486772,0.7000000000000001,10.244986534118652,0.5740740740740741,0.4615384615384615,2.972656011581421 -Cousin,4,0.6808510638297872,0.6666666666666666,10.403497457504272,0.8759124087591241,0.7777777777777778,3.042815685272217 -Cousin,5,0.6808510638297872,0.6666666666666666,10.336796998977661,0.7080745341614906,0.6666666666666665,2.929539203643799 -Cousin,6,0.6808510638297872,0.6666666666666666,10.411279678344727,0.6086956521739131,0.5714285714285714,3.0655336380004883 -Cousin,7,0.6808510638297872,0.6666666666666666,10.340527296066284,0.8244274809160305,0.8750000000000001,2.9445199966430664 -Cousin,8,0.6808510638297872,0.6666666666666666,10.27221393585205,0.6878980891719746,0.588235294117647,2.9294135570526123 -Cousin,9,0.6808510638297872,0.6666666666666666,10.286634683609009,0.7559055118110236,0.5454545454545454,2.9068799018859863 -Daughter,0,0.9176470588235294,0.6666666666666666,10.288658142089844,1.0,1.0,1.8981523513793945 -Daughter,1,0.878048780487805,1.0,10.334213733673096,1.0,1.0,1.880248785018921 -Daughter,2,0.7966101694915254,0.7692307692307693,10.214572668075562,1.0,1.0,1.7235753536224365 -Daughter,3,0.7966101694915254,0.7692307692307693,10.10741114616394,1.0,1.0,1.8292639255523682 -Daughter,4,0.880952380952381,1.0,10.166059255599976,1.0,1.0,1.7984251976013184 -Daughter,5,0.8941176470588235,0.888888888888889,10.173385858535767,1.0,1.0,1.837914228439331 -Daughter,6,0.8536585365853657,0.5714285714285715,10.180103302001953,1.0,1.0,1.8175277709960938 -Daughter,7,0.9069767441860465,0.7499999999999999,10.132628440856934,1.0,1.0,1.923680067062378 -Daughter,8,0.8941176470588235,0.888888888888889,10.156253337860107,1.0,1.0,1.8762238025665283 -Daughter,9,0.880952380952381,1.0,10.139609098434448,1.0,1.0,2.5373952388763428 -Father,0,0.6987951807228916,1.0,10.306830406188965,1.0,1.0,3.276716709136963 -Father,1,0.9391304347826086,1.0,10.29699969291687,1.0,1.0,3.5884950160980225 -Father,2,0.9473684210526316,0.923076923076923,10.309446096420288,1.0,1.0,3.557835102081299 -Father,3,0.9473684210526316,0.923076923076923,10.290547370910645,1.0,1.0,3.7736170291900635 -Father,4,0.7142857142857143,0.9090909090909091,10.184202909469604,1.0,1.0,3.646970510482788 -Father,5,0.9391304347826086,1.0,10.265981435775757,1.0,1.0,3.6551547050476074 -Father,6,0.9391304347826086,1.0,10.308954954147339,1.0,1.0,3.636713743209839 -Father,7,0.9391304347826086,1.0,10.302457571029663,1.0,1.0,3.6859583854675293 -Father,8,0.9473684210526316,0.923076923076923,11.209972381591797,1.0,1.0,3.5596234798431396 -Father,9,0.9473684210526316,0.923076923076923,10.285754919052124,1.0,1.0,3.618985652923584 -Granddaughter,0,0.8253968253968254,1.0,10.207628011703491,1.0,1.0,1.7590880393981934 -Granddaughter,1,0.875,0.5714285714285715,10.490622758865356,1.0,1.0,1.7864124774932861 -Granddaughter,2,0.84375,0.8571428571428571,10.270440578460693,1.0,1.0,1.8659541606903076 -Granddaughter,3,0.8387096774193549,0.888888888888889,10.441718578338623,1.0,1.0,1.7048046588897705 -Granddaughter,4,0.8787878787878788,0.4,10.173384428024292,1.0,1.0,1.6656403541564941 -Granddaughter,5,0.8253968253968254,1.0,10.291685342788696,1.0,1.0,1.9797186851501465 -Granddaughter,6,0.8571428571428572,0.75,10.245042085647583,1.0,1.0,1.764113426208496 -Granddaughter,7,0.8,0.7499999999999999,10.178563833236694,1.0,1.0,1.6813058853149414 -Granddaughter,8,0.7906976744186047,0.8571428571428571,10.524157285690308,1.0,1.0,1.7787351608276367 -Granddaughter,9,0.8095238095238095,0.6666666666666666,10.23407506942749,1.0,1.0,1.8210666179656982 -Grandfather,0,0.8611111111111112,1.0,10.134748458862305,1.0,1.0,2.232255220413208 -Grandfather,1,0.8857142857142858,0.8,10.278506755828857,1.0,1.0,2.2302517890930176 -Grandfather,2,0.8732394366197184,0.888888888888889,10.384016513824463,1.0,1.0,2.2753970623016357 -Grandfather,3,0.8732394366197184,0.888888888888889,10.335604429244995,1.0,1.0,2.3278045654296875 -Grandfather,4,0.8611111111111112,1.0,10.371815919876099,1.0,1.0,2.3427388668060303 -Grandfather,5,0.761904761904762,0.6,10.549142837524414,1.0,1.0,2.3183822631835938 -Grandfather,6,0.735632183908046,0.8571428571428571,10.0295090675354,1.0,1.0,2.2775468826293945 -Grandfather,7,0.7529411764705882,0.6666666666666666,10.186664581298828,1.0,1.0,2.277482509613037 -Grandfather,8,0.7529411764705882,0.6666666666666666,10.165103673934937,1.0,1.0,2.3335306644439697 -Grandfather,9,0.7441860465116279,0.7499999999999999,10.00925612449646,1.0,1.0,2.338984966278076 -Grandgranddaughter,0,0.8000000000000002,1.0,10.205824613571167,1.0,1.0,0.756260871887207 -Grandgranddaughter,1,0.8387096774193549,0.6666666666666666,10.060903310775757,1.0,1.0,0.8525564670562744 -Grandgranddaughter,2,0.8666666666666667,0.5,10.286964893341064,1.0,1.0,0.869300127029419 -Grandgranddaughter,3,0.8275862068965518,0.8,10.307080030441284,1.0,1.0,0.8793110847473145 -Grandgranddaughter,4,0.8000000000000002,1.0,10.191157102584839,1.0,1.0,0.9423344135284424 -Grandgranddaughter,5,0.8000000000000002,1.0,11.037566661834717,1.0,1.0,0.7919189929962158 -Grandgranddaughter,6,0.8387096774193549,0.6666666666666666,10.208840131759644,1.0,1.0,0.8553135395050049 -Grandgranddaughter,7,0.8421052631578948,0.6666666666666666,10.235713005065918,1.0,1.0,0.8662207126617432 -Grandgranddaughter,8,0.8205128205128205,1.0,10.154046058654785,1.0,1.0,0.826848030090332 -Grandgranddaughter,9,0.8421052631578948,0.6666666666666666,10.35183072090149,1.0,1.0,0.8636078834533691 -Grandgrandfather,0,0.8333333333333333,0.8,10.551578044891357,0.6956521739130436,0.6666666666666666,1.1670069694519043 -Grandgrandfather,1,0.8108108108108109,1.0,10.30830192565918,0.0,0.0,1.2312273979187012 -Grandgrandfather,2,0.8571428571428571,0.6666666666666666,10.275844097137451,1.0,0.8,1.0849416255950928 -Grandgrandfather,3,0.8108108108108109,1.0,10.374758958816528,0.125,0.0,1.1502177715301514 -Grandgrandfather,4,0.8333333333333333,0.8,10.05979609489441,0.125,0.0,1.135958194732666 -Grandgrandfather,5,0.8108108108108109,1.0,10.164026260375977,0.0,0.0,1.1486477851867676 -Grandgrandfather,6,0.8333333333333333,0.8,10.10143780708313,0.0,0.0,1.1892061233520508 -Grandgrandfather,7,0.761904761904762,0.5,10.193586111068726,1.0,0.6666666666666666,1.1413133144378662 -Grandgrandfather,8,0.761904761904762,0.5,10.315101146697998,1.0,1.0,1.0958023071289062 -Grandgrandfather,9,0.7441860465116279,0.6666666666666666,10.290321588516235,0.11764705882352941,0.0,1.172348976135254 -Grandgrandmother,0,0.7894736842105263,1.0,10.276443719863892,0.9655172413793104,1.0,0.9460880756378174 -Grandgrandmother,1,0.8108108108108109,0.8,10.275603294372559,0.9655172413793104,0.6666666666666666,0.9455740451812744 -Grandgrandmother,2,0.8108108108108109,0.8,10.207051753997803,1.0,1.0,0.9826905727386475 -Grandgrandmother,3,0.8108108108108109,0.8,10.254411935806274,1.0,0.8,0.9287319183349609 -Grandgrandmother,4,0.7894736842105263,1.0,10.018872499465942,0.5,0.0,0.9984481334686279 -Grandgrandmother,5,0.7894736842105263,1.0,10.130765914916992,0.9655172413793104,1.0,0.9703750610351562 -Grandgrandmother,6,0.8108108108108109,0.8,10.239579916000366,1.0,1.0,0.9493546485900879 -Grandgrandmother,7,0.6956521739130436,0.6666666666666666,10.128279447555542,0.967741935483871,0.6666666666666666,0.9954016208648682 -Grandgrandmother,8,0.7111111111111111,0.5,10.17303466796875,0.967741935483871,1.0,0.9266934394836426 -Grandgrandmother,9,0.7111111111111111,0.5,10.246387243270874,1.0,1.0,0.9809508323669434 -Grandgrandson,0,0.8936170212765957,0.8571428571428571,10.239697217941284,1.0,0.8571428571428571,1.0497198104858398 -Grandgrandson,1,0.8750000000000001,1.0,10.155253171920776,1.0,0.8,1.1618521213531494 -Grandgrandson,2,0.8750000000000001,1.0,10.455909252166748,1.0,1.0,1.111853837966919 -Grandgrandson,3,0.8936170212765957,0.8571428571428571,10.308926582336426,1.0,0.8571428571428571,1.1918866634368896 -Grandgrandson,4,0.7457627118644068,0.8,10.387377977371216,1.0,0.8,1.0054936408996582 -Grandgrandson,5,0.7586206896551725,0.6666666666666666,10.30250597000122,1.0,1.0,1.072995901107788 -Grandgrandson,6,0.7719298245614035,0.5714285714285715,10.327780723571777,0.9767441860465117,0.8,1.1413650512695312 -Grandgrandson,7,0.7719298245614035,0.5714285714285715,10.09762978553772,1.0,1.0,1.0994288921356201 -Grandgrandson,8,0.88,1.0,10.19813847541809,1.0,1.0,1.094287633895874 -Grandgrandson,9,0.88,1.0,10.096000671386719,1.0,1.0,1.224492073059082 -Grandmother,0,0.911764705882353,1.0,10.2213876247406,1.0,1.0,1.7469537258148193 -Grandmother,1,0.9253731343283582,0.8,10.026697635650635,1.0,1.0,1.9178388118743896 -Grandmother,2,0.911764705882353,0.888888888888889,10.396840810775757,1.0,1.0,1.956364631652832 -Grandmother,3,0.911764705882353,0.888888888888889,10.165223360061646,1.0,1.0,1.7867670059204102 -Grandmother,4,0.898550724637681,1.0,10.26410460472107,1.0,1.0,1.9815576076507568 -Grandmother,5,0.7529411764705882,0.6666666666666666,10.355570793151855,1.0,1.0,1.8425483703613281 -Grandmother,6,0.7529411764705882,0.6666666666666666,10.042956113815308,1.0,1.0,1.8157532215118408 -Grandmother,7,0.761904761904762,0.6,10.119189023971558,1.0,1.0,1.726874589920044 -Grandmother,8,0.761904761904762,0.6,10.227713584899902,1.0,1.0,1.8222780227661133 -Grandmother,9,0.7441860465116279,0.7499999999999999,10.572446584701538,1.0,1.0,1.916520595550537 -Grandson,0,0.9500000000000001,1.0,10.45121717453003,1.0,1.0,1.996962547302246 -Grandson,1,0.9620253164556963,0.9090909090909091,10.277642250061035,1.0,1.0,2.023207426071167 -Grandson,2,0.9500000000000001,1.0,11.044942378997803,1.0,1.0,1.989884614944458 -Grandson,3,0.7959183673469388,0.8,10.372109174728394,1.0,1.0,2.0029540061950684 -Grandson,4,0.7959183673469388,0.8,10.33963680267334,1.0,1.0,2.1420788764953613 -Grandson,5,0.7959183673469388,0.8,10.458490371704102,1.0,1.0,1.8334505558013916 -Grandson,6,0.962962962962963,0.888888888888889,10.274829387664795,1.0,1.0,2.101997137069702 -Grandson,7,0.962962962962963,0.888888888888889,10.266865730285645,1.0,1.0,2.0819005966186523 -Grandson,8,0.951219512195122,1.0,10.443277835845947,1.0,1.0,2.109687328338623 -Grandson,9,0.975,1.0,10.326062679290771,1.0,1.0,1.8898797035217285 -Mother,0,0.9310344827586207,0.8571428571428571,10.32288408279419,1.0,1.0,3.172806978225708 -Mother,1,0.9152542372881356,1.0,10.23881459236145,1.0,1.0,3.1301231384277344 -Mother,2,0.923076923076923,0.923076923076923,10.288079023361206,1.0,1.0,3.0593740940093994 -Mother,3,0.923076923076923,0.923076923076923,10.358543395996094,1.0,1.0,3.2367610931396484 -Mother,4,0.923076923076923,0.923076923076923,10.306773662567139,1.0,1.0,3.2038753032684326 -Mother,5,0.9152542372881356,1.0,10.297085046768188,1.0,1.0,3.046260118484497 -Mother,6,0.9310344827586207,0.8571428571428571,10.176455020904541,1.0,1.0,3.2601492404937744 -Mother,7,0.923076923076923,0.923076923076923,10.33784818649292,1.0,1.0,3.259394407272339 -Mother,8,0.9152542372881356,1.0,10.229471921920776,1.0,1.0,3.193561315536499 -Mother,9,0.9310344827586207,0.8571428571428571,10.242078065872192,1.0,1.0,3.1572659015655518 -PersonWithASibling,0,0.8421052631578948,0.8421052631578948,10.294052600860596,1.0,1.0,3.8989531993865967 -PersonWithASibling,1,0.8311688311688311,0.9411764705882353,10.48823881149292,1.0,1.0,3.963196039199829 -PersonWithASibling,2,0.8496732026143791,0.7777777777777778,10.39369249343872,1.0,1.0,4.034543514251709 -PersonWithASibling,3,0.8496732026143791,0.7777777777777778,10.39035439491272,1.0,1.0,3.9486355781555176 -PersonWithASibling,4,0.8387096774193548,0.8750000000000001,10.192833662033081,1.0,1.0,3.937183141708374 -PersonWithASibling,5,0.8441558441558442,0.8235294117647058,10.063442945480347,1.0,1.0,3.8711564540863037 -PersonWithASibling,6,0.8552631578947367,0.7368421052631579,10.32389235496521,1.0,1.0,3.9331045150756836 -PersonWithASibling,7,0.8333333333333333,0.9333333333333333,10.348839521408081,1.0,1.0,3.9498133659362793 -PersonWithASibling,8,0.8387096774193548,0.8750000000000001,10.270437479019165,1.0,1.0,3.9628982543945312 -PersonWithASibling,9,0.8387096774193548,0.8750000000000001,10.42144227027893,1.0,1.0,4.119089841842651 -Sister,0,0.8125,0.8000000000000002,10.321670293807983,1.0,1.0,2.0848755836486816 -Sister,1,1.0,1.0,2.325162410736084,1.0,1.0,2.096190929412842 -Sister,2,0.8351648351648352,1.0,10.194955825805664,1.0,1.0,2.0969724655151367 -Sister,3,0.8539325842696629,0.8,10.341610670089722,1.0,1.0,2.115995407104492 -Sister,4,1.0,1.0,2.207367181777954,1.0,1.0,2.029665470123291 -Sister,5,1.0,1.0,2.150571346282959,1.0,1.0,2.115659475326538 -Sister,6,1.0,1.0,2.1680688858032227,1.0,1.0,2.0719399452209473 -Sister,7,1.0,1.0,2.2187042236328125,1.0,1.0,2.0832901000976562 -Sister,8,1.0,1.0,2.1514315605163574,1.0,1.0,2.09660005569458 -Sister,9,1.0,1.0,2.126575231552124,1.0,1.0,2.0524933338165283 -Son,0,0.7222222222222222,0.8,10.377680540084839,1.0,1.0,2.72068190574646 -Son,1,0.7222222222222222,0.8,10.20944881439209,1.0,1.0,2.587041139602661 -Son,2,0.8245614035087719,0.9090909090909091,10.299796342849731,1.0,1.0,2.6240527629852295 -Son,3,0.8392857142857142,0.7692307692307693,10.14388632774353,1.0,1.0,2.668272018432617 -Son,4,0.7123287671232876,0.888888888888889,10.040217638015747,1.0,1.0,2.5966758728027344 -Son,5,0.7297297297297298,0.7499999999999999,10.48685622215271,1.0,1.0,2.835683822631836 -Son,6,0.7297297297297298,0.7499999999999999,10.354504108428955,1.0,1.0,2.5557501316070557 -Son,7,0.9894736842105264,1.0,10.458415269851685,1.0,1.0,2.6983001232147217 -Son,8,0.7466666666666666,0.5714285714285715,10.279742240905762,1.0,1.0,2.71370005607605 -Son,9,0.763157894736842,0.33333333333333337,10.05710220336914,1.0,1.0,2.6824440956115723 -Uncle,0,0.8831168831168831,0.888888888888889,10.231672763824463,0.7586206896551724,0.6666666666666666,2.303570508956909 -Uncle,1,0.6666666666666667,0.5714285714285715,10.106208801269531,0.8333333333333333,0.5714285714285715,2.35325026512146 -Uncle,2,0.8947368421052632,0.8,10.182795524597168,0.8474576271186441,0.6666666666666665,2.394861936569214 -Uncle,3,0.8717948717948718,1.0,10.730798482894897,0.7142857142857143,0.8571428571428571,2.00998592376709 -Uncle,4,0.8831168831168831,0.888888888888889,10.475631713867188,0.56,0.0,1.5967051982879639 -Uncle,5,0.8831168831168831,0.888888888888889,10.19606637954712,0.78125,0.75,1.714298963546753 -Uncle,6,0.8717948717948718,1.0,10.286277055740356,0.7796610169491526,0.8571428571428571,1.7564830780029297 -Uncle,7,0.8831168831168831,0.888888888888889,10.25898289680481,0.7142857142857143,0.8571428571428571,1.8795826435089111 -Uncle,8,0.6796116504854368,0.6,10.247749328613281,0.7868852459016394,1.0,1.7332313060760498 -Uncle,9,0.6796116504854368,0.6,10.288705825805664,0.6545454545454545,1.0,1.7731196880340576 diff --git a/setup.py b/setup.py index 1693182f..5b3a6d05 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ "tqdm>=4.64.0", "transformers>=4.38.1", "pytest>=7.2.2", - "owlapy==1.1.0", + "owlapy==1.3.1", "dicee==0.1.4", "ontosample>=0.2.2", "sphinx>=7.2.6", @@ -94,7 +94,7 @@ def deps_list(*pkgs): setup( name="ontolearn", description="Ontolearn is an open-source software library for structured machine learning in Python. Ontolearn includes modules for processing knowledge bases, inductive logic programming and ontology engineering.", - version="0.7.1", + version="0.8.0", packages=find_packages(), install_requires=extras["min"], extras_require=extras, diff --git a/tests/test_celoe.py b/tests/test_celoe.py index d1e320ac..d10947c5 100644 --- a/tests/test_celoe.py +++ b/tests/test_celoe.py @@ -8,8 +8,7 @@ from ontolearn.knowledge_base import KnowledgeBase from ontolearn.concept_learner import CELOE from ontolearn.learning_problem import PosNegLPStandard -from ontolearn.model_adapter import ModelAdapter -from ontolearn.utils import setup_logging, compute_f1_score +from ontolearn.utils import compute_f1_score from owlapy.render import DLSyntaxObjectRenderer PATH_FAMILY = 'KGs/Family/family-benchmark_rich_background.owl' @@ -122,8 +121,7 @@ def test_multiple_fits(self): neg_uncle = set(map(OWLNamedIndividual, map(IRI.create, settings['problems']['Uncle']['negative_examples']))) - - model = ModelAdapter(learner_type=CELOE, knowledge_base=kb, max_runtime=1000, max_num_of_concepts_tested=100) + model = CELOE(knowledge_base=kb, max_runtime=1000, max_num_of_concepts_tested=100) model.fit(pos=pos_aunt, neg=neg_aunt) kb.clean() model.fit(pos=pos_uncle, neg=neg_uncle) @@ -134,7 +132,7 @@ def test_multiple_fits(self): q, str_concept = compute_f1_score(individuals={i for i in kb.individuals(hypotheses[0])}, pos=pos_uncle, neg=neg_uncle), hypotheses[0] kb.clean() kb = KnowledgeBase(path=PATH_FAMILY) - model = ModelAdapter(learner_type=CELOE, knowledge_base=kb, max_runtime=1000, max_num_of_concepts_tested=100) + model = CELOE(knowledge_base=kb, max_runtime=1000, max_num_of_concepts_tested=100) model.fit(pos=pos_uncle, neg=neg_uncle) print("Only fitted on Uncle:") @@ -142,6 +140,5 @@ def test_multiple_fits(self): q2, str_concept2 = compute_f1_score(individuals={i for i in kb.individuals(hypotheses[0])}, pos=pos_uncle, neg=neg_uncle), hypotheses[0] - assert q==q2 - assert str_concept==str_concept2 - + assert q == q2 + assert str_concept == str_concept2 diff --git a/tests/test_clip.py b/tests/test_clip.txt similarity index 97% rename from tests/test_clip.py rename to tests/test_clip.txt index 583253d9..039f9018 100644 --- a/tests/test_clip.py +++ b/tests/test_clip.txt @@ -45,4 +45,4 @@ def test_prediction_quality_mutagenesis(self): if __name__ == "__main__": test = TestCLIP() test.test_prediction_quality_family() - test.test_prediction_quality_mutagenesis() \ No newline at end of file + test.test_prediction_quality_mutagenesis() diff --git a/tests/test_clip_trainer.py b/tests/test_clip_trainer.txt similarity index 94% rename from tests/test_clip_trainer.py rename to tests/test_clip_trainer.txt index 6fda15e5..132970b6 100644 --- a/tests/test_clip_trainer.py +++ b/tests/test_clip_trainer.txt @@ -40,6 +40,6 @@ def test_trainer_family(self): data = json.load(f) clip.train(list(data.items())[-100:], epochs=5, learning_rate=0.001, save_model=False, record_runtime=False, storage_path=f"./CLIP-{time.time()}/") -if __name__ == "__main__": - test = TestCLIPTrainer() - test.test_trainer_family() \ No newline at end of file +#if __name__ == "__main__": +# test = TestCLIPTrainer() + # test.test_trainer_family() \ No newline at end of file diff --git a/tests/test_express_refinement.py b/tests/test_express_refinement.py deleted file mode 100644 index b49406a9..00000000 --- a/tests/test_express_refinement.py +++ /dev/null @@ -1,57 +0,0 @@ -import json -import unittest - -from owlapy.class_expression import OWLClass -from owlapy.iri import IRI -from owlapy.owl_individual import OWLNamedIndividual - -from ontolearn.model_adapter import ModelAdapter -from ontolearn.refinement_operators import ExpressRefinement - -NS = 'http://www.benchmark.org/family#' -PATH_FAMILY = 'KGs/Family/family-benchmark_rich_background.owl' - -with open('examples/synthetic_problems.json') as json_file: - settings = json.load(json_file) - - -class TestExpressRefinement(unittest.TestCase): - def test_celoe_express(self): - concepts_to_ignore = { - OWLClass(IRI(NS, 'Brother')), - OWLClass(IRI(NS, 'Sister')), - OWLClass(IRI(NS, 'Daughter')), - OWLClass(IRI(NS, 'Mother')), - OWLClass(IRI(NS, 'Grandmother')), - OWLClass(IRI(NS, 'Father')), - OWLClass(IRI(NS, 'Grandparent')), - OWLClass(IRI(NS, 'PersonWithASibling')), - OWLClass(IRI(NS, 'Granddaughter')), - OWLClass(IRI(NS, 'Son')), - OWLClass(IRI(NS, 'Child')), - OWLClass(IRI(NS, 'Grandson')), - OWLClass(IRI(NS, 'Grandfather')), - OWLClass(IRI(NS, 'Grandchild')), - OWLClass(IRI(NS, 'Parent')), - } - model = ModelAdapter(path=PATH_FAMILY, - refinement_operator_type=ExpressRefinement, - ignore=concepts_to_ignore, - # max_runtime=600, - # max_num_of_concepts_tested=10_000_000_000, - # iter_bound=10_000_000_000, - # expansionPenaltyFactor=0.01 - ) - for str_target_concept, examples in settings['problems'].items(): - p = set(examples['positive_examples']) - n = set(examples['negative_examples']) - print('Target concept: ', str_target_concept) - - typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p))) - typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) - - model = model.fit(pos=typed_pos, neg=typed_neg) - - hypotheses = list(model.best_hypotheses(n=3)) - [print(_) for _ in hypotheses] - break diff --git a/tests/test_knowledge_base.py b/tests/test_knowledge_base.py index 0a457196..be140478 100644 --- a/tests/test_knowledge_base.py +++ b/tests/test_knowledge_base.py @@ -143,13 +143,13 @@ def test_property_retrieval(self): # indirect object property values of an individual super_in_bond = OWLObjectProperty(IRI.create(self.namespace, 'super_inBond')) - self.mgr.add_axiom(self.onto, OWLSubObjectPropertyOfAxiom(self.in_bond, super_in_bond)) + self.onto.add_axiom(OWLSubObjectPropertyOfAxiom(self.in_bond, super_in_bond)) inds = frozenset(self.kb.get_object_property_values(self.bond5225, super_in_bond, direct=False)) true_inds = {self.d91_32, self.d91_17} self.assertEqual(true_inds, inds) inds = frozenset(self.kb.get_object_property_values(self.bond5225, super_in_bond, direct=True)) self.assertEqual(frozenset(), inds) - self.mgr.remove_axiom(self.onto, OWLSubObjectPropertyOfAxiom(self.in_bond, super_in_bond)) + self.onto.remove_axiom(OWLSubObjectPropertyOfAxiom(self.in_bond, super_in_bond)) # data property values of an individual values = frozenset(self.kb.get_data_property_values(self.d91_32, self.charge, direct=True)) @@ -158,13 +158,13 @@ def test_property_retrieval(self): # indirect data property values of an individual super_charge = OWLDataProperty(IRI.create(self.namespace, 'super_charge')) - self.mgr.add_axiom(self.onto, OWLSubDataPropertyOfAxiom(self.charge, super_charge)) + self.onto.add_axiom(OWLSubDataPropertyOfAxiom(self.charge, super_charge)) values = frozenset(self.kb.get_data_property_values(self.d91_32, super_charge, direct=False)) true_values = {OWLLiteral(0.146)} self.assertEqual(true_values, values) values = frozenset(self.kb.get_data_property_values(self.d91_32, super_charge, direct=True)) self.assertEqual(frozenset(), values) - self.mgr.remove_axiom(self.onto, OWLSubDataPropertyOfAxiom(self.charge, super_charge)) + self.onto.remove_axiom(OWLSubDataPropertyOfAxiom(self.charge, super_charge)) # object properties of an individual properties = frozenset(self.kb.get_object_properties_for_ind(self.bond5225, direct=True)) @@ -172,14 +172,14 @@ def test_property_retrieval(self): self.assertEqual(true_properties, properties) # indirect object properties of an individual - self.mgr.add_axiom(self.onto, OWLSubObjectPropertyOfAxiom(self.in_bond, self.has_bond)) + self.onto.add_axiom(OWLSubObjectPropertyOfAxiom(self.in_bond, self.has_bond)) properties = frozenset(self.kb.get_object_properties_for_ind(self.bond5225, direct=False)) true_properties = {self.in_bond, self.has_bond} self.assertEqual(true_properties, properties) properties = frozenset(self.kb.get_object_properties_for_ind(self.bond5225, direct=True)) true_properties = {self.in_bond} self.assertEqual(true_properties, properties) - self.mgr.remove_axiom(self.onto, OWLSubObjectPropertyOfAxiom(self.in_bond, self.has_bond)) + self.onto.remove_axiom(OWLSubObjectPropertyOfAxiom(self.in_bond, self.has_bond)) # data properties of an individual properties = frozenset(self.kb.get_data_properties_for_ind(self.d91_32, direct=True)) @@ -187,14 +187,14 @@ def test_property_retrieval(self): self.assertEqual(true_properties, properties) # indirect data properties of an individual - self.mgr.add_axiom(self.onto, OWLSubDataPropertyOfAxiom(self.charge, self.act)) + self.onto.add_axiom(OWLSubDataPropertyOfAxiom(self.charge, self.act)) properties = frozenset(self.kb.get_data_properties_for_ind(self.d91_32, direct=False)) true_properties = {self.charge, self.act} self.assertEqual(true_properties, properties) properties = frozenset(self.kb.get_data_properties_for_ind(self.d91_32, direct=True)) true_properties = {self.charge} self.assertEqual(true_properties, properties) - self.mgr.remove_axiom(self.onto, OWLSubDataPropertyOfAxiom(self.charge, self.act)) + self.onto.remove_axiom(OWLSubDataPropertyOfAxiom(self.charge, self.act)) def test_ignore(self): concepts_to_ignore = {self.bond1, self.compound} diff --git a/tests/test_model_adapter.py b/tests/test_model_adapter.py deleted file mode 100644 index 84c74550..00000000 --- a/tests/test_model_adapter.py +++ /dev/null @@ -1,90 +0,0 @@ -import json -import unittest -from typing import cast - -from ontolearn.concept_learner import CELOE, EvoLearner -from ontolearn.heuristics import CELOEHeuristic -from ontolearn.knowledge_base import KnowledgeBase -from ontolearn.metrics import Accuracy -from ontolearn.model_adapter import ModelAdapter -from ontolearn.refinement_operators import ModifiedCELOERefinement -from owlapy.owl_individual import OWLNamedIndividual -from owlapy.iri import IRI -from owlapy.owl_ontology import Ontology -from owlapy.owl_reasoner import SyncReasoner, BaseReasoner - - -class TestModelAdapter(unittest.TestCase): - - def test_celoe_quality_variant_1(self): - with open('examples/synthetic_problems.json') as json_file: - settings = json.load(json_file) - kb_path = "KGs/Family/family-benchmark_rich_background.owl" - kb = KnowledgeBase(path=kb_path) - reasoner = SyncReasoner(cast(Ontology, kb.ontology), BaseReasoner.HERMIT) - op = ModifiedCELOERefinement(knowledge_base=kb, use_negation=False, use_all_constructor=False) - p = set(settings['problems']['Uncle']['positive_examples']) - n = set(settings['problems']['Uncle']['negative_examples']) - typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p))) - typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) - - model = ModelAdapter(learner_type=CELOE, - path=kb_path, - reasoner=reasoner, - quality_type=Accuracy, - max_runtime=5, - max_num_of_concepts_tested=10_000_000_000, - iter_bound=10_000_000_000, - refinement_operator=op) - - model = model.fit(pos=typed_pos, neg=typed_neg) - hypothesis = model.best_hypotheses(n=1, return_node=True) - assert hypothesis.quality >= 0.86 - - def test_celoe_quality_variant_2(self): - with open('examples/synthetic_problems.json') as json_file: - settings = json.load(json_file) - kb_path = "KGs/Family/family-benchmark_rich_background.owl" - kb = KnowledgeBase(path=kb_path) - reasoner = SyncReasoner(cast(Ontology, kb.ontology), BaseReasoner.PELLET) - op = ModifiedCELOERefinement(knowledge_base=kb, use_negation=False, use_all_constructor=False) - p = set(settings['problems']['Uncle']['positive_examples']) - n = set(settings['problems']['Uncle']['negative_examples']) - typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p))) - typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) - - model = ModelAdapter(learner_type=CELOE, - path=kb_path, - reasoner=reasoner, - quality_type=Accuracy, - max_runtime=5, - max_num_of_concepts_tested=10_000_000_000, - iter_bound=10_000_000_000, - refinement_operator=op, - heuristic_type=CELOEHeuristic, - expansionPenaltyFactor=0.05, - startNodeBonus=1.0, - nodeRefinementPenalty=0.01 - ) - - model = model.fit(pos=typed_pos, neg=typed_neg) - hypothesis = model.best_hypotheses(n=1, return_node=True) - assert hypothesis.quality >= 0.59 - - def test_evolearner_quality(self): - with open('examples/synthetic_problems.json') as json_file: - settings = json.load(json_file) - kb = KnowledgeBase(path="KGs/Family/family-benchmark_rich_background.owl") - reasoner = SyncReasoner(cast(Ontology, kb.ontology), BaseReasoner.HERMIT) - p = set(settings['problems']['Uncle']['positive_examples']) - n = set(settings['problems']['Uncle']['negative_examples']) - typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p))) - typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n))) - - model = ModelAdapter(learner_type=EvoLearner, - knowledge_base=kb, - reasoner=reasoner) - - model = model.fit(pos=typed_pos, neg=typed_neg) - hypothesis = model.best_hypotheses(n=1,return_node=True) - assert hypothesis.quality >= 0.9 diff --git a/tests/test_nces.py b/tests/test_nces.py index cddc7ae3..6fd8c2cc 100644 --- a/tests/test_nces.py +++ b/tests/test_nces.py @@ -2,6 +2,7 @@ from ontolearn.knowledge_base import KnowledgeBase from owlapy.parser import DLSyntaxParser from ontolearn.metrics import F1 +from ontolearn.learning_problem import PosNegLPStandard import time import random import unittest @@ -33,21 +34,22 @@ class TestNCES(unittest.TestCase): def test_prediction_quality_family(self): nces = NCES(knowledge_base_path="./NCESData/family/family.owl", quality_func=F1(), num_predictions=100, path_of_embeddings="./NCESData/family/embeddings/ConEx_entity_embeddings.csv", - pretrained_model_name=["LSTM", "GRU", "SetTransformer"]) + learner_names=["LSTM", "GRU", "SetTransformer"]) KB = KnowledgeBase(path=nces.knowledge_base_path) dl_parser = DLSyntaxParser(nces.kb_namespace) brother = dl_parser.parse('Brother') daughter = dl_parser.parse('Daughter') pos = set(KB.individuals(brother)).union(set(KB.individuals(daughter))) neg = set(KB.individuals())-set(pos) - node = list(nces.fit(pos, neg).best_predictions)[0] + learning_problem = PosNegLPStandard(pos=pos, neg=neg) + node = list(nces.fit(learning_problem).best_predictions)[0] print("Quality:", node.quality) assert node.quality > 0.95 def test_prediction_quality_mutagenesis(self): nces = NCES(knowledge_base_path="./NCESData/mutagenesis/mutagenesis.owl", quality_func=F1(), num_predictions=100, path_of_embeddings="./NCESData/mutagenesis/embeddings/ConEx_entity_embeddings.csv", - pretrained_model_name=["LSTM", "GRU", "SetTransformer"]) + learner_names=["LSTM", "GRU", "SetTransformer"]) KB = KnowledgeBase(path=nces.knowledge_base_path) dl_parser = DLSyntaxParser(nces.kb_namespace) exists_inbond = dl_parser.parse('∃ hasStructure.Benzene') @@ -57,7 +59,8 @@ def test_prediction_quality_mutagenesis(self): if len(pos) > 500: pos = set(np.random.choice(list(pos), size=min(500, len(pos)), replace=False)) neg = set(neg[:min(1000-len(pos), len(neg))]) - node = list(nces.fit(pos, neg).best_predictions)[0] + learning_problem = PosNegLPStandard(pos=pos, neg=neg) + node = list(nces.fit(learning_problem).best_predictions)[0] print("Quality:", node.quality) assert node.quality > 0.95 diff --git a/tests/test_tdl_regression.py b/tests/test_tdl_regression.py index 96a8fd67..710f01ba 100644 --- a/tests/test_tdl_regression.py +++ b/tests/test_tdl_regression.py @@ -1,6 +1,5 @@ from ontolearn.learners import TDL from ontolearn.knowledge_base import KnowledgeBase -from ontolearn.triple_store import TripleStore from ontolearn.learning_problem import PosNegLPStandard from owlapy.iri import IRI from owlapy.owl_individual import OWLNamedIndividual @@ -35,9 +34,9 @@ def test_regression_family(self): # If not a valid SPARQL query, it should throw an error rdflib.Graph().query(owl_expression_to_sparql(root_variable="?x", expression=h)) # Save the prediction - save_owl_class_expressions(h, path="Predictions") + save_owl_class_expressions(h) # (Load the prediction) and check the number of owl class definitions - g = rdflib.Graph().parse("Predictions.owl") + g = rdflib.Graph().parse("./Predictions.owl") # rdflib.Graph() parses named OWL Classes by the order of their definition named_owl_classes = [s for s, p, o in g.triples((None, rdflib.namespace.RDF.type, rdflib.namespace.OWL.Class)) if @@ -65,7 +64,6 @@ def test_regression_mutagenesis(self): assert q >= 0.94 """ - def test_regression_family_triple_store(self): """ # @TODO: CD: Removed because rdflib does not produce correct results