Skip to content

Commit

Permalink
Merge pull request #1 from Minitour/feature/code-enhancements
Browse files Browse the repository at this point in the history
Code refactoring
  • Loading branch information
Minitour authored Jan 3, 2025
2 parents 0639a8b + f66d11d commit fc0c82e
Show file tree
Hide file tree
Showing 14 changed files with 264 additions and 118 deletions.
39 changes: 34 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
name: Publish Python Package
name: CI Pipeline

on:
push:
branches: # Run on pushes to any branch
- '*'
pull_request: # Run on pull requests to any branch
branches:
- master
- '*'

jobs:
build:
test:
name: Run Unit Tests
runs-on: ubuntu-latest

steps:
Expand All @@ -15,13 +19,38 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
python-version: '3.12'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install
poetry install --with test
- name: Run Tests
run: |
poetry run pytest
release:
name: Publish Python Package
needs: test # Ensure tests pass before publishing
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master'

steps:
- uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.12'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install
- name: Publish package
env:
Expand Down
39 changes: 34 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,25 @@ model = ChatGptModelParaphrase(api_key='sk-xyz', model='gpt-4o', temperature=0.7

```python
from verbalizer.process import Processor
from verbalizer.vocabulary import Vocabulary
from verbalizer import Verbalizer

ontology = 'pizza.ttl'
name = 'pizza'
processor = Processor(llm=model, vocab_ignore=ignore, vocab_rephrased=rephrased, min_statements=1)
processor.process(name, ontology, output_dir='/path/to/my/output')
ontology = Processor.from_file('pizza.ttl')

# create vocabulary and verbalizer
vocab = Vocabulary(ontology, ignore=ignore, rephrased=rephrased)
verbalizer = Verbalizer(vocab)

# start verbalization process
results = Processor.verbalize_with(verbalizer, namespace="pizza", output_dir="./output")
```

## Examples

<details>

<summary>Expand to see examples</summary>

<table border="1">
<tr>
<th>OWL Fragment</th>
Expand Down Expand Up @@ -175,4 +187,21 @@ processor.process(name, ontology, output_dir='/path/to/my/output')
Chicken topping is a type of meat topping that has at least some mild spiciness. It is different from pepperoni sausage topping, hot spiced beef topping, and ham topping.
</td>
</tr>
</table>
</table>

</details>


## Citation

```
@inproceedings{zaitoun2024generating,
title={Generating Ontology-Learning Training-Data through Verbalization},
author={Zaitoun, Antonio and Sagi, Tomer and Peleg, Mor},
booktitle={Proceedings of the AAAI Symposium Series},
volume={4},
number={1},
pages={233--241},
year={2024}
}
```
2 changes: 1 addition & 1 deletion evaluations/processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,4 @@ def test_evaluation(self):
for ontology_name, contents in ontologies.items():
file = contents['file']
sampler = CustomSampler(samples=contents['samples'])
processor.process(ontology_name, file, data_sampler=sampler)
processor.verbalize_with(ontology_name, file, sampler=sampler)
22 changes: 17 additions & 5 deletions playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from verbalizer.nlp import ChatGptModelParaphrase, LlamaModelParaphrase
from verbalizer.process import Processor
from verbalizer.sampler import Sampler
from verbalizer.verbalizer import Verbalizer
from verbalizer.vocabulary import Vocabulary

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -87,9 +89,19 @@

sampler = Sampler(sample_n=100, seed=42)

ontologies = [
('people', Processor.from_file('./data/people.ttl')),
('pizza', Processor.from_file('./data/pizza.ttl')),
('mondo', Processor.from_file('./data/mondo.owl')),
('fma', Processor.from_file('./data/fma.owl')),
]

vocabularies = [
(namespace, Vocabulary(ontology, ignore=ignore, rephrased=rephrased))
for namespace, ontology in ontologies
]

for model in models:
processor = Processor(llm=model, vocab_ignore=ignore, vocab_rephrased=rephrased, min_statements=1)
processor.process('people', './data/people.ttl')
processor.process('pizza', './data/pizza.ttl')
processor.process('mondo', './data/mondo.owl', data_sampler=sampler)
processor.process('fma', './data/fma.owl', data_sampler=sampler)
for namespace, vocabulary in vocabularies:
verbalizer = Verbalizer(vocabulary, language_model=model)
results = Processor.verbalize_with(verbalizer, namespace=namespace, output_dir="./output", sampler=sampler)
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ontology-verbalizer"
version = "1.0.1"
version = "1.1.0"
description = "A Python package for ontology verbalization"
authors = ["Antonio Zaitoun <tony.z.1711@gmail.com>"]
license = "MIT"
Expand All @@ -10,12 +10,15 @@ packages = [
]
repository = "https://github.com/Minitour/ontology-verbalizer"
[tool.poetry.dependencies]
python = "^3.9"
python = "^3.12"
rdflib = "~7.0.0"
openai = "~1.12.0"
pandas = "~2.2.0"
tqdm = "~4.66.2"

[tool.poetry.group.test.dependencies]
pytest = "~8.3.4"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions tests/test_verbalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import unittest

from rdflib import Graph

from verbalizer.process import Processor
from verbalizer.sampler import Sampler
from verbalizer.vocabulary import Vocabulary
from verbalizer import Verbalizer

rename_iri = {
'http://www.w3.org/2002/07/owl#equivalentClass': 'is same as',
'http://www.w3.org/2000/01/rdf-schema#subClassOf': 'is a type of',
'http://www.w3.org/2002/07/owl#intersectionOf': 'all of',
'http://www.w3.org/2002/07/owl#unionOf': 'any of',
'http://www.w3.org/2002/07/owl#disjointWith': 'is different from',
'http://www.w3.org/2002/07/owl#withRestrictions': 'must be'
}
ignore_iri = {
'http://www.w3.org/2002/07/owl#onDatatype',
'http://www.w3.org/2000/01/rdf-schema#seeAlso',
'http://www.w3.org/2000/01/rdf-schema#label',
'http://www.w3.org/2000/01/rdf-schema#comment',
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
'http://www.w3.org/2000/01/rdf-schema#isDefinedBy',
'http://www.w3.org/2003/06/sw-vocab-status/ns#term_status',
'http://www.w3.org/2000/01/rdf-schema#Class'
}


class TestVerbalization(unittest.TestCase):

def test_verbalization(self):
# graph
ontology = Processor.from_file('./data/foaf.owl')

# create vocabulary
vocab = Vocabulary(ontology, ignore=ignore_iri, rephrased=rename_iri)

# create verbalizer
verbalizer = Verbalizer(vocab)

results = Processor.verbalize_with(verbalizer, namespace='foaf')
self.assertEqual(12, len(results))

# Add default prefix (won't work without this)
fragment_sample = '@prefix : <https://zaitoun.dev#> .\n' + results[0]['fragment']
g = Graph()
g.parse(data=fragment_sample, format="turtle")

self.assertEqual(7, len(list(g.triples((None, None, None)))))

def test_verbalization_with_sampler(self):
# graph
ontology = Processor.from_file('./data/foaf.owl')

# create vocabulary
vocab = Vocabulary(ontology, ignore=ignore_iri, rephrased=rename_iri)

# create verbalizer
verbalizer = Verbalizer(vocab)

sampler = Sampler(sample_n=10, seed=42)
results = Processor.verbalize_with(verbalizer, namespace='foaf', sampler=sampler)

# although we sampled 10, only 7 were applicable.
self.assertEqual(7, len(results))
1 change: 1 addition & 0 deletions verbalizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .verbalizer import Verbalizer
7 changes: 5 additions & 2 deletions verbalizer/patterns/owl_disjoint.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from rdflib import URIRef

from verbalizer.patterns import Pattern
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns
from verbalizer.vocabulary import Vocabulary


Expand Down Expand Up @@ -46,7 +46,8 @@ def normalize(self, node: VerbalizationNode, triple_collector):
relation_display = self.vocab.get_relationship_label(relation)

if relation_display == Vocabulary.IGNORE_VALUE:
triple_collector.append((node.concept, relation, obj))
if self.vocab.should_keep(relation):
triple_collector.append((node.concept, relation, obj))
continue

next_node = VerbalizationNode(obj, parent_path=node.get_parent_path() + [(node.concept, relation)])
Expand All @@ -56,3 +57,5 @@ def normalize(self, node: VerbalizationNode, triple_collector):
triple_collector.append((node.concept, relation, obj))

return [(reference.relationship, reference.node.concept) for reference in node.references]

default_patterns.append(OwlDisjointWith)
4 changes: 3 additions & 1 deletion verbalizer/patterns/owl_first_rest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from verbalizer.patterns import Pattern
from rdflib import URIRef

from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns


class OwlFirstRestPattern(Pattern):
Expand Down Expand Up @@ -37,3 +37,5 @@ def normalize(self, node: VerbalizationNode, triple_collector):
current = rest_node

return [(reference.relationship, reference.node.concept) for reference in node.references]

default_patterns.append(OwlFirstRestPattern)
4 changes: 3 additions & 1 deletion verbalizer/patterns/owl_restriction.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from rdflib import URIRef

from verbalizer.patterns import Pattern
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns


class OwlRestrictionPattern(Pattern):
Expand Down Expand Up @@ -127,3 +127,5 @@ def _handle_cardinality(self, quantifier_relation, property_relation, obj_litera
return f'has at least {literal_value}{on_class_label}{property_relation_label}{relation_plural_s}'
elif quantifier_relation.endswith('maxCardinality') or quantifier_relation.endswith('maxQualifiedCardinality'):
return f'has at most {literal_value}{on_class_label}{property_relation_label}{relation_plural_s}'

default_patterns.append(OwlRestrictionPattern)
Loading

0 comments on commit fc0c82e

Please sign in to comment.