-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_cell_names.py
94 lines (77 loc) · 3.89 KB
/
generate_cell_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import ontospy
import rdflib
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import inflect
from database_schema import Normalization
#names = set()
# download from https://bioportal.bioontology.org/ontologies/CL
model = ontospy.Ontospy("./data/cl.owl")
cells = model.getClass("http://purl.obolibrary.org/obo/CL_0000000")
remappings = ["OVA", "monocyte", "Treg"]
synonyms = {}
for e in cells.descendants():
synonyms[e.bestLabel()] = e.bestLabel()
for syn in e.rdfgraph.objects(subject=e.uri,
predicate=rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasExactSynonym')):
#print(e.bestLabel(), "exact", str(syn))
if str(syn) not in remappings:
synonyms[str(syn)] = e.bestLabel()
# has_related_synonym
for syn in e.rdfgraph.objects(subject=e.uri,
predicate=rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym')):
#print(e.bestLabel(), "related", str(syn))
if str(syn) not in remappings:
synonyms[str(syn)] = e.bestLabel()
for syn in e.rdfgraph.objects(subject=e.uri,
predicate=rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym')):
#print(e.bestLabel(), "related", str(syn))
if str(syn) not in remappings:
synonyms[str(syn)] = e.bestLabel()
for syn in e.rdfgraph.objects(subject=e.uri,
predicate=rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym')):
#print(e.bestLabel(), "related", str(syn))
if str(syn) not in remappings:
synonyms[str(syn)] = e.bestLabel()
engine = inflect.engine()
#all_names = synonyms.keys()
for s in list(synonyms):
plural = engine.plural_noun(s)
if plural != s:
synonyms[plural] = synonyms[s]
print(len(synonyms))
with open("data/cell_names.txt", 'w') as cellfile:
cellfile.write('\n'.join(synonyms.keys()))
with open("config/database.config", 'r') as f:
for l in f:
if l.startswith("username"):
username = l.split("=")[-1].strip()
elif l.startswith("password"):
password = l.split("=")[-1].strip()
# engine = create_engine('sqlite:///database.sqlite', echo=False)
engine = create_engine('mysql+pymysql://{}:{}@localhost/immuno?charset=utf8mb4'.format(username, password), echo=False)
Session = sessionmaker(bind=engine)
Base = declarative_base()
session = Session()
for cell in session.query(Normalization).filter(Normalization.entity_type == "cell"):
session.delete(cell)
normalization = Normalization(text="monocyte", reference_name="monocyte", entity_type="cell",
reference_source="cellontology")
session.add(normalization)
normalization = Normalization(text="monocytes", reference_name="monocyte", entity_type="cell",
reference_source="cellontology")
session.add(normalization)
normalization = Normalization(text="Treg", reference_name="regulatory T cell", entity_type="cell",
reference_source="cellontology")
session.add(normalization)
normalization = Normalization(text="Tregs", reference_name="regulatory T cell", entity_type="cell",
reference_source="cellontology")
session.add(normalization)
with open("data/cell_reference.txt", 'w') as cellfile:
for s in synonyms:
cellfile.write('{}\t{}\n'.format(s, synonyms[s]))
normalization = Normalization(text=s, reference_name=synonyms[s], entity_type="cell",
reference_source="cellontology")
session.add(normalization)
session.commit()