Skip to content

Commit

Permalink
Merge pull request #1 from di2ag/pipeline_update
Browse files Browse the repository at this point in the history
updated client for the new BKB pipeline and for TRAPI 1.0 specifications
  • Loading branch information
veenhouse authored Nov 19, 2020
2 parents f5eff36 + 31709f9 commit d2c996d
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 42 deletions.
24 changes: 12 additions & 12 deletions chp_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
caching_avail = False


__version__ = '0.0.1'
__version__ = '0.0.2'


class ChpClient:
Expand Down Expand Up @@ -111,8 +111,8 @@ def _get_outcome_prob(self, q_resp):
res = q_resp["message"]["results"][0]
# Find the outcome edge
for qg_id, edge_bind in res["edge_bindings"].items():
edge = kg["edges"][edge_bind["kg_id"]]
if edge["type"] == 'disease_to_phenotypic_feature_association':
edge = kg["edges"][edge_bind[0]["id"]]
if edge["predicate"] == 'biolink:DiseaseToPhenotypicFeatureAssociation':
try:
prob = edge["has_confidence_level"]
break
Expand All @@ -131,24 +131,24 @@ def _get_ranked_wildcards(self, q_resp):
# Extract wildcard types from qg. Numbers are how many wildcard of each type are in qg.
wildcard_types = defaultdict(int)
for node_id, node in qg["nodes"].items():
if "curie" not in node:
wildcard_types[node["type"]] += 1
if "id" not in node:
wildcard_types[node["category"]] += 1
ranks = defaultdict(list)
for _res in res:
for qg_id, edge_bind in _res["edge_bindings"].items():
edge = kg["edges"][edge_bind["kg_id"]]
if "gene" in wildcard_types and edge["type"] == 'gene_to_disease_association':
weight = edge["weight"]
node_curie = edge["source_id"]
edge = kg["edges"][edge_bind[0]["id"]]
if "biolink:Gene" in wildcard_types and edge["predicate"] == 'biolink:GeneToDiseaseAssociation':
weight = edge["value"]
node_curie = edge["subject"]
source_node = kg["nodes"][node_curie]
name = source_node["name"]
ranks["gene"].append({
"weight": weight,
"curie": node_curie,
"name": name})
elif "chemical_substance" in wildcard_types and edge["type"] == 'chemical_to_disease_or_phenotypic_feature_association':
weight = edge["weight"]
node_curie = edge["source_id"]
elif "biolink:Drug" in wildcard_types and edge["predicate"] == 'biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation':
weight = edge["value"]
node_curie = edge["subject"]
source_node = kg["nodes"][node_curie]
name = source_node["name"]
ranks["gene"].append({
Expand Down
54 changes: 24 additions & 30 deletions chp_client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_query(
message = {
"query_graph": {},
"knowledge_graph": {},
"results": {}
"results": []
}
# empty query graph
message["query_graph"] = {
Expand All @@ -60,27 +60,21 @@ def build_query(
"nodes": {}
}

# empty response graph
message["results"] = [{
"node_bindings": {},
"edge_bindings": {}
}]

node_count = 0
edge_count = 0

# add genes
for gene in genes:
message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
"type":'gene',
"curie": gene
"category":"biolink:Gene",
"id": gene
}
node_count += 1

# add gene wildcards (if applicable)
for _ in range(num_gene_wildcards):
message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
"type": 'gene'
"category": 'biolink:Gene'
}
node_count += 1

Expand All @@ -97,51 +91,51 @@ def build_query(

else:
message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
"type": 'chemical_substance',
"curie": therapeutic
"category": 'biolink:Drug',
"id": therapeutic
}
node_count += 1

# add in disease node
message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
"type": 'disease',
"curie": disease
"category": 'biolink:Disease',
"id": disease
}
node_count += 1

# link all evidence to disease
for node_id, node in message["query_graph"]["nodes"].items():
if node["type"] == 'gene':
if node["category"] == 'biolink:Gene':
message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
"type":'gene_to_disease_association',
"source_id": node_id,
"target_id": 'n{}'.format(node_count - 1) # should be disease node
"predicate":'biolink:GeneToDiseaseAssociation',
"subject": node_id,
"object": 'n{}'.format(node_count - 1) # should be disease node
}
edge_count += 1
elif node["type"] == 'chemical_substance':
elif node["category"] == 'biolink:Drug':
message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
"type":'chemical_to_disease_or_phenotypic_feature_association',
"source_id": node_id,
"target_id": 'n{}'.format(node_count -1) # should be disease node
"predicate":'biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation',
"subject": node_id,
"object": 'n{}'.format(node_count -1) # should be disease node
}
edge_count += 1

# add target outcome node
outcome_curie, op, value = outcome
message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
"type": 'phenotypic_feature',
"curie": outcome_curie,
"category": 'biolink:PhenotypicFeature',
"id": outcome_curie,
}
node_count += 1

# link disease to target
message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
"type": 'disease_to_phenotypic_feature_association',
"source_id": 'n{}'.format(node_count-2),
"target_id": 'n{}'.format(node_count-1),
"predicate": 'biolink:DiseaseToPhenotypicFeatureAssociation',
"subject": 'n{}'.format(node_count-2),
"object": 'n{}'.format(node_count-1),
"properties": {
"qualifier": op,
"value": value
}
"qualifier": op,
"days": value
}
}
return {"message": message}
67 changes: 67 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Source code developed by DI2AG.
Thayer School of Engineering at Dartmouth College
Authors: Dr. Eugene Santos, Jr
Mr. Chase Yakaboski,
Mr. Gregory Hyde,
Mr. Luke Veenhuis,
Dr. Keum Joo Kim
"""

import unittest
import json
from chp_client import get_client
from chp_client.query import build_query

class TestClient(unittest.TestCase):
"""
"""

def test_predicates(self):
"""
"""
default_client = get_client()
preds = default_client.predicates()
predicates_pretty = json.dumps(preds, indent=2)
print(predicates_pretty)

def test_curies(self):
"""
"""
default_client = get_client()
curies = default_client.curies()
print(curies.keys())
for curie_type in curies.keys():
term_length = min([len(curies[curie_type]),5])
for curie in curies[curie_type][:term_length]:
curie_pretty = json.dumps(curie, indent=2)
print(curie_pretty)

def test_default(self):
"""
"""
default_client = get_client()
q = build_query( genes = ['ENSEMBL:ENSG00000132155'],
therapeutic='CHEMBL:CHEMBL88',
disease='MONDO:0007254',
outcome=('EFO:0000714', '>=', 1000) )
r = default_client.query(q)
prob = default_client.get_outcome_prob(r)
print('Probability of survival',prob)

def test_wildcard(self):
"""
"""
default_client = get_client()
q = build_query( therapeutic='CHEMBL:CHEMBL88',
disease='MONDO:0007254',
outcome=('EFO:0000714', '>=', 1000),
num_gene_wildcards=1 )
r = default_client.query(q)
prob = default_client.get_outcome_prob(r)
print('Probability of survival',prob)
ranked = default_client.get_ranked_wildcards(r)
print(json.dumps(ranked, indent=2))

if __name__ == '__main__':
unittest.main()

0 comments on commit d2c996d

Please sign in to comment.