Merge pull request #1 from di2ag/pipeline_update

updated client for the new BKB pipeline and for TRAPI 1.0 specifications
di2ag · Nov 19, 2020 · d2c996d · d2c996d
2 parents f5eff36 + 31709f9
commit d2c996d
Show file tree

Hide file tree

Showing 3 changed files with 103 additions and 42 deletions.
diff --git a/chp_client/client.py b/chp_client/client.py
@@ -13,7 +13,7 @@
     caching_avail = False
 
 
-__version__ = '0.0.1'
+__version__ = '0.0.2'
 
 
 class ChpClient:
@@ -111,8 +111,8 @@ def _get_outcome_prob(self, q_resp):
         res = q_resp["message"]["results"][0]
         # Find the outcome edge
         for qg_id, edge_bind  in res["edge_bindings"].items():
-            edge = kg["edges"][edge_bind["kg_id"]]
-            if edge["type"] == 'disease_to_phenotypic_feature_association':
+            edge = kg["edges"][edge_bind[0]["id"]]
+            if edge["predicate"] == 'biolink:DiseaseToPhenotypicFeatureAssociation':
                 try:
                     prob = edge["has_confidence_level"]
                     break
@@ -131,24 +131,24 @@ def _get_ranked_wildcards(self, q_resp):
         # Extract wildcard types from qg. Numbers are how many wildcard of each type are in qg.
         wildcard_types = defaultdict(int)
         for node_id, node in qg["nodes"].items():
-            if "curie" not in node:
-                wildcard_types[node["type"]] += 1
+            if "id" not in node:
+                wildcard_types[node["category"]] += 1
         ranks = defaultdict(list)
         for _res in res:
             for qg_id, edge_bind in _res["edge_bindings"].items():
-                edge = kg["edges"][edge_bind["kg_id"]]
-                if "gene" in wildcard_types and edge["type"] == 'gene_to_disease_association':
-                    weight = edge["weight"]
-                    node_curie = edge["source_id"]
+                edge = kg["edges"][edge_bind[0]["id"]]
+                if "biolink:Gene" in wildcard_types and edge["predicate"] == 'biolink:GeneToDiseaseAssociation':
+                    weight = edge["value"]
+                    node_curie = edge["subject"]
                     source_node = kg["nodes"][node_curie]
                     name = source_node["name"]
                     ranks["gene"].append({
                             "weight": weight,
                             "curie": node_curie,
                             "name": name})
-                elif "chemical_substance" in wildcard_types and edge["type"] == 'chemical_to_disease_or_phenotypic_feature_association':
-                    weight = edge["weight"]
-                    node_curie = edge["source_id"]
+                elif "biolink:Drug" in wildcard_types and edge["predicate"] == 'biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation':
+                    weight = edge["value"]
+                    node_curie = edge["subject"]
                     source_node = kg["nodes"][node_curie]
                     name = source_node["name"]
                     ranks["gene"].append({

diff --git a/chp_client/query.py b/chp_client/query.py
@@ -46,7 +46,7 @@ def build_query(
     message = {
             "query_graph": {},
             "knowledge_graph": {},
-            "results": {}
+            "results": []
             }
     # empty query graph
     message["query_graph"] = {
@@ -60,27 +60,21 @@ def build_query(
             "nodes": {}
             }
 
-    # empty response graph
-    message["results"] = [{
-            "node_bindings": {},
-            "edge_bindings": {}
-            }]
-
     node_count = 0
     edge_count = 0
 
     # add genes
     for gene in genes:
         message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
-                "type":'gene',
-                "curie": gene
+                "category":"biolink:Gene",
+                "id": gene
                 }
         node_count += 1
 
     # add gene wildcards (if applicable)
     for _ in range(num_gene_wildcards):
         message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
-                "type": 'gene'
+                "category": 'biolink:Gene'
                 }
         node_count += 1
 
@@ -97,51 +91,51 @@ def build_query(
 
     else:
         message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
-                "type": 'chemical_substance',
-                "curie": therapeutic
+                "category": 'biolink:Drug',
+                "id": therapeutic
                 }
         node_count += 1
 
     # add in disease node
     message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
-            "type": 'disease',
-            "curie": disease
+            "category": 'biolink:Disease',
+            "id": disease
             }
     node_count += 1
 
     # link all evidence to disease
     for node_id, node in message["query_graph"]["nodes"].items():
-        if node["type"] == 'gene':
+        if node["category"] == 'biolink:Gene':
             message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
-                    "type":'gene_to_disease_association',
-                    "source_id": node_id,
-                    "target_id": 'n{}'.format(node_count - 1)   # should be disease node
+                    "predicate":'biolink:GeneToDiseaseAssociation',
+                    "subject": node_id,
+                    "object": 'n{}'.format(node_count - 1)   # should be disease node
                     }
             edge_count += 1
-        elif node["type"] == 'chemical_substance':
+        elif node["category"] == 'biolink:Drug':
             message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
-                    "type":'chemical_to_disease_or_phenotypic_feature_association',
-                    "source_id": node_id,
-                    "target_id": 'n{}'.format(node_count -1)  # should be disease node
+                    "predicate":'biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation',
+                    "subject": node_id,
+                    "object": 'n{}'.format(node_count -1)  # should be disease node
                     }
             edge_count += 1
 
     # add target outcome node
     outcome_curie, op, value = outcome
     message["query_graph"]["nodes"]['n{}'.format(node_count)] = {
-            "type": 'phenotypic_feature',
-            "curie": outcome_curie,
+            "category": 'biolink:PhenotypicFeature',
+            "id": outcome_curie,
             }
     node_count += 1
 
     # link disease to target
     message["query_graph"]["edges"]['e{}'.format(edge_count)] = {
-            "type": 'disease_to_phenotypic_feature_association',
-            "source_id": 'n{}'.format(node_count-2),
-            "target_id": 'n{}'.format(node_count-1),
+            "predicate": 'biolink:DiseaseToPhenotypicFeatureAssociation',
+            "subject": 'n{}'.format(node_count-2),
+            "object": 'n{}'.format(node_count-1),
             "properties": {
-                    "qualifier": op,
-                    "value": value
-                    }
+                           "qualifier": op,
+                           "days": value
+                          }
             }
     return {"message": message}
diff --git a/tests/test_client.py b/tests/test_client.py
@@ -0,0 +1,67 @@
+"""
+    Source code developed by DI2AG.
+    Thayer School of Engineering at Dartmouth College
+    Authors:    Dr. Eugene Santos, Jr
+                Mr. Chase Yakaboski,
+                Mr. Gregory Hyde,
+                Mr. Luke Veenhuis,
+                Dr. Keum Joo Kim
+"""
+
+import unittest
+import json
+from chp_client import get_client
+from chp_client.query import build_query
+
+class TestClient(unittest.TestCase):
+    """
+    """
+
+    def test_predicates(self):
+        """
+        """
+        default_client = get_client()
+        preds = default_client.predicates()
+        predicates_pretty = json.dumps(preds, indent=2)
+        print(predicates_pretty)
+
+    def test_curies(self):
+        """
+        """
+        default_client = get_client()
+        curies = default_client.curies()
+        print(curies.keys())
+        for curie_type in curies.keys():
+            term_length = min([len(curies[curie_type]),5])
+            for curie in curies[curie_type][:term_length]:
+                curie_pretty = json.dumps(curie, indent=2)
+                print(curie_pretty)
+
+    def test_default(self):
+        """
+        """
+        default_client = get_client()
+        q = build_query( genes = ['ENSEMBL:ENSG00000132155'],
+                         therapeutic='CHEMBL:CHEMBL88',
+                         disease='MONDO:0007254',
+                         outcome=('EFO:0000714', '>=', 1000) )
+        r = default_client.query(q)
+        prob = default_client.get_outcome_prob(r)
+        print('Probability of survival',prob)
+
+    def test_wildcard(self):
+        """
+        """
+        default_client = get_client()
+        q = build_query( therapeutic='CHEMBL:CHEMBL88',
+                         disease='MONDO:0007254',
+                         outcome=('EFO:0000714', '>=', 1000),
+                         num_gene_wildcards=1 )
+        r = default_client.query(q)
+        prob = default_client.get_outcome_prob(r)
+        print('Probability of survival',prob)
+        ranked = default_client.get_ranked_wildcards(r)
+        print(json.dumps(ranked, indent=2))
+
+if __name__ == '__main__':
+    unittest.main()