Skip to content

Commit

Permalink
Darwin passing build 1.
Browse files Browse the repository at this point in the history
  • Loading branch information
Chase Yakaboski committed May 8, 2021
1 parent 41323ae commit fe4a6b2
Show file tree
Hide file tree
Showing 11 changed files with 628 additions and 110 deletions.
20 changes: 10 additions & 10 deletions chp_client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ def build_wildcard_query(
return query

def build_onehop_query(
q_subject,
q_subject_category,
q_object,
q_object_category,
q_subjects,
q_subject_categories,
q_objects,
q_object_categories,
genes=None,
drugs=None,
outcome=None,
Expand All @@ -163,21 +163,21 @@ def build_onehop_query(
q = message.query_graph

# Add nodes
subject_node = q.add_node(q_subject, q_subject_category)
object_node = q.add_node(q_object, q_object_category)
subject_node = q.add_node(q_subjects, q_subject_categories)
object_node = q.add_node(q_objects, q_object_categories)

# Add edge
try:
edge_predicate = SUBJECT_TO_OBJECT_PREDICATE_MAP[(q_subject_category, q_object_category)]
edge_predicate = SUBJECT_TO_OBJECT_PREDICATE_MAP[(q_subject_categories[0], q_object_categories[0])]
except KeyError:
raise QueryBuildError('Edge from {} to {} is not supported.'.format(q_subject_category, q_object_category))
raise QueryBuildError('Edge from {} to {} is not supported.'.format(q_subject_categories[0], q_object_categories[0]))

edge_id = q.add_edge(subject_node, object_node, edge_predicate)

# Add constraints
if outcome is not None:
q.add_constraint('predicate_proxy', 'CHP:PredicateProxy', '==', outcome_name, edge_id=edge_id)
q.add_constraint(outcome_name, outcome, outcome_op, outcome_value, edge_id=edge_id)
q.add_constraint('predicate_proxy', 'CHP:PredicateProxy', '==', [outcome], edge_id=edge_id)
q.add_constraint(outcome, outcome, outcome_op, outcome_value, edge_id=edge_id)

# Get context
context = []
Expand Down
132 changes: 132 additions & 0 deletions utils/build_batch_one_hop_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@

from chp_client.query import build_onehop_query
import itertools
import tqdm
import logging
import pickle
import random
import json
from collections import defaultdict

from trapi_model.constants import *
from chp.trapi_interface import TrapiInterface

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Set number of queries to build
NUM_QUERIES = 10

TRAPI_VERSIONS = ['1.0', '1.1']

# Set seed
random.seed(111)

# Get client
#client = get_client()

# Get curies
logger.info('Getting curies.')
curies = TrapiInterface().get_curies()
#curies = client.curies()
logger.info('Got curies.')

# Build all simple single gene, single drug, breast cancer, survival queries.
queries = defaultdict(lambda: defaultdict(list))
disease = ['MONDO:0007254']
outcome_name = 'survival_time'
outcome = 'EFO:0000714'
outcome_op = '>'

# Build batch gene to disease query
for trapi_version in TRAPI_VERSIONS:
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
q = build_onehop_query(
genes,
[BIOLINK_GENE],
disease,
[BIOLINK_DISEASE],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
)
print(q)
input()
queries[trapi_version]['gene_to_disease_proxy'] = q.to_dict()


# Build batch drug to disease query
drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))]
q = build_onehop_query(
drugs,
[BIOLINK_DRUG],
disease,
[BIOLINK_DISEASE],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
)
print(q)
input()
queries[trapi_version]['drug_to_disease_proxy'] = q.to_dict()

# Build batch gene to drug query
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))]
q = build_onehop_query(
genes,
[BIOLINK_GENE],
drugs,
[BIOLINK_DRUG],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
)
print(q)
input()
queries[trapi_version]['gene_to_drug_proxy'] = q.to_dict()


# Build gene to drug query
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))]
q = build_onehop_query(
drugs,
[BIOLINK_DRUG],
genes,
[BIOLINK_GENE],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
)
print(q)
input()
queries[trapi_version]['drug_to_gene_proxy'] = q.to_dict()

# Build gene to disease with proxy and context
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))]
q = build_onehop_query(
genes,
[BIOLINK_GENE],
disease,
[BIOLINK_DISEASE],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
drugs=drugs,
)
print(q)
input()
queries[trapi_version]['gene_to_disease_proxy_context'] = q.to_dict()


# Pickle the queries
with open('standard_batch_onehop_queries.pk', 'wb') as f_:
pickle.dump(dict(queries), f_)
64 changes: 64 additions & 0 deletions utils/build_batch_wildcard_onehop_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from chp_client.query import build_onehop_query
import itertools
import tqdm
import logging
import pickle
import random
import json
from collections import defaultdict

from trapi_model.constants import *
from chp.trapi_interface import TrapiInterface

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Set number of queries to build
NUM_QUERIES = 10

TRAPI_VERSIONS = ['1.0', '1.1']

# Set seed
random.seed(111)

# Get client
#client = get_client()

# Get curies
logger.info('Getting curies.')
curies = TrapiInterface().get_curies()
#curies = client.curies()
logger.info('Got curies.')

# Build all simple single gene, single drug, breast cancer, survival queries.
queries = defaultdict(lambda: defaultdict(list))
disease = ['MONDO:0007254']
outcome_name = 'survival_time'
outcome = 'EFO:0000714'
outcome_op = '>'

# Build batch gene to disease query
for trapi_version in TRAPI_VERSIONS:
# Build batch gene wildcard and drug wildcard to disease query
drug = [random.choice(list(curies["biolink:Drug"].keys()))]
gene = [random.choice(list(curies["biolink:Gene"].keys()))]
q = build_onehop_query(
None,
[BIOLINK_GENE, BIOLINK_DRUG],
disease,
[BIOLINK_DISEASE],
outcome=outcome,
outcome_op=outcome_op,
outcome_value=random.randint(500,1500),
trapi_version=trapi_version,
drugs=drug,
genes=gene,
)
print(q)
input()
queries[trapi_version]['batchwildcard_to_disease_proxy'] = q.to_dict()

# Pickle the queries
with open('wildcard_batch_onehop_queries.pk', 'wb') as f_:
pickle.dump(dict(queries), f_)
38 changes: 21 additions & 17 deletions utils/build_drug_wildcard_batch_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import random
import json
from collections import defaultdict

from chp.trapi_interface import TrapiInterface

Expand All @@ -15,6 +16,8 @@
# Set number of queries to build
NUM_QUERIES = 10

TRAPI_VERSIONS = ['1.0', '1.1']

# Set seed
random.seed(111)

Expand All @@ -26,24 +29,25 @@
curies = TrapiInterface().get_curies()
#curies = client.curies()
logger.info('Got curies.')
# Build all simple single gene, single drug, breast cancer, survival queries.

queries = []
for _ in range(NUM_QUERIES):
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
q = build_wildcard_query(
batch_genes=genes,
disease='MONDO:0007254',
outcome_name='survival_time',
outcome='EFO:0000714',
outcome_op='>=',
outcome_value=random.randint(1, 5000),
trapi_version='1.0',
wildcard_category='drug',
)
#print(q)
#input()
queries.append(q.to_dict())
# Build all simple single gene, single drug, breast cancer, survival queries.
queries = defaultdict(list)
for trapi_version in TRAPI_VERSIONS:
for _ in range(NUM_QUERIES):
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))]
q = build_wildcard_query(
batch_genes=genes,
disease='MONDO:0007254',
outcome_name='survival_time',
outcome='EFO:0000714',
outcome_op='>',
outcome_value=random.randint(1, 5000),
trapi_version=trapi_version,
wildcard_category='drug',
)
#print(q)
#input()
queries[trapi_version].append(q.to_dict())

# Pickle the queries
with open('random_drug_wildcard_batch_queries.pk', 'wb') as f_:
Expand Down
34 changes: 19 additions & 15 deletions utils/build_drug_wildcard_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import random
import json
from collections import defaultdict

from chp.trapi_interface import TrapiInterface

Expand All @@ -15,6 +16,8 @@
# Set number of queries to build
NUM_QUERIES = 10

TRAPI_VERSIONS = ['1.0', '1.1']

# Set seed
random.seed(111)

Expand All @@ -26,22 +29,23 @@
curies = TrapiInterface().get_curies()
#curies = client.curies()
logger.info('Got curies.')
# Build all simple single gene, single drug, breast cancer, survival queries.

queries = []
for _ in range(NUM_QUERIES):
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,2))]
q = build_wildcard_query(
genes=genes,
disease='MONDO:0007254',
outcome_name='survival_time',
outcome='EFO:0000714',
outcome_op='>=',
outcome_value=random.randint(1, 5000),
trapi_version='1.0',
wildcard_category='drug',
)
queries.append(q.to_dict())
# Build all simple single gene, single drug, breast cancer, survival queries.
queries = defaultdict(list)
for trapi_version in TRAPI_VERSIONS:
for _ in range(NUM_QUERIES):
genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,2))]
q = build_wildcard_query(
genes=genes,
disease='MONDO:0007254',
outcome_name='survival_time',
outcome='EFO:0000714',
outcome_op='>',
outcome_value=random.randint(1, 5000),
trapi_version=trapi_version,
wildcard_category='drug',
)
queries[trapi_version].append(q.to_dict())

# Pickle the queries
with open('random_drug_wildcard_queries.pk', 'wb') as f_:
Expand Down
Loading

0 comments on commit fe4a6b2

Please sign in to comment.