From fe4a6b2f0bb1ffcc4ee5bf6071afb9aebbf15271 Mon Sep 17 00:00:00 2001 From: Chase Yakaboski Date: Sat, 8 May 2021 02:24:48 -0400 Subject: [PATCH] Darwin passing build 1. --- chp_client/query.py | 20 +-- utils/build_batch_one_hop_queries.py | 132 +++++++++++++++ utils/build_batch_wildcard_onehop_queries.py | 64 +++++++ utils/build_drug_wildcard_batch_queries.py | 38 +++-- utils/build_drug_wildcard_queries.py | 34 ++-- utils/build_gene_wildcard_batch_queries.py | 39 +++-- utils/build_gene_wildcard_queries.py | 37 +++-- utils/build_one_hop_queries.py | 132 +++++++++++++++ utils/build_random_batch_queries.py | 42 ++--- utils/build_random_queries.py | 35 ++-- utils/build_wildcard_onehop_queries.py | 165 +++++++++++++++++++ 11 files changed, 628 insertions(+), 110 deletions(-) create mode 100644 utils/build_batch_one_hop_queries.py create mode 100644 utils/build_batch_wildcard_onehop_queries.py create mode 100644 utils/build_one_hop_queries.py create mode 100644 utils/build_wildcard_onehop_queries.py diff --git a/chp_client/query.py b/chp_client/query.py index a54e09d..eb28d4c 100644 --- a/chp_client/query.py +++ b/chp_client/query.py @@ -144,10 +144,10 @@ def build_wildcard_query( return query def build_onehop_query( - q_subject, - q_subject_category, - q_object, - q_object_category, + q_subjects, + q_subject_categories, + q_objects, + q_object_categories, genes=None, drugs=None, outcome=None, @@ -163,21 +163,21 @@ def build_onehop_query( q = message.query_graph # Add nodes - subject_node = q.add_node(q_subject, q_subject_category) - object_node = q.add_node(q_object, q_object_category) + subject_node = q.add_node(q_subjects, q_subject_categories) + object_node = q.add_node(q_objects, q_object_categories) # Add edge try: - edge_predicate = SUBJECT_TO_OBJECT_PREDICATE_MAP[(q_subject_category, q_object_category)] + edge_predicate = SUBJECT_TO_OBJECT_PREDICATE_MAP[(q_subject_categories[0], q_object_categories[0])] except KeyError: - raise QueryBuildError('Edge from {} to {} is not supported.'.format(q_subject_category, q_object_category)) + raise QueryBuildError('Edge from {} to {} is not supported.'.format(q_subject_categories[0], q_object_categories[0])) edge_id = q.add_edge(subject_node, object_node, edge_predicate) # Add constraints if outcome is not None: - q.add_constraint('predicate_proxy', 'CHP:PredicateProxy', '==', outcome_name, edge_id=edge_id) - q.add_constraint(outcome_name, outcome, outcome_op, outcome_value, edge_id=edge_id) + q.add_constraint('predicate_proxy', 'CHP:PredicateProxy', '==', [outcome], edge_id=edge_id) + q.add_constraint(outcome, outcome, outcome_op, outcome_value, edge_id=edge_id) # Get context context = [] diff --git a/utils/build_batch_one_hop_queries.py b/utils/build_batch_one_hop_queries.py new file mode 100644 index 0000000..7648424 --- /dev/null +++ b/utils/build_batch_one_hop_queries.py @@ -0,0 +1,132 @@ + +from chp_client.query import build_onehop_query +import itertools +import tqdm +import logging +import pickle +import random +import json +from collections import defaultdict + +from trapi_model.constants import * +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +TRAPI_VERSIONS = ['1.0', '1.1'] + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') + +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(lambda: defaultdict(list)) +disease = ['MONDO:0007254'] +outcome_name = 'survival_time' +outcome = 'EFO:0000714' +outcome_op = '>' + +# Build batch gene to disease query +for trapi_version in TRAPI_VERSIONS: + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + q = build_onehop_query( + genes, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['gene_to_disease_proxy'] = q.to_dict() + + +# Build batch drug to disease query + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_onehop_query( + drugs, + [BIOLINK_DRUG], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['drug_to_disease_proxy'] = q.to_dict() + +# Build batch gene to drug query + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_onehop_query( + genes, + [BIOLINK_GENE], + drugs, + [BIOLINK_DRUG], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['gene_to_drug_proxy'] = q.to_dict() + + +# Build gene to drug query + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_onehop_query( + drugs, + [BIOLINK_DRUG], + genes, + [BIOLINK_GENE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['drug_to_gene_proxy'] = q.to_dict() + +# Build gene to disease with proxy and context + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_onehop_query( + genes, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + drugs=drugs, + ) + print(q) + input() + queries[trapi_version]['gene_to_disease_proxy_context'] = q.to_dict() + + +# Pickle the queries +with open('standard_batch_onehop_queries.pk', 'wb') as f_: + pickle.dump(dict(queries), f_) diff --git a/utils/build_batch_wildcard_onehop_queries.py b/utils/build_batch_wildcard_onehop_queries.py new file mode 100644 index 0000000..77e15e9 --- /dev/null +++ b/utils/build_batch_wildcard_onehop_queries.py @@ -0,0 +1,64 @@ +from chp_client.query import build_onehop_query +import itertools +import tqdm +import logging +import pickle +import random +import json +from collections import defaultdict + +from trapi_model.constants import * +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +TRAPI_VERSIONS = ['1.0', '1.1'] + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') + +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(lambda: defaultdict(list)) +disease = ['MONDO:0007254'] +outcome_name = 'survival_time' +outcome = 'EFO:0000714' +outcome_op = '>' + +# Build batch gene to disease query +for trapi_version in TRAPI_VERSIONS: +# Build batch gene wildcard and drug wildcard to disease query + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_GENE, BIOLINK_DRUG], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + drugs=drug, + genes=gene, + ) + print(q) + input() + queries[trapi_version]['batchwildcard_to_disease_proxy'] = q.to_dict() + +# Pickle the queries +with open('wildcard_batch_onehop_queries.pk', 'wb') as f_: + pickle.dump(dict(queries), f_) diff --git a/utils/build_drug_wildcard_batch_queries.py b/utils/build_drug_wildcard_batch_queries.py index ba5d6ce..ff0b30c 100644 --- a/utils/build_drug_wildcard_batch_queries.py +++ b/utils/build_drug_wildcard_batch_queries.py @@ -5,6 +5,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -15,6 +16,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -26,24 +29,25 @@ curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') -# Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] - q = build_wildcard_query( - batch_genes=genes, - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - wildcard_category='drug', - ) - #print(q) - #input() - queries.append(q.to_dict()) +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + q = build_wildcard_query( + batch_genes=genes, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + wildcard_category='drug', + ) + #print(q) + #input() + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_drug_wildcard_batch_queries.pk', 'wb') as f_: diff --git a/utils/build_drug_wildcard_queries.py b/utils/build_drug_wildcard_queries.py index 045c44b..23f3d3c 100644 --- a/utils/build_drug_wildcard_queries.py +++ b/utils/build_drug_wildcard_queries.py @@ -5,6 +5,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -15,6 +16,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -26,22 +29,23 @@ curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') -# Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,2))] - q = build_wildcard_query( - genes=genes, - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - wildcard_category='drug', - ) - queries.append(q.to_dict()) +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,2))] + q = build_wildcard_query( + genes=genes, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + wildcard_category='drug', + ) + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_drug_wildcard_queries.pk', 'wb') as f_: diff --git a/utils/build_gene_wildcard_batch_queries.py b/utils/build_gene_wildcard_batch_queries.py index 4fa58ea..7e00f7a 100644 --- a/utils/build_gene_wildcard_batch_queries.py +++ b/utils/build_gene_wildcard_batch_queries.py @@ -1,3 +1,4 @@ +from chp_client import get_client from chp_client.query import build_wildcard_query import itertools import tqdm @@ -5,6 +6,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -15,6 +17,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -26,24 +30,25 @@ curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') -# Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] - q = build_wildcard_query( - batch_drugs=drugs, - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - wildcard_category='gene', - ) - #print(q) - #input() - queries.append(q.to_dict()) +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_wildcard_query( + batch_drugs=drugs, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + wildcard_category='gene', + ) + #print(q) + #input() + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_gene_wildcard_batch_queries.pk', 'wb') as f_: diff --git a/utils/build_gene_wildcard_queries.py b/utils/build_gene_wildcard_queries.py index 3f6b2af..401cbb0 100644 --- a/utils/build_gene_wildcard_queries.py +++ b/utils/build_gene_wildcard_queries.py @@ -1,3 +1,4 @@ +from chp_client import get_client from chp_client.query import build_wildcard_query import itertools import tqdm @@ -5,6 +6,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -15,6 +17,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -28,22 +32,23 @@ logger.info('Got curies.') # Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - drugs=random.choice(list(curies["biolink:Drug"].keys())) - q = build_wildcard_query( - drugs=[drugs], - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - wildcard_category='gene', - ) - #print(q) - #input() - queries.append(q.to_dict()) +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + drugs=random.choice(list(curies["biolink:Drug"].keys())) + q = build_wildcard_query( + drugs=[drugs], + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + wildcard_category='gene', + ) + #print(q) + #input() + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_gene_wildcard_queries.pk', 'wb') as f_: diff --git a/utils/build_one_hop_queries.py b/utils/build_one_hop_queries.py new file mode 100644 index 0000000..67ffdc2 --- /dev/null +++ b/utils/build_one_hop_queries.py @@ -0,0 +1,132 @@ + +from chp_client.query import build_onehop_query +import itertools +import tqdm +import logging +import pickle +import random +import json +from collections import defaultdict + +from trapi_model.constants import * +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +TRAPI_VERSIONS = ['1.0', '1.1'] + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') + +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(lambda: defaultdict(list)) +disease = ['MONDO:0007254'] +outcome_name = 'survival_time' +outcome = 'EFO:0000714' +outcome_op = '>' + +# Build single gene to disease query +for trapi_version in TRAPI_VERSIONS: + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + q = build_onehop_query( + gene, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) +#print(q) +#input() + queries[trapi_version]['gene_to_disease_proxy'] = q.to_dict() + + +# Build single drug to disease query + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + drug, + [BIOLINK_DRUG], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) +#print(q) +#input() + queries[trapi_version]['drug_to_disease_proxy'] = q.to_dict() + +# Build gene to drug query + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + gene, + [BIOLINK_GENE], + drug, + [BIOLINK_DRUG], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) +#print(q) +#input() + queries[trapi_version]['gene_to_drug_proxy'] = q.to_dict() + + +# Build gene to drug query + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + drug, + [BIOLINK_DRUG], + gene, + [BIOLINK_GENE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) +#print(q) +#input() + queries[trapi_version]['drug_to_gene_proxy'] = q.to_dict() + +# Build gene to disease with proxy and context + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + gene, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + drugs=drug, + ) +#print(q) +#input() + queries[trapi_version]['gene_to_disease_proxy_context'] = q.to_dict() + + +# Pickle the queries +with open('standard_single_onehop_queries.pk', 'wb') as f_: + pickle.dump(dict(queries), f_) diff --git a/utils/build_random_batch_queries.py b/utils/build_random_batch_queries.py index e483da6..e27e063 100644 --- a/utils/build_random_batch_queries.py +++ b/utils/build_random_batch_queries.py @@ -6,6 +6,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -16,6 +17,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -29,25 +32,26 @@ logger.info('Got curies.') # Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(0,1))] - batch_genes = [gene for gene in random.choices(list(set(curies["biolink:Gene"].keys()) - set(genes)), k=random.randint(1,3))] - drugs = [drug for drug in random.choices(list(curies["biolink:Drug"].keys()), k=random.randint(0,1))] - batch_drugs = [drug for drug in random.choices(list(set(curies["biolink:Drug"].keys()) - set(drugs)), k=random.randint(1,3))] - q = build_standard_query( - genes=genes, - drugs=drugs, - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - batch_genes=batch_genes, - batch_drugs=batch_drugs, - ) - queries.append(q.to_dict()) +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(0,1))] + batch_genes = [gene for gene in random.choices(list(set(curies["biolink:Gene"].keys()) - set(genes)), k=random.randint(1,3))] + drugs = [drug for drug in random.choices(list(curies["biolink:Drug"].keys()), k=random.randint(0,1))] + batch_drugs = [drug for drug in random.choices(list(set(curies["biolink:Drug"].keys()) - set(drugs)), k=random.randint(1,3))] + q = build_standard_query( + genes=genes, + drugs=drugs, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + batch_genes=batch_genes, + batch_drugs=batch_drugs, + ) + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_batch_queries.pk', 'wb') as f_: diff --git a/utils/build_random_queries.py b/utils/build_random_queries.py index 6ba305d..948a839 100644 --- a/utils/build_random_queries.py +++ b/utils/build_random_queries.py @@ -6,6 +6,7 @@ import pickle import random import json +from collections import defaultdict from chp.trapi_interface import TrapiInterface @@ -16,6 +17,8 @@ # Set number of queries to build NUM_QUERIES = 10 +TRAPI_VERSIONS = ['1.0', '1.1'] + # Set seed random.seed(111) @@ -29,22 +32,22 @@ logger.info('Got curies.') # Build all simple single gene, single drug, breast cancer, survival queries. -queries = [] -for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(1,3))] - therapeutic=random.choice(list(curies["biolink:Drug"].keys())) - q = build_standard_query( - genes=genes, - drugs=[therapeutic], - disease='MONDO:0007254', - outcome_name='survival_time', - outcome='EFO:0000714', - outcome_op='>=', - outcome_value=random.randint(1, 5000), - trapi_version='1.0', - ) - print(json.dumps(q.to_dict(), indent=2)) - queries.append(q.to_dict()) +queries = defaultdict(list) +for trapi_version in TRAPI_VERSIONS: + for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(1,3))] + therapeutic=random.choice(list(curies["biolink:Drug"].keys())) + q = build_standard_query( + genes=genes, + drugs=[therapeutic], + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>', + outcome_value=random.randint(1, 5000), + trapi_version=trapi_version, + ) + queries[trapi_version].append(q.to_dict()) # Pickle the queries with open('random_queries.pk', 'wb') as f_: diff --git a/utils/build_wildcard_onehop_queries.py b/utils/build_wildcard_onehop_queries.py new file mode 100644 index 0000000..a2a08a0 --- /dev/null +++ b/utils/build_wildcard_onehop_queries.py @@ -0,0 +1,165 @@ +from chp_client.query import build_onehop_query +import itertools +import tqdm +import logging +import pickle +import random +import json +from collections import defaultdict + +from trapi_model.constants import * +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +TRAPI_VERSIONS = ['1.0', '1.1'] + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') + +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = defaultdict(lambda: defaultdict(list)) +disease = ['MONDO:0007254'] +outcome_name = 'survival_time' +outcome = 'EFO:0000714' +outcome_op = '>' + +# Build batch gene to disease query +for trapi_version in TRAPI_VERSIONS: +# Build single gene wildcard to disease query + q = build_onehop_query( + None, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['genewildcard_to_disease_proxy'] = q.to_dict() + + +# Build single drug wildcard to disease query + q = build_onehop_query( + None, + [BIOLINK_DRUG], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['drugwildcard_to_disease_proxy'] = q.to_dict() + +# Build single gene wildcard to disease query + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + drugs=drug, + ) + print(q) + input() + queries[trapi_version]['genewildcard_to_disease_proxy_context'] = q.to_dict() + + +# Build single drug wildcard to disease query + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_DRUG], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + genes=gene, + ) + print(q) + input() + queries[trapi_version]['drugwildcard_to_disease_proxy_context'] = q.to_dict() + +# Build gene wildcard to drug query + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_GENE], + drug, + [BIOLINK_DRUG], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['genewildcard_to_drug_proxy'] = q.to_dict() + + +# Build drug wildcard to gene query + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_DRUG], + gene, + [BIOLINK_GENE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + ) + print(q) + input() + queries[trapi_version]['drugwildcard_to_gene_proxy'] = q.to_dict() + +# Build gene wildcard to disease with proxy and context + gene = [random.choice(list(curies["biolink:Gene"].keys()))] + drug = [random.choice(list(curies["biolink:Drug"].keys()))] + q = build_onehop_query( + None, + [BIOLINK_GENE], + disease, + [BIOLINK_DISEASE], + outcome=outcome, + outcome_op=outcome_op, + outcome_value=random.randint(500,1500), + trapi_version=trapi_version, + drugs=drug, + ) + print(q) + input() + queries[trapi_version]['genewildcard_to_disease_proxy_context'] = q.to_dict() + + +# Pickle the queries +with open('wildcard_single_onehop_queries.pk', 'wb') as f_: + pickle.dump(dict(queries), f_)