diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..500c4aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +#SSH Keys +id_rsa* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.gitmodules b/.gitmodules index 7064f77..1f28ec7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "submodules/reasoner-validator"] path = submodules/reasoner-validator url = git@github.com:di2ag/reasoner-validator.git +[submodule "submodules/trapi_model"] + path = submodules/trapi_model + url = git@github.com:di2ag/trapi_model.git +[submodule "submodules/ncats_testing"] + path = submodules/ncats_testing + url = git@github.com:NCATSTranslator/testing.git diff --git a/chp_client/query.py b/chp_client/query.py index ca8e5e1..a54e09d 100644 --- a/chp_client/query.py +++ b/chp_client/query.py @@ -5,374 +5,10 @@ import json from jsonschema import ValidationError -from chp_client.trapi_constants import * -from chp_client.exceptions import * -from reasoner_validator import validate_QEdge_1_0, validate_QEdge_1_1, \ -validate_QNode_1_0, validate_QNode_1_1, validate_Message_1_0, validate_Message_1_1, \ -validate_QueryGraph_1_0, validate_QueryGraph_1_1 - - -# Constants -SUBJECT_TO_OBJECT_PREDICATE_MAP = { - (BIOLINK_GENE, BIOLINK_DRUG): BIOLINK_GENE_TO_CHEMICAL_PREDICATE, - (BIOLINK_DRUG, BIOLINK_GENE): BIOLINK_CHEMICAL_TO_GENE_PREDICATE, - (BIOLINK_GENE, BIOLINK_DISEASE): BIOLINK_GENE_TO_DISEASE_PREDICATE, - (BIOLINK_DRUG, BIOLINK_DISEASE): BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE, - } - -class QBaseClass: - def __init__(self, trapi_version): - self.trapi_version = trapi_version - - def json(self, filename=None): - if filename is None: - return json.dumps(self.to_dict()) - else: - with open(filename, 'w') as json_file: - json.dump(self.to_dict(), json_file) - - def __str__(self): - return json.dumps(self.to_dict()) - -class QConstraintOrAdditionalProperty(QBaseClass): - def __init__(self, - trapi_version, - name, - c_id, - operator, - value, - unit_id=None, - unit_name=None, - c_not=False, - ): - self.name = name - self.id = c_id - self.operator = operator - self.value = value - self.unit_id = unit_id - self.unit_name = unit_name - super().__init__(trapi_version) - - def to_dict(self): - if self.trapi_version == '1.0': - return { - self.name: { - "id": self.id, - "operator": self.operator, - "value": self.value, - "unit_id": self.unit_id, - "unit_name": self.unit_name - } - } - elif self.trapi_version == '1.1': - return { - "name": self.name, - "id": self.id, - "operator": self.operator, - "value": self.value, - "unit_id": self.unit_id, - "unit_name": self.unit_name - } - else: - raise UnsupportedTrapiVersion(self.trapi_version) - - -class QNode(QBaseClass): - def __init__(self, - trapi_version, - ids = None, - categories = None, - constraints = None, - ): - self.ids = ids - self.categories = categories - self.constraints = constraints - super().__init__(trapi_version) - - valid, message = self.validate() - if not valid: - raise InvalidTrapiComponent(trapi_version, 'QNode', message) - - def to_dict(self): - if self.trapi_version == '1.0': - _dict = { - "id": self.ids, - "category": self.categories, - } - if self.constraints is not None: - for constraint in self.constraints: - _dict.update(constraint.to_dict()) - return _dict - elif self.trapi_version == '1.1': - ids = self.ids - categories = self.categories - if type(ids) is not list and ids is not None: - ids = [ids] - if type(categories) is not list and categories is not None: - categories = [categories] - _dict = { - "ids": ids, - "categories": categories, - "constraints": [] - } - if self.constraints is not None: - for constraint in self.constraints: - _dict["constraints"].append(constraint.to_dict()) - return _dict - else: - raise UnsupportedTrapiVersion(self.trapi_version) - - def add_constraint(self, - name, - c_id, - operator, - value, - unit_id=None, - unit_name=None, - c_not=False, - edge_id=None, - node_id=None, - ): - if self.constraints is None: - self.constraints = [] - self.constraints.append( - QConstraintOrAdditionalProperty( - trapi_version=self.trapi_version, - name=name, - c_id=c_id, - operator=operator, - value=value, - unit_id=unit_id, - unit_name=unit_name, - c_not=c_not, - ) - ) - valid, message = self.validate() - if not valid: - raise InvalidTrapiComponent(trapi_version, 'QNode', message) - - def validate(self): - _dict = self.to_dict() - try: - if self.trapi_version == '1.0': - validate_QNode_1_0(_dict) - elif self.trapi_version == '1.1': - validate_QNode_1_1(_dict) - else: - raise UnsupportedTrapiVersion(self.trapi_version) - return True, None - except ValidationError as ex: - return False, ex.message - -class QEdge(QBaseClass): - def __init__(self, - trapi_version, - q_subject, - q_object, - predicates=None, - relation=None, - constraints=None, - ): - self.subject = q_subject - self.object = q_object - self.predicates = predicates - self.relation = relation - self.constraints = constraints - super().__init__(trapi_version) - - valid, message = self.validate() - if not valid: - raise InvalidTrapiComponent(trapi_version, 'QEdge', message) +from trapi_model import Message, Query +from trapi_model.constants import * - def to_dict(self): - if self.trapi_version == '1.0': - _dict = { - "predicate": self.predicates, - "relation": self.relation, - "subject": self.subject, - "object": self.object, - } - if self.constraints is not None: - for constraint in self.constraints: - _dict.update(constraint.to_dict()) - return _dict - elif self.trapi_version == '1.1': - predicates = self.predicates - if type(predicates) is not list: - predicates = [predicates] - _dict = { - "predicates": predicates, - "relation": self.relation, - "subject": self.subject, - "object": self.object, - } - if self.constraints is not None: - _dict["constraints"] = [] - for constraint in self.constraints: - _dict["constraints"].append(constraint.to_dict()) - return _dict - - def add_constraint(self, - name, - c_id, - operator, - value, - unit_id=None, - unit_name=None, - c_not=False, - edge_id=None, - node_id=None, - ): - if self.constraints is None: - self.constraints = [] - self.constraints.append( - QConstraintOrAdditionalProperty( - trapi_version=self.trapi_version, - name=name, - c_id=c_id, - operator=operator, - value=value, - unit_id=unit_id, - unit_name=unit_name, - c_not=c_not, - ) - ) - valid, message = self.validate() - if not valid: - raise InvalidTrapiComponent(self.trapi_version, 'QEdge', message) - - def validate(self): - _dict = self.to_dict() - try: - if self.trapi_version == '1.0': - validate_QEdge_1_0(_dict) - elif self.trapi_version == '1.1': - validate_QEdge_1_1(_dict) - else: - raise UnsuppoertedTrapiVersion(self.trapi_version) - return True, None - except ValidationError as ex: - return False, ex.message - -class Query(QBaseClass): - def __init__(self, trapi_version='1.1'): - self.nodes = {} - self.edges = {} - self.node_counter = 0 - self.edge_counter = 0 - super().__init__(trapi_version) - - def add_node(self, ids, categories): - node_id = 'n{}'.format(self.node_counter) - self.node_counter += 1 - self.nodes[node_id] = QNode( - trapi_version=self.trapi_version, - ids=ids, - categories=categories - ) - return node_id - - def add_edge(self, q_subject, q_object, predicates, relation=None): - edge_id = 'e{}'.format(self.edge_counter) - self.edge_counter += 1 - self.edges[edge_id] = QEdge( - trapi_version=self.trapi_version, - q_subject=q_subject, - q_object=q_object, - predicates=predicates, - relation=relation, - ) - return edge_id - - def add_constraint(self, - name, - c_id, - operator, - value, - unit_id=None, - unit_name=None, - c_not=False, - edge_id=None, - node_id=None, - ): - if edge_id is None and node_id is None: - raise ValueError('Must specify either node or edge id.') - elif edge_id is not None and node_id is not None: - raise ValueError('Must specify either node or edge id, not both.') - if edge_id is not None: - q_obj = self.edges[edge_id] - else: - q_obj = self.nodes[node_id] - q_obj.add_constraint( - name, - c_id, - operator, - value, - unit_id=None, - unit_name=None, - c_not=False, - ) - return True - - def to_dict(self): - nodes = {} - edges = {} - for node_id, node in self.nodes.items(): - nodes[node_id] = node.to_dict() - for edge_id, edge in self.edges.items(): - edges[edge_id] = edge.to_dict() - return { - "nodes": nodes, - "edges": edges, - } - - def find_nodes(self, categories=None, ids=None): - matched_node_ids = [] - for node_id, node_info in self.nodes.items(): - if categories is not None: - if node_info.categories != categories: - continue - if ids is not None: - if node_info.ids != ids: - continue - matched_node_ids.append(node_id) - return matched_node_ids - - def make_trapi_message(self, to_json=False): - trapi_message = { - "query_graph": self.to_dict(), - "knowledge_graph": None, - "results": None - } - if to_json: - return json.dumps(trapi_message) - return trapi_message - - def validate_query_graph(self): - _dict = self.to_dict() - try: - if self.trapi_version == '1.0': - validate_QueryGraph_1_0(_dict) - elif self.trapi_version == '1.1': - validate_QueryGraph_1_1(_dict) - else: - raise UnsuppoertedTrapiVersion(self.trapi_version) - return True, None - except ValidationError as ex: - return False, ex.message - - def validate(self): - _dict = self.make_trapi_message() - try: - if self.trapi_version == '1.0': - validate_Message_1_0(_dict) - elif self.trapi_version == '1.1': - validate_Message_1_1(_dict) - else: - raise UnsuppoertedTrapiVersion(self.trapi_version) - return True, None - except ValidationError as ex: - return False, ex.message +from chp_client.exceptions import * def build_standard_query( genes=None, @@ -383,10 +19,15 @@ def build_standard_query( outcome_value=None, disease=None, trapi_version='1.1', + biolink_version=None, + batch_genes=None, + batch_drugs=None, + batch_diseases=None, ): if genes is None and drugs is None: - raise QueryBuildError("Both genes and drugs can't be None.") + if batch_genes is None and batch_drugs is None: + raise QueryBuildError("Both genes and drugs can't be None.") if outcome is None: raise QueryBuildError('You must specify an outcome CURIE.') if outcome_op is None: @@ -394,14 +35,20 @@ def build_standard_query( with your desired TRAPI version's Constraint Object.") if outcome_value is None: raise QueryBuildError('You must specify an outcome value to test.') - if disease is None: + if disease is None and batch_diseases is None: raise QueryBuildError('You must specify a disease.') + if disease is not None and batch_diseases is not None: + raise QueryBuildError('Only specify either diseases or batch diseases not both.') - # Initialize Query - q = Query(trapi_version=trapi_version) + # Initialize Message + message = Message(trapi_version, biolink_version) + q = message.query_graph - # Add disease node - disease_node = q.add_node(disease, BIOLINK_DISEASE) + # Add disease or batch disease node + if disease is not None: + disease_node = q.add_node(disease, BIOLINK_DISEASE) + else: + disease_node = q.add_node(batch_diseases, BIOLINK_DISEASE) if genes is not None: # Add gene nodes @@ -413,6 +60,13 @@ def build_standard_query( for gene_node in gene_nodes: q.add_edge(gene_node, disease_node, BIOLINK_GENE_TO_DISEASE_PREDICATE) + # Setup batch genes + if batch_genes is not None: + if type(batch_genes) is not list: + raise QueryBuildError('Batch genes must be a list.') + batch_gene_node = q.add_node(batch_genes, BIOLINK_GENE) + q.add_edge(batch_gene_node, disease_node, BIOLINK_GENE_TO_DISEASE_PREDICATE) + if drugs is not None: # Add drug nodes if drugs is not None: @@ -424,12 +78,21 @@ def build_standard_query( for drug_node in drug_nodes: q.add_edge(drug_node, disease_node, BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE) + # Setup batch drugs + if batch_drugs is not None: + if type(batch_drugs) is not list: + raise QueryBuildError('Batch drugs must be a list.') + batch_drug_node = q.add_node(batch_drugs, BIOLINK_DRUG) + q.add_edge(batch_drug_node, disease_node, BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE) + # Connect drug node to outcome node outcome_node = q.add_node(outcome, BIOLINK_PHENOTYPIC_FEATURE) - q.add_constraint(outcome_name, outcome, outcome_op, outcome_value, node_id=outcome_node) - q.add_edge(disease_node, outcome_node, BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE) + phenotype_edge = q.add_edge(disease_node, outcome_node, BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE) + q.add_constraint(outcome_name, outcome, outcome_op, outcome_value, edge_id=phenotype_edge) - return q + query = Query(trapi_version=trapi_version, biolink_version=biolink_version) + query.message = message + return query def build_wildcard_query( @@ -442,6 +105,10 @@ def build_wildcard_query( outcome_value=None, disease=None, trapi_version='1.1', + biolink_version=None, + batch_genes=None, + batch_drugs=None, + batch_diseases=None, ): if wildcard_category is None: @@ -449,7 +116,21 @@ def build_wildcard_query( # Build standard query - q = build_standard_query(genes, drugs, outcome, outcome_name, outcome_op, outcome_value, disease, trapi_version=trapi_version) + query = build_standard_query( + genes, + drugs, + outcome, + outcome_name, + outcome_op, + outcome_value, + disease, + trapi_version=trapi_version, + biolink_version=biolink_version, + batch_genes=batch_genes, + batch_drugs=batch_drugs, + batch_diseases=batch_diseases, + ) + q = query.message.query_graph disease_node = q.find_nodes(categories=BIOLINK_DISEASE)[0] wildcard_node = q.add_node(None, wildcard_category) @@ -460,7 +141,7 @@ def build_wildcard_query( q.add_edge(wildcard_node, disease_node, BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE) else: raise InvalidWildcardCategory(wildcard_category) - return q + return query def build_onehop_query( q_subject, @@ -475,9 +156,11 @@ def build_onehop_query( outcome_value=None, disease=None, trapi_version='1.1', + biolink_version=None, ): # Initialize query - q = Query(trapi_version) + message = Message(trapi_version, biolink_version) + q = message.query_graph # Add nodes subject_node = q.add_node(q_subject, q_subject_category) @@ -514,4 +197,6 @@ def build_onehop_query( q.add_constraint(BIOLINK_DRUG, BIOLINK_DRUG, 'matches', drugs, edge_id=edge_id) if disease is not None: q.add_constraint(BIOLINK_DISEASE, BIOLINK_DISEASE, 'matches', disease, edge_id=edge_id) - return q + query = Query(trapi_version=trapi_version, biolink_version=biolink_version) + query.message = message + return query diff --git a/requirements.txt b/requirements.txt index 39a5fa1..7890005 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests submodules/reasoner-validator +bmt diff --git a/samples/generate_samples.py b/samples/generate_samples.py index e6b9203..4d54928 100644 --- a/samples/generate_samples.py +++ b/samples/generate_samples.py @@ -46,15 +46,16 @@ def make_gene_wildcard_query(): json.dump(query, f_) def make_drug_wildcard_query(): - query = build_query( - genes=["ENSEMBL:ENSG00000121879"], - disease="MONDO:0007254", - outcome=("EFO:0000714", ">=", 500), - therapeutic_wildcard=True, - ) + for i in [1,2,5,10]: + query = build_query( + genes=["ENSEMBL:ENSG00000012048"], + disease="MONDO:0007254", + outcome=("EFO:0000714", ">=", 365*i), + therapeutic_wildcard=True, + ) - with open('drug_wildcard.json', 'w') as f_: - json.dump(query, f_) + with open('dw_brac1_{}yr.json'.format(i), 'w') as f_: + json.dump(query, f_) def make_one_hop_query(): query = build_query( @@ -67,11 +68,11 @@ def make_one_hop_query(): json.dump(query, f_) def main(): - make_standard_probablistic_query_one_gene() - make_standard_probablistic_query_two_gene() - make_gene_wildcard_query() + #make_standard_probablistic_query_one_gene() + #make_standard_probablistic_query_two_gene() + #make_gene_wildcard_query() make_drug_wildcard_query() - make_one_hop_query() + #make_one_hop_query() if __name__ == "__main__": main() diff --git a/submodules/ncats_testing b/submodules/ncats_testing new file mode 160000 index 0000000..bca5e43 --- /dev/null +++ b/submodules/ncats_testing @@ -0,0 +1 @@ +Subproject commit bca5e436b32faac7815e70f5e5a913b235d18296 diff --git a/submodules/trapi_model b/submodules/trapi_model new file mode 160000 index 0000000..064b458 --- /dev/null +++ b/submodules/trapi_model @@ -0,0 +1 @@ +Subproject commit 064b458589bc85adf31c4edc38d52ed570a9c0d9 diff --git a/tests/generate_regression_tests.py b/tests/generate_regression_tests.py index 8ae2f6e..d1b0cb5 100644 --- a/tests/generate_regression_tests.py +++ b/tests/generate_regression_tests.py @@ -3,6 +3,8 @@ import random import sys +from trapi_model.constants import * + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler(sys.stdout) @@ -52,32 +54,32 @@ def __init__(self): '1.1', ] self.wildcard_options = [ - 'biolink:Gene', - 'biolink:Drug', + BIOLINK_GENE, + BIOLINK_DRUG, ] # Onehop query options self.q_object_category_options = [ - 'biolink:Gene', - 'biolink:Drug', + BIOLINK_GENE, + BIOLINK_DRUG, ] self.q_subject_category_options = [ - 'biolink:Disease', - 'biolink:Gene', - 'biolink:Drug', + BIOLINK_DISEASE, + BIOLINK_GENE, + BIOLINK_DRUG, ] self.q_category_map = { - "biolink:Gene": [ + BIOLINK_GENE: [ 'GENE0001', 'GENE0002', ], - "biolink:Drug": [ + BIOLINK_DRUG: [ 'DRUG0001', 'DRUG0001', ], - "biolink:Disease": [ + BIOLINK_DISEASE: [ 'DIS0001', 'DIS0002', ], diff --git a/tests/test_client.py b/tests/test_client.py index bca2e83..342532a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -14,9 +14,11 @@ import sys import os from collections import defaultdict + +from trapi_model import Query + from generate_regression_tests import QueryBuildingRegressionSuite from chp_client.exceptions import QueryBuildError - from chp_client import get_client from chp_client.query import build_standard_query, build_wildcard_query, build_onehop_query @@ -27,6 +29,24 @@ #url = 'http://localhost:8000' url = None + +class TestNcatsRepo(unittest.TestCase): + def setUp(self): + self.ncats_test_dir = os.path.abspath('../submodules/ncats_testing') + + def test_ars_predicates_queries(self): + ars_predicates_dir = os.path.join(self.ncats_test_dir, 'ars-requests/predicates') + # Added supported Query filenames to list below: + supported_query_filenames = [ + 'conditionGene.json', + 'geneCondition.json', + ] + # Run tests + for filename in supported_query_filenames: + filepath = os.path.join(ars_predicates_dir, filename) + query = Query('1.0').load(query_filepath=filepath) + print(json.dumps(query.to_dict(), indent=2)) + class TestQuery2(unittest.TestCase): def setUp(self): self.tester = QueryBuildingRegressionSuite() diff --git a/utils/build_drug_wildcard_batch_queries.py b/utils/build_drug_wildcard_batch_queries.py new file mode 100644 index 0000000..ba5d6ce --- /dev/null +++ b/utils/build_drug_wildcard_batch_queries.py @@ -0,0 +1,50 @@ +from chp_client.query import build_wildcard_query +import itertools +import tqdm +import logging +import pickle +import random +import json + +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') +# Build all simple single gene, single drug, breast cancer, survival queries. + +queries = [] +for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,4))] + q = build_wildcard_query( + batch_genes=genes, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', + wildcard_category='drug', + ) + #print(q) + #input() + queries.append(q.to_dict()) + +# Pickle the queries +with open('random_drug_wildcard_batch_queries.pk', 'wb') as f_: + pickle.dump(queries, f_) diff --git a/utils/build_drug_wildcard_queries.py b/utils/build_drug_wildcard_queries.py index 32b0551..045c44b 100644 --- a/utils/build_drug_wildcard_queries.py +++ b/utils/build_drug_wildcard_queries.py @@ -1,5 +1,4 @@ -from chp_client import get_client -from chp_client.query import build_query +from chp_client.query import build_wildcard_query import itertools import tqdm import logging @@ -27,19 +26,22 @@ curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') - # Build all simple single gene, single drug, breast cancer, survival queries. + queries = [] for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies["gene"].keys()), k=random.randint(1,2))] - q = build_query( + genes = [gene for gene in random.choices(list(curies['biolink:Gene'].keys()), k=random.randint(1,2))] + q = build_wildcard_query( genes=genes, disease='MONDO:0007254', - outcome=('EFO:0000714', '>=', random.randint(1, 5000)), - therapeutic_wildcard = True, - ) - print(json.dumps(q, indent=2)) - queries.append(q) + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', + wildcard_category='drug', + ) + queries.append(q.to_dict()) # Pickle the queries with open('random_drug_wildcard_queries.pk', 'wb') as f_: diff --git a/utils/build_gene_wildcard_batch_queries.py b/utils/build_gene_wildcard_batch_queries.py new file mode 100644 index 0000000..4fa58ea --- /dev/null +++ b/utils/build_gene_wildcard_batch_queries.py @@ -0,0 +1,50 @@ +from chp_client.query import build_wildcard_query +import itertools +import tqdm +import logging +import pickle +import random +import json + +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') +# Build all simple single gene, single drug, breast cancer, survival queries. + +queries = [] +for _ in range(NUM_QUERIES): + drugs = [gene for gene in random.choices(list(curies['biolink:Drug'].keys()), k=random.randint(1,4))] + q = build_wildcard_query( + batch_drugs=drugs, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', + wildcard_category='gene', + ) + #print(q) + #input() + queries.append(q.to_dict()) + +# Pickle the queries +with open('random_gene_wildcard_batch_queries.pk', 'wb') as f_: + pickle.dump(queries, f_) diff --git a/utils/build_gene_wildcard_queries.py b/utils/build_gene_wildcard_queries.py index 5e52500..3f6b2af 100644 --- a/utils/build_gene_wildcard_queries.py +++ b/utils/build_gene_wildcard_queries.py @@ -1,5 +1,4 @@ -from chp_client import get_client -from chp_client.query import build_query +from chp_client.query import build_wildcard_query import itertools import tqdm import logging @@ -31,15 +30,20 @@ # Build all simple single gene, single drug, breast cancer, survival queries. queries = [] for _ in range(NUM_QUERIES): - therapeutic=random.choice(list(curies["biolink:Drug"].keys())) - q = build_query( - therapeutic=therapeutic, + drugs=random.choice(list(curies["biolink:Drug"].keys())) + q = build_wildcard_query( + drugs=[drugs], disease='MONDO:0007254', - outcome=('EFO:0000714', '>=', random.randint(1, 5000)), - num_gene_wildcards = 1, - ) - print(json.dumps(q, indent=2)) - queries.append(q) + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', + wildcard_category='gene', + ) + #print(q) + #input() + queries.append(q.to_dict()) # Pickle the queries with open('random_gene_wildcard_queries.pk', 'wb') as f_: diff --git a/utils/build_random_batch_queries.py b/utils/build_random_batch_queries.py new file mode 100644 index 0000000..e483da6 --- /dev/null +++ b/utils/build_random_batch_queries.py @@ -0,0 +1,54 @@ +from chp_client import get_client +from chp_client.query import build_standard_query +import itertools +import tqdm +import logging +import pickle +import random +import json + +from chp.trapi_interface import TrapiInterface + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# Set number of queries to build +NUM_QUERIES = 10 + +# Set seed +random.seed(111) + +# Get client +#client = get_client() + +# Get curies +logger.info('Getting curies.') +curies = TrapiInterface().get_curies() +#curies = client.curies() +logger.info('Got curies.') + +# Build all simple single gene, single drug, breast cancer, survival queries. +queries = [] +for _ in range(NUM_QUERIES): + genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(0,1))] + batch_genes = [gene for gene in random.choices(list(set(curies["biolink:Gene"].keys()) - set(genes)), k=random.randint(1,3))] + drugs = [drug for drug in random.choices(list(curies["biolink:Drug"].keys()), k=random.randint(0,1))] + batch_drugs = [drug for drug in random.choices(list(set(curies["biolink:Drug"].keys()) - set(drugs)), k=random.randint(1,3))] + q = build_standard_query( + genes=genes, + drugs=drugs, + disease='MONDO:0007254', + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', + batch_genes=batch_genes, + batch_drugs=batch_drugs, + ) + queries.append(q.to_dict()) + +# Pickle the queries +with open('random_batch_queries.pk', 'wb') as f_: + pickle.dump(queries, f_) diff --git a/utils/build_random_queries.py b/utils/build_random_queries.py index 3b58811..6ba305d 100644 --- a/utils/build_random_queries.py +++ b/utils/build_random_queries.py @@ -1,10 +1,11 @@ from chp_client import get_client -from chp_client.query import build_query +from chp_client.query import build_standard_query import itertools import tqdm import logging import pickle import random +import json from chp.trapi_interface import TrapiInterface @@ -30,15 +31,20 @@ # Build all simple single gene, single drug, breast cancer, survival queries. queries = [] for _ in range(NUM_QUERIES): - genes = [gene for gene in random.choices(list(curies["gene"].keys()), k=random.randint(1,3))] - therapeutic=random.choice(list(curies["chemical_substance"].keys())) - q = build_query( + genes = [gene for gene in random.choices(list(curies["biolink:Gene"].keys()), k=random.randint(1,3))] + therapeutic=random.choice(list(curies["biolink:Drug"].keys())) + q = build_standard_query( genes=genes, - therapeutic=therapeutic, + drugs=[therapeutic], disease='MONDO:0007254', - outcome=('EFO:0000714', '>=', random.randint(1, 5000)), + outcome_name='survival_time', + outcome='EFO:0000714', + outcome_op='>=', + outcome_value=random.randint(1, 5000), + trapi_version='1.0', ) - queries.append(q) + print(json.dumps(q.to_dict(), indent=2)) + queries.append(q.to_dict()) # Pickle the queries with open('random_queries.pk', 'wb') as f_: