-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathindex_metabolic_networks.py
executable file
·174 lines (156 loc) · 6.49 KB
/
index_metabolic_networks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python
"""Index metabolic network files, current/initial version is limited
to SBML files and psamm-model-collection project yaml files """
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import json
import logging
import os
import cobra
from psamm.datasource import native, sbml
from psamm.expression.boolean import ParseError
from pymongo import IndexModel
from nosqlbiosets.dbutils import DBconnection
logger = logging.getLogger(__name__)
INDEX = 'biosets'
DOCTYPE = 'metabolic_network'
def psamm_yaml_to_sbml(inf):
reader = native.ModelReader.reader_from_path(inf)
print(reader.name)
m = reader.create_model()
writer = sbml.SBMLWriter(cobra_flux_bounds=True)
tempsbml = inf + ".sbml"
print(tempsbml)
try:
writer.write_model(tempsbml, m, pretty=True)
return tempsbml
except TypeError as e:
print("Type error while saving %s in SBML: %s" % (inf, e))
except ParseError as e:
print("Parser error while saving %s in SBML: %s" % (inf, e))
return None
def sbml_to_cobra_json(inf):
c = cobra.io.read_sbml_model(inf)
r = cobra.io.model_to_dict(c)
return r
class SBMLIndexer(DBconnection):
def __init__(self, db, index=INDEX, doctype=DOCTYPE, host=None, port=None):
self.index = index
self.doctype = doctype
self.db = db
es_indexsettings = {
"index.mapping.total_fields.limit": 8000,
"number_of_replicas": 0}
super(SBMLIndexer, self).__init__(db, index, host, port,
recreateindex=True,
es_indexsettings=es_indexsettings)
if db != "Elasticsearch":
print(doctype)
self.mcl = self.mdbi[doctype]
# Read and index metabolic network files, PSAMM yaml or sbml
def read_and_index_model_files(self, infile):
if os.path.isdir(infile):
for child in os.listdir(infile):
c = os.path.join(infile, child)
if os.path.isdir(c) and os.path.exists(
os.path.join(c, "model.yaml")):
c = os.path.join(c, "model.yaml")
self.read_and_index_model_file(c)
else:
self.read_and_index_model_file(infile)
if self.db == 'Elasticsearch':
self.es.indices.refresh(index=self.index)
else:
index = IndexModel([("name", "text")])
self.mdbi[self.doctype].create_indexes([index])
# Read PSAMM yaml or SBML file, index using the database selected earlier
def read_and_index_model_file(self, infile):
print("Reading/indexing %s " % infile)
if not os.path.exists(infile):
print("Input file not found")
raise FileNotFoundError(infile)
if infile.endswith(".yaml"):
try:
infile_ = psamm_yaml_to_sbml(infile)
if infile_ is not None:
self.read_and_index_sbml_file(infile_)
else:
print("Unable to process PSAMM yaml file: %s" % infile)
except Exception as e:
print("Error while processing PSAMM yaml file: %s, %s" %
(infile, e))
elif infile.endswith(".xml") or infile.endswith(".sbml"):
try:
self.read_and_index_sbml_file(infile)
except Exception as e:
print("Error while processing SBML file: %s, %s" %
(infile, e))
else:
print(
"Only .xml, .sbml (for SBML) and .yaml (for PSAMM)"
" files are supported")
return
# Read sbml file, index using the function indexf
def read_and_index_sbml_file(self, infile):
if os.path.exists(infile):
r = sbml_to_cobra_json(infile)
# Changes to COBRAby json, see readme file in this folder
if r is not None:
for react in r['reactions']:
ml = [{"id": mid, "st": react['metabolites'][mid]}
for mid in react['metabolites']]
react['metabolites'] = ml
if r['id'] is None:
del (r['id'])
if self.db == "Elasticsearch":
self.es_index_sbml(1, r)
else: # "MongoDB"
self.mongodb_index_sbml(1, r)
else:
print("SBML file not found")
# Index metabolic network model with Elasticsearch
def es_index_sbml(self, _, model):
docid = model['name'] if 'name' in model else model['id']
try:
self.es.index(index=self.index, doc_type=self.doctype,
id=docid, body=json.dumps(model))
return True
except Exception as e:
print(e)
return False
# Index metabolic network model with MongoDB
def mongodb_index_sbml(self, _, model):
docid = model['name'] if 'name' in model else model['id']
spec = {"_id": docid}
try:
self.mcl.update(spec, model, upsert=True)
return True
except Exception as e:
print(e)
return False
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Index metabolic network files (SBML, or PSAMM yaml)'
' with Elasticsearch or MongoDB')
parser.add_argument('-infile', '--infile',
help='Input file or folder name'
' with Metabolic network file(s) in PSAMM .yaml'
' or SBML .xml formats')
parser.add_argument('--index',
default=INDEX,
help='Name of the Elasticsearch index'
' or MongoDB database')
parser.add_argument('--doctype',
default=DOCTYPE,
help='Name for the Elasticsearch document type or '
'MongoDB collection')
parser.add_argument('--host',
help='Elasticsearch or MongoDB server hostname')
parser.add_argument('--port',
help="Elasticsearch or MongoDB server port number")
parser.add_argument('--db', default='Elasticsearch',
help="Database: 'Elasticsearch' or 'MongoDB'")
args = parser.parse_args()
indxr = SBMLIndexer(args.db, args.index, args.doctype, args.host, args.port)
indxr.read_and_index_model_files(args.infile)