-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmcl_helpers.py
executable file
·267 lines (208 loc) · 8.97 KB
/
mcl_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/pkg/python/3.7.4/bin/python3
import sys
import os
from collections import defaultdict
def read_in_slashes_m2m(m2m_path):
from graph_helpers import unmark_node
m2m_pairs = []
with open(m2m_path, 'r') as f:
for alignment in f:
for line in alignment.strip().split('\t'):
node1, node2 = line.split('/')
m2m_pairs.append((unmark_node(node1), unmark_node(node2)))
return m2m_pairs
def read_in_slashes_alignments(path):
from graph_helpers import unmark_node
alignments = []
with open(path, 'r') as f:
for alignment in f:
new_alignment = []
for line in alignment.strip().split('\t'):
node1, node2 = line.split('/')
new_alignment.append((unmark_node(node1), unmark_node(node2)))
alignments.append(new_alignment)
return alignments
def get_nif_str(el):
return('\n'.join([f'{node1}\t{node2}\t1' for node1, node2 in el]))
def gen_nif_file(gtag, overwrite=False):
from graph_helpers import get_nif_path, get_marked_el
from file_helpers import file_exists, write_to_file
nif_path = get_nif_path(gtag)
if overwrite or not file_exists(nif_path):
marked_el = get_marked_el(gtag)
write_to_file(get_nif_str(marked_el), nif_path)
else:
print(f'using old nif file for {gtag}', file=sys.stderr)
def gen_odv_ort_file(gtag1, gtag2, override_k=None, bnstr=None, overwrite=False, notes=''):
from graph_helpers import gtag_to_mark, read_in_adj_set, get_graph_path
from odv_helpers import get_odv_orthologs, odv_ort_to_str, get_odv_ort_path, two_gtags_to_k, two_gtags_to_n, ODV, odv_ort_to_nodes
from file_helpers import file_exists, write_to_file
from analysis_helpers import get_deg_distr
k = two_gtags_to_k(gtag1, gtag2, override_k=override_k)
n = two_gtags_to_n(gtag1, gtag2)
ODV.set_weights_vars(k)
ort_path = get_odv_ort_path(gtag1, gtag2, k, n, bnstr=bnstr, notes=notes)
if overwrite or not file_exists(ort_path):
if notes == 'no1':
no1 = True
alpha = 1
elif notes == 'a80':
no1 = False
alpha = 0.8
elif notes == 'norm':
no1 = False
alpha = 1
odv_ort = get_odv_orthologs(gtag1, gtag2, k, n, bnstr=bnstr, no1=no1, alpha=alpha)
mark1 = gtag_to_mark(gtag1)
mark2 = gtag_to_mark(gtag2)
ort_str = odv_ort_to_str(odv_ort, mark1, mark2)
write_to_file(ort_str, ort_path)
else:
print(f'using old odv ort file for {gtag1}-{gtag2}', file=sys.stderr)
def copy_to_out(gtag1, gtag2, notes=''):
from bash_helpers import run_outsend
from graph_helpers import get_nif_path
from odv_helpers import get_odv_ort_path, two_gtags_to_k, two_gtags_to_n
nif_path1 = get_nif_path(gtag1)
nif_path2 = get_nif_path(gtag2)
k = two_gtags_to_k(gtag1, gtag2)
n = two_gtags_to_n(gtag1, gtag2)
ort_path = get_odv_ort_path(gtag1, gtag2, k, n, notes=notes)
run_outsend(nif_path1)
run_outsend(nif_path2)
run_outsend(ort_path)
def get_mcl_out_fname(gtag1, gtag2, k, n, notes=''):
base = f'{gtag1}-{gtag2}-k{k}-n{n}'
if notes != '':
base += f'-{notes}'
return f'{base}.txt'
def read_in_mcl_out(path):
with open(path, 'r') as f:
pass
def get_mcl_out_path(gtag1, gtag2, k, n, notes=''):
from file_helpers import get_data_path
return get_data_path(f'mcl/{get_mcl_out_fname(gtag1, gtag2, k, n, notes=notes)}')
def get_mcl_paths(gtag1, gtag2, k, n, notes=''):
out_path = get_mcl_out_path(gtag1, gtag2, k, n, notes=notes)
out_path_base = '.'.join(out_path.split('.')[:-1])
ag_path = out_path_base + '.ag'
time_path = out_path_base + '.time'
return out_path, ag_path, time_path
def take_from_out(gtag1, gtag2, notes=''):
from bash_helpers import run_outtake
from odv_helpers import two_gtags_to_k, two_gtags_to_n
k = two_gtags_to_k(gtag1, gtag2)
n = two_gtags_to_n(gtag1, gtag2)
out_fname = get_mcl_out_fname(gtag1, gtag2, k, n, notes=notes)
here_path = get_mcl_out_path(gtag1, gtag2, k, n, notes=notes)
run_outtake(out_fname, here_path) # note: the out here means the output file of mcl, while the out param of run_outtake refers to the remote git repo called "out" (which is named vmcopy lol)
def process_mcl(gtag1, gtag2, notes=''):
from odv_helpers import two_gtags_to_k, two_gtags_to_n
from analysis_helpers import print_distr, get_s3, get_alignment_nc
from graph_helpers import get_graph_path, read_in_adj_set
from ortholog_helpers import get_g1_to_g2_orthologs
k = two_gtags_to_k(gtag1, gtag2)
n = two_gtags_to_n(gtag1, gtag2)
adj_set1 = read_in_adj_set(get_graph_path(gtag1))
adj_set2 = read_in_adj_set(get_graph_path(gtag2))
g1_to_g2_ort = get_g1_to_g2_orthologs(gtag1, gtag2)
out_path = get_mcl_out_path(gtag1, gtag2, k, n, notes=notes)
print(f'evaluating {out_path}')
alignments = read_in_slashes_alignments(out_path)
if False:
lens = defaultdict(int)
for alignment in alignments:
lens[len(alignment)] += 1
print_distr(lens, 'alignment size')
if False:
for alignment in alignments:
alignment_size = len(alignment)
s3 = get_s3(alignment, adj_set1, adj_set2)
if alignment_size == 1 or s3 == 0:
continue
print(alignment_size, s3)
if False:
for alignment in alignments:
alignment_size = len(alignment)
s3 = get_s3(alignment, adj_set1, adj_set2)
nc = get_alignment_nc(alignment, g1_to_g2_ort)
if alignment_size < 8 or s3 == 0:
continue
print(s3, nc)
def get_mcl_tfp_stats(alignments, g1_to_g2_ort, adj_set1, adj_set2):
from analysis_helpers import get_topofunc_perfect_alignments
alignments = [alignment for alignment in alignments if len(alignment) >= 8]
tfp_aligns = get_topofunc_perfect_alignments(alignments, g1_to_g2_ort, adj_set1, adj_set2)
lens = [len(align) for align in tfp_aligns]
max_len = max(lens) if len(lens) > 0 else 0
return len(tfp_aligns), max_len
def prepare_mcl(gtag1, gtag2, notes=''):
from bash_helpers import run_orca_for_gtag
gen_nif_file(gtag1)
gen_nif_file(gtag2)
run_orca_for_gtag(gtag1)
run_orca_for_gtag(gtag2)
gen_odv_ort_file(gtag1, gtag2, notes=notes)
def full_local_run_mcl(gtag1, gtag2, notes=''):
from bash_helpers import run_align_mcl
prepare_mcl(gtag1, gtag2, notes=notes)
p = run_align_mcl(gtag1, gtag2, notes=notes)
if p != None:
p.wait()
process_mcl(gtag1, gtag2, notes=notes)
def wayne_copy_mcl(gtag1, gtag2, notes=''):
from bash_helpers import run_cmd
from general_helpers import get_wayne_path
from odv_helpers import two_gtags_to_k, two_gtags_to_n
k = two_gtags_to_k(gtag1, gtag2)
n = two_gtags_to_n(gtag1, gtag2)
old_path = get_mcl_out_path(gtag1, gtag2, k, n, notes=notes)
new_path = get_wayne_path(f'mcl/{gtag1}-{gtag2}-mclseeds.txt')
p = run_cmd(f'cp {old_path} {new_path}')
def clean_mcl_single(gtag, notes=''):
from odv_helpers import gtag_to_k, get_odv_path
from graph_helpers import get_nif_path
k = gtag_to_k(gtag)
nif_path = get_nif_path(gtag)
odv_path = get_odv_path(gtag, k)
for path in [nif_path, odv_path]:
try:
os.remove(path)
print(f'removed {path}')
except Exception as e:
print(f'failed to remove {path} because of {e}')
def clean_mcl_pair(gtag1, gtag2, notes=''):
from odv_helpers import two_gtags_to_k, two_gtags_to_n, get_odv_ort_path
k = two_gtags_to_k(gtag1, gtag2)
n = two_gtags_to_n(gtag1, gtag2)
ort_path = get_odv_ort_path(gtag1, gtag2, k, n, notes=notes)
out_path, ag_path, time_path = get_mcl_paths(gtag1, gtag2, k, n, notes=notes)
for path in [ort_path, mcl_out_path, ag_path, time_path]:
try:
os.remove(path)
print(f'removed {path}')
except Exception as e:
print(f'failed to remove {path} because of {e}')
def clean_mcl(gtag1, gtag2, notes=''):
clean_mcl_single(gtag1, notes=notes)
clean_mcl_single(gtag2, notes=notes)
clean_mcl_pair(gtag1, gtag2, notes=notes)
if __name__ == '__main__':
mode = sys.argv[1]
gtag1 = sys.argv[2]
gtag2 = sys.argv[3]
notes = sys.argv[4] if len(sys.argv) > 4 else ''
if mode == 'prep':
prepare_mcl(gtag1, gtag2, notes=notes)
# copy_to_out(gtag1, gtag2, notes=notes)
elif mode == 'proc':
# take_from_out(gtag1, gtag2, notes=notes)
process_mcl(gtag1, gtag2, notes=notes)
elif mode == 'full':
full_local_run_mcl(gtag1, gtag2, notes=notes)
elif mode == 'clean':
clean_mcl(gtag1, gtag2, notes=notes)
elif mode == 'wcpy':
wayne_copy_mcl(gtag1, gtag2, notes=notes)
else:
print('USAGE: mcl_helpers.py gtag1 gtag2 mode, where mode is prep or proc')