-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassociate.py
37 lines (36 loc) · 1.5 KB
/
associate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python
"""
associate.py
Associates output of tag.py with SRR and SRP accession numbers from output
of ann.py. All input is read from stdin. cat in this order:
index_to_SRA_accession.tsv
output of tag.py
output of ann.py
"""
import sys
acc_to_sample = {}
sample_to_line = {}
captured_header_line = False
printed_header_line = False
for line in sys.stdin:
tokens = line.strip().split('\t')
if len(tokens) == 5 and tokens[1][1:3] == 'RP' and tokens[2][1:3] == 'RS':
acc_to_sample[tokens[1]] = tokens[2]
acc_to_sample[tokens[4]] = tokens[2]
elif 'cell line' in line and 'small rna' in line and 'submission date' \
in line and 'attributes' in line and not captured_header_line:
header_line = line.strip()
captured_header_line = True
elif captured_header_line and len(tokens) > 10:
sample_to_line[tokens[9]] = line.strip()
elif not printed_header_line:
print '\t'.join(['sample or project', 'accession', 'junction count',
'annotated junction count', 'read count',
'reads overlapping annotated junctions',
'proportion of junctions that are annotated',
'proportion of reads overlapping annotated junctions']) \
+ '\t' + header_line
printed_header_line = True
print line.strip() + '\t' + sample_to_line[acc_to_sample[tokens[1]]]
else:
print line.strip() + '\t' + sample_to_line[acc_to_sample[tokens[1]]]