-
Notifications
You must be signed in to change notification settings - Fork 7
/
make_sacCer3_gap.py
executable file
·40 lines (33 loc) · 1.2 KB
/
make_sacCer3_gap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
import subprocess
import gzip
import csv
import os
import sys
import mirnylib.genome
if len(sys.argv) != 2:
print 'Please, supply the path to the folder with the sacCer3 genome.'
sys.exit(1)
print sys.argv
print 'Download and parse sgdOther track...'
subprocess.call(
'wget http://hgdownload.cse.ucsc.edu/goldenPath/sacCer3/database/sgdOther.txt.gz',
shell=True)
centromere_starts = {}
centromere_ends = {}
for line in csv.reader(gzip.open('sgdOther.txt.gz'), dialect='excel-tab'):
if line[4].startswith('CEN'):
chr_num = int(line[4][3:])
centromere_starts[chr_num] = min(int(line[2]),
centromere_starts.get(chr_num, 1e9))
centromere_ends[chr_num] = max(int(line[3]),
centromere_ends.get(chr_num, -1))
os.remove('sgdOther.txt.gz')
print 'Save the centromere positions into a .gap file'
centromere_positions = {}
for i in centromere_starts:
centromere_positions[str(i)] = (
centromere_starts[i], centromere_ends[i])
genome_db = mirnylib.genome.Genome(sys.argv[1])
genome_db.setCentromeres(centromere_positions = centromere_positions)
genome_db.createGapFile()