forked from Phlya/liftOverBedpe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathliftOverBedpe.py
executable file
·136 lines (107 loc) · 4.62 KB
/
liftOverBedpe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from __future__ import print_function
import argparse
import os
try:
from collections.abc import OrderedDict
except ImportError:
from collections import OrderedDict
########################################################################################
# #
# Functions #
# #
########################################################################################
""" Define a function that splits up bedpe files into 2 temp files """
def splitBedpe(bedpe, tmp1="tmp1.bed", tmp2="tmp2.bed", tmp_annot="tmp_annot.txt",
header="F", verbose="F"):
t1 = open(tmp1, 'w')
t2 = open(tmp2, 'w')
bedpein = open(bedpe, 'r')
annotations = open(tmp_annot, 'w')
if header == "T":
headerline = bedpein.readline()
for n, l in enumerate(bedpein):
n = str(n)
e = l.strip().split("\t")
print("\t".join(e[0:3] + [n]), file=t1)
print("\t".join(e[3:6] + [n]), file=t2)
print("\t".join(e[6:] + [n]), file=annotations)
t1.close()
t2.close()
bedpein.close()
annotations.close()
""" Define a function that implements liftOver """
def doliftOver(liftOver,chain,infile,verbose="F"):
cmd = " ".join([liftOver,infile,chain,infile + ".success",infile + ".failure"])
if verbose == "T":
print(cmd)
os.system(cmd)
""" Define a function that merges liftOver """
def mergeliftOver(f1, f2, annotations, outputfile, verbose="F"):
o = open(outputfile,'w')
# read in file 1 and make dictionary
readdict = OrderedDict()
f = open(f1, 'r')
for l in f:
e = l.strip().split('\t')
readdict[e[3]] = e[:3]
f.close()
# read in file2 and print out matches
f = open(f2,'r')
for l in f:
e = l.strip().split('\t')
if e[3] in readdict:
readdict[e[3]] += e[:3]
f = open(annotations,'r')
for l in f:
e = l.strip().split('\t')
if e[-1] in readdict:
readdict[e[-1]] += e[:-1]
for i, val in readdict.items():
print("\t".join(val), file=o)
f.close()
o.close()
########################################################################################
# #
# Parse arguments #
# #
########################################################################################
parser = argparse.ArgumentParser(description='wrapper for liftOver to accomodate bedpe files')
# required arguments
parser.add_argument('--lift', dest='liftOver', help='path to liftOver')
parser.add_argument('--chain', dest='chain', help='(e.g. hg19ToHg18.over.chain)')
parser.add_argument('--i', dest='infile', help='input file in bedpe format')
parser.add_argument('--o', dest='outfile', help='output file')
parser.add_argument('--v', dest='verbose', help='verbose' , default = "F")
parser.add_argument('--h', dest='header', help='T / F if there is a header line', default = "F")
# parse arguments
args = parser.parse_args()
# read in args
LO = args.liftOver
chain = args.chain
bedpeIN = args.infile
bedpeOUT = args.outfile
tmp1 = "tmp1.bed"
tmp2 = "tmp2.bed"
tmp_annot= "tmp_annot.txt"
header = args.header
verbose = args.verbose
#####################################################################################################
# #
# Run the Code #
# #
#####################################################################################################
# break up the files
splitBedpe(bedpeIN, tmp1, tmp2, tmp_annot, header, verbose)
# perform liftOver
doliftOver(LO, chain, tmp1, verbose)
doliftOver(LO, chain, tmp2, verbose)
# merge liftOvered files
mergeliftOver(tmp1+".success", tmp2+".success", tmp_annot, bedpeOUT, verbose)
# remove tmp files
os.remove(tmp1)
os.remove(tmp2)
os.remove(tmp_annot)
os.remove(tmp1+".success")
os.remove(tmp1+".failure")
os.remove(tmp2+".success")
os.remove(tmp2+".failure")