-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_loader.py
80 lines (69 loc) · 2.42 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
__author__ = 'hervemn'
import scipy.sparse as scp
import numpy as np
import matplotlib.pyplot as plt
import os
import wx
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f, 1):
pass
return i
class data(object):
""" Import raw contact data
"""
def __init__(self, input_folder, output_folder):
self.input_folder = input_folder
self.contact_file = os.path.join(input_folder, 'abs_fragments_contacts_weighted.txt')
self.list_frag = os.path.join(input_folder,'fragments_list.txt')
self.output_folder = output_folder
self.sparse_matrix_file = os.path.join(self.output_folder, 'sparse_contacts.txt')
self.nfrags = file_len(self.list_frag) - 1
def create_sparse_dict(self):
self.sparse_dict = dict()
h = open(self.contact_file, "r")
all_lines = h.readlines()
n_lines = len(all_lines)
for i in range(1, n_lines):
line = all_lines[i]
dat = line.split()
mates = [int(dat[0]), int(dat[1])]
mates.sort()
f1 = mates[0]
f2 = mates[1]
if f1 in self.sparse_dict:
if f2 in self.sparse_dict[f1]:
self.sparse_dict[f1][f2] += 1
else:
self.sparse_dict[f1][f2] = 1
else:
self.sparse_dict[f1] = dict()
self.sparse_dict[f1][f2] = 1
def dok_to_csr(self,):
keys = self.sparse_dict.keys()
keys.sort()
self.out_r = []
self.out_c = []
self.out_d = []
for r in keys:
data = self.sparse_dict[r]
for c in data.keys():
self.out_r.append(r)
self.out_c.append(c)
self.out_d.append(data[c])
self.n_on_pxls = len(self.out_d)
self.np_csr = np.zeros((3, self.n_on_pxls), dtype=np.int32)
self.np_csr[0, :] = self.out_r
self.np_csr[1, :] = self.out_c
self.np_csr[2, :] = self.out_d
self.sparse_mat = scp.csr_matrix((self.np_csr[2, :], self.np_csr[0:2, :]), shape=(self.nfrags, self.nfrags))
# if __name__ == '__main__':
# app = wx.App()
# frame = wx.Frame(None, -1, '')
# frame.SetToolTip(wx.ToolTip('HiC data loader'))
# frame.SetPosition(wx.Point(0,0))
# frame.SetSize(wx.Size(300,250))
# frame.SetTitle('HiC data loader')
# frame.Show()
#
# app.MainLoop()