-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcounts.py
186 lines (162 loc) · 6.75 KB
/
counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
class Counts:
def __init__(self):
self.name = ""
self.filename = ""
self.tables = []
self.num_datasets = 0
self.tot_posits = 0
self.tot_negats = 0
def read(self, filename):
self.filename = filename
f = open(filename, 'r')
line = f.readline()
(name, num) = line.split('\t')
self.name = name
self.num_datasets = int(num)
line = f.readline()
(neg, pos) = line.split('\t')
self.tot_negats = int(neg)
self.tot_posits = int(pos)
# Begin reading contingency tables
dataset_name = f.readline().strip()
while dataset_name:
neg_bins = [float(x) for x in f.readline().strip().split('\t')]
pos_bins = [float(x) for x in f.readline().strip().split('\t')]
self.tables.append(ConTable(dataset_name, neg_bins, pos_bins))
dataset_name = f.readline().strip()
f.close()
def __repr__(self):
return "Counts: " + self.name
def __add__(self, other):
sum_count = Counts()
# sum_count.name = self.name + " + " + other.name
sum_count.num_datasets = self.num_datasets
sum_count.tot_negats = self.tot_negats + other.tot_negats
sum_count.tot_posits = self.tot_posits + other.tot_posits
for i in xrange(self.num_datasets):
sum_count.tables.append(self.tables[i] + other.tables[i])
return(sum_count)
def __mul__(self, other):
mul_count = Counts()
mul_count.name = self.name
mul_count.num_datasets = self.num_datasets
mul_count.tot_negats = self.tot_negats * other
mul_count.tot_posits = self.tot_posits * other
for i in xrange(self.num_datasets):
mul_count.tables.append(self.tables[i] * other)
return(mul_count)
def __rmul__(self, other):
return(self.__mul__(other))
def __div__(self, other):
return(self * (1.0 / other))
#def __rdiv__(self, other):
#return(self.__div__(other))
def write_counts(self, filename, mult=1):
file = open(filename, 'w')
file.write(self.name + "\t" + str(self.num_datasets) + "\n")
file.write(str(int(self.tot_negats)) + "\t" + str(int(self.tot_posits)) + '\n')
if mult == 1:
for i in xrange(self.num_datasets):
file.write(self.tables[i].name + '\n')
file.write('\t'.join([str(int(x)) for x in self.tables[i].neg_bins]) + '\n')
file.write('\t'.join([str(int(x)) for x in self.tables[i].pos_bins]) + '\n')
else:
for i in xrange(self.num_datasets):
file.write(self.tables[i].name + '\n')
file.write('\t'.join([str(int(x * mult)) for x in self.tables[i].neg_bins]) + '\n')
file.write('\t'.join([str(int(x * mult)) for x in self.tables[i].pos_bins]) + '\n')
file.close()
def to_props(self):
for x in self.tables:
x.to_props()
@staticmethod
def ave_props(counts_list, weight_list=None):
if not len(counts_list):
return None
ave_counts = Counts()
ave_counts.num_datasets = counts_list[0].num_datasets
ave_counts.tot_negats = 1
ave_counts.tot_posits = 1
if weight_list:
weight_list = [float(x) / sum(weight_list) for x in weight_list]
for i in xrange(ave_counts.num_datasets):
ave_counts.tables.append(ConTable.ave_props([x.tables[i] for x in counts_list], weight_list))
return(ave_counts)
def counts_from_props(self, mult=100000):
self.tot_negats = mult
self.tot_posits = mult
for c_table in self.tables:
c_table.neg_bins = [int(x * mult) for x in c_table.neg_props]
c_table.pos_bins = [int(x * mult) for x in c_table.pos_props]
class ConTable:
def __init__(self, name="", neg_bins=None, pos_bins=None):
self.name = name
self.neg_props = []
self.pos_props = []
if neg_bins:
self.neg_bins = neg_bins[:]
else:
self.neg_bins = []
if pos_bins:
self.pos_bins = pos_bins[:]
else:
self.pos_bins = []
def __add__(self, other):
if isinstance(other, ConTable):
sum_table = ConTable(self.name)
for j in xrange(len(self.neg_bins)):
sum_table.neg_bins.append(self.neg_bins[j] + other.neg_bins[j])
sum_table.pos_bins.append(self.pos_bins[j] + other.pos_bins[j])
return(sum_table)
elif isinstance(other, (long, int)):
#code for adding integers
raise TypeError
else:
raise TypeError
def __radd__(self, other):
return(self.__add__(other))
def to_props(self):
tot_negats = sum(self.neg_bins)
tot_posits = sum(self.pos_bins)
if tot_negats == 0:
bins = len(self.neg_bins)
self.neg_props = [1.0 / bins] * bins
else:
self.neg_props = [float(x) / tot_negats for x in self.neg_bins]
if tot_posits == 0:
bins = len(self.neg_bins)
self.pos_props = [1.0 / bins] * bins
else:
self.pos_props = [float(x) / tot_posits for x in self.pos_bins]
@staticmethod
def ave_props(con_table_list, weight_list=None):
num_of_con_tables = len(con_table_list)
num_of_bins = len(con_table_list[0].neg_props)
c = ConTable()
c.name = con_table_list[0].name
if not weight_list:
for i in range(num_of_bins):
c.neg_props.append(sum([x.neg_props[i] for x in con_table_list]) /
num_of_con_tables)
c.pos_props.append(sum([x.pos_props[i] for x in con_table_list]) /
num_of_con_tables)
else:
for i in range(num_of_bins):
c.neg_props.append(sum([con_table_list[j].neg_props[i] * weight_list[j] for j in range(len(con_table_list))]))
c.pos_props.append(sum([con_table_list[j].pos_props[i] * weight_list[j] for j in range(len(con_table_list))]))
return(c)
def __mul__(self, other):
if isinstance(other, (long, int, float)):
mul_table = ConTable(self.name)
for j in xrange(len(self.neg_bins)):
mul_table.neg_bins.append(self.neg_bins[j] * other)
mul_table.pos_bins.append(self.pos_bins[j] * other)
return(mul_table)
else:
raise TypeError
def __rmul__(self, other):
return(self.__mul__(other))
def __repr__(self):
return(self.name + '\n' +
'\t'.join([str(x) for x in self.neg_bins]) + '\n' +
'\t'.join([str(x) for x in self.pos_bins]) + '\n')