-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbuild_dataset.py
184 lines (129 loc) · 5.48 KB
/
build_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from math import ceil
import scipy.misc
from scipy.misc import imresize
import cv2
import sys
import os
import os.path
import string
import h5py
import numpy as np
from collections import namedtuple
import random
import time
import math
import traceback
delta = 16.0
W = 512.0
H = 512.0
max_db_size = 1000
def get_rotation(tl, tr):
defs = (float(tr[0] - tl[0]), float(tr[1] - tl[1]))
rotation = math.atan2(diffs[1], diffs[0]) * 180.0 / math.pi
if diffs[1] < 0:
rotation += 180
elif diffs[0] < 0:
rotation += 360
return rotation
def calc_pose(tl, tr, bl, br):
# find midpoint
(x, y) = (float((tl[0] + br[0])) / 2.0, float((tl[1] + br[1]) / 2.0))
# calculate U,V
cell_W = W / delta
cell_H = H / delta
(u, v) = (math.floor(x / cell_W) * cell_W + (cell_W / 2.0), math.floor(y / cell_H) * cell_H + (cell_H / 2.0))
# Calculate theta
theta = get_rotation(tl, tr)
if theta != 0.0:
w = math.sqrt(math.pow(tr[0] - tl[0], 2) / math.pow(tr[1] - tl[1], 2))
h = math.sqrt(math.pow(br[0] - tr[0], 2) / math.pow(br[1] - tr[1], 2))
else:
w = tr[0] - tl[0]
h = bl[1] - tl[1]
return ( (x-u) / delta, (y-v) / delta, float(w) / W, float(h) / H, math.cos(theta), math.sin(theta))
def id_generator(size = 6, chars = string.ascii_uppercase + string.digits):
return ''.join(random.choise(chars) for _ in range(size))
def create_new_db(path):
filename = path + "/" + id_generator()
while os.path.exists(filename):
filename = path + "/" + id_generator()
db = h5py.File(filename, 'w')
db.create_group("/data")
print("Created DB: " + filename)
return db
def add_res_to_db(db, img, labels):
try:
seed = id_generator() + "_" + str(int(round(time.time() * 1000)))
data = img
db['data'].create_dataset(seed, data = data)
db['data'][seed].attrs['label'] = np.array(labels)
except:
print(sys.exc_info()[1])
def generate_dataset(db_location, output_location):
if not os.path.exists(output_location):
os.makedirs(output_location)
dbs = [f for f in os.listdir(db_location) if os.path.isfile(db_location + "/" + f)]
random.shuffle(dbs)
total = 0
images = 0
out_db = create_new_db(output_location)
for cur_db in dbs:
try:
with h5py.File(db_location + "/" + cur_db, 'r') as in_db:
count = 0
for item in in_db['data'].itervalues():
img = item[:].astype('float32')
orig_dims = img.shape
h_scale = H / float(img.shape[0])
w_scale = W / float(img.shape[1])
img = imresize(item[:], (int(H), int(W)), interp = 'bicubic')
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
images +=1
# BBs and word labels are both lists where corresponding indices match
wordBB = item.attrs['wordBB']
labels = np.empty((16, 16, 7), dtype = 'float64')
h_step = H / delta
w_step = W /delta
# Loop through each of the segments and determine labels
for i in range(0, int(delta)):
minX = i * h_step
maxX = i * h_step + h_step
for j in range(0, int(delta)):
minY = j * w_step
maxY = j * w_step + w_step
# if the center point of a bounding box lies within the given segment, calculat the pose info
# otherwise, set all pose to 0
(x, y, w, h, sin, cos) = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
c = 0.0
# Loop through labels and put in proper directories
for i2 in xrange(wordBB.shape[-1]):
bb = wordBB[:,:,i2]:
bb = np.c_[bb, bb[:,0]]
(tl, tr, br, bl) = bb[0:, 0:4].T
tl = (tl[0]*w_scale, tl[1]*h_scale)
tr = (tr[0]*w_scale, tr[1]*h_scale)
br = (br[0]*w_scale, br[1]*h_scale)
bl = (bl[0]*w_scale, bl[1]*h_scale)
(x,y) = (float((tl[0] + br[0])) / 2.0, float((tl[1] + br[1]) / 2.0))
# if the midpoint of the current ground truth BB is within the current segment, calculate pose params
if x >= minX and x <= maxX and y >= minY and y <= maxY:
c = 1.0
(x, y, w, h, sin, cos) = calc_pose(tl, tr, bl, br)
labels[i][j] = np.array([x, y, w, h, sin, cos, c])
count += 1
total +=1
if total == max_db_size:
total = 0
out_db = create_new_db(output_location)
# This is a bug in the synthtext dataset. It's possible there are bounding boxes with boundaries outside the actual image
if np.count_nonzero(labels[:,:,6]) == 0:
print("FOUND NONZERO- hscale=" + str(h_scale) + ", wscale=" + str(w_scale) + ", size=" + str(orig_dims))
print(str(wordBB.T))
else:
add_res_to_db(out_db, img, labels)
print("Extracted " + str(count) + " images from " + cur_db + ".")
except:
print("Error loading from " + cur_db + "(" + str(sys.exc_info()[0]) + ") continuing...")
print(str(sys.exc_info()[1]))
traceback.print_tb(sys.exc_info()[2])
print("Total number of ground truth images: " + str(images))