forked from vuducnghia/attention-ocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_generator.py
48 lines (40 loc) · 1.67 KB
/
data_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from image_util import ImageUtil
from vocabulary import Vocabulary
import os
import random
import numpy as np
from config import *
class Generator:
def __init__(self, folder_image, folder_label):
self.folder_image = folder_image
self.folder_label = folder_label
self.batch_size = BATCH_SIZE
self.max_txt_length = max_txt_length
self.examples = []
self.cur_index = 0
self.load_data()
self.image_util = ImageUtil(image_height=image_height, image_width=image_width)
self.vocab = Vocabulary()
def load_data(self):
with open(self.folder_label, 'r') as f:
for line in f.readlines():
if ';' in line:
image_file, txt = line.split(sep=';', maxsplit=1)
image_file = os.path.abspath(os.path.join(self.folder_image, image_file))
txt = txt.strip()
if os.path.isfile(image_file):
self.examples.append((txt, image_file))
def examples_generator(self):
random.shuffle(self.examples)
while True:
images, target, encode_hidden = [], [], []
for i in range(self.batch_size):
self.cur_index += 1
if self.cur_index >= len(self.examples):
self.cur_index = 0
txt, img_path = self.examples[self.cur_index]
images.append(self.image_util.load(img_path))
target.append(self.vocab.one_hot_encode(txt))
# print(self.vocab.text_to_labels(txt))
# print(self.vocab.labels_to_text(target[0]))
yield np.array(images), np.array(target)