-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathocr.py
98 lines (85 loc) · 3.77 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# import numpy as np
# import cv2
# import pytesseract
# from keras.models import load_model
# from keras.preprocessing import image
# pip install numpy
# pip install opencv-python
# pip install Keras
# pip install pytesseract
# Takes in image file path and model, returns array of character IDs and array of newline indicies
# Image file path example: '../input/testimages7/test.jpg'
# Model file path example: '../input/hasyv2npz/0.42-09.hdf5'
def ocr(image_path, model_path):
model = load_model(model_path)
# Get image and convert to black and white
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, img) = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# Image dimensions
height, width = img.shape
CLEARANCE = 8
SIZE = 224
# Bounding box coordinates and character ids
coords = []
characters = []
# Get bounding boxes
boxes = pytesseract.image_to_boxes(img)
# Get prediction for character in each bounding box
for box in boxes.splitlines():
box = box.split(' ')
# Crop image to bounding box
crop_img = img[height - int(box[4]) - CLEARANCE:height - int(box[2]) + CLEARANCE, int(box[1]) - CLEARANCE:int(box[3]) + CLEARANCE]
# img = cv2.rectangle(img, (int(box[1]) - CLEARANCE, height - int(box[2]) + CLEARANCE), (int(box[3]) + CLEARANCE, height - int(box[4]) - CLEARANCE), 0, 2)
# Preprocess image and get prediction from model
try:
# Square image with padding
crop_height, crop_width = crop_img.shape
if (crop_height > crop_width):
new_width = int(SIZE * crop_width / crop_height)
crop_img = cv2.resize(crop_img, (new_width, SIZE))
crop_img = cv2.copyMakeBorder(crop_img, 0, 0, int((SIZE - new_width) / 2), int((SIZE - new_width) / 2), cv2.BORDER_CONSTANT, value = 255.0)
else:
new_height = int(SIZE * crop_height / crop_width)
crop_img = cv2.resize(crop_img, (SIZE, new_height))
crop_img = cv2.copyMakeBorder(crop_img, int((SIZE - new_height) / 2), int((SIZE - new_height) / 2), 0, 0, cv2.BORDER_CONSTANT, value = 255.0)
# Preprocess image
crop_img = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2RGB)
# plt.figure(figsize=(1,1))
# plt.imshow(crop_img, interpolation = 'nearest')
# plt.show()
crop_img = image.img_to_array(crop_img)
crop_img = np.expand_dims(crop_img, axis = 0)
crop_img = crop_img / 255.0
# Store bounding box coordinates and predicted character ids
predictions = model.predict(crop_img)
prediction = np.argmax(predictions)
second_prediction = (np.argsort(np.max(predictions, axis = 0))[-2])
if (np.max(predictions) < 0.7 and prediction > 61 and second_prediction < 62):
prediction = second_prediction
elif (prediction == 109):
prediction = 14
elif (prediction == 116):
prediction = 24
# print(charmap[str(prediction)])
coords.append(box);
characters.append(prediction)
except:
print("error")
# print(crop_img.shape)
# Get newline indicies
newlines = []
baseline = 0;
for i in range(len(coords)):
if i > 0 and ((int(baseline) - int((coords[i])[4])) > 50):
newlines.append(i)
baseline = (coords[i])[4]
# Print results
# plt.figure(figsize=(15,20))
# plt.imshow(img, interpolation = 'nearest')
# plt.show()
# print(newlines)
# print(characters)
# for character in characters:
# print(charmap[str(character)], end = " ")
return newlines, characters