-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_orientation_correction.py
81 lines (65 loc) · 2.87 KB
/
ocr_orientation_correction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import cv2
import math
from scipy import ndimage
import pytesseract
# read test images
img_1 = cv2.imread("inputs/ocr_image.jpg")
img_2 = cv2.imread("inputs/ocr_image_2.jpg")
'''
Adjust orientation by using the Canny(detect edges) and HoughLines(detect lines) algorithms and open cv
'''
def orientation_correction(img, save_image=True):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # image to greyscale
img_edges = cv2.Canny(img_gray, 100, 100, apertureSize=3) # detect edges
lines = cv2.HoughLinesP(img_edges, 1, math.pi / 180.0, 100, minLineLength=100, maxLineGap=5)# detect lines
angles = []
for line in lines:
for x1, y1, x2, y2 in line:
angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
angles.append(angle)
# Getting the median angle
median_angle = np.median(angles)
# Rotating the image with this median angle
img_rotated = ndimage.rotate(img, median_angle)
if save_image:
cv2.imwrite('outputs/orientation_corrected.jpg', img_rotated)
return img_rotated
img_rotated = orientation_correction(img_1)
# initializing the list for storing the coordinates
coordinates = []
# Defining the event listener (callback function)
def shape_selection(event, x, y, flags, param):
# making coordinates global
global coordinates
# Storing the (x1,y1) coordinates when left mouse button is pressed
if event == cv2.EVENT_LBUTTONDOWN:
coordinates = [(x, y)] # grab starting point (top left of rectangle)
elif event == cv2.EVENT_LBUTTONUP:
coordinates.append((x, y)) # grab finishing point, bottom right of rectangle
# Drawing a rectangle around the region of interest (roi) after mouse button is released
cv2.rectangle(img_1, coordinates[0], coordinates[1], (0, 255, 255), 2)
cv2.imshow("Region of interest", img_1)
# load the image and setup the mouse callback function
image_copy = img_rotated.copy()
cv2.namedWindow("image")
cv2.setMouseCallback("image", shape_selection)
# keep looping until the 'q' key is pressed
while True:
# display the image and wait for a keypress
cv2.imshow("image", image_copy)
key = cv2.waitKey(1) & 0xFF
if key==13: # If 'enter' is pressed, apply OCR
break
if key == ord("c"): # Clear the selection when 'c' is pressed
image = image_copy.copy()
if len(coordinates) == 2: # top left and bottom right coords have been retrieved
image_roi = image_copy[coordinates[0][1]:coordinates[1][1],
coordinates[0][0]:coordinates[1][0]]
cv2.imshow("Selected Region of Interest - Press any key to proceed", image_roi)
extracted_text = pytesseract.image_to_string(image_roi)# alternatives include google or amazon vision
print(f'The text in the selected region is :\n {extracted_text}')
cv2.waitKey(0)
# closing all open windows
cv2.destroyAllWindows()
#perform OCR on the ROI