-
Notifications
You must be signed in to change notification settings - Fork 15
/
image2tokens.py
93 lines (69 loc) · 2.49 KB
/
image2tokens.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
from google.oauth2 import service_account
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credential.json"
class Token():
def __init__(self,x,y,width,height,text):
self.x = x
self.y = y
self.width = width
self.height = height
self.text = text
def extractTokens(image):
tokens = []
for page in image['fullTextAnnotation']['pages']:
for block in page["blocks"]:
for paragraph in block["paragraphs"]:
text = ""
firstWord = paragraph["words"][0]
i = 1
n = len(paragraph["words"])
for word in paragraph["words"]:
lastWord = word
breakType = None
if len(word["symbols"]) > 0 and "property" in word["symbols"][-1] and "detectedBreak" in word["symbols"][-1]["property"]:
breakType = word["symbols"][-1]["property"]["detectedBreak"];
for symbol in word["symbols"]:
text += symbol["text"]
if breakType != None:# and breakType in ["LINE_BREAK",""]:
vertices = firstWord["boundingBox"]["vertices"]
minX = vertices[0]["x"]
minY = vertices[0]["y"]
maxY = vertices[3]["y"]
vertices = lastWord["boundingBox"]["vertices"]
maxX = vertices[1]["x"]
newToken = Token(minX, maxY, maxX - minX, maxY - minY, text)
tokens.append(newToken)
text = ""
if i < n:
firstWord = paragraph["words"][i];
i += 1
return tokens
def createLine(tokens):
MAX_SPACE_BETWEEN_WORDS = 25
tokens = sorted(tokens, key=lambda token : token.x)
last = tokens[0]
text = ""
for token in tokens[1:]:
if token.x - (last.x + last.width) > MAX_SPACE_BETWEEN_WORDS:
text += last.text + "\t"
else:
text += last.text + " "
last = token
text += tokens[-1].text
return text.split("\t")
def extractLines(tokens):
tokens = sorted(tokens, key=lambda token : token.y)
newLine = []
lines = []
newLine.append(tokens[0])
lastY = tokens[0].y
lastX = tokens[0].x + tokens[0].width
for token in tokens[1:]:
if token.y > lastY + token.height * 0.85:
lines.append(createLine(newLine))
lastY = token.y
newLine = []
newLine.append(token)
lastX = token.x + token.width
lines.append(createLine(newLine))
return lines