-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbaidu_OCR.py
133 lines (99 loc) · 3.43 KB
/
baidu_OCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/python3
##this script is developed by lunarjoll and publish in git@github.com:lunarjoll/baidu_OCR.git
##sent Email to me : lunarkindle@yahoo.com
import requests, sys, getopt, configparser, timeit
from aip import AipOcr
start = timeit.default_timer()
#pip3 install baidu-aip aip?
input_file=None
output_file=None
# 定义参数变量
options = {
'detect_direction': 'true',
'language_type': 'CHN_ENG',
}
def create_config():
print("go to baidu and create APP_ID. \n\
https://console.bce.baidu.com ")
config = configparser.ConfigParser()
config['baidu'] = {}
config['baidu']['APP_ID'] = input("APP_ID=");
config['baidu']['API_KEY'] = input("API_KEY=");
config['baidu']['SECRET_KEY'] = input("SECRET_KEY=");
with open('baidu_OCR.conf', 'w') as configfile:
config.write(configfile)
configfile.close()
def usage():
print(" when first use, go to baidu and create APP_ID. \n\
and pip3 install baidu-aip \n\
and use --init to create config file \n\
https://console.bce.baidu.com \n")
print (sys.argv[0], '--init')
print (sys.argv[0], '-i <inputfile> -o <outputfile>')
return 0
# 读取图片
def get_file_content(input_file):
image_file = None
try:
if input_file.startswith('http://') or input_file.startswith('https://'):
return requests.get(input_file).content
#return image_file
else:
with open(input_file, 'rb') as fp:
return fp.read()
except Exception:
raise Exception('invalid input_file: %s' % input_file)
# 调用通用文字识别接口
def baidu_link(input_file, output_file, options):
result = client.basicAccurate(get_file_content(input_file), options)
#print(result)
words_result=result['words_result']
if output_file == None:
for i in range(len(words_result)):
print(words_result[i]['words'])
else:
fo = open(output_file,"w")
for i in range(len(words_result)):
fo.write(words_result[i]['words'] + "\n")
#fo.write("\n")
fo.close()
end=timeit.default_timer()
print('Running time: %s Seconds'%(end-start))
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["verbose", "version", "ifile=", "init"])
except getopt.GetoptError:
print (sys.argv[0], 'GetoptError' ),
sys.exit(2)
for op, value in opts:
if op in ("-i", "--ifile"):
input_file = value
elif op == "-o":
output_file = value
elif op == "-h":
usage()
sys.exit()
elif op == "--init":
create_config()
sys.exit()
if input_file == None:
usage()
sys.exit(2)
#input_file = 'https://imgsa.baidu.com/forum/pic/item/c0d66dcb39dbb6fdfb44797a0424ab18972b3758.jpg'
#client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
config = configparser.ConfigParser()
config.read("baidu_OCR.conf")
client = AipOcr(config["baidu"]["APP_ID"], config["baidu"]["API_KEY"], config["baidu"]["SECRET_KEY"])
baidu_link(input_file, output_file, options)
'''
if resp is not None:
resp = resp.json()
if int(resp.get('errNum')) != 0:
raise Exception(reps.get('errMsg'))
else:
return resp.get('words_result')[0].get('word')
else:
return None
#print(result)
'''
##this script is developed by lunarjoll and publish in git@github.com:lunarjoll/baidu_OCR.git
##sent Email to me : lunarkindle@yahoo.com