-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassify-image.py
79 lines (66 loc) · 2.94 KB
/
classify-image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
import os
import cv2
import numpy
import string
import random
import argparse
import tensorflow as tf
import tensorflow.keras as keras
def decode(characters, y):
y = numpy.argmax(numpy.array(y), axis=2)[:,0]
return ''.join([characters[x] for x in y])
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model-name', help='Model name to use for classification', type=str)
parser.add_argument('--captcha-dir', help='Where to read the captchas to break', type=str)
parser.add_argument('--output', help='File where the classifications should be saved', type=str)
parser.add_argument('--symbols', help='File with the symbols to use in captchas', type=str)
args = parser.parse_args()
if args.model_name is None:
print("Please specify the CNN model to use")
exit(1)
if args.captcha_dir is None:
print("Please specify the directory with captchas to break")
exit(1)
if args.output is None:
print("Please specify the path to the output file")
exit(1)
if args.symbols is None:
print("Please specify the captcha symbols file")
exit(1)
symbols_file = open(args.symbols, 'r')
captcha_symbols = symbols_file.readline().strip()
symbols_file.close()
print("Classifying captchas with symbol set {" + captcha_symbols + "}")
with tf.device('/gpu:0'):
with open(args.output, 'w') as output_file:
json_file = open(args.model_name+'.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = keras.models.model_from_json(loaded_model_json)
model.load_weights(args.model_name+'.h5')
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(1e-3, amsgrad=True),
metrics=['accuracy'])
mapping = dict()
for x in os.listdir(args.captcha_dir):
# load image and preprocess it
raw_data = cv2.imread(os.path.join(args.captcha_dir, x))
rgb_data = cv2.cvtColor(raw_data, cv2.COLOR_BGR2GRAY)
rgb_data = cv2.equalizeHist(rgb_data)
rgb_data = numpy.expand_dims(rgb_data, axis=3)
image = numpy.array(rgb_data) / 255.0
(c, h, w) = image.shape
image = image.reshape([-1, c, h, w])
prediction = model.predict(image)
mapping[decode(captcha_symbols, prediction)] = x
#output_file.write(x + "," + decode(captcha_symbols, prediction) + "\n")
for filename in sorted(mapping.keys()):
output_file.write(mapping[filename] + "," + filename + "\n")
print('Classified ' + mapping[filename])
if __name__ == '__main__':
main()