-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_signs.py
50 lines (49 loc) · 2.25 KB
/
get_signs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from pymongo import MongoClient
import sys
import os
from PIL import Image
import io
import base64
import tqdm
print("Enter connection string", sys.argv)
client = MongoClient(sys.argv[1])
database = client["ebl"]
annotations = database["annotations"]
fragments = database["fragments"]
cropped_images = database["cropped_sign_images"]
sign_filter = ["AN", "DIŠ", "A", "MA", "UD","NA"]
counter = 0
for sign in tqdm.tqdm(sign_filter):
image_id_era = {}
if not os.path.isdir('data_other'):
os.mkdir('data_other')
pipeline = [
{"$match" : {"annotations.data.signName": sign}},
{"$unwind" : "$annotations"},
{"$match": {"annotations.data.signName": sign}},
{"$project": {"annotations.data.signName": 1, "annotations.croppedSign": 1, "fragmentNumber": 1}}
]
cursor = annotations.aggregate(pipeline)
for doc in tqdm.tqdm(cursor):
image_id = doc['annotations']['croppedSign']['imageId']
fragment_id = doc['fragmentNumber']
fragment_cursor = fragments.find({"_id": fragment_id}, {"script.period":1})
for fragment_doc in fragment_cursor:
image_id_era[image_id] = fragment_doc['script']['period']
for image in tqdm.tqdm(image_id_era):
if image_id_era[image] == 'Neo-Babylonian' or image_id_era[image] == 'Neo-Assyrian':
if not os.path.isdir(f'data_other/{sign}_{image_id_era[image]}'):
os.mkdir(f'data_other/{sign}_{image_id_era[image]}')
image_cursor = cropped_images.find({"_id": image})
for doc in image_cursor:
image_string = doc['image']
img = Image.open(io.BytesIO(base64.decodebytes(bytes(image_string, "utf-8"))))
if image_id_era[image] != 'Neo-Assyrian' and image_id_era[image] != 'Neo-Babylonian' and image_id_era[image] != "Ur III":
if not os.path.isdir(f'data_other/{sign}_Neo-Babylonian'):
os.mkdir(f'data_other/{sign}_Neo-Babylonian')
img.save(f'data_other/{sign}_Neo-Babylonian/{counter}.png')
elif image_id_era[image] == 'Ur III':
pass
else:
img.save(f'data_other/{sign}_{image_id_era[image]}/{counter}.png')
counter += 1