-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathuploadIMages.py
36 lines (33 loc) · 1.21 KB
/
uploadIMages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from pyspark import SparkContext
from pyspark.sql import SQLContext, Row
from io import StringIO
from PIL import Image
import cv2
import os
import numpy as np
import base64
import time
if __name__ == "__main__":
sc = SparkContext(appName="image_uploader")
sqlContext = SQLContext(sc)
imglist = []
start = time.time()
count = 0
for img_path in os.listdir("./TinyImageNet"):
try:
img = cv2.imread("/home/tejasv55/Documents/CBIR-system-using-PySpark-and-Alluxio/TinyImageNet/"+img_path)
img = cv2.resize(img,(300,300))
retval, buffer = cv2.imencode('.jpg', img)
imgtext = base64.b64encode(buffer)
imglist.append([img_path,imgtext])
except:
count+=1
print(str(count) + " Number of images Failed to Load.")
rddimglist = sc.parallelize(imglist)
rddimglist.map(lambda data: (data[0], data[1])).saveAsSequenceFile("alluxio://localhost:19998/ImageNet20000")
end = time.time()
alluxioFile = sc.sequenceFile("alluxio://localhost:19998/ImageNet20000")
print(alluxioFile.count())
print("=============================")
print(f"Time taken: {end-start}")
print("=============================")