-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcombining_dataset.py
87 lines (66 loc) · 3.11 KB
/
combining_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""Combining_Dataset.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1DS5xp4EcdF3yt7uxVOi5YwXy1n3aGnrU
"""
# mounting on Google Drive,below 2 lines helps in getting the authorization code by loggin into your Google account
from google.colab import drive
drive.mount('/gdrive')
# Importing all the required libraries
import os, sys
import numpy as np
import cv2
import matplotlib.pyplot as plt
# gathering each of the seperate file paths to create a combined image dataset
OUTPUT_DIR = "/gdrive/Shareddrives/ALDA_Project/data/final_dataset"
IMAGE_SIDE_SIZE = 128
INPUT_DIRS = {
"elliptical" : "/gdrive/Shareddrives/ALDA_Project/data/data_extract1_shilpa/elliptical",
"spiral" : "/gdrive/Shareddrives/ALDA_Project/data/data_extract1_shilpa/spiral",
"irregular" : "/gdrive/Shareddrives/ALDA_Project/data/web_scraping_meghana/irregular_aug",
"invalid" : "/gdrive/Shareddrives/ALDA_Project/data/web_scraping_meghana/invalid"
}
galaxy_npys = {
"elliptical" : np.zeros((1, IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE, 3), dtype = np.uint8),
"spiral" : np.zeros((1, IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE, 3), dtype = np.uint8),
"irregular" : np.zeros((1, IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE, 3), dtype = np.uint8),
"invalid" : np.zeros((1, IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE, 3), dtype = np.uint8)
}
# Getting the number of instances in each class
for galaxy_class in INPUT_DIRS.keys():
print(galaxy_class, ":\t", len(os.listdir(INPUT_DIRS[galaxy_class])))
#combining all images of 4 classes into a single output path specified
counter = 0
for galaxy_class in INPUT_DIRS.keys():
for image_name in os.listdir(INPUT_DIRS[galaxy_class]):
if "jpg" in image_name:
temp = cv2.imread(os.path.join(INPUT_DIRS[galaxy_class], image_name))
temp = cv2.resize(temp, (IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE))
temp = np.expand_dims(temp, axis = 0)
galaxy_npys[galaxy_class] = np.concatenate((galaxy_npys[galaxy_class], temp[:]), axis = 0)
## create the output structure manually
cv2.imwrite(os.path.join(OUTPUT_DIR, galaxy_class, image_name), temp)
counter += 1
if counter % 100 == 0:
print(counter, "images done!")
print(counter, "images DONE!")
#Displaying the number of images and image dimensions
for galaxy_class in galaxy_npys.keys():
galaxy_npys[galaxy_class] = galaxy_npys[galaxy_class][1:]
print(galaxy_npys[galaxy_class].shape)
# save the data to corresponding .npy file
for galaxy_class in galaxy_npys.keys():
temp = np.save(os.path.join(OUTPUT_DIR, galaxy_class+".npy"), galaxy_npys[galaxy_class])
images = np.zeros(shape = (1, IMAGE_SIDE_SIZE, IMAGE_SIDE_SIZE, 3))
# concatenating all images in images array
for galaxy_class in galaxy_npys.keys():
temp = np.load(os.path.join(OUTPUT_DIR, galaxy_class+".npy"))
# temp = np.expand_dims(temp, axis = 0)
images = np.concatenate((images, temp), axis = 0)
images = images[1:]
print(images.shape)
#Displaying elliptical image below
plt.imshow(galaxy_npys["elliptical"][4])
#Displaying irregular image below
plt.imshow(galaxy_npys["irregular"][1])