-
Notifications
You must be signed in to change notification settings - Fork 4
/
data_loader.py
107 lines (67 loc) · 3.18 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import zipfile
from pathlib import Path
import numpy as np
from tqdm import tqdm
from mnist_data_downloader import download_mnist_data
def prepare_mnist_data(data):
inputs, targets = [], []
for raw_line in tqdm(data, desc="preparing data"):
line = raw_line.split(",")
inputs.append(np.asfarray(line[1:]) / 127.5 - 1) # normalization: / 255 => [0; 1] #/ 127.5-1 => [-1; 1]
targets.append(int(line[0]))
return inputs, targets
def load_mnist(path="datasets/mnist/"):
if not (Path(path) / "mnist_train.csv").exists() or not (Path(path) / "mnist_test.csv").exists():
train_url = "https://pjreddie.com/media/files/mnist_train.csv"
test_url = "https://pjreddie.com/media/files/mnist_test.csv"
download_mnist_data(train_url, path + "mnist_train.csv")
download_mnist_data(test_url, path + "mnist_test.csv")
train_data = Path(path).joinpath("mnist_train.csv").open("r").readlines()
test_data = Path(path).joinpath("mnist_test.csv").open("r").readlines()
if not (Path(path) / "mnist_train.npy").exists() or not (Path(path) / "mnist_test.npy").exists():
train_inputs, train_targets = prepare_mnist_data(train_data)
train_inputs = np.asfarray(train_inputs)
test_inputs, test_targets = prepare_mnist_data(test_data)
test_inputs = np.asfarray(test_inputs)
np.save(path + "mnist_train.npy", train_inputs)
np.save(path + "mnist_test.npy", test_inputs)
np.save(path + "mnist_train_targets.npy", train_targets)
np.save(path + "mnist_test_targets.npy", test_targets)
else:
train_inputs = np.load(path + "mnist_train.npy")
test_inputs = np.load(path + "mnist_test.npy")
train_targets = np.load(path + "mnist_train_targets.npy")
test_targets = np.load(path + "mnist_test_targets.npy")
train_dataset = train_inputs
test_dataset = test_inputs
return train_dataset, test_dataset, train_targets, test_targets
def prepare_utkface_data(path, image_size = (3, 32, 32)):
import random
import numpy as np
from PIL import Image
images = os.listdir(path)
random.shuffle(images)
train_inputs = []
for image in tqdm(images, desc = 'preparing data'):
image = Image.open(path + "/" + image)
image = image.resize((image_size[1], image_size[2]))
image = np.asarray(image)
image = image.transpose(2, 0, 1)
image = image / 127.5 - 1
train_inputs.append(image)
return np.array(train_inputs)
def load_utkface(path="datasets/utkface/", image_size=(3, 32, 32)):
path = Path(path)
if not path.exists():
path.mkdir(parents=True)
if not (path / 'UTKFace').exists():
with zipfile.ZipFile(path / 'archive.zip', 'r') as zip_ref:
zip_ref.extractall(path)
save_path = path / 'UTKFace.npy'
if not save_path.exists():
train_inputs = prepare_utkface_data(path / 'UTKFace', image_size)
np.save(save_path, train_inputs)
else:
train_inputs = np.load(save_path)
return train_inputs