-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathdataset.py
38 lines (33 loc) · 1.37 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import tensorflow as tf
import tensorflow_datasets as tfds
def prepare_dataset(steps_per_epoch):
# labeled and unlabeled samples are loaded synchronously
# with batch sizes selected accordingly
unlabeled_batch_size = 100000 // steps_per_epoch
labeled_batch_size = 5000 // steps_per_epoch
batch_size = unlabeled_batch_size + labeled_batch_size
print(
"batch size is {} (unlabeled) + {} (labeled)".format(
unlabeled_batch_size, labeled_batch_size
)
)
unlabeled_train_dataset = (
tfds.load("stl10", split="unlabelled", as_supervised=True, shuffle_files=True)
.shuffle(buffer_size=5000)
.batch(unlabeled_batch_size, drop_remainder=True)
)
labeled_train_dataset = (
tfds.load("stl10", split="train", as_supervised=True, shuffle_files=True)
.shuffle(buffer_size=5000)
.batch(labeled_batch_size, drop_remainder=True)
)
test_dataset = (
tfds.load("stl10", split="test", as_supervised=True)
.batch(batch_size)
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
# labeled and unlabeled datasets are zipped together
train_dataset = tf.data.Dataset.zip(
(unlabeled_train_dataset, labeled_train_dataset)
).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return batch_size, train_dataset, labeled_train_dataset, test_dataset