From 5c8558ac7dc1cefc198829b9ea66fccf1a64b344 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 17:07:27 +1000 Subject: [PATCH 01/18] start fork of keras-cam for barnes --- .gitignore | 3 +++ cam.py | 7 +++--- data.py | 49 +++++++++++++++++++++++++++++++++++++++--- model.py | 8 +++---- preprocessing.py | 51 ++++++++++++++++++++++++++++++++++++++++++++ verify-image-data.py | 20 +++++++++++++++++ 6 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 .gitignore create mode 100644 preprocessing.py create mode 100644 verify-image-data.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b51e500 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +weights +*.pyc +data diff --git a/cam.py b/cam.py index 29982c4..443bf3b 100644 --- a/cam.py +++ b/cam.py @@ -7,12 +7,13 @@ import argparse def train(dataset_path): - model = get_model() - X, y = load_inria_person(dataset_path) + X, y, nb_classes = load_data(dataset_path) + model = get_model(nb_classes) print "Training.." + print y checkpoint_path="weights.{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') - model.fit(X, y, nb_epoch=40, batch_size=32, validation_split=0.2, verbose=1, callbacks=[checkpoint]) + model.fit(X, y, nb_epoch=40, batch_size=24, validation_split=0.2, verbose=1, callbacks=[checkpoint]) def visualize_class_activation_map(model_path, img_path, output_path): model = load_model(model_path) diff --git a/data.py b/data.py index f8b79d6..015abb8 100644 --- a/data.py +++ b/data.py @@ -1,15 +1,58 @@ import cv2 + import glob import os import numpy as np from keras.utils.np_utils import to_categorical +from preprocessing import preprocess_image_batch + +CLASS_NAME_MAPPING = {} +PER_CLASS_MAX_IMAGES = 24 + + +def load_data(path): + data_set_input_images_files, data_set_input_images_true_label = get_class_wise_images_and_true_label(path) + processed_input_images = [preprocess_image_batch([image]) + for image in data_set_input_images_files] + + global CLASS_NAME_MAPPING + nb_classes = len(CLASS_NAME_MAPPING.keys()) + X_train = np.concatenate(processed_input_images) + + y_out = np.concatenate(data_set_input_images_true_label) + y_out = to_categorical(y_out, nb_classes=nb_classes) # to get sofmax shape of (None, nb_classes) + Y_train = y_out + + + from sklearn.utils import shuffle + X_train, Y_train = shuffle(X_train, Y_train) + + return X_train, Y_train, nb_classes + + +def get_class_wise_images_and_true_label(path): + print('path', path+"/*") + directory = glob.glob(path + '/*') + data_set_input_images = [] + data_set_input_images_true_label = [] + global CLASS_NAME_MAPPING + index = 0 + for sub_directory in directory: + if os.path.isdir(sub_directory): + class_dir_name = sub_directory.split('/')[-1] + CLASS_NAME_MAPPING[index] = class_dir_name + image_class_files = glob.glob(sub_directory + '/*.jpeg')[:PER_CLASS_MAX_IMAGES] + data_set_input_images.extend(image_class_files) + data_set_input_images_true_label.extend([[index]] * len(image_class_files)) + index += 1 + return data_set_input_images, data_set_input_images_true_label def load_inria_person(path): pos_path = os.path.join(path, "pos") - neg_path = os.path.join(path, "/neg") - pos_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(pos_path + "/*.png")] + neg_path = os.path.join(path, "neg") + pos_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(pos_path + "/*.jpeg")] pos_images = [np.transpose(img, (2, 0, 1)) for img in pos_images] - neg_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(neg_path + "/*.png")] + neg_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(neg_path + "/*.jpeg")] neg_images = [np.transpose(img, (2, 0, 1)) for img in neg_images] y = [1] * len(pos_images) + [0] * len(neg_images) y = to_categorical(y, 2) diff --git a/model.py b/model.py index 42df8d4..3f5a8d4 100644 --- a/model.py +++ b/model.py @@ -49,14 +49,14 @@ def VGG16_convolutions(): model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) return model -def get_model(): +def get_model(nb_classes): model = VGG16_convolutions() - model = load_model_weights(model, "vgg16_weights.h5") + # model = load_model_weights(model, "vgg16.h5") model.add(Lambda(global_average_pooling, output_shape=global_average_pooling_shape)) - model.add(Dense(2, activation = 'softmax', init='uniform')) + model.add(Dense(nb_classes, activation = 'softmax', init='uniform')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.5, nesterov=True) model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy']) return model @@ -80,4 +80,4 @@ def get_output_layer(model, layer_name): # get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) layer = layer_dict[layer_name] - return layer \ No newline at end of file + return layer diff --git a/preprocessing.py b/preprocessing.py new file mode 100644 index 0000000..43c37d4 --- /dev/null +++ b/preprocessing.py @@ -0,0 +1,51 @@ +import numpy as np +from scipy.misc import imread +from scipy.misc import imresize + + +VGG_16_IMAGE_SHAPE = (224, 224) + +def preprocess_image_batch(image_paths, crop_size=None, color_mode='rgb', + out=None): + """ + Consistent preprocessing of images batches + :param image_paths: iterable: images to process + :param crop_size: tuple: crop images if specified + :param img_size: tuple: resize images if specified + :param color_mode: Use rgb or change to bgr mode based on type of model you want to use + :param out: append output to this iterable if specified + """ + img_list = [] + img_size = VGG_16_IMAGE_SHAPE + for im_path in image_paths: + img = imread(im_path, mode='RGB') + if img_size: + img = imresize(img, img_size) + + img = img.astype('float32') + # We normalize the colors (in RGB space) with the empirical means on the training set + img[:, :, 0] -= 123.68 + img[:, :, 1] -= 116.779 + img[:, :, 2] -= 103.939 + # We permute the colors to get them in the BGR order + if color_mode == 'bgr': + img[:, :, [0, 1, 2]] = img[:, :, [2, 1, 0]] + img = img.transpose((2, 0, 1)) + + if crop_size: + img = img[:, (img_size[0] - crop_size[0]) // 2:(img_size[0] + crop_size[0]) // 2 + , (img_size[1] - crop_size[1]) // 2:(img_size[1] + crop_size[1]) // 2] + + img_list.append(img) + + try: + img_batch = np.stack(img_list, axis=0) + except: + raise ValueError('when img_size and crop_size are None, images' + ' in image_paths must have the same shapes.') + + if out is not None and hasattr(out, 'append'): + out.append(img_batch) + else: + return img_batch + diff --git a/verify-image-data.py b/verify-image-data.py new file mode 100644 index 0000000..df8e95d --- /dev/null +++ b/verify-image-data.py @@ -0,0 +1,20 @@ +import magic +from subprocess import call +import os + +DATA_DIR = "./dataset" + +classes = os.listdir(DATA_DIR) + +for c in classes: + image_dir = "{}/{}/".format(DATA_DIR, c) + images = os.listdir(image_dir) + + call(["mogrify", "-format", "jpeg", "{}/*.png".format(image_dir)]) + + for image in images: + file_name = image_dir+image + mime = magic.from_file(file_name, mime=True) + if mime != "image/jpeg": + # print('removing', file_name) + os.remove(file_name) From 4b5b8a1dc34915b183dd1781fe16b93f582a0f0f Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 19:53:33 +1000 Subject: [PATCH 02/18] set up keras flow_from_directory --- cam.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/cam.py b/cam.py index 443bf3b..79f3ac7 100644 --- a/cam.py +++ b/cam.py @@ -1,19 +1,46 @@ from keras.models import * from keras.callbacks import * +from keras.preprocessing.image import ImageDataGenerator import keras.backend as K from model import * from data import * +from utils import * import cv2 import argparse +from keras.utils.np_utils import to_categorical +BATCH_SIZE = 32 def train(dataset_path): - X, y, nb_classes = load_data(dataset_path) + #X, y, nb_classes = load_data(dataset_path) + + test_generator = get_batches(dataset_path, shuffle=False, batch_size=BATCH_SIZE) + x_train = test_generator.classes + y_train = to_categorical(x_train) + nb_classes = len(y_train[0]) model = get_model(nb_classes) - print "Training.." - print y + checkpoint_path="weights.{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') - model.fit(X, y, nb_epoch=40, batch_size=24, validation_split=0.2, verbose=1, callbacks=[checkpoint]) + model.fit_generator(test_generator, 64, BATCH_SIZE, callbacks=[checkpoint]) + # datagen = ImageDataGenerator( + # featurewise_center=True, + # featurewise_std_normalization=True, + # rotation_range=20, + # width_shift_range=0.2, + # height_shift_range=0.2, + # horizontal_flip=True) + + # compute quantities required for featurewise normalization + # (std, mean, and principal components if ZCA whitening is applied) + # datagen.fit(x_train) + + # fits the model on batches with real-time data augmentation: + # model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), + # steps_per_epoch=len(x_train) / 32, epochs=epochs) + # trn_features = model.predict_generator(batches, batches.nb_sample) + # save_array(model_path + 'train_convlayer_features.bc', trn_features) + # print y + # model.fit(batches, trn_labels, nb_epoch=40, batch_size=BATCH_SIZE, validation_split=0.2, verbose=1, callbacks=[checkpoint]) def visualize_class_activation_map(model_path, img_path, output_path): model = load_model(model_path) From 03ce2bb0dc22284346760fedd4a31c5b7f26809a Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 19:53:43 +1000 Subject: [PATCH 03/18] add utils file --- utils.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 utils.py diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..39d2183 --- /dev/null +++ b/utils.py @@ -0,0 +1,34 @@ +import keras +# from keras import backend as K +# from keras.utils.data_utils import get_file +# from keras.utils import np_utils +# from keras.utils.np_utils import to_categorical +# from keras.models import Sequential, Model +# from keras.layers import Input, Embedding, Reshape, merge, LSTM, Bidirectional +# from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU +# from keras.layers.core import Flatten, Dense, Dropout, Lambda +# from keras.regularizers import l2, activity_l2, l1, activity_l1 +# from keras.layers.normalization import BatchNormalization +# from keras.optimizers import SGD, RMSprop, Adam +# from keras.utils.layer_utils import layer_from_config +# from keras.metrics import categorical_crossentropy, categorical_accuracy +# from keras.layers.convolutional import * +from keras.preprocessing import image +# from keras.preprocessing.text import Tokenizer + + +def get_batches( + dirname, + gen=image.ImageDataGenerator( + rescale=1./255, + shear_range=0.2, + zoom_range=0.2, + horizontal_flip=True), + shuffle=True, + batch_size=32, + class_mode='categorical', + target_size=(224,224)): + return gen.flow_from_directory(dirname, target_size=target_size, + class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) + + From 0adc0829b064ce3200bb3e4f1fed3edbcc719abc Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 20:10:38 +1000 Subject: [PATCH 04/18] add script to create validation data --- create-validation-data.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 create-validation-data.py diff --git a/create-validation-data.py b/create-validation-data.py new file mode 100644 index 0000000..0fdca57 --- /dev/null +++ b/create-validation-data.py @@ -0,0 +1,30 @@ +import os + +TRAIN_DIR = "./data/train" +VALID_DIR = "./data/valid" + + +classes = os.listdir(TRAIN_DIR) + +def percentage(percent, whole): + return (percent * whole) / 100.0 + +if not os.path.exists(VALID_DIR): + os.mkdir(VALID_DIR) + +for c in classes: + image_dir = "{}/{}/".format(TRAIN_DIR, c) + images = os.listdir(image_dir) + num_of_images_to_move = int(percentage(20, len(images))) + + valid_class_dir = VALID_DIR+"/"+c + train_class_dir = TRAIN_DIR+"/"+c + + if not os.path.exists(valid_class_dir): + os.mkdir(valid_class_dir) + + for image in images[-num_of_images_to_move:]: + new_file_name = valid_class_dir+"/"+image + old_file_name = train_class_dir+"/"+image + print('moving', old_file_name, 'to', new_file_name) + os.rename(old_file_name, new_file_name) From eae81a47d5324c019963d6851cd361674cc789b6 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 21:48:38 +1000 Subject: [PATCH 05/18] add verification generator --- cam.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/cam.py b/cam.py index 79f3ac7..5c374ba 100644 --- a/cam.py +++ b/cam.py @@ -10,18 +10,32 @@ from keras.utils.np_utils import to_categorical BATCH_SIZE = 32 +nb_train_samples = 2000 +nb_validation_samples = 800 + def train(dataset_path): #X, y, nb_classes = load_data(dataset_path) - test_generator = get_batches(dataset_path, shuffle=False, batch_size=BATCH_SIZE) - x_train = test_generator.classes + train_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) + valid_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) + x_train = train_generator.classes y_train = to_categorical(x_train) nb_classes = len(y_train[0]) model = get_model(nb_classes) + nb_train_samples = train_generator.nb_samples + nb_validation_samples = validation_generator.nb_samples + print(nb_train_samples) + print(nb_validation_samples) checkpoint_path="weights.{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') - model.fit_generator(test_generator, 64, BATCH_SIZE, callbacks=[checkpoint]) + model.fit_generator( + train_generator, + steps_per_epoch= nb_train_samples // BATCH_SIZE, + BATCH_SIZE, + validation_data=validation_generator, + validation_steps=nb_validation_samples // BATCH_SIZE, + callbacks=[checkpoint]) # datagen = ImageDataGenerator( # featurewise_center=True, # featurewise_std_normalization=True, From 70c417cdc22a1b3053c48dc2485b59ec03feff9d Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 21:49:56 +1000 Subject: [PATCH 06/18] update ignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b51e500..3e8b3d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -weights +weights/* *.pyc data From 4bb84c3c7053978952becec7fb1e2838462391b9 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 21:56:49 +1000 Subject: [PATCH 07/18] clean up code --- cam.py | 45 +++++++++++---------------------------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/cam.py b/cam.py index 5c374ba..c755e42 100644 --- a/cam.py +++ b/cam.py @@ -9,52 +9,29 @@ import argparse from keras.utils.np_utils import to_categorical BATCH_SIZE = 32 - -nb_train_samples = 2000 -nb_validation_samples = 800 +NB_EPOCHS = 5 +SAMPLES_PER_EPOCH = 500 def train(dataset_path): - #X, y, nb_classes = load_data(dataset_path) - train_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) - valid_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) + valid_generator = get_batches(dataset_path+"/valid", shuffle=False, batch_size=BATCH_SIZE) x_train = train_generator.classes + x_valid = valid_generator.classes y_train = to_categorical(x_train) nb_classes = len(y_train[0]) model = get_model(nb_classes) - nb_train_samples = train_generator.nb_samples - nb_validation_samples = validation_generator.nb_samples + nb_train_samples = len(x_train) + nb_valid_samples = len(x_valid) print(nb_train_samples) - print(nb_validation_samples) - - checkpoint_path="weights.{epoch:02d}-{val_loss:.2f}.hdf5" + checkpoint_path="weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') model.fit_generator( train_generator, - steps_per_epoch= nb_train_samples // BATCH_SIZE, - BATCH_SIZE, - validation_data=validation_generator, - validation_steps=nb_validation_samples // BATCH_SIZE, + SAMPLES_PER_EPOCH, + NB_EPOCHS, + validation_data=valid_generator, + nb_val_samples=int(SAMPLES_PER_EPOCH*0.2), callbacks=[checkpoint]) - # datagen = ImageDataGenerator( - # featurewise_center=True, - # featurewise_std_normalization=True, - # rotation_range=20, - # width_shift_range=0.2, - # height_shift_range=0.2, - # horizontal_flip=True) - - # compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied) - # datagen.fit(x_train) - - # fits the model on batches with real-time data augmentation: - # model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), - # steps_per_epoch=len(x_train) / 32, epochs=epochs) - # trn_features = model.predict_generator(batches, batches.nb_sample) - # save_array(model_path + 'train_convlayer_features.bc', trn_features) - # print y - # model.fit(batches, trn_labels, nb_epoch=40, batch_size=BATCH_SIZE, validation_split=0.2, verbose=1, callbacks=[checkpoint]) def visualize_class_activation_map(model_path, img_path, output_path): model = load_model(model_path) From 70d8e6b1077fc7f14b3f274ba37a1dcdd5bc5668 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Tue, 1 Aug 2017 21:59:09 +1000 Subject: [PATCH 08/18] move scripts into script directory --- create-validation-data.py => scripts/create-validation-data.py | 0 verify-image-data.py => scripts/verify-image-data.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename create-validation-data.py => scripts/create-validation-data.py (100%) rename verify-image-data.py => scripts/verify-image-data.py (100%) diff --git a/create-validation-data.py b/scripts/create-validation-data.py similarity index 100% rename from create-validation-data.py rename to scripts/create-validation-data.py diff --git a/verify-image-data.py b/scripts/verify-image-data.py similarity index 100% rename from verify-image-data.py rename to scripts/verify-image-data.py From 172287d1df74c22f47f81913abc7e8bc61c3d33e Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 2 Aug 2017 09:59:45 +1000 Subject: [PATCH 09/18] each epoch should run through all the samples --- cam.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cam.py b/cam.py index c755e42..ae0e057 100644 --- a/cam.py +++ b/cam.py @@ -9,8 +9,7 @@ import argparse from keras.utils.np_utils import to_categorical BATCH_SIZE = 32 -NB_EPOCHS = 5 -SAMPLES_PER_EPOCH = 500 +NB_EPOCHS = 50 def train(dataset_path): train_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) @@ -27,10 +26,10 @@ def train(dataset_path): checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') model.fit_generator( train_generator, - SAMPLES_PER_EPOCH, + nb_train_samples, NB_EPOCHS, validation_data=valid_generator, - nb_val_samples=int(SAMPLES_PER_EPOCH*0.2), + nb_val_samples=nb_valid_samples, callbacks=[checkpoint]) def visualize_class_activation_map(model_path, img_path, output_path): From 9dd2d6319ac104d0f5e7e85a6a18e9cdc843cfbd Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Wed, 2 Aug 2017 10:56:11 +1000 Subject: [PATCH 10/18] add notes for failed fist test --- NOTES.md | 17 +++++++++++++++++ T1.logs | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 NOTES.md create mode 100644 T1.logs diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..d2d0584 --- /dev/null +++ b/NOTES.md @@ -0,0 +1,17 @@ +## NOTES + + +- How to determine maximum batch size? +- What are the indications that training is going well or badly? +- Is there any down side to my approach with validation script? +- How would i incorporate some hdf5 saving into my workflow? + +#OVERFITTING +- You will notice if training accuracy is much higher than validation + + +#T1 +- After 13 epochs accuracy stops improving +- Accuracy is very low (0.362) +- After 2nd epoch validation accuracy freezes at 0.3360 +- during training the accuracy seemed much higher (~80) but the summaries tell a different story diff --git a/T1.logs b/T1.logs new file mode 100644 index 0000000..42df151 --- /dev/null +++ b/T1.logs @@ -0,0 +1,49 @@ +988/1988 [==============================] - 156s - loss: 1.0860 - acc: 0.4371 - val_loss: 1.1059 - val_acc: 0.2696 +Epoch 2/50 +1988/1988 [==============================] - 145s - loss: 1.1030 - acc: 0.3451 - val_loss: 1.1010 - val_acc: 0.3360 +Epoch 3/50 +1988/1988 [==============================] - 145s - loss: 1.1007 - acc: 0.3682 - val_loss: 1.0996 - val_acc: 0.3360 +Epoch 4/50 +1988/1988 [==============================] - 146s - loss: 1.0995 - acc: 0.3747 - val_loss: 1.0993 - val_acc: 0.3360 +Epoch 5/50 +1988/1988 [==============================] - 145s - loss: 1.1002 - acc: 0.3521 - val_loss: 1.0982 - val_acc: 0.3360 +Epoch 6/50 +1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0981 - val_acc: 0.3360 +Epoch 7/50 +1988/1988 [==============================] - 149s - loss: 1.0988 - acc: 0.3682 - val_loss: 1.0983 - val_acc: 0.3360 +Epoch 8/50 +1988/1988 [==============================] - 148s - loss: 1.1002 - acc: 0.3682 - val_loss: 1.0971 - val_acc: 0.3360 +Epoch 9/50 +1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0969 - val_acc: 0.3360 +Epoch 10/50 +1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0965 - val_acc: 0.3360 +Epoch 11/50 +1988/1988 [==============================] - 124s - loss: 1.0985 - acc: 0.3783 - val_loss: 1.0965 - val_acc: 0.3360 +Epoch 12/50 +1988/1988 [==============================] - 124s - loss: 1.0988 - acc: 0.3783 - val_loss: 1.0961 - val_acc: 0.3360 +Epoch 13/50 +1988/1988 [==============================] - 121s - loss: 1.0980 - acc: 0.3783 - val_loss: 1.0963 - val_acc: 0.3360 +Epoch 14/50 +1988/1988 [==============================] - 119s - loss: 1.0987 - acc: 0.3783 - val_loss: 1.0959 - val_acc: 0.3360 +Epoch 15/50 +1988/1988 [==============================] - 120s - loss: 1.0985 - acc: 0.3783 - val_loss: 1.0958 - val_acc: 0.3360 +Epoch 16/50 +1988/1988 [==============================] - 121s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 + +Epoch 17/50 +1988/1988 [==============================] - 131s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +Epoch 18/50 +1988/1988 [==============================] - 128s - loss: 1.0977 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 +Epoch 19/50 +1988/1988 [==============================] - 128s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0956 - val_acc: 0.3360 +Epoch 20/50 +1988/1988 [==============================] - 127s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +Epoch 21/50 +1988/1988 [==============================] - 127s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +Epoch 22/50 +1988/1988 [==============================] - 127s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +Epoch 23/50 +1988/1988 [==============================] - 127s - loss: 1.0979 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +Epoch 24/50 +1988/1988 [==============================] - 126s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +Epoch 25/50 From a53250aaa838bbf0def25eaba015b054c5730fe8 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 2 Aug 2017 11:27:35 +1000 Subject: [PATCH 11/18] update logs for readability --- T1.logs | 72 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/T1.logs b/T1.logs index 42df151..019d881 100644 --- a/T1.logs +++ b/T1.logs @@ -1,49 +1,73 @@ -988/1988 [==============================] - 156s - loss: 1.0860 - acc: 0.4371 - val_loss: 1.1059 - val_acc: 0.2696 +988/1988 [==============================] - 156s +- loss: 1.0860 - acc: 0.4371 - val_loss: 1.1059 - val_acc: 0.2696 Epoch 2/50 -1988/1988 [==============================] - 145s - loss: 1.1030 - acc: 0.3451 - val_loss: 1.1010 - val_acc: 0.3360 +1988/1988 [==============================] - 145s +- loss: 1.1030 - acc: 0.3451 - val_loss: 1.1010 - val_acc: 0.3360 Epoch 3/50 -1988/1988 [==============================] - 145s - loss: 1.1007 - acc: 0.3682 - val_loss: 1.0996 - val_acc: 0.3360 +1988/1988 [==============================] - 145s +- loss: 1.1007 - acc: 0.3682 - val_loss: 1.0996 - val_acc: 0.3360 Epoch 4/50 -1988/1988 [==============================] - 146s - loss: 1.0995 - acc: 0.3747 - val_loss: 1.0993 - val_acc: 0.3360 +1988/1988 [==============================] - 146s +- loss: 1.0995 - acc: 0.3747 - val_loss: 1.0993 - val_acc: 0.3360 Epoch 5/50 -1988/1988 [==============================] - 145s - loss: 1.1002 - acc: 0.3521 - val_loss: 1.0982 - val_acc: 0.3360 +1988/1988 [==============================] - 145s +- loss: 1.1002 - acc: 0.3521 - val_loss: 1.0982 - val_acc: 0.3360 Epoch 6/50 -1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0981 - val_acc: 0.3360 +1988/1988 [==============================] - 147s +- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0981 - val_acc: 0.3360 Epoch 7/50 -1988/1988 [==============================] - 149s - loss: 1.0988 - acc: 0.3682 - val_loss: 1.0983 - val_acc: 0.3360 +1988/1988 [==============================] - 149s +- loss: 1.0988 - acc: 0.3682 - val_loss: 1.0983 - val_acc: 0.3360 Epoch 8/50 -1988/1988 [==============================] - 148s - loss: 1.1002 - acc: 0.3682 - val_loss: 1.0971 - val_acc: 0.3360 +1988/1988 [==============================] - 148s +- loss: 1.1002 - acc: 0.3682 - val_loss: 1.0971 - val_acc: 0.3360 Epoch 9/50 -1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0969 - val_acc: 0.3360 +1988/1988 [==============================] - 147s +- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0969 - val_acc: 0.3360 Epoch 10/50 -1988/1988 [==============================] - 147s - loss: 1.0990 - acc: 0.3682 - val_loss: 1.0965 - val_acc: 0.3360 +1988/1988 [==============================] - 147s +- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0965 - val_acc: 0.3360 Epoch 11/50 -1988/1988 [==============================] - 124s - loss: 1.0985 - acc: 0.3783 - val_loss: 1.0965 - val_acc: 0.3360 +1988/1988 [==============================] - 124s +- loss: 1.0985 - acc: 0.3783 - val_loss: 1.0965 - val_acc: 0.3360 Epoch 12/50 -1988/1988 [==============================] - 124s - loss: 1.0988 - acc: 0.3783 - val_loss: 1.0961 - val_acc: 0.3360 +1988/1988 [==============================] - 124s +- loss: 1.0988 - acc: 0.3783 - val_loss: 1.0961 - val_acc: 0.3360 Epoch 13/50 -1988/1988 [==============================] - 121s - loss: 1.0980 - acc: 0.3783 - val_loss: 1.0963 - val_acc: 0.3360 +1988/1988 [==============================] - 121s +- loss: 1.0980 - acc: 0.3783 - val_loss: 1.0963 - val_acc: 0.3360 Epoch 14/50 -1988/1988 [==============================] - 119s - loss: 1.0987 - acc: 0.3783 - val_loss: 1.0959 - val_acc: 0.3360 +1988/1988 [==============================] - 119s +- loss: 1.0987 - acc: 0.3783 - val_loss: 1.0959 - val_acc: 0.3360 Epoch 15/50 -1988/1988 [==============================] - 120s - loss: 1.0985 - acc: 0.3783 - val_loss: 1.0958 - val_acc: 0.3360 +1988/1988 [==============================] - 120s +- loss: 1.0985 - acc: 0.3783 - val_loss: 1.0958 - val_acc: 0.3360 Epoch 16/50 -1988/1988 [==============================] - 121s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 +1988/1988 [==============================] - 121s +- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 Epoch 17/50 -1988/1988 [==============================] - 131s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +1988/1988 [==============================] - 131s +- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 Epoch 18/50 -1988/1988 [==============================] - 128s - loss: 1.0977 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 +1988/1988 [==============================] - 128s +- loss: 1.0977 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 Epoch 19/50 -1988/1988 [==============================] - 128s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0956 - val_acc: 0.3360 +1988/1988 [==============================] - 128s +- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0956 - val_acc: 0.3360 Epoch 20/50 -1988/1988 [==============================] - 127s - loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +1988/1988 [==============================] - 127s +- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 Epoch 21/50 -1988/1988 [==============================] - 127s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +1988/1988 [==============================] - 127s +- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 Epoch 22/50 -1988/1988 [==============================] - 127s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +1988/1988 [==============================] - 127s +- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 Epoch 23/50 -1988/1988 [==============================] - 127s - loss: 1.0979 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 +1988/1988 [==============================] - 127s +- loss: 1.0979 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 Epoch 24/50 -1988/1988 [==============================] - 126s - loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 +1988/1988 [==============================] - 126s +- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 Epoch 25/50 From 2d958497257c2d3b315e290641a1c38e7a25077c Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Thu, 3 Aug 2017 17:43:38 +1000 Subject: [PATCH 12/18] set up master --- .gitignore | 1 + cam.py | 16 ++++++++++++---- model.py | 6 +++++- scripts/create-validation-data.py | 4 ++-- utils.py | 18 ++++++++++-------- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 3e8b3d6..9a7a43f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ weights/* *.pyc data +weights_1 diff --git a/cam.py b/cam.py index ae0e057..c7f061f 100644 --- a/cam.py +++ b/cam.py @@ -12,8 +12,16 @@ NB_EPOCHS = 50 def train(dataset_path): - train_generator = get_batches(dataset_path+"/train", shuffle=False, batch_size=BATCH_SIZE) + gen = ImageDataGenerator( + rotation_range=15, + rescale=1./255, + shear_range=0.1, + zoom_range=0.1, + horizontal_flip=True) + train_generator = get_batches(dataset_path+"/train", gen=gen, shuffle=True, batch_size=BATCH_SIZE) + # Don't shuffle or Augment validation set valid_generator = get_batches(dataset_path+"/valid", shuffle=False, batch_size=BATCH_SIZE) + x_train = train_generator.classes x_valid = valid_generator.classes y_train = to_categorical(x_train) @@ -21,7 +29,6 @@ def train(dataset_path): model = get_model(nb_classes) nb_train_samples = len(x_train) nb_valid_samples = len(x_valid) - print(nb_train_samples) checkpoint_path="weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') model.fit_generator( @@ -47,11 +54,12 @@ def visualize_class_activation_map(model_path, img_path, output_path): [conv_outputs, predictions] = get_output([img]) conv_outputs = conv_outputs[0, :, :, :] + print "predictions", predictions + target_class = 0 #Create the class activation map. cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[1:3]) - for i, w in enumerate(class_weights[:, 1]): + for i, w in enumerate(class_weights[:, target_class]): cam += w * conv_outputs[i, :, :] - print "predictions", predictions cam /= np.max(cam) cam = cv2.resize(cam, (height, width)) heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET) diff --git a/model.py b/model.py index 3f5a8d4..6013bb2 100644 --- a/model.py +++ b/model.py @@ -52,13 +52,17 @@ def VGG16_convolutions(): def get_model(nb_classes): model = VGG16_convolutions() - # model = load_model_weights(model, "vgg16.h5") + model = load_model_weights(model, "vgg16_weights.h5") + print("NUMBER OF CLASSES", nb_classes) model.add(Lambda(global_average_pooling, output_shape=global_average_pooling_shape)) model.add(Dense(nb_classes, activation = 'softmax', init='uniform')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.5, nesterov=True) model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy']) + # model.compile(loss='binary_crossentropy', + # optimizer='rmsprop', + # metrics=['accuracy']) return model def load_model_weights(model, weights_path): diff --git a/scripts/create-validation-data.py b/scripts/create-validation-data.py index 0fdca57..2efd884 100644 --- a/scripts/create-validation-data.py +++ b/scripts/create-validation-data.py @@ -1,7 +1,7 @@ import os -TRAIN_DIR = "./data/train" -VALID_DIR = "./data/valid" +TRAIN_DIR = "../data/train" +VALID_DIR = "../data/valid" classes = os.listdir(TRAIN_DIR) diff --git a/utils.py b/utils.py index 39d2183..0b984f9 100644 --- a/utils.py +++ b/utils.py @@ -19,16 +19,18 @@ def get_batches( dirname, - gen=image.ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True), - shuffle=True, + gen=image.ImageDataGenerator(), + shuffle=False, + save_to_dir=None, batch_size=32, class_mode='categorical', target_size=(224,224)): - return gen.flow_from_directory(dirname, target_size=target_size, - class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) + return gen.flow_from_directory( + dirname, + save_to_dir=save_to_dir, + target_size=target_size, + class_mode=class_mode, + shuffle=shuffle, + batch_size=batch_size) From dc287f8bb18515cc27018110a7b5db187c2bfb18 Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 3 Aug 2017 17:51:01 +1000 Subject: [PATCH 13/18] create a simple branch for debugging --- NOTES.md | 17 ------- T1.logs | 73 ------------------------------- cam.py | 31 +++++++++---- data.py | 52 +++------------------- model.py | 4 -- preprocessing.py | 51 --------------------- scripts/create-validation-data.py | 30 ------------- scripts/verify-image-data.py | 20 --------- utils.py | 36 --------------- 9 files changed, 28 insertions(+), 286 deletions(-) delete mode 100644 NOTES.md delete mode 100644 T1.logs delete mode 100644 preprocessing.py delete mode 100644 scripts/create-validation-data.py delete mode 100644 scripts/verify-image-data.py delete mode 100644 utils.py diff --git a/NOTES.md b/NOTES.md deleted file mode 100644 index d2d0584..0000000 --- a/NOTES.md +++ /dev/null @@ -1,17 +0,0 @@ -## NOTES - - -- How to determine maximum batch size? -- What are the indications that training is going well or badly? -- Is there any down side to my approach with validation script? -- How would i incorporate some hdf5 saving into my workflow? - -#OVERFITTING -- You will notice if training accuracy is much higher than validation - - -#T1 -- After 13 epochs accuracy stops improving -- Accuracy is very low (0.362) -- After 2nd epoch validation accuracy freezes at 0.3360 -- during training the accuracy seemed much higher (~80) but the summaries tell a different story diff --git a/T1.logs b/T1.logs deleted file mode 100644 index 019d881..0000000 --- a/T1.logs +++ /dev/null @@ -1,73 +0,0 @@ -988/1988 [==============================] - 156s -- loss: 1.0860 - acc: 0.4371 - val_loss: 1.1059 - val_acc: 0.2696 -Epoch 2/50 -1988/1988 [==============================] - 145s -- loss: 1.1030 - acc: 0.3451 - val_loss: 1.1010 - val_acc: 0.3360 -Epoch 3/50 -1988/1988 [==============================] - 145s -- loss: 1.1007 - acc: 0.3682 - val_loss: 1.0996 - val_acc: 0.3360 -Epoch 4/50 -1988/1988 [==============================] - 146s -- loss: 1.0995 - acc: 0.3747 - val_loss: 1.0993 - val_acc: 0.3360 -Epoch 5/50 -1988/1988 [==============================] - 145s -- loss: 1.1002 - acc: 0.3521 - val_loss: 1.0982 - val_acc: 0.3360 -Epoch 6/50 -1988/1988 [==============================] - 147s -- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0981 - val_acc: 0.3360 -Epoch 7/50 -1988/1988 [==============================] - 149s -- loss: 1.0988 - acc: 0.3682 - val_loss: 1.0983 - val_acc: 0.3360 -Epoch 8/50 -1988/1988 [==============================] - 148s -- loss: 1.1002 - acc: 0.3682 - val_loss: 1.0971 - val_acc: 0.3360 -Epoch 9/50 -1988/1988 [==============================] - 147s -- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0969 - val_acc: 0.3360 -Epoch 10/50 -1988/1988 [==============================] - 147s -- loss: 1.0990 - acc: 0.3682 - val_loss: 1.0965 - val_acc: 0.3360 -Epoch 11/50 -1988/1988 [==============================] - 124s -- loss: 1.0985 - acc: 0.3783 - val_loss: 1.0965 - val_acc: 0.3360 -Epoch 12/50 -1988/1988 [==============================] - 124s -- loss: 1.0988 - acc: 0.3783 - val_loss: 1.0961 - val_acc: 0.3360 -Epoch 13/50 -1988/1988 [==============================] - 121s -- loss: 1.0980 - acc: 0.3783 - val_loss: 1.0963 - val_acc: 0.3360 -Epoch 14/50 -1988/1988 [==============================] - 119s -- loss: 1.0987 - acc: 0.3783 - val_loss: 1.0959 - val_acc: 0.3360 -Epoch 15/50 -1988/1988 [==============================] - 120s -- loss: 1.0985 - acc: 0.3783 - val_loss: 1.0958 - val_acc: 0.3360 -Epoch 16/50 -1988/1988 [==============================] - 121s -- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 - -Epoch 17/50 -1988/1988 [==============================] - 131s -- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 -Epoch 18/50 -1988/1988 [==============================] - 128s -- loss: 1.0977 - acc: 0.3783 - val_loss: 1.0957 - val_acc: 0.3360 -Epoch 19/50 -1988/1988 [==============================] - 128s -- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0956 - val_acc: 0.3360 -Epoch 20/50 -1988/1988 [==============================] - 127s -- loss: 1.0983 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 -Epoch 21/50 -1988/1988 [==============================] - 127s -- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 -Epoch 22/50 -1988/1988 [==============================] - 127s -- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 -Epoch 23/50 -1988/1988 [==============================] - 127s -- loss: 1.0979 - acc: 0.3783 - val_loss: 1.0955 - val_acc: 0.3360 -Epoch 24/50 -1988/1988 [==============================] - 126s -- loss: 1.0981 - acc: 0.3783 - val_loss: 1.0954 - val_acc: 0.3360 -Epoch 25/50 diff --git a/cam.py b/cam.py index c7f061f..1486e1c 100644 --- a/cam.py +++ b/cam.py @@ -1,15 +1,31 @@ from keras.models import * from keras.callbacks import * +from keras.preprocessing import image from keras.preprocessing.image import ImageDataGenerator import keras.backend as K from model import * from data import * -from utils import * import cv2 import argparse -from keras.utils.np_utils import to_categorical BATCH_SIZE = 32 -NB_EPOCHS = 50 +NB_EPOCHS = 40 + +def get_batches( + dirname, + gen=image.ImageDataGenerator(), + shuffle=False, + save_to_dir=None, + batch_size=32, + class_mode='categorical', + target_size=(128,128)): + return gen.flow_from_directory( + dirname, + save_to_dir=save_to_dir, + target_size=target_size, + class_mode=class_mode, + shuffle=shuffle, + batch_size=batch_size) + def train(dataset_path): gen = ImageDataGenerator( @@ -41,12 +57,11 @@ def train(dataset_path): def visualize_class_activation_map(model_path, img_path, output_path): model = load_model(model_path) - original_img = cv2.imread(img_path, 1) + original_img = cv2.resize(cv2.imread(img_path, 1), (224, 224)) width, height, _ = original_img.shape #Reshape to the network input shape (3, w, h). img = np.array([np.transpose(np.float32(original_img), (2, 0, 1))]) - #Get the 512 input weights to the softmax. class_weights = model.layers[-1].get_weights()[0] final_conv_layer = get_output_layer(model, "conv5_3") @@ -54,16 +69,16 @@ def visualize_class_activation_map(model_path, img_path, output_path): [conv_outputs, predictions] = get_output([img]) conv_outputs = conv_outputs[0, :, :, :] - print "predictions", predictions - target_class = 0 #Create the class activation map. cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[1:3]) - for i, w in enumerate(class_weights[:, target_class]): + for i, w in enumerate(class_weights[:, 1]): cam += w * conv_outputs[i, :, :] + print "predictions", predictions cam /= np.max(cam) cam = cv2.resize(cam, (height, width)) heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET) heatmap[np.where(cam < 0.2)] = 0 + print(heatmap.shape) img = heatmap*0.5 + original_img cv2.imwrite(output_path, img) diff --git a/data.py b/data.py index 015abb8..d351598 100644 --- a/data.py +++ b/data.py @@ -1,58 +1,16 @@ import cv2 - import glob import os import numpy as np from keras.utils.np_utils import to_categorical -from preprocessing import preprocess_image_batch - -CLASS_NAME_MAPPING = {} -PER_CLASS_MAX_IMAGES = 24 - - -def load_data(path): - data_set_input_images_files, data_set_input_images_true_label = get_class_wise_images_and_true_label(path) - processed_input_images = [preprocess_image_batch([image]) - for image in data_set_input_images_files] - - global CLASS_NAME_MAPPING - nb_classes = len(CLASS_NAME_MAPPING.keys()) - X_train = np.concatenate(processed_input_images) - - y_out = np.concatenate(data_set_input_images_true_label) - y_out = to_categorical(y_out, nb_classes=nb_classes) # to get sofmax shape of (None, nb_classes) - Y_train = y_out - - - from sklearn.utils import shuffle - X_train, Y_train = shuffle(X_train, Y_train) - - return X_train, Y_train, nb_classes - - -def get_class_wise_images_and_true_label(path): - print('path', path+"/*") - directory = glob.glob(path + '/*') - data_set_input_images = [] - data_set_input_images_true_label = [] - global CLASS_NAME_MAPPING - index = 0 - for sub_directory in directory: - if os.path.isdir(sub_directory): - class_dir_name = sub_directory.split('/')[-1] - CLASS_NAME_MAPPING[index] = class_dir_name - image_class_files = glob.glob(sub_directory + '/*.jpeg')[:PER_CLASS_MAX_IMAGES] - data_set_input_images.extend(image_class_files) - data_set_input_images_true_label.extend([[index]] * len(image_class_files)) - index += 1 - return data_set_input_images, data_set_input_images_true_label def load_inria_person(path): - pos_path = os.path.join(path, "pos") - neg_path = os.path.join(path, "neg") - pos_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(pos_path + "/*.jpeg")] + pos_path = "./data/train/pos" + neg_path = "./data/train/neg" + pos_images = [cv2.resize(cv2.imread(x), (128, 128)) for x in glob.glob(pos_path + "/*.jpg")] pos_images = [np.transpose(img, (2, 0, 1)) for img in pos_images] - neg_images = [cv2.resize(cv2.imread(x), (64, 128)) for x in glob.glob(neg_path + "/*.jpeg")] + neg_images = [cv2.resize(cv2.imread(x), (128, 128)) for x in glob.glob(neg_path + "/*.jpg")] + print(pos_path, neg_path) neg_images = [np.transpose(img, (2, 0, 1)) for img in neg_images] y = [1] * len(pos_images) + [0] * len(neg_images) y = to_categorical(y, 2) diff --git a/model.py b/model.py index 6013bb2..f11c904 100644 --- a/model.py +++ b/model.py @@ -54,15 +54,11 @@ def get_model(nb_classes): model = load_model_weights(model, "vgg16_weights.h5") - print("NUMBER OF CLASSES", nb_classes) model.add(Lambda(global_average_pooling, output_shape=global_average_pooling_shape)) model.add(Dense(nb_classes, activation = 'softmax', init='uniform')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.5, nesterov=True) model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy']) - # model.compile(loss='binary_crossentropy', - # optimizer='rmsprop', - # metrics=['accuracy']) return model def load_model_weights(model, weights_path): diff --git a/preprocessing.py b/preprocessing.py deleted file mode 100644 index 43c37d4..0000000 --- a/preprocessing.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -from scipy.misc import imread -from scipy.misc import imresize - - -VGG_16_IMAGE_SHAPE = (224, 224) - -def preprocess_image_batch(image_paths, crop_size=None, color_mode='rgb', - out=None): - """ - Consistent preprocessing of images batches - :param image_paths: iterable: images to process - :param crop_size: tuple: crop images if specified - :param img_size: tuple: resize images if specified - :param color_mode: Use rgb or change to bgr mode based on type of model you want to use - :param out: append output to this iterable if specified - """ - img_list = [] - img_size = VGG_16_IMAGE_SHAPE - for im_path in image_paths: - img = imread(im_path, mode='RGB') - if img_size: - img = imresize(img, img_size) - - img = img.astype('float32') - # We normalize the colors (in RGB space) with the empirical means on the training set - img[:, :, 0] -= 123.68 - img[:, :, 1] -= 116.779 - img[:, :, 2] -= 103.939 - # We permute the colors to get them in the BGR order - if color_mode == 'bgr': - img[:, :, [0, 1, 2]] = img[:, :, [2, 1, 0]] - img = img.transpose((2, 0, 1)) - - if crop_size: - img = img[:, (img_size[0] - crop_size[0]) // 2:(img_size[0] + crop_size[0]) // 2 - , (img_size[1] - crop_size[1]) // 2:(img_size[1] + crop_size[1]) // 2] - - img_list.append(img) - - try: - img_batch = np.stack(img_list, axis=0) - except: - raise ValueError('when img_size and crop_size are None, images' - ' in image_paths must have the same shapes.') - - if out is not None and hasattr(out, 'append'): - out.append(img_batch) - else: - return img_batch - diff --git a/scripts/create-validation-data.py b/scripts/create-validation-data.py deleted file mode 100644 index 2efd884..0000000 --- a/scripts/create-validation-data.py +++ /dev/null @@ -1,30 +0,0 @@ -import os - -TRAIN_DIR = "../data/train" -VALID_DIR = "../data/valid" - - -classes = os.listdir(TRAIN_DIR) - -def percentage(percent, whole): - return (percent * whole) / 100.0 - -if not os.path.exists(VALID_DIR): - os.mkdir(VALID_DIR) - -for c in classes: - image_dir = "{}/{}/".format(TRAIN_DIR, c) - images = os.listdir(image_dir) - num_of_images_to_move = int(percentage(20, len(images))) - - valid_class_dir = VALID_DIR+"/"+c - train_class_dir = TRAIN_DIR+"/"+c - - if not os.path.exists(valid_class_dir): - os.mkdir(valid_class_dir) - - for image in images[-num_of_images_to_move:]: - new_file_name = valid_class_dir+"/"+image - old_file_name = train_class_dir+"/"+image - print('moving', old_file_name, 'to', new_file_name) - os.rename(old_file_name, new_file_name) diff --git a/scripts/verify-image-data.py b/scripts/verify-image-data.py deleted file mode 100644 index df8e95d..0000000 --- a/scripts/verify-image-data.py +++ /dev/null @@ -1,20 +0,0 @@ -import magic -from subprocess import call -import os - -DATA_DIR = "./dataset" - -classes = os.listdir(DATA_DIR) - -for c in classes: - image_dir = "{}/{}/".format(DATA_DIR, c) - images = os.listdir(image_dir) - - call(["mogrify", "-format", "jpeg", "{}/*.png".format(image_dir)]) - - for image in images: - file_name = image_dir+image - mime = magic.from_file(file_name, mime=True) - if mime != "image/jpeg": - # print('removing', file_name) - os.remove(file_name) diff --git a/utils.py b/utils.py deleted file mode 100644 index 0b984f9..0000000 --- a/utils.py +++ /dev/null @@ -1,36 +0,0 @@ -import keras -# from keras import backend as K -# from keras.utils.data_utils import get_file -# from keras.utils import np_utils -# from keras.utils.np_utils import to_categorical -# from keras.models import Sequential, Model -# from keras.layers import Input, Embedding, Reshape, merge, LSTM, Bidirectional -# from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU -# from keras.layers.core import Flatten, Dense, Dropout, Lambda -# from keras.regularizers import l2, activity_l2, l1, activity_l1 -# from keras.layers.normalization import BatchNormalization -# from keras.optimizers import SGD, RMSprop, Adam -# from keras.utils.layer_utils import layer_from_config -# from keras.metrics import categorical_crossentropy, categorical_accuracy -# from keras.layers.convolutional import * -from keras.preprocessing import image -# from keras.preprocessing.text import Tokenizer - - -def get_batches( - dirname, - gen=image.ImageDataGenerator(), - shuffle=False, - save_to_dir=None, - batch_size=32, - class_mode='categorical', - target_size=(224,224)): - return gen.flow_from_directory( - dirname, - save_to_dir=save_to_dir, - target_size=target_size, - class_mode=class_mode, - shuffle=shuffle, - batch_size=batch_size) - - From 349338bc143df93cf9f3070c78433cf9e8b1b31b Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Sat, 5 Aug 2017 10:53:36 +1000 Subject: [PATCH 14/18] simple notes.md --- NOTES.md | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 NOTES.md diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..e5d9d5e --- /dev/null +++ b/NOTES.md @@ -0,0 +1,78 @@ +## NOTES + + +- How to determine maximum batch size? +- What are the indications that training is going well or badly? +- Is there any down side to my approach with validation script? +- How would i incorporate some hdf5 saving into my workflow? + +#OVERFITTING +- You will notice if training accuracy is much higher than validation + + +#T1 +- After 13 epochs accuracy stops improving +- Accuracy is very low (0.362) +- After 2nd epoch validation accuracy freezes at 0.3360 +- during training the accuracy seemed much higher (~80) but the summaries tell a different story + +#T2 LOGS :sam-cam +1920/1920 [==============================] - 33s - loss: 0.6843 - acc: 0.5760 - val_loss: 2.6257 - val_acc: 0.5125 +Epoch 2/50 +1920/1920 [==============================] - 30s - loss: 0.6630 - acc: 0.6250 - val_loss: 3.9856 - val_acc: 0.5083 +Epoch 3/50 +1920/1920 [==============================] - 30s - loss: 0.6450 - acc: 0.6552 - val_loss: 5.0728 - val_acc: 0.5021 +Epoch 4/50 +1920/1920 [==============================] - 30s - loss: 0.6335 - acc: 0.6552 - val_loss: 5.7212 - val_acc: 0.5021 +Epoch 5/50 +1920/1920 [==============================] - 30s - loss: 0.6233 - acc: 0.6745 - val_loss: 6.1444 - val_acc: 0.5021 +Epoch 6/50 +1920/1920 [==============================] - 30s - loss: 0.6132 - acc: 0.6875 - val_loss: 6.4218 - val_acc: 0.5021 +Epoch 7/50 +1920/1920 [==============================] - 30s - loss: 0.6050 - acc: 0.6906 - val_loss: 6.6029 - val_acc: 0.5021 +Epoch 8/50 +1920/1920 [==============================] - 32s - loss: 0.5975 - acc: 0.6995 - val_loss: 6.7176 - val_acc: 0.5021 +Epoch 9/50 +1920/1920 [==============================] - 31s - loss: 0.5917 - acc: 0.6979 - val_loss: 6.7884 - val_acc: 0.5021 +Epoch 10/50 +1920/1920 [==============================] - 30s - loss: 0.5845 - acc: 0.7177 - val_loss: 6.8826 - val_acc: 0.5042 +Epoch 11/50 +1920/1920 [==============================] - 30s - loss: 0.5778 - acc: 0.7135 - val_loss: 6.9231 - val_acc: 0.5062 +Epoch 12/50 +1920/1920 [==============================] - 30s - loss: 0.5733 - acc: 0.7198 - val_loss: 6.9043 - val_acc: 0.5083 +Epoch 13/50 +1920/1920 [==============================] - 30s - loss: 0.5681 - acc: 0.7203 - val_loss: 6.9571 - val_acc: 0.5083 +Epoch 14/50 +1920/1920 [==============================] - 30s - loss: 0.5654 - acc: 0.7276 - val_loss: 6.9330 - val_acc: 0.5083 +Epoch 15/50 +1920/1920 [==============================] - 30s - loss: 0.5600 - acc: 0.7286 - val_loss: 6.9109 - val_acc: 0.5104 +Epoch 16/50 +1920/1920 [==============================] - 30s - loss: 0.5567 - acc: 0.7385 - val_loss: 6.9255 - val_acc: 0.5146 +Epoch 17/50 +1920/1920 [==============================] - 30s - loss: 0.5494 - acc: 0.7391 - val_loss: 6.9168 - val_acc: 0.5146 +Epoch 18/50 +1920/1920 [==============================] - 30s - loss: 0.5496 - acc: 0.7385 - val_loss: 6.8663 - val_acc: 0.5146 +Epoch 19/50 +1920/1920 [==============================] - 31s - loss: 0.5452 - acc: 0.7438 - val_loss: 6.8451 - val_acc: 0.5146 +Epoch 20/50 +1920/1920 [==============================] - 31s - loss: 0.5406 - acc: 0.7422 - val_loss: 6.8062 - val_acc: 0.5188 + + +#GITHUB Q + +Hey, + +I am trying to implement a version of your code that will allow for more than 2 classes, as well as larger datasets by using the fit_generator function. I was able to get your code to work fine + +However it seems that something I am doing is breaking the model, as even with 2 classes now, the validation accuracy seems to freeze and it doesn't get any better. + +Here is my observations of the problems. +- After 13 epochs accuracy stops improving +- Accuracy is very low (0.362) +- After 2nd epoch validation accuracy freezes at 0.3360 +- during training the accuracy seemed much higher (~80) but the summaries tell a different story + + +Here is a sample of my training logs: + + From 2d1b213fd1259ef6ccd044d5cce7a0aa1ec2a3d6 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Sat, 5 Aug 2017 12:12:44 +1000 Subject: [PATCH 15/18] add resolution variable --- cam.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cam.py b/cam.py index 1486e1c..ca6e434 100644 --- a/cam.py +++ b/cam.py @@ -9,6 +9,7 @@ import argparse BATCH_SIZE = 32 NB_EPOCHS = 40 +IMAGE_SIZE = (128, 128) def get_batches( dirname, @@ -17,7 +18,7 @@ def get_batches( save_to_dir=None, batch_size=32, class_mode='categorical', - target_size=(128,128)): + target_size=IMAGE_SIZE): return gen.flow_from_directory( dirname, save_to_dir=save_to_dir, @@ -57,7 +58,7 @@ def train(dataset_path): def visualize_class_activation_map(model_path, img_path, output_path): model = load_model(model_path) - original_img = cv2.resize(cv2.imread(img_path, 1), (224, 224)) + original_img = cv2.resize(cv2.imread(img_path, 1), IMAGE_SIZE) width, height, _ = original_img.shape #Reshape to the network input shape (3, w, h). From f189f07f2b8415335d913bbd2ced5cecc6f293b5 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Sat, 5 Aug 2017 12:21:42 +1000 Subject: [PATCH 16/18] remove notes --- NOTES.md | 78 -------------------------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 NOTES.md diff --git a/NOTES.md b/NOTES.md deleted file mode 100644 index e5d9d5e..0000000 --- a/NOTES.md +++ /dev/null @@ -1,78 +0,0 @@ -## NOTES - - -- How to determine maximum batch size? -- What are the indications that training is going well or badly? -- Is there any down side to my approach with validation script? -- How would i incorporate some hdf5 saving into my workflow? - -#OVERFITTING -- You will notice if training accuracy is much higher than validation - - -#T1 -- After 13 epochs accuracy stops improving -- Accuracy is very low (0.362) -- After 2nd epoch validation accuracy freezes at 0.3360 -- during training the accuracy seemed much higher (~80) but the summaries tell a different story - -#T2 LOGS :sam-cam -1920/1920 [==============================] - 33s - loss: 0.6843 - acc: 0.5760 - val_loss: 2.6257 - val_acc: 0.5125 -Epoch 2/50 -1920/1920 [==============================] - 30s - loss: 0.6630 - acc: 0.6250 - val_loss: 3.9856 - val_acc: 0.5083 -Epoch 3/50 -1920/1920 [==============================] - 30s - loss: 0.6450 - acc: 0.6552 - val_loss: 5.0728 - val_acc: 0.5021 -Epoch 4/50 -1920/1920 [==============================] - 30s - loss: 0.6335 - acc: 0.6552 - val_loss: 5.7212 - val_acc: 0.5021 -Epoch 5/50 -1920/1920 [==============================] - 30s - loss: 0.6233 - acc: 0.6745 - val_loss: 6.1444 - val_acc: 0.5021 -Epoch 6/50 -1920/1920 [==============================] - 30s - loss: 0.6132 - acc: 0.6875 - val_loss: 6.4218 - val_acc: 0.5021 -Epoch 7/50 -1920/1920 [==============================] - 30s - loss: 0.6050 - acc: 0.6906 - val_loss: 6.6029 - val_acc: 0.5021 -Epoch 8/50 -1920/1920 [==============================] - 32s - loss: 0.5975 - acc: 0.6995 - val_loss: 6.7176 - val_acc: 0.5021 -Epoch 9/50 -1920/1920 [==============================] - 31s - loss: 0.5917 - acc: 0.6979 - val_loss: 6.7884 - val_acc: 0.5021 -Epoch 10/50 -1920/1920 [==============================] - 30s - loss: 0.5845 - acc: 0.7177 - val_loss: 6.8826 - val_acc: 0.5042 -Epoch 11/50 -1920/1920 [==============================] - 30s - loss: 0.5778 - acc: 0.7135 - val_loss: 6.9231 - val_acc: 0.5062 -Epoch 12/50 -1920/1920 [==============================] - 30s - loss: 0.5733 - acc: 0.7198 - val_loss: 6.9043 - val_acc: 0.5083 -Epoch 13/50 -1920/1920 [==============================] - 30s - loss: 0.5681 - acc: 0.7203 - val_loss: 6.9571 - val_acc: 0.5083 -Epoch 14/50 -1920/1920 [==============================] - 30s - loss: 0.5654 - acc: 0.7276 - val_loss: 6.9330 - val_acc: 0.5083 -Epoch 15/50 -1920/1920 [==============================] - 30s - loss: 0.5600 - acc: 0.7286 - val_loss: 6.9109 - val_acc: 0.5104 -Epoch 16/50 -1920/1920 [==============================] - 30s - loss: 0.5567 - acc: 0.7385 - val_loss: 6.9255 - val_acc: 0.5146 -Epoch 17/50 -1920/1920 [==============================] - 30s - loss: 0.5494 - acc: 0.7391 - val_loss: 6.9168 - val_acc: 0.5146 -Epoch 18/50 -1920/1920 [==============================] - 30s - loss: 0.5496 - acc: 0.7385 - val_loss: 6.8663 - val_acc: 0.5146 -Epoch 19/50 -1920/1920 [==============================] - 31s - loss: 0.5452 - acc: 0.7438 - val_loss: 6.8451 - val_acc: 0.5146 -Epoch 20/50 -1920/1920 [==============================] - 31s - loss: 0.5406 - acc: 0.7422 - val_loss: 6.8062 - val_acc: 0.5188 - - -#GITHUB Q - -Hey, - -I am trying to implement a version of your code that will allow for more than 2 classes, as well as larger datasets by using the fit_generator function. I was able to get your code to work fine - -However it seems that something I am doing is breaking the model, as even with 2 classes now, the validation accuracy seems to freeze and it doesn't get any better. - -Here is my observations of the problems. -- After 13 epochs accuracy stops improving -- Accuracy is very low (0.362) -- After 2nd epoch validation accuracy freezes at 0.3360 -- during training the accuracy seemed much higher (~80) but the summaries tell a different story - - -Here is a sample of my training logs: - - From 4b2f63ec5c9d8454e7e6da8dcf1ef2176bd94552 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Sat, 5 Aug 2017 12:24:53 +1000 Subject: [PATCH 17/18] remove data file --- data.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 data.py diff --git a/data.py b/data.py deleted file mode 100644 index d351598..0000000 --- a/data.py +++ /dev/null @@ -1,19 +0,0 @@ -import cv2 -import glob -import os -import numpy as np -from keras.utils.np_utils import to_categorical - -def load_inria_person(path): - pos_path = "./data/train/pos" - neg_path = "./data/train/neg" - pos_images = [cv2.resize(cv2.imread(x), (128, 128)) for x in glob.glob(pos_path + "/*.jpg")] - pos_images = [np.transpose(img, (2, 0, 1)) for img in pos_images] - neg_images = [cv2.resize(cv2.imread(x), (128, 128)) for x in glob.glob(neg_path + "/*.jpg")] - print(pos_path, neg_path) - neg_images = [np.transpose(img, (2, 0, 1)) for img in neg_images] - y = [1] * len(pos_images) + [0] * len(neg_images) - y = to_categorical(y, 2) - X = np.float32(pos_images + neg_images) - - return X, y From ce7de258e874fb0318fdcb6c7d6c04025d18d858 Mon Sep 17 00:00:00 2001 From: Sam Hains Date: Sat, 5 Aug 2017 12:48:36 +1000 Subject: [PATCH 18/18] add dynamic argmax selection for picking which class to visualize --- cam.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cam.py b/cam.py index ca6e434..4c243fb 100644 --- a/cam.py +++ b/cam.py @@ -72,7 +72,10 @@ def visualize_class_activation_map(model_path, img_path, output_path): #Create the class activation map. cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[1:3]) - for i, w in enumerate(class_weights[:, 1]): + + class_index = predictions.argmax() + print(class_index) + for i, w in enumerate(class_weights[:, class_index]): cam += w * conv_outputs[i, :, :] print "predictions", predictions cam /= np.max(cam)