VGG19.py

# -*- coding: utf-8 -*-
"""cnn.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1jGRcwN-2pY2kp-FhxcB8eIpCvXiyKSB3
"""

from google.colab import drive
drive.mount('/content/drive')

#
# **************************************************************************************************************************************************
#	Libraries
# **************************************************************************************************************************************************
#

from keras import optimizers
from keras import applications
from keras.models import Model
from keras.layers import Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from math import ceil

import datetime
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#
# **************************************************************************************************************************************************
#	Global Variables
# **************************************************************************************************************************************************
#

# saved filename of model
model_filename = '/content/drive/My Drive/FYP/Project/model.h5'

# saved filename of model weights
weights_filename = '/content/drive/My Drive/FYP/Project/model_weights.h5'
modelcheckpoint_filename = '/content/drive/My Drive/FYP/Project/model_checkpoint.h5'

# saved filename of result csv
results_filename = '/content/drive/My Drive/FYP/Project/results.csv'

# directories of dataset
train_data_dir = '/content/drive/My Drive/FYP/Project/data/train'
# validation_data_dir = '/content/drive/My Drive/FYP/Project/data/validation'
test_data_dir = '/content/drive/My Drive/FYP/Project/data/test'
output_dir = '/content/drive/My Drive/FYP/Project/output'

# dimensions of the architecture
img_width, img_height = 224, 224

# number of channels for the architecture
channels = 3

# number of epochs
epochs = 34

# batch sizes
training_batch_size = 2
validation_batch_size = 1

# learning rate
learning_rate = 0.0001

# class mode
class_mode = 'categorical'

# total number of classes
total_classes = 2

#
# **************************************************************************************************************************************************
#	Functions
# **************************************************************************************************************************************************
#

def summary():
	# model summary
	vgg19.summary()

	# graphing our training and validation
	acc = history.history['acc']
	val_acc = history.history['val_acc']
	loss = history.history['loss']
	val_loss = history.history['val_loss']
	epochs = range(len(acc))
	plt.plot(epochs, acc, 'r', label = 'Training Acc')
	plt.plot(epochs, val_acc, 'b', label = 'Validation Acc')
	plt.title('Training and Validation Accuracy')
	plt.ylabel('Accuracy')
	plt.xlabel('Epoch')
	plt.legend()
	plt.figure()
	plt.plot(epochs, loss, 'r', label = 'Training Loss')
	plt.plot(epochs, val_loss, 'b', label = 'Validation Loss')
	plt.title('Training and Validation Loss')
	plt.ylabel('Loss')
	plt.xlabel('Epoch')
	plt.legend()
	plt.show()

def saveModel():
	model.save(model_filename)
	model.save_weights(weights_filename)

	model_json = model.to_json()
	with open("model_in_json.json", "w") as json_file:
		json.dump(model_json, json_file)

#
# **************************************************************************************************************************************************
#	Main Code
# **************************************************************************************************************************************************
#

# load model
model = applications.VGG19(include_top = False, weights = 'imagenet', classes = total_classes, input_shape = (img_width, img_height, channels))
flat = Flatten()(model.layers[-1].output)
output = Dense(total_classes, activation = "softmax")(flat)
vgg19 = Model(inputs = model.inputs, outputs = output)

train_datagen = ImageDataGenerator(rescale = 1. / 255,
                                    rotation_range = 90,
                                    brightness_range = [0.2, 1.0],
                                    shear_range = 0.2,
                                    zoom_range = 0.3,
                                    width_shift_range = 1.0,
                                    height_shift_range = 1.0,
                                    horizontal_flip = True,
                                    vertical_flip = True,
                                    validation_split = 0.4)

# validation_datagen = ImageDataGenerator(rescale = 1. / 255)

# current time
start = datetime.datetime.now()

# training data
training_set = train_datagen.flow_from_directory(
                                		train_data_dir,
                                		target_size = (img_width, img_height),
                                		batch_size = training_batch_size,
                                		class_mode = class_mode,
										#save_to_dir = output_dir,
										#save_prefix = 'Aug',
										subset = "training",
										seed = 42,
                                		shuffle = True)

nb_train_samples = len(training_set.filenames)

# validation data
validation_set = train_datagen.flow_from_directory(
                                  		train_data_dir,
                                  		target_size = (img_width, img_height),
                                  		batch_size = validation_batch_size,
                                  		class_mode = class_mode,
										subset = "validation",
										seed = 42,
                                  		shuffle = True)

nb_validation_samples = len(validation_set.filenames)

vgg19.compile(loss = 'categorical_crossentropy',
				optimizer = optimizers.SGD(lr = learning_rate),
				metrics = ['acc'])

# es = EarlyStopping(monitor = 'val_loss',
#                     min_delta = 1e-5,
#                     patience = 5,
#                     verbose = 1,
#                     mode = 'min',
#                     restore_best_weights = True)

mc = ModelCheckpoint(modelcheckpoint_filename,
                        monitor = 'val_loss',
                        mode = 'min',
                        save_weights_only = True,
                        save_best_only = True)

history = vgg19.fit_generator(training_set,
                                steps_per_epoch = nb_train_samples / training_batch_size,
                                epochs = epochs,
                              #remember to put back afteer confiq 2 callbacks = [es,mc],
                                callbacks = [mc],
                                validation_data = validation_set,
                                validation_steps = nb_validation_samples / validation_batch_size)

summary()
saveModel()


(eval_loss, eval_accuracy) = vgg19.evaluate_generator(
 validation_set, steps = nb_validation_samples, verbose = 1)

print("[INFO] Accuracy: {:.2f}%".format(eval_accuracy * 100))
print("[INFO] Loss: {}".format(eval_loss))
end = datetime.datetime.now()
elapsed = end - start
print('[INFO] Time Elapsed: ', elapsed)


# test data
test_datagen = ImageDataGenerator()
test_set = test_datagen.flow_from_directory(
                                  		test_data_dir,
                                  		target_size = (img_width, img_height),
                                  		batch_size = 1,
                                  		class_mode = class_mode,
										seed = 42,
                                  		shuffle = True)

step_size_test = test_set.n // test_set.batch_size
test_set.reset()

pred = vgg19.predict_generator(test_set,
                                steps = step_size_test,
                                verbose = 1)

predicted_class_indices = np.argmax(pred, axis = 1)
labels = (test_set.class_indices)

labels = dict((v, k) for k, v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames = test_set.filenames
results = pd.DataFrame({"Filename": filenames,
                      "Prediction": predictions})
results.to_csv(results_filename, index = False)