diff --git a/README.md b/README.md index 464377e..7cd9fe3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ This repository contains code for the following Keras models: - ResNet50 - Inception v3 - CRNN for music tagging +- SqueezeNet All architectures are compatible with both TensorFlow and Theano, and upon instantiation the models will be built according to the image dimension ordering set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_dim_ordering=tf`, then any model loaded from this repository will get built according to the TensorFlow dimension ordering convention, "Width-Height-Depth". @@ -78,6 +79,7 @@ block4_pool_features = model.predict(x) - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) - please cite this paper if you use the ResNet model in your work. - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) - please cite this paper if you use the Inception v3 model in your work. - [Music-auto_tagging-keras](https://github.com/keunwoochoi/music-auto_tagging-keras) +- [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360) - please cite this paper if you use the SqueezeNet model in your work. Additionally, don't forget to [cite Keras](https://keras.io/getting-started/faq/#how-should-i-cite-keras) if you use these models. @@ -88,3 +90,4 @@ Additionally, don't forget to [cite Keras](https://keras.io/getting-started/faq/ - The ResNet50 weights are ported from the ones [released by Kaiming He](https://github.com/KaimingHe/deep-residual-networks) under the [MIT license](https://github.com/KaimingHe/deep-residual-networks/blob/master/LICENSE). - The VGG16 and VGG19 weights are ported from the ones [released by VGG at Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) under the [Creative Commons Attribution License](https://creativecommons.org/licenses/by/4.0/). - The Inception v3 weights are trained by ourselves and are released under the MIT license. +- The SqueezeNet weights are ported from the ones [released by DeepScale team](https://github.com/DeepScale/SqueezeNet) under the [BSD license](https://github.com/DeepScale/SqueezeNet/blob/master/LICENSE). diff --git a/squeezenet.py b/squeezenet.py new file mode 100644 index 0000000..36ef3fb --- /dev/null +++ b/squeezenet.py @@ -0,0 +1,172 @@ +'''SqueezeNet model for Keras. + +# Reference: +- [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360) + +# Keras Project Reference: + +- [keras-squeezenet](https://github.com/rcmalli/keras-squeezenet) + +# Original Project Reference: + +- [Original Squeezenet](https://github.com/DeepScale/SqueezeNet) + +''' + +from keras.layers import Input, merge +from keras.layers.convolutional import Convolution2D, MaxPooling2D +from keras.layers.core import Dropout, Activation +from keras.layers.pooling import GlobalAveragePooling2D +from keras.models import Model +from keras import backend as K +from keras.utils.layer_utils import convert_all_kernels_in_model +from keras.utils.data_utils import get_file +from keras.preprocessing import image +from imagenet_utils import decode_predictions, preprocess_input +import numpy as np +import warnings + + +TH_WEIGHTS_PATH = 'https://github.com/rcmalli/deep-learning-models/releases/download/v0.4/squeezenet_weights_th_dim_ordering_th_kernels.h5' +TF_WEIGHTS_PATH = 'https://github.com/rcmalli/deep-learning-models/releases/download/v0.4/squeezenet_weights_tf_dim_ordering_tf_kernels.h5' + + +# Modular function for Fire Node + +def fire_module(x, fire_id, squeeze=16, expand=64): + sq1x1, exp1x1, exp3x3, relu = "squeeze1x1", "expand1x1", "expand3x3", "relu_" + s_id = 'fire' + str(fire_id) + '/' + + if K.image_dim_ordering() == 'tf': + c_axis = 3 + else: + c_axis = 1 + + x = Convolution2D(squeeze, 1, 1, border_mode='valid', name=s_id + sq1x1)(x) + x = Activation('relu', name=s_id + relu + sq1x1)(x) + + left = Convolution2D(expand, 1, 1, border_mode='valid', name=s_id + exp1x1)(x) + left = Activation('relu', name=s_id + relu + exp1x1)(left) + + right = Convolution2D(expand, 3, 3, border_mode='same', name=s_id + exp3x3)(x) + right = Activation('relu', name=s_id + relu + exp3x3)(right) + + x = merge([left, right], mode='concat', concat_axis=c_axis, name=s_id + 'concat') + return x + + +def SqueezeNet(include_top=True, weights='imagenet', input_tensor=None): + '''Instantiate the SqueezeNet architecture, + optionally loading weights pre-trained + on ImageNet. Note that when using TensorFlow, + for best performance you should set + `image_dim_ordering="tf"` in your Keras config + at ~/.keras/keras.json. + + The model and the weights are compatible with both + TensorFlow and Theano. The dimension ordering + convention used by the model is the one + specified in your Keras config file. + + # Arguments + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization) + or "imagenet" (pre-training on ImageNet). + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + + # Returns + A Keras model instance. + ''' + + if weights not in {'imagenet', None}: + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization) or `imagenet` ' + '(pre-training on ImageNet).') + + if K.image_dim_ordering() == 'th': + input_shape = (3, 227, 227) + else: + input_shape = (227, 227, 3) + + if input_tensor is None: + img_input = Input(shape=input_shape) + else: + if not K.is_keras_tensor(input_tensor): + img_input = Input(tensor=input_tensor) + else: + img_input = input_tensor + + x = Convolution2D(64, 3, 3, subsample=(2, 2), border_mode='valid', name='conv1')(img_input) + x = Activation('relu', name='relu_conv1')(x) + x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x) + + x = fire_module(x, fire_id=2, squeeze=16, expand=64) + x = fire_module(x, fire_id=3, squeeze=16, expand=64) + x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x) + + x = fire_module(x, fire_id=4, squeeze=32, expand=128) + x = fire_module(x, fire_id=5, squeeze=32, expand=128) + x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x) + + x = fire_module(x, fire_id=6, squeeze=48, expand=192) + x = fire_module(x, fire_id=7, squeeze=48, expand=192) + x = fire_module(x, fire_id=8, squeeze=64, expand=256) + x = fire_module(x, fire_id=9, squeeze=64, expand=256) + x = Dropout(0.5, name='drop9')(x) + + if include_top: + x = Convolution2D(1000, 1, 1, border_mode='valid', name='conv10')(x) + x = Activation('relu', name='relu_conv10')(x) + x = GlobalAveragePooling2D()(x) + x = Activation('softmax', name='loss')(x) + + model = Model(input=img_input, output=[x]) + + # load weights + if weights == 'imagenet': + print('K.image_dim_ordering:', K.image_dim_ordering()) + if K.image_dim_ordering() == 'th': + weights_path = get_file('squeezenet_weights_th_dim_ordering_th_kernels.h5', + TH_WEIGHTS_PATH, + cache_subdir='models') + model.load_weights(weights_path, by_name=True) + if K.backend() == 'tensorflow': + warnings.warn('You are using the TensorFlow backend, yet you ' + 'are using the Theano ' + 'image dimension ordering convention ' + '(`image_dim_ordering="th"`). ' + 'For best performance, set ' + '`image_dim_ordering="tf"` in ' + 'your Keras config ' + 'at ~/.keras/keras.json.') + convert_all_kernels_in_model(model) + else: + weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5', + TF_WEIGHTS_PATH, + cache_subdir='models') + + model.load_weights(weights_path, by_name=True) + if K.backend() == 'theano': + convert_all_kernels_in_model(model) + return model + + +if __name__ == '__main__': + import time + + model = SqueezeNet() + start = time.time() + img_path = 'elephant.jpg' + img = image.load_img(img_path, target_size=(227, 227)) + x = image.img_to_array(img) + x = np.expand_dims(x, axis=0) + x = preprocess_input(x) + print('Input image shape:', x.shape) + + preds = model.predict(x) + print('Predicted:', decode_predictions(preds)) + + duration = time.time() - start + print "{} s to get output".format(duration)