-
Notifications
You must be signed in to change notification settings - Fork 0
/
mnist_2.1_five_layers_relu_lrdecay.py
169 lines (136 loc) · 7.53 KB
/
mnist_2.1_five_layers_relu_lrdecay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# encoding: UTF-8
# Copyright 2016 Google.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import tensorflowvisu
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
from math import exp
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)
# neural network with 1 layer of 10 softmax neurons
#
# · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28
# \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10]
# · · · · · · · · Y [batch, 10]
# The model is:
#
# Y = softmax( X * W + b)
# X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch)
# W: weight matrix with 784 lines and 10 columns
# b: bias vector with 10 dimensions
# +: add with broadcasting: adds the vector to each line of the matrix (numpy)
# softmax(matrix) applies softmax on each line
# softmax(line) applies an exp to each value then divides by the norm of the resulting line
# Y: output matrix with 100 lines and 10 columns
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
alpha = tf.placeholder(tf.float32)
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
X_flat = tf.reshape(X, shape=[-1, 784])
#W1 = tf.get_variable('W1', [784, 200], initializer=tf.random_normal_initializer(stddev=0.1))
W1 = tf.Variable(tf.truncated_normal([784, 200], stddev=0.1))
#b1 = tf.get_variable('b1', [200], initializer=tf.zeros_initializer)
b1 = tf.Variable(tf.ones([200])/10)
#W2 = tf.get_variable('W2', [200, 100], initializer=tf.random_normal_initializer(stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([200, 100], stddev=0.1))
#b2 = tf.get_variable('b2', [100], initializer=tf.zeros_initializer)
b2 = tf.Variable(tf.ones([100])/10)
#W3 = tf.get_variable('W3', [100, 60], initializer=tf.random_normal_initializer(stddev=0.1))
W3 = tf.Variable(tf.truncated_normal([100, 60], stddev=0.1))
#b3 = tf.get_variable('b3', [60], initializer=tf.zeros_initializer)
b3 = tf.Variable(tf.ones([60])/10)
#W4 = tf.get_variable('W4', [60, 30], initializer=tf.random_normal_initializer(stddev=0.1))
W4 = tf.Variable(tf.truncated_normal([60, 30], stddev=0.1))
#b4 = tf.get_variable('b4', [30], initializer=tf.zeros_initializer)
b4 = tf.Variable(tf.ones([30])/10)
#W5 = tf.get_variable('W5', [30, 10], initializer=tf.random_normal_initializer(stddev=0.1))
W5 = tf.Variable(tf.truncated_normal([30, 10], stddev=0.1))
#b5 = tf.get_variable('b5', [10], initializer=tf.zeros_initializer)
b5 = tf.Variable(tf.ones([10])/10)
a1 = tf.nn.relu(tf.matmul(X_flat, W1) + b1)
a2 = tf.nn.relu(tf.matmul(a1, W2) + b2)
a3 = tf.nn.relu(tf.matmul(a2, W3) + b3)
a4 = tf.nn.relu(tf.matmul(a3, W4) + b4)
Ylogits = tf.matmul(a4, W5) + b5
Y = tf.nn.softmax(Ylogits)
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
#XX = tf.reshape(X, [-1, 784])
# The model
#Y = tf.nn.softmax(tf.matmul(XX, W) + b)
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
# cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# # *10 because "mean" included an unwanted division by 10
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)) * 100
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.AdamOptimizer(alpha).minimize(cross_entropy)
# matplotlib visualisation
allweights = tf.concat([tf.reshape(W1, [-1]),
tf.reshape(W2, [-1]),
tf.reshape(W3, [-1]),
tf.reshape(W4, [-1]),
tf.reshape(W5, [-1])], 0)
allbiases = tf.concat([tf.reshape(b1, [-1]),
tf.reshape(b2, [-1]),
tf.reshape(b3, [-1]),
tf.reshape(b4, [-1]),
tf.reshape(b5, [-1])], 0)
I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) # assembles 10x10 images by default
It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) # 1000 images on 25 lines
datavis = tensorflowvisu.MnistDataVis()
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# You can call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# learning rate decay
lrmax = 0.003
lrmin = 0.0001
learning_rate = lrmin + (lrmax - lrmin) * exp(-i / 2000)
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
# compute training values for visualisation
if update_train_data:
a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y})
datavis.append_training_curves_data(i, a, c)
datavis.append_data_histograms(i, w, b)
datavis.update_image1(im)
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c))
# compute test values for visualisation
if update_test_data:
a, c, im = sess.run([accuracy, cross_entropy, It], feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
datavis.append_test_curves_data(i, a, c)
datavis.update_image2(im)
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
# the backpropagation training step
sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y, alpha: learning_rate})
datavis.animate(training_step, iterations=5000+1, train_data_update_freq=10, test_data_update_freq=50, more_tests_at_start=True)
# to save the animation as a movie, add save_movie=True as an argument to datavis.animate
# to disable the visualisation use the following line instead of the datavis.animate line
# for i in range(2000+1): training_step(i, i % 50 == 0, i % 10 == 0)
print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
# final max test accuracy = 0.9268 (10K iterations). Accuracy should peak above 0.92 in the first 2000 iterations.