-
Notifications
You must be signed in to change notification settings - Fork 22
/
mnist_2.1_5_layer_nn_relu_adam.py
180 lines (128 loc) · 6.34 KB
/
mnist_2.1_5_layer_nn_relu_adam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# encoding: UTF-8
# Copyright Krzysztof Sopyła ([email protected])
#
#
# Licensed under the MIT
# Network architecture:
# Five layer neural network, input layer 28*28= 784, output 10 (10 digits)
# Output labels uses one-hot encoding
# input layer - X[batch, 784]
# 1 layer - W1[784, 200] + b1[200]
# Y1[batch, 200]
# 2 layer - W2[200, 100] + b2[100]
# Y2[batch, 100]
# 3 layer - W3[100, 60] + b3[60]
# Y3[batch, 60]
# 4 layer - W4[60, 30] + b4[30]
# Y4[batch, 30]
# 5 layer - W5[30, 10] + b5[10]
# One-hot encoded labels Y5[batch, 10]
# model
# Y = softmax(X*W+b)
# Matrix mul: X*W - [batch,784]x[784,10] -> [batch,10]
# Training consists of finding good W elements. This will be handled automaticaly by
# Tensorflow optimizer
import visualizations as vis
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
NUM_ITERS=5000
DISPLAY_STEP=100
BATCH=100
tf.set_random_seed(0)
# Download images and labels
mnist = read_data_sets("MNISTdata", one_hot=True, reshape=False, validation_size=0)
# mnist.test (10K images+labels) -> mnist.test.images, mnist.test.labels
# mnist.train (60K images+labels) -> mnist.train.images, mnist.test.labels
# Placeholder for input images, each data sample is 28x28 grayscale images
# All the data will be stored in X - tensor, 4 dimensional matrix
# The first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
# layers sizes
L1 = 200
L2 = 100
L3 = 60
L4 = 30
L5 = 10
# weights - initialized with random values from normal distribution mean=0, stddev=0.1
# output of one layer is input for the next
W1 = tf.Variable(tf.truncated_normal([784, L1], stddev=0.1))
b1 = tf.Variable(tf.zeros([L1]))
W2 = tf.Variable(tf.truncated_normal([L1, L2], stddev=0.1))
b2 = tf.Variable(tf.zeros([L2]))
W3 = tf.Variable(tf.truncated_normal([L2, L3], stddev=0.1))
b3 = tf.Variable(tf.zeros([L3]))
W4 = tf.Variable(tf.truncated_normal([L3, L4], stddev=0.1))
b4 = tf.Variable(tf.zeros([L4]))
W5 = tf.Variable(tf.truncated_normal([L4, L5], stddev=0.1))
b5 = tf.Variable(tf.zeros([L5]))
# flatten the images, unrole eacha image row by row, create vector[784]
# -1 in the shape definition means compute automatically the size of this dimension
XX = tf.reshape(X, [-1, 784])
# Define model
Y1 = tf.nn.relu(tf.matmul(XX, W1) + b1)
Y2 = tf.nn.relu(tf.matmul(Y1, W2) + b2)
Y3 = tf.nn.relu(tf.matmul(Y2, W3) + b3)
Y4 = tf.nn.relu(tf.matmul(Y3, W4) + b4)
Ylogits = tf.matmul(Y4, W5) + b5
Y = tf.nn.softmax(Ylogits)
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
#cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 100.0 # normalized for batches of 100 images,
# we can also use tensorflow function for softmax
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training,
learning_rate = 0.003
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
# matplotlib visualisation
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0)
allbiases = tf.concat([tf.reshape(b1, [-1]), tf.reshape(b2, [-1]), tf.reshape(b3, [-1]), tf.reshape(b4, [-1]), tf.reshape(b5, [-1])], 0)
# Initializing the variables
init = tf.global_variables_initializer()
train_losses = list()
train_acc = list()
test_losses = list()
test_acc = list()
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for i in range(NUM_ITERS+1):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(BATCH)
if i%DISPLAY_STEP ==0:
# compute training values for visualisation
acc_trn, loss_trn, w, b = sess.run([accuracy, cross_entropy, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y})
acc_tst, loss_tst = sess.run([accuracy, cross_entropy], feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
print("#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format(i,acc_trn,loss_trn,acc_tst,loss_tst))
train_losses.append(loss_trn)
train_acc.append(acc_trn)
test_losses.append(loss_tst)
test_acc.append(acc_tst)
# the backpropagationn training step
sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y})
title = "MNIST 2.1 5 layers relu adam"
vis.losses_accuracies_plots(train_losses,train_acc,test_losses, test_acc,title,DISPLAY_STEP)
# Restults
# mnist_single_layer_nn.py acc= 0.9237
# mnist__layer_nn.py TST acc = 0.9534
# mnist__layer_nn_relu_adam.py TST acc = 0.9771
# sample output for 5k iterations
#0 Trn acc=0.10000000149011612 , Trn loss=229.3443603515625 Tst acc=0.11999999731779099 , Tst loss=230.12518310546875
#100 Trn acc=0.9300000071525574 , Trn loss=30.25579071044922 Tst acc=0.8877000212669373 , Tst loss=35.22196578979492
#200 Trn acc=0.8799999952316284 , Trn loss=33.183040618896484 Tst acc=0.9417999982833862 , Tst loss=19.18865966796875
#300 Trn acc=0.9399999976158142 , Trn loss=21.5306396484375 Tst acc=0.9406999945640564 , Tst loss=19.576183319091797
# ...
#4800 Trn acc=0.949999988079071 , Trn loss=16.546607971191406 Tst acc=0.9739999771118164 , Tst loss=10.48233699798584
#4900 Trn acc=1.0 , Trn loss=0.8173556327819824 Tst acc=0.9768000245094299 , Tst loss=11.440749168395996
#5000 Trn acc=1.0 , Trn loss=0.5762706398963928 Tst acc=0.9771000146865845 , Tst loss=10.08562183380127