forked from SullyChen/Autopilot-TensorFlow
-
Notifications
You must be signed in to change notification settings - Fork 1
/
train.py
134 lines (97 loc) · 3.49 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""Training of the NVIDIA end-to-end deep learning model for self-driving cars.
This script is used to train the NVIDIA end-to-end deep learning model
using images collected from a dashcam.
"""
import os
import tensorflow as tf
import driving_data
import steering_model as sm
#
# Initialize parameters
#
# File and folder paths
MODELDIR = './save'
LOGDIR = './logs'
# Learning parameters
L2NormConst = 0.001
learning_rate = 0.0001
epochs = 30
batch_size = 100
#
# Build a steering model
#
# Initialize TensorFlow session
sess = tf.InteractiveSession()
# Build a steering model graph
model = sm.Model()
# Create log folders
if not os.path.exists(MODELDIR):
os.makedirs(MODELDIR)
# Save model to a protobuf file
tf.train.write_graph(sess.graph_def, MODELDIR, 'graph.pb')
#
# Loss function
#
# Mean square error loss
mse_loss = tf.reduce_mean(tf.square(tf.subtract(model.y_, model.y)))
# L2 regularization loss
train_vars = tf.trainable_variables()
l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in train_vars]) * L2NormConst
# Total training loss
loss = mse_loss + l2_loss
#
# Prepare training
#
# Use Adam Optimizer for gradient decent
optimizer = tf.train.AdamOptimizer(learning_rate)
# Training step is to minimize total loss
train_step = optimizer.minimize(loss)
# Initialize parameters
sess.run(tf.global_variables_initializer())
# Create a summary to monitor cost tensor
tf.summary.scalar("total loss", loss)
tf.summary.scalar("L2 loss", l2_loss)
tf.summary.scalar("MSE loss", mse_loss)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
# Create checkpoint saver
saver = tf.train.Saver()
# Op to write logs to Tensorboard
summary_writer = tf.summary.FileWriter(LOGDIR, graph=tf.get_default_graph())
if not os.path.exists(LOGDIR):
os.makedirs(LOGDIR)
#
# Training
#
# Display instructions for TensorBoard
print("\nTraining...")
print("To track the training open TensorBoard:\n\n" \
"tensorboard --logdir " + LOGDIR + "\n\n")
step = 0
# Train over the dataset
for epoch in range(epochs):
for i in range(int(driving_data.num_images/batch_size)):
# Load training images and labels
xs, ys = driving_data.LoadTrainBatch(batch_size)
# Run gradient decent with training data using dropout
train_step.run(feed_dict={model.x: xs, model.y_: ys, model.keep_prob: 0.8})
# Display loss
if i % 10 == 0:
xs, ys = driving_data.LoadValBatch(batch_size)
loss_value = loss.eval(feed_dict={model.x:xs, model.y_: ys, model.keep_prob: 1.0})
print("Epoch: %d, Step: %d, Loss: %g" % (epoch, step, loss_value))
# Write logs at every iteration
summary = merged_summary_op.eval(feed_dict={model.x:xs, model.y_: ys, model.keep_prob: 1.0})
summary_writer.add_summary(summary, global_step=step)
# Save checkpoint after last item of each batch
if i % batch_size == batch_size-1:
checkpoint_path = os.path.join(MODELDIR, "model.ckpt")
filename = saver.save(sess, checkpoint_path, global_step=step)
frozen_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['y'])
tf.train.write_graph(frozen_graph_def, MODELDIR, 'frozen_graph.pb', as_text=False)
step += 1
print("Model saved in file: %s" % filename)
# Display instructions for TensorBoard
print("\nTraining completed.\n")
print("To view the training performance open TensorBoard:\n\n" \
"tensorboard --logdir " + LOGDIR + "\n\n")