forked from guillaume-chevalier/Hyperopt-Keras-CNN-CIFAR-100
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hyperopt_optimize.py
230 lines (188 loc) · 7.52 KB
/
hyperopt_optimize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
"""Auto-optimizing a neural network with Hyperopt (TPE algorithm)."""
from neural_net import build_and_train, build_model
from utils import print_json, save_json_result, load_best_hyperspace
from keras.utils import plot_model
import keras.backend as K
from hyperopt import hp, tpe, fmin, Trials
import pickle
import os
import traceback
__author__ = "Guillaume Chevalier"
__copyright__ = "Copyright 2017, Guillaume Chevalier"
__license__ = "MIT License"
__notice__ = (
"Some further edits by Guillaume Chevalier are made on "
"behalf of Vooban Inc. and belongs to Vooban Inc. ")
# See: https://github.com/Vooban/Hyperopt-Keras-CNN-CIFAR-100/blob/master/LICENSE"
space = {
# This loguniform scale will multiply the learning rate, so as to make
# it vary exponentially, in a multiplicative fashion rather than in
# a linear fashion, to handle his exponentialy varying nature:
'lr_rate_mult': hp.loguniform('lr_rate_mult', -0.5, 0.5),
# L2 weight decay:
'l2_weight_reg_mult': hp.loguniform('l2_weight_reg_mult', -1.3, 1.3),
# Batch size fed for each gradient update
'batch_size': hp.quniform('batch_size', 100, 450, 5),
# Choice of optimizer:
'optimizer': hp.choice('optimizer', ['Adam', 'Nadam', 'RMSprop']),
# Coarse labels importance for weights updates:
'coarse_labels_weight': hp.uniform('coarse_labels_weight', 0.1, 0.7),
# Uniform distribution in finding appropriate dropout values, conv layers
'conv_dropout_drop_proba': hp.uniform('conv_dropout_proba', 0.0, 0.35),
# Uniform distribution in finding appropriate dropout values, FC layers
'fc_dropout_drop_proba': hp.uniform('fc_dropout_proba', 0.0, 0.6),
# Use batch normalisation at more places?
'use_BN': hp.choice('use_BN', [False, True]),
# Use a first convolution which is special?
'first_conv': hp.choice(
'first_conv', [None, hp.choice('first_conv_size', [3, 4])]
),
# Use residual connections? If so, how many more to stack?
'residual': hp.choice(
'residual', [None, hp.quniform(
'residual_units', 1 - 0.499, 4 + 0.499, 1)]
),
# Let's multiply the "default" number of hidden units:
'conv_hiddn_units_mult': hp.loguniform('conv_hiddn_units_mult', -0.6, 0.6),
# Number of conv+pool layers stacked:
'nb_conv_pool_layers': hp.choice('nb_conv_pool_layers', [2, 3]),
# Starting conv+pool layer for residual connections:
'conv_pool_res_start_idx': hp.quniform('conv_pool_res_start_idx', 0, 2, 1),
# The type of pooling used at each subsampling step:
'pooling_type': hp.choice('pooling_type', [
'max', # Max pooling
'avg', # Average pooling
'all_conv', # All-convolutionnal: https://arxiv.org/pdf/1412.6806.pdf
'inception' # Inspired from: https://arxiv.org/pdf/1602.07261.pdf
]),
# The kernel_size for convolutions:
'conv_kernel_size': hp.quniform('conv_kernel_size', 2, 4, 1),
# The kernel_size for residual convolutions:
'res_conv_kernel_size': hp.quniform('res_conv_kernel_size', 2, 4, 1),
# Amount of fully-connected units after convolution feature map
'fc_units_1_mult': hp.loguniform('fc_units_1_mult', -0.6, 0.6),
# Use one more FC layer at output
'one_more_fc': hp.choice(
'one_more_fc', [None, hp.loguniform('fc_units_2_mult', -0.6, 0.6)]
),
# Activations that are used everywhere
'activation': hp.choice('activation', ['relu', 'elu'])
}
def plot(hyperspace, file_name_prefix):
"""Plot a model from it's hyperspace."""
model = build_model(hyperspace)
plot_model(
model,
to_file='{}.png'.format(file_name_prefix),
show_shapes=True
)
print("Saved model visualization to {}.png.".format(file_name_prefix))
K.clear_session()
del model
def plot_base_model():
"""Plot a basic demo model."""
space_base_demo_to_plot = {
'lr_rate_mult': 1.0,
'l2_weight_reg_mult': 1.0,
'batch_size': 300,
'optimizer': 'Nadam',
'coarse_labels_weight': 0.2,
'conv_dropout_drop_proba': 0.175,
'fc_dropout_drop_proba': 0.3,
'use_BN': True,
'first_conv': 4,
'residual': 4,
'conv_hiddn_units_mult': 1.0,
'nb_conv_pool_layers': 3,
'conv_pool_res_start_idx': 0.0,
'pooling_type': 'inception',
'conv_kernel_size': 3.0,
'res_conv_kernel_size': 3.0,
'fc_units_1_mult': 1.0,
'one_more_fc': 1.0,
'activation': 'elu'
}
plot(space_base_demo_to_plot, "model_demo")
def plot_best_model():
"""Plot the best model found yet."""
space_best_model = load_best_hyperspace()
if space_best_model is None:
print("No best model to plot. Continuing...")
return
print("Best hyperspace yet:")
print_json(space_best_model)
plot(space_best_model, "model_best")
def optimize_cnn(hype_space):
"""Build a convolutional neural network and train it."""
try:
model, model_name, result, _ = build_and_train(hype_space)
# Save training results to disks with unique filenames
save_json_result(model_name, result)
K.clear_session()
del model
return result
except Exception as err:
try:
K.clear_session()
except:
pass
err_str = str(err)
print(err_str)
traceback_str = str(traceback.format_exc())
print(traceback_str)
return {
'status': STATUS_FAIL,
'err': err_str,
'traceback': traceback_str
}
print("\n\n")
def run_a_trial():
"""Run one TPE meta optimisation step and save its results."""
max_evals = nb_evals = 1
print("Attempt to resume a past training if it exists:")
try:
# https://github.com/hyperopt/hyperopt/issues/267
trials = pickle.load(open("results.pkl", "rb"))
print("Found saved Trials! Loading...")
max_evals = len(trials.trials) + nb_evals
print("Rerunning from {} trials to add another one.".format(
len(trials.trials)))
except:
trials = Trials()
print("Starting from scratch: new trials.")
best = fmin(
optimize_cnn,
space,
algo=tpe.suggest,
trials=trials,
max_evals=max_evals
)
pickle.dump(trials, open("results.pkl", "wb"))
print("\nOPTIMIZATION STEP COMPLETE.\n")
if __name__ == "__main__":
"""Plot the model and run the optimisation forever (and saves results)."""
print("Plotting a demo model that would represent "
"a quite normal model (or a bit more huge), "
"and then the best model...")
plot_base_model()
print("Now, we train many models, one after the other. "
"Note that hyperopt has support for cloud "
"distributed training using MongoDB.")
print("\nYour results will be saved in the folder named 'results/'. "
"You can sort that alphabetically and take the greatest one. "
"As you run the optimization, results are consinuously saved into a "
"'results.pkl' file, too. Re-running optimize.py will resume "
"the meta-optimization.\n")
while True:
# Optimize a new model with the TPE Algorithm:
print("OPTIMIZING NEW MODEL:")
try:
run_a_trial()
except Exception as err:
err_str = str(err)
print(err_str)
traceback_str = str(traceback.format_exc())
print(traceback_str)
# Replot best model since it may have changed:
print("PLOTTING BEST MODEL:")
plot_best_model()