-
Notifications
You must be signed in to change notification settings - Fork 0
/
varyLambda.m
73 lines (61 loc) · 2.9 KB
/
varyLambda.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
function [bestParams, train_cost, cv_cost, bestLambda] = varyLambda(lambda_vec, ...
max_iters, nn_specs, ...
X_train, y_train, ...
X_cv, y_cv)
num_hidden_layers = nn_specs(1);
input_layer_size = nn_specs(2);
hidden_layer_size = nn_specs(3);
num_labels = nn_specs(4);
lambda = lambda_vec(1);
train_cost = size(1, length(lambda_vec));
cv_cost = size(1, length(lambda_vec));
% randomly initialize weights
initial_params = randInitWeights(num_hidden_layers, input_layer_size, hidden_layer_size, num_labels);
for i = 1:length(lambda_vec)
lambda = lambda_vec(i);
fprintf('\nTraining regularized nn with %.3f lambda\n', lambda);
% shorthand for cost function
costFunctionS = @(p) costFunction(p, num_hidden_layers, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X_train, y_train, lambda);
% finish at least the amount of iterations specified
options = optimset('MaxIter', max_iters);
[params, ~] = fmincg(costFunctionS, initial_params, options);
[prevCost, ~] = costFunction(params, num_hidden_layers, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X_train, y_train, lambda);
% do while compares cost, stops fmincg when cost difference is
% negligeable
costDiff = 0;
iters = 0;
% since cost should go down every iteration, no need for absolute value
% comparison, otherwise indicates error
while (costDiff > 0.01 || iters == 0) && iters < 50
options = optimset('MaxIter', 1);
[params, thisCost] = fmincg(costFunctionS, params, options);
costDiff = prevCost - thisCost;
prevCost = thisCost;
iters = iters + 1;
end
[train_cost(i), ~] = costFunction(params, num_hidden_layers, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X_train, y_train, lambda);
[cv_cost(i), ~] = costFunction(params, num_hidden_layers, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X_cv, y_cv, lambda);
if cv_cost(i) == min(cv_cost)
bestParams = params;
end
end
[~, bestLambda_index] = min(cv_cost);
bestLambda = lambda_vec(bestLambda_index);
fprintf('Optimal lambda is %.3f\n', bestLambda);
end