-
Notifications
You must be signed in to change notification settings - Fork 1
/
NeuralNetwork.java
292 lines (237 loc) · 14 KB
/
NeuralNetwork.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
// File: NeuralNetwork.java
// Name: Mo K. Eid ([email protected])
// Desc: A customizable neural network class built from scratch that makes use of batch gradient descent
import java.util.Random;
class NeuralNetwork {
// Instance variables related to the structure of the neural network
private int inputSize;
private int[] hiddenLayerSizes;
private int numClassifiers;
// Matrix containing the weight vectors between each layer
private double[][] weightsForAllLayers;
// Instance variables related to optimization
private double learningRate;
private double regularizationRate;
// Normalization parameter
private double[] featureAverages;
NeuralNetwork(int paramInputSize, int[] paramHiddenLayerSizes, int paramNumClassifiers) {
inputSize = paramInputSize;
hiddenLayerSizes = paramHiddenLayerSizes;
numClassifiers = paramNumClassifiers;
weightsForAllLayers = generateRandomWeights();
learningRate = 0.3;
regularizationRate = 10;
}
// Infer a function from labeled training data
void train(int[][] trainingExamples, int[] trainingActual, int numIterations) {
// Center the training data features
calculateFeatureMeans(trainingExamples);
double[][] normalizedExamples = normalizeFeatures(trainingExamples);
int i = 0;
while(i < numIterations) {
System.out.println("Training Iteration: " + (i + 1) + " of " + numIterations);
backPropagation(normalizedExamples, trainingActual);
i++;
}
}
// Return the rate the neural network predicts its own labeled training examples correctly
double checkAccuracy(int[][] inputSet, int[] actualSet) {
double[][] normalizedInputSet = normalizeFeatures(inputSet);
double numCorrect = 0.0;
for(int inputIndex = 1; inputIndex < inputSet.length - 1; inputIndex++) {
double[] trainingExample = normalizedInputSet[inputIndex];
double[][] predictions = predict(trainingExample);
double[] predictedOutput = predictions[predictions.length - 1];
int predictedClass = translatePrediction(predictedOutput);
numCorrect += (predictedClass == actualSet[inputIndex]) ? 1 : 0;
}
return numCorrect / inputSet.length;
}
// Backward propagate errors in batch mode and update weights given labeled training data
private void backPropagation(double[][] trainingExamples, int[] trainingActual) {
// Initialize the delta accumulator
double[][] gradient = new double[hiddenLayerSizes.length + 1][];
// Learn from each training example
for(int trainingExampleIndex = 0; trainingExampleIndex < trainingExamples.length - 1; trainingExampleIndex++) {
double[][] predictions = predict(trainingExamples[trainingExampleIndex]);
double[][] deltas = new double[weightsForAllLayers.length][];
// Iterate through each weights layer
for(int deltaLayerIndex = deltas.length - 1; deltaLayerIndex >= 0; deltaLayerIndex--) {
boolean onOutputLayer = (deltaLayerIndex == deltas.length - 1);
int currentLayerSize = onOutputLayer ? numClassifiers : hiddenLayerSizes[deltaLayerIndex] + 1;
deltas[deltaLayerIndex] = new double[currentLayerSize];
// Initialize the gradients for this layer if null
if(gradient[deltaLayerIndex] == null)
gradient[deltaLayerIndex] = new double[weightsForAllLayers[deltaLayerIndex].length];
// Calculate output deltas
if(onOutputLayer) {
// Encode the class into a vector
int[] encodedActual = oneHotEncode(trainingActual[trainingExampleIndex]);
for(int nodeIndex = 0; nodeIndex < numClassifiers; nodeIndex++) {
double activationPrime = activatePrime(predictions[predictions.length - 1][nodeIndex]);
deltas[deltaLayerIndex][nodeIndex] = (predictions[predictions.length - 1][nodeIndex] - encodedActual[nodeIndex]) * activationPrime;
}
}
// Calculate hidden layer deltas
else {
boolean nextLayerIsOutput = (deltaLayerIndex == deltas.length - 2);
int nextLayerSize = nextLayerIsOutput ? numClassifiers : hiddenLayerSizes[deltaLayerIndex + 1];
// Iterate through each activation node
for(int nodeIndex = 0; nodeIndex < currentLayerSize; nodeIndex++) {
double delta = 0.0;
// Iterate through each of the next layer's activation nodes
for(int nextNodeIndex = 0; nextNodeIndex < nextLayerSize; nextNodeIndex++) {
int weightIndex = (currentLayerSize * nextNodeIndex) + nodeIndex;
double weightVal = weightsForAllLayers[deltaLayerIndex + 1][weightIndex];
double activationPrime = activatePrime(predictions[deltaLayerIndex + 1][nextNodeIndex]);
delta += weightVal * deltas[deltaLayerIndex + 1][nextNodeIndex] * activationPrime;
}
deltas[deltaLayerIndex][nodeIndex] = delta;
}
}
}
// Accumulate deltas
for(int deltaLayerIndex = deltas.length - 1; deltaLayerIndex >= 0; deltaLayerIndex--) {
// Get parameters on the current layer
boolean onOutputLayer = (deltaLayerIndex == deltas.length - 1);
int currentLayerSize = onOutputLayer ? numClassifiers : hiddenLayerSizes[deltaLayerIndex];
// Get parameters on the previous layer
boolean previousIsInput = (deltaLayerIndex == 0);
int previousLayerSize = previousIsInput ? inputSize : hiddenLayerSizes[deltaLayerIndex - 1] + 1;
// Use each weight connection to calculate the gradient
for(int previousNodeIndex = 0; previousNodeIndex < previousLayerSize; previousNodeIndex++) {
for(int currentNodeIndex = 0; currentNodeIndex < currentLayerSize; currentNodeIndex++) {
int weightIndex = (currentNodeIndex * previousLayerSize) + previousNodeIndex;
double previousActivation;
// Set activation to be 1 if the previous node is a bias
if(previousNodeIndex == previousLayerSize - 1)
previousActivation = 1;
else
previousActivation = previousIsInput ? trainingExamples[trainingExampleIndex][previousNodeIndex] : predictions[deltaLayerIndex - 1][previousNodeIndex];
gradient[deltaLayerIndex][weightIndex] += deltas[deltaLayerIndex][currentNodeIndex] * previousActivation;
}
}
}
}
// Update weights
for(int deltaLayerIndex = 0; deltaLayerIndex < gradient.length; deltaLayerIndex++) {
boolean onInputLayer = (deltaLayerIndex == 0);
int currentLayerSize = onInputLayer ? inputSize : hiddenLayerSizes[deltaLayerIndex - 1] + 1;
boolean nextIsOutLayer = (deltaLayerIndex == gradient.length - 1);
int nextLayerSize = nextIsOutLayer ? numClassifiers : hiddenLayerSizes[deltaLayerIndex];
for(int nextNodeIndex = 0; nextNodeIndex < nextLayerSize; nextNodeIndex++) {
for(int currentNodeIndex = 0; currentNodeIndex < currentLayerSize; currentNodeIndex++) {
int weightIndex = (nextNodeIndex * currentLayerSize) + currentNodeIndex;
// Introduce regularization to prevent overfitting
if(currentNodeIndex != currentLayerSize - 1 && !onInputLayer)
weightsForAllLayers[deltaLayerIndex][weightIndex] = weightsForAllLayers[deltaLayerIndex][weightIndex] * (1 - learningRate * regularizationRate / trainingExamples.length) -
gradient[deltaLayerIndex][weightIndex] * learningRate / trainingExamples.length;
else
weightsForAllLayers[deltaLayerIndex][weightIndex] -= gradient[deltaLayerIndex][weightIndex] * learningRate / trainingExamples.length;
}
}
}
}
// Generate a random matrix of weight values between min and mix based on the neural network's structure
private double[][] generateRandomWeights() {
// There will be a layer of weights between the layers of the neural network
double[][] weights = new double[hiddenLayerSizes.length + 1][];
// The range of weight randomization
double min = -1.0;
double max = 1.0;
// Iterate through each layer of nodes
for(int weightLayerIndex = 0; weightLayerIndex < weights.length; weightLayerIndex++) {
boolean forHiddenLayer = (weightLayerIndex != weights.length - 1);
int currentLayerSize = forHiddenLayer ? hiddenLayerSizes[weightLayerIndex] : numClassifiers;
boolean previousIsInputLayer = (weightLayerIndex == 0);
int previousLayerSize = previousIsInputLayer ? inputSize : hiddenLayerSizes[weightLayerIndex - 1] + 1;
// If the previous layer is a hidden one, allocate room for the bias node that will be added later
int weightLayerSize = currentLayerSize * previousLayerSize;
weights[weightLayerIndex] = new double[weightLayerSize];
// Set each weight's value to be between min and max
for(int weightIndex = 0; weightIndex < weightLayerSize; weightIndex++) {
Random random = new Random();
double randomValue = min + (max - min) * random.nextDouble();
weights[weightLayerIndex][weightIndex] = randomValue;
}
}
return weights;
}
// Sigmoid function
private double activate(double val) {
return (1 / (1 + Math.exp(-val)));
}
// Derivative of the sigmoid function
private double activatePrime(double val) {
return val * (1 - val);
}
private double[][] normalizeFeatures(int[][] set) {
double[][] normalizedSet = new double[set.length][];
for(int exampleIndex = 0; exampleIndex < normalizedSet.length; exampleIndex++) {
normalizedSet[exampleIndex] = new double[inputSize];
for(int featureIndex = 0; featureIndex < inputSize; featureIndex++) {
double centeredFeature = set[exampleIndex][featureIndex] - featureAverages[featureIndex];
normalizedSet[exampleIndex][featureIndex] = centeredFeature;
}
}
return normalizedSet;
}
// Iterate through every example in a set and calculate the mean for each feature
private void calculateFeatureMeans(int[][] set) {
// Calculate the means
featureAverages = new double[inputSize];
for(int featureIndex = 0; featureIndex < inputSize; featureIndex++) {
for(int exampleIndex = 0; exampleIndex < set.length; exampleIndex++)
featureAverages[featureIndex] += set[exampleIndex][featureIndex];
featureAverages[featureIndex] /= set.length;
}
}
// Encode classes (i.e., 2 -> [0, 1, 0, .., n])
private int[] oneHotEncode(int val) {
int[] encodedVal = new int[numClassifiers];
for(int i = 0; i < numClassifiers; i++)
encodedVal[i] = (i == val) ? 1 : 0;
return encodedVal;
}
// Make prediction based on its training
private double[][] predict(double[] input) {
double[][] predictions = new double[this.hiddenLayerSizes.length + 1][];
for(int actLayerIndex = 0; actLayerIndex < predictions.length; actLayerIndex++) {
boolean onOutputLayer = (actLayerIndex == predictions.length - 1);
boolean previousIsInput = (actLayerIndex == 0);
// Compute layer sizes (the + 1 is for the bias node)
int previousLayerSize = previousIsInput ? inputSize : (hiddenLayerSizes[actLayerIndex - 1] + 1);
int currentLayerSize = onOutputLayer ? numClassifiers : (hiddenLayerSizes[actLayerIndex]);
boolean shouldAddBias = !onOutputLayer;
predictions[actLayerIndex] = new double[currentLayerSize + (shouldAddBias ? 1 : 0)];
// Compute activations for each node in the current layer
for(int actNodeIndex = 0; actNodeIndex < currentLayerSize; actNodeIndex++) {
// Sum the connections between the appropriate nodes in the current layer and the previous layer
double sum = 0.0;
for(int previousNodeIndex = 0; previousNodeIndex < previousLayerSize; previousNodeIndex++) {
int weightIndex = (previousLayerSize * actNodeIndex) + previousNodeIndex;
double weightVal = weightsForAllLayers[actLayerIndex][weightIndex];
double previousNodeVal = previousIsInput ? input[previousNodeIndex] : predictions[actLayerIndex - 1][previousNodeIndex];
sum += weightVal * previousNodeVal;
}
// Activate the summed value and assign it in the predictions matrix
double activation = activate(sum);
predictions[actLayerIndex][actNodeIndex] = activation;
}
// Add bias if needed
if(shouldAddBias) {
int biasIndex = hiddenLayerSizes[actLayerIndex];
predictions[actLayerIndex][biasIndex] = 1;
}
}
return predictions;
}
// Decode class (i.e., [0, 1, 0, .., n] -> 2)
private int translatePrediction(double[] encodedVal) {
int predictedClass = 0;
for(int classifierIndex = 0; classifierIndex < numClassifiers; classifierIndex++)
if(encodedVal[classifierIndex] > encodedVal[predictedClass])
predictedClass = classifierIndex;
return predictedClass;
}
}