forked from NilsHMeier/LG04QS_Cycling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3_viz_learning.py
75 lines (67 loc) · 3.22 KB
/
3_viz_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import numpy as np
from MachineLearning.LearningAlgorithms import MachineLearning
from MachineLearning.PrepareData import PrepareDataset
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import
# Decide what should be visualized
DO_SVM = True
DO_KNN = False
# Set the source path, load the data and prepare it for training the models
SOURCE_PATH = 'Data/AggregatedData/'
engineer = PrepareDataset(SOURCE_PATH, True, False, 'label', ['x', 'y', 'z'])
data_table = engineer.fill_datatable(['LP', 'NM', 'NB'])
selected_features = [col + feature for col in ['x', 'y', 'z'] for feature in ['_mean', '_max', '_min', '_std']]
X_train, X_test, Y_train, Y_test = engineer.split_dataset_selected(data_table, selected_features, 0.3)
train_data = {'x': X_train, 'y': Y_train}
test_data = {'x': X_test, 'y': Y_test}
# Load a second dataset with other data to evaluate the trained models
# training_data = PrepareDataset.fill_datatable(source_path=SOURCE_PATH, labelcol_name='label', names=['NM'])
training_data = engineer.prepare_evaluation_dataset('Data/EvaluationData/', ['LP', 'NP', 'NM'], 15, 20)
training_data = training_data.drop(columns=[col for col in training_data.columns
if col != 'label' and col not in X_train.columns])
if DO_SVM:
print('Running training visualization on SVM.')
# Set up the different hyperparameters that should be used
Cs = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
#Cs = np.arange(1, 100, 1)
Gammas = [1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]
#Gammas = np.arange(0.01, 1, 0.01)
Scores = []
# Train a model with each possible combination of C and Gamma and save the best score
for c in Cs:
for gamma in Gammas:
score, svm = MachineLearning.support_vector_machine_with_kernel(train_data, test_data, c, gamma)
Scores.append(score)
print(f'C={c} & Gamma={gamma} -> Score={score}')
# Create data used in plot
X, Y = np.meshgrid(Cs, Gammas)
Z = np.asarray(Scores)
Z = Z.reshape(len(Gammas), len(Cs))
# Create 3d surface plot showing the score with the different parameters
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='brg_r')
# surf = ax.scatter(X, Y, Z, cmap='brg_r')
ax.set_xlabel('C')
ax.set_ylabel('Gamma')
ax.set_zlabel('Score')
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
if DO_KNN:
# Set up a range of Ks for training the model
n = np.arange(1, 50, 1)
training_scores = []
evaluation_scores = []
# Train a model for each possible k and save the best score
for i in n:
print(f'Calculating best score for {i} neighbours')
acc, knn = MachineLearning.k_nearest_neighbours(train_data, test_data, i)
training_scores.append(acc)
evaluation_scores.append(knn.score(training_data.drop(columns=['label']), training_data['label']))
# Plot the training and test scores in one graph
plt.plot(n, training_scores, color='b', label='Validation Score')
plt.plot(n, evaluation_scores, color='r', label='Test Score')
plt.xlabel('k Neighbours')
plt.ylabel('Accuracy')
plt.legend()
plt.show()