-
Notifications
You must be signed in to change notification settings - Fork 0
/
regressors.py
94 lines (77 loc) · 3.41 KB
/
regressors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import sys, os
import matplotlib.pyplot as plt
import numpy as np
import math
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
def load_full_dataset(fname):
full_dataset = np.load(fname)
X = full_dataset.f.X
y_avg = full_dataset.f.y_avg
y_med = full_dataset.f.y_med
return X, y_avg, y_med
def print_stats(model_name, n_train, n_test, R2_y_avg, cross_val_avg, R2_y_med, cross_val_med):
print('MODEL: ', model_name)
# print('Number of train: ', n_train)
# print('Number of test: ', n_test)
# print('R2 y_avg: ', R2_y_avg)
print('y_avg:')
# print('Cross_Val List: ', cross_val_avg)
print('\tmean:', -np.mean(cross_val_avg))
print('\tstd:', np.std(cross_val_avg), '\n')
# print('R2_y_med: ', R2_y_med, '\n')
print('y_med:')
# print('Cross_Val List: ', cross_val_med, '\n')
print('\tmean:', -np.mean(cross_val_med))
print('\tstd:', np.std(cross_val_med), '\n')
def save_scatterplot(predicted, y, model_name, y_mode):
fig, ax = plt.subplots()
ax.scatter(y, predicted, edgecolors=(0, 0, 0))
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.savefig('./predict_charts/' + '{}_{}.png'.format(model_name, y_mode))
return
def main(fname):
LR = ['Linear Regression', LinearRegression()]
SGD = ['Stochastic Gradient Descent', SGDRegressor()]
SVMR = ['Support Vector Regression', SVR()]
models = [LR, SGD, SVMR]
train_percent = .8 # percentage of examples for training; complement is for testing
X, y_avg, y_med = load_full_dataset(fname)
n_train = int(len(X) * train_percent)
n_test = len(X) - n_train
split_data = train_test_split(X, y_avg, y_med, train_size = train_percent)
train_feats, test_feats, train_y_avg, test_y_avg, train_y_med, test_y_med = split_data
for m in models:
model_name = m[0]
model = m[1]
# fit to y_avg
model.fit(train_feats, train_y_avg)
preds_y_avg = model.predict(test_feats)
score_y_avg = model.score(test_feats, test_y_avg)
# fit to y_med
model.fit(train_feats, train_y_med)
preds_y_med = model.predict(test_feats)
score_y_med = model.score(test_feats, test_y_med)
splits = len(y_avg)
cross_val_scores_avg = cross_val_score(model, X, y_avg, cv=splits, scoring='neg_mean_squared_error')
cross_val_scores_med = cross_val_score(model, X, y_med, cv=splits, scoring='neg_mean_squared_error')
print_stats(model_name, n_train, n_test, score_y_avg, cross_val_scores_avg, score_y_med, cross_val_scores_med)
for y, y_name in [(y_avg, 'avg'), (y_med, 'med')]:
predicted = cross_val_predict(model, X, y, cv=splits)
save_scatterplot(predicted, y, model_name, y_name)
if __name__ == '__main__':
if len(sys.argv) < 2:
# main(DEFAULT_DATASET)
main('persistence_image_feature/full_dataset.npz')
exit(0)
# quit("Missing filename as command-line argument. e.g. 'python main.py ./data/Simple/two_ortho.csv'")
filename = sys.argv[1]
if not os.path.exists(filename):
quit("Filename '{}' does not exist".format(filename))
main(filename)