-
Notifications
You must be signed in to change notification settings - Fork 0
/
spot_check_regression.py
66 lines (61 loc) · 2.11 KB
/
spot_check_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#this is for regression problem...
import warnings
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from matplotlib import pyplot
import sys
import crayons
import regression
data = pd.read_csv(sys.argv[1])
df = np.array(data[['Prev Close','Open Price', 'Last Price']])
y = np.array(data['Close Price'])
# print and plot the top n results
def summarize_results(results, maximize=True, top_n=10):
# check for no results
if len(results) == 0:
print('no results')
return
# determine how many results to summarize
n = min(top_n, len(results))
# create a list of (name, mean(scores)) tuples
mean_scores = [(k,mean(v)) for k,v in results.items()]
# sort tuples by mean score
mean_scores = sorted(mean_scores, key=lambda x: x[1])
# reverse for descending order (e.g. for accuracy)
if maximize:
mean_scores = list(reversed(mean_scores))
# retrieve the top n for summarization
names = [x[0] for x in mean_scores[:n]]
scores = [results[x[0]] for x in mean_scores[:n]]
# print the top n
print()
for i in range(n):
name = names[i]
mean_score, std_score = mean(results[name]), std(results[name])
print(crayons.yellow(f'\t[*] RANK => {i+1}', bold=True))
print(crayons.blue(f'\t[*] NAME => {name}', bold=True))
print(crayons.yellow(f'\t[*] Score => {round(mean_score,3)}', bold=True))
print(crayons.red(f'\t[*] std score => (+/-){round(std_score,3)}', bold=True))
print("\n\n")
'''
# boxplot for the top n
pyplot.boxplot(scores, labels=names)
_, labels = pyplot.xticks()
pyplot.setp(labels, rotation=90)
#pyplot.savefig('spotcheck.png')
pyplot.grid()
pyplot.show()'''
# load dataset
print(crayons.red("\n[*] LOADING DATASET... ", bold=True))
print(crayons.red("\n[*] DATASET LOADED...NOW EVALUATING THE BEST MODELS....", bold=True))
X, y = df, y
# get model list
print(crayons.red("\n[*] GETTING THE MODEL LISTS:" , bold=True))
models = regression.get_models()
# evaluate models
print(crayons.yellow("\n[*]BEST RESULTING MODELS ACCORDING TO THE DATA...", bold=True))
results = regression.evaluate_models(X, y, models, metric='mean_squared_error')
# summarize results
summarize_results(results)