-
Notifications
You must be signed in to change notification settings - Fork 0
/
spot_check_classification.py
88 lines (76 loc) · 2.56 KB
/
spot_check_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#importing packages and library
import warnings
from numpy import mean
import numpy as np
import pandas as pd
from pandas import DataFrame
from numpy import std
from matplotlib import pyplot
import sys
import crayons
import classificaton
data = pd.read_csv(sys.argv[1])
#dropping unnecessary columns..
data = data.drop(['Symbol','Series','Date'],axis = 1)
#features extracton and converting into an array
m = data['Close Price']
#print(m)
n = m.shift(1) #shifting the data one step into the future
features = m-n
#converting negative data into 0 and postive data into 1.
features[features<0] = 0
features[features>0] = 1
#changing data into DataFrame
features = pd.DataFrame(features)
#replacing the NAN value with 0.
features = features.fillna(0)
#changin the data into a numpy array
features = np.array(features)
data = data.drop('Close Price',axis=1)
labels = np.array(data)
# print and plot the top n results
#take the dictionaryof results,prints the summary of results ,and creates the vos plot image
#maximising = True, if the evaluation score is maximising
def summarize_results(results, maximize=True, top_n=10):
# check for no results
if len(results) == 0:
print('no results')
return
# determine how many results to summarize
n = min(top_n, len(results))
# create a list of (name, mean(scores)) tuples
mean_scores = [(k,mean(v)) for k,v in results.items()]# k = key and v = value
# sort tuples by mean score
mean_scores = sorted(mean_scores, key=lambda x: x[1])
# reverse for descending order (e.g. for accuracy)
if maximize:
mean_scores = list(reversed(mean_scores))
# retrieve the top n for summarization
names = [x[0] for x in mean_scores[:n]]
scores = [results[x[0]] for x in mean_scores[:n]]
# print the top n
print()
for i in range(n):
name = names[i]
mean_score, std_score = mean(results[name]), std(results[name])
print(crayons.yellow(f'\t[*] RANK => {i+1}', bold=True))
print(crayons.blue(f'\t[*] NAME => {name}', bold=True))
print(crayons.yellow(f'\t[*] Score => {round(mean_score,3)}', bold=True))
print(crayons.red(f'\t[*] std score => (+/-){round(std_score,3)}', bold=True))
print("\n\n")
# boxplot for the top n
pyplot.boxplot(scores, labels=names)
_, labels = pyplot.xticks()
pyplot.setp(labels, rotation=90)
pyplot.show()
#pyplot.savefig('spotcheck.png')
# load dataset
X, y = labels, features
# get model list
models = classificaton.define_models()
# add gbm models
models = classificaton.define_gbm_models(models)
# evaluate models
results = classificaton.evaluate_models(X, y, models)
# summarize results
summarize_results(results)