-
Notifications
You must be signed in to change notification settings - Fork 0
/
prediction.py
102 lines (56 loc) · 2.4 KB
/
prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import config, utils
from log import Log
import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from pipeline import Pipeline
class Prediction(Pipeline):
def __init__(self, user_id, path_to_dataset, random_state=42):
Pipeline.__init__(self, user_id, path_to_dataset, random_state)
self.log = Log()
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> enter'
self.log.print(msg)
self.user_id = user_id
msg = 'user_id: ',self.user_id
self.log.print(msg)
self.path_to_dataset = path_to_dataset
msg = 'path_to_dataset: ',self.path_to_dataset
self.log.print(msg)
self.random_state = random_state
msg = 'random_state: ',self.random_state
self.log.print(msg)
self.dataframe = pd.read_csv(self.path_to_dataset)
self.prediction = None
def split_dataframe(self):
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> enter'
self.log.print(msg)
feature_names = [col for col in self.dataframe.columns if col!=self.target_column]
data = self.dataframe.copy()
X = data[feature_names]
y = data[self.target_column]
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> exit'
self.log.print(msg)
return X, y
def decode_prediction(self, data):
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> enter'
self.log.print(msg)
return super(Prediction, self).decode_target_feature(data)
def predict(self):
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> enter'
self.log.print(msg)
super(Prediction, self).extract_features()
super(Prediction, self).validate_column_type()
super(Prediction, self).drop_this_first()
self.X, self.y = self.split_dataframe()
self.X = super(Prediction, self).features_engineering(self.X)
self.X = super(Prediction, self).replace_infinite_numbers(self.X)
self.X, self.y = super(Prediction, self).handle_nan_values(self.X,self.y)
self.X = super(Prediction, self).drop_unnecessary_columns(self.X)
self.X = super(Prediction, self).encode_categorical_data(self.X)
self.y = super(Prediction, self).encode_target_feature(self.y)
self.prediction = super(Prediction, self).predict(self.X)
prediction_labels = self.decode_prediction(self.prediction)
msg = self.__class__.__name__+'.'+utils.get_function_caller()+' -> exit'
self.log.print(msg)
return self.prediction, prediction_labels