-
Notifications
You must be signed in to change notification settings - Fork 0
/
FeatureSelection.py
123 lines (86 loc) · 3.53 KB
/
FeatureSelection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_selection import RFECV
import numpy as np
import pandas as pd
from sklearn import preprocessing
import math
import warnings
warnings.filterwarnings("ignore")
# dataset= load_boston()
model_dict={'gradient_boost': GradientBoostingRegressor()}
# def transform_to_df(X):
# X_df=pd.DataFrame(data=X, columns= feature_names)
# return X_df
cv_list = np.arange(2, 5)
step_list= [0.2, 0.3, 0.5, 0.6, 0.7, 1]
class feature_selection:
def __init__(self,
model_dict,
cv_list,
step_list):
self.model_dict= model_dict
self.cv_list= cv_list
self.step_list= step_list
def fit_rfecv(self,
X,
y):
selected_features = []
X_sel= []
for model in self.model_dict.values():
for cv in self.cv_list:
for step in self.step_list:
try:
rfecv= RFECV(estimator= model, cv= cv, step= step)
_ = rfecv.fit(X, y)
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True)
# selected_features.append((X.columns[np.where(rfecv.support_)]))
# print(X.columns)
X_sel.append(X)
selected_features.append(X.columns)
except:
continue
# print('This is the list:', X_sel)
# print(selected_features)
# return selected_features, X_sel
return selected_features
def feature_selection_function(X, y):
X_sel_functions= []
col_name= []
for model in model_dict.values():
for cv in cv_list:
for step in step_list:
try:
rfecv = RFECV(estimator=model, cv=cv, step=step)
rfecv.fit(X, y)
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis=1, inplace=True)
# selected_features.append((X.columns[np.where(rfecv.support_)]))
# print(X.columns)
X_sel_functions.append(X)
col_name.append(X.columns)
except:
continue
return X_sel_functions, col_name
def fs_scaled(X, y, is_scaling= 1):
if is_scaling == 1:
scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)
X_scaled= pd.DataFrame(data= X_scaled, columns=(X.columns))
X_sel_functions, col_name= feature_selection_function(X_scaled, y)
else:
X_sel_functions, col_name= feature_selection_function(X, y)
return X_sel_functions, col_name
def sand_paper(X):
for i in range(X.shape[0]):
for j in range(X.shape[1]):
X.iloc[i, j] = ((X.iloc[i, j] + 10**(-2))/(10**(-2)))
return X
def fs_sanding(X, y, is_scaling= 1):
if is_scaling== 1:
X_sand= sand_paper(X)
X_sel_functions, col_name = feature_selection_function(X_sand, y)
else:
X_sel_functions, col_name = feature_selection_function(X, y)
return X_sel_functions, col_name