-
Notifications
You must be signed in to change notification settings - Fork 0
/
1-findbestmodel.py
76 lines (64 loc) · 2.06 KB
/
1-findbestmodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import warnings
warnings.filterwarnings("ignore")
#Read
import pandas as pd
df = pd.read_csv("data/train.csv")
#Drop useless columns
df.drop(["depth", "table"], axis=1, inplace=True)
# Label encoder
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
columns = ["cut", "color", "clarity"]
for i in columns:
df[i] = le.fit_transform(df[i])
df_noprice = df.drop(["id", "price"], axis=1)
# Defining Variables
X = df_noprice
y = df["price"]
# Splitting
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
# Checking
try:
X_train.shape[0] == y_train.shape[0]
except:
print("Something went wrong when splitting")
# Defining model
from sklearn.linear_model import LinearRegression as LinReg
from sklearn.linear_model import Ridge, Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
models = {
"lr": LinReg(),
"ridge": Ridge(),
"lasso": Lasso(),
"sgd": SGDRegressor(),
"knn": KNeighborsRegressor(),
"grad": GradientBoostingRegressor(),
"svr": SVR(),
"randomregressor": RandomForestRegressor(),
"decisiontree": DecisionTreeRegressor(),
"catboost": CatBoostRegressor(loss_function="RMSE"),
"xgboost": XGBRegressor()
}
# Fitting
for name, model in models.items():
print("Fitting: ", name)
model.fit(X_train, y_train)
# Get errors
from sklearn import metrics
import numpy as np
for name, model in models.items():
y_pred = model.predict(X_test)
# Variables
print(f"------{name}------")
print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))
print('MSE - ', metrics.mean_squared_error(y_test, y_pred))
print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('R2 - ', metrics.r2_score(y_test, y_pred))