-
Notifications
You must be signed in to change notification settings - Fork 0
/
3-save_my_model.py
42 lines (30 loc) · 1.03 KB
/
3-save_my_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import warnings
warnings.filterwarnings("ignore")
#Read
import pandas as pd
train = pd.read_csv("data/train.csv")
#Drop useless columns
train.drop(["depth", "table"], axis=1, inplace=True)
# Label encoder
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
columns = ["cut", "color", "clarity"]
for i in columns:
train[i] = le.fit_transform(train[i])
train_noprice = train.drop(["id", "price"], axis=1)
# Defining Variables (this time no split, all my dataset will be the training data)
X_train = train_noprice
y_train = train["price"]
# Checking
try:
X_train.shape[0] == y_train.shape[0]
except:
print("Something went wrong when splitting")
# Defining model
from catboost import CatBoostRegressor
model = CatBoostRegressor(loss_function="RMSE", depth = 6, learning_rate = 0.1, iterations = 900, l2_leaf_reg = 0.25, random_strength=10, grow_policy="SymmetricTree")
# Fitting
model.fit(X_train, y_train)
# Save model
import pickle
pickle.dump(model, open("models/my_model", 'wb'))