-
Notifications
You must be signed in to change notification settings - Fork 3
/
blend.py
126 lines (100 loc) · 4.1 KB
/
blend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import Concatenate, LSTM, Input, concatenate
from keras.optimizers import Adagrad
from utils import *
import os
import gc
# Assuming everything is in Movie User order
# Input the filenames of the prediction files for all modles
probe_names = ["submissions/final blend/mu_svd_probe_May25041116_rmse918.pred", # SVD
"submissions/final blend/mu_svd++_probe_May25155049_rmse917.pred", # SVD++
"submissions/mu_rbm_graphchiMay26May4537.probe", # RBM
#"pmf/predictionsl005k30t10NMF", # NMF
"pmf/predictionsl005k65t10"] # PMF
qual_names = ["submissions/final blend/mu_svd_qual_May25041117_rmse91993.pred",
"submissions/final blend/mu_svd++_qual_May25155100_rmse91912.pred",
"submissions/mu_rbm_graphchiMay26May4537.pred",
#"pmf/predictionsl005k30t10NMFqual",
"pmf/predictionsl005k65t10qual"]
num_models = len(probe_names)
probe_length = 1374739
qual_length = 2749898
method = 1 #1: single layer, 2: 20-40 layers
n_hidden = 30
ordering = 'mu' # rows correspond to movie_ids; cols correspond to user_ids
submit = True # set to True to save a submission on qual
save_model = False # set to True to save model parameters for future predictions
# computes Root Mean-squared-error given true ratings and predictions
def RMSE(true, pred):
return np.sqrt(np.sum((true - pred)**2) / len(true))
# Load the probe and qual set predictions to compare against
print('Loading probe set...')
probeSet = pd.read_csv(os.path.join('data', 'mu_probe.csv'))
# modify dataframe to reduce memory
#del probeSet['Unnamed: 0']
#del probeSet['Date Number']
probeSet = probeSet[["Rating"]]
probeSet = np.array(probeSet.astype('double'))
#print('Loading qual set...')
#qualSet = pd.read_csv(os.path.join('data', 'mu_qual.csv'))
# modify dataframe to reduce memory
#del qualSet['Unnamed: 0']
#del qualSet['Date Number']
#qualSet = qualSet[["Rating"]]
#qualSet = np.array(qualSet.astype('double'))
# Obtain the predictions
probe_list = []
for probe_name in probe_names:
#probe = pd.read_csv(os.path.join('submissions', probe_name))
probe = pd.read_csv(probe_name, header=None)
probe = probe.astype('double')
print("RMSE for", probe_name, ":", RMSE(probeSet, probe))
probe_list.append(np.array(probe))
probe_list = np.array(probe_list).T
probe_list = probe_list[0:][0]
qual_list = []
for qual_name in qual_names:
#qual = pd.read_csv(os.path.join('submissions', qual_name))
qual = pd.read_csv(qual_name, header=None)
qual = qual.astype('double')
#print("RMSE for", qual_name, ":", RMSE(qualSet, qual))
qual_list.append(np.array(qual))
qual_list = np.array(qual_list).T
qual_list = qual_list[0:][0]
print("probe train dim:", probe_list.ndim)
print("probe test dim:", probeSet.ndim)
print("qual train dim:", qual_list.ndim)
#print("qual test dim:", qualSet.ndim)
#print(qual_list[0])
# Use a hidden factor of 20-40 weights
# and Relu activation
if method == 2:
nnmodel = Sequential()
nnmodel.add(Dense(n_hidden, input_dim=num_models))
nnmodel.add(Dense(1, input_dim=num_models, activation="relu"))
if method == 1:
# Feed into neural network
nnmodel = Sequential()
nnmodel.add(Dense(1, input_dim=num_models, activation="relu"))
nnmodel.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
nnmodel.fit(probe_list, probeSet, epochs = 5, batch_size=128)
blend_pred = nnmodel.predict(qual_list)
#blend_RMSE = RMSE(qualSet, blend_pred)
#print("RMSE for blended model:", blend_RMSE)
# Truncate results between 1 and 5
for i in range(2749898):
if blend_pred[i] < 1.0:
blend_pred[i] = 1.0
elif blend_pred[i] > 5.0:
blend_pred[i] = 5.0
if save_model:
print('Saving model...')
dump.dump(os.path.join('models', 'blend'), nnmodel)
# Profit
if submit:
save_submission("blendn" + str(n_hidden), blend_pred, ordering)