-
Notifications
You must be signed in to change notification settings - Fork 0
/
swmm_modpods_eval.py
190 lines (161 loc) · 9.16 KB
/
swmm_modpods_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# import modpods and other libraries
import sys
sys.path.append("G:/My Drive/modpods")
import modpods
#print(modpods)
import numpy as np
import pandas as pd
import scipy.stats as stats
import os
import matplotlib.pyplot as plt
import math
#!pip3 install pyswmm
import pyswmm
#!pip3 install swmm
import swmm
import datetime
from matplotlib.gridspec import GridSpec
# for storing models and analyzing performance
import dill as pickle # so pickle can serialize lambda functions
import os
import time
"""# load data"""
print("begin loading training data")
start_index = 9
print("start index")
print(start_index)
#with pyswmm.output.Output('/content/drive/MyDrive/SINDy/SWMM-model-reduction/AnnArborFull/full_ann_arbor_090216/full_ann_arbor.out') as out:
with pyswmm.output.Output("G:/My Drive/SINDy/SWMM-model-reduction/AnnArborFull/full_ann_arbor_090216/train.out") as out:
# 1200 junctions, so stepping 30 loads about 40 sites
depths = pd.DataFrame(columns= list(out.nodes.keys())[start_index:-1:30], index = out.node_series(index=0,attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH))
#depths = pd.DataFrame(columns= list(["88-55483", "1771_PLYMOUTH_2","37035_6"]), index = out.node_series(index=0,attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH))
for col in depths.columns:
depths[col]= out.node_series(index=col, attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH).values()
# all subcatchemnts have the same precip time series so just grab the first subcatchment listed
depths['precipitation'] = out.subcatch_series(index=0, attribute=swmm.toolkit.shared_enum.SubcatchAttribute.RAINFALL).values()
# i want precipitation to be the first column
cols = depths.columns.tolist()
cols = cols[-1:] + cols[:-1]
depths = depths[cols]
df_train = depths
#depths.plot(figsize=(25,10))
#x = np.array(depths)
#x_test = np.array(depths)
print(df_train)
print("training data loaded")
print("begin loading testing data")
#with pyswmm.output.Output('/content/drive/MyDrive/SINDy/SWMM-model-reduction/AnnArborFull/full_ann_arbor_090216/test.out') as out:
with pyswmm.output.Output("G:/My Drive/SINDy/SWMM-model-reduction/AnnArborFull/full_ann_arbor_090216/test.out") as out:
test_depths = pd.DataFrame(columns= list(out.nodes.keys())[start_index:-1:30], index = out.node_series(index=0,attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH))
#test_depths = pd.DataFrame(columns= list(["88-55483", "1771_PLYMOUTH_2","37035_6"]), index = out.node_series(index=0,attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH))
for col in test_depths.columns:
test_depths[col]= out.node_series(index=col, attribute=swmm.toolkit.shared_enum.NodeAttribute.INVERT_DEPTH).values()
# all subcatchemnts have the same precip time series so just grab the first subcatchment listed
test_depths['precipitation'] = out.subcatch_series(index=0, attribute=swmm.toolkit.shared_enum.SubcatchAttribute.RAINFALL).values()
# i want precipitation to be the first column
cols = test_depths.columns.tolist()
cols = cols[-1:] + cols[:-1]
test_depths = test_depths[cols]
#test_depths.plot(figsize=(25,10))
df_eval = test_depths
#x_test = np.array(test_depths)
#x = np.array(test_depths)
print(df_eval)
print("testing data loaded")
# model is initially dry
windup_timesteps = 0
# model parameters
max_polyorder = 3
max_iter = 100
max_transforms = 2
for depth_col in df_train.columns[1:]:
# filter out junctions which have no real response in either training or testing data
if (df_train[depth_col].max() - df_train[depth_col].min() < 0.5
or df_eval[depth_col].max() - df_eval[depth_col].min() < 0.5):
print("no response in either training or testing condition. skipping.")
continue
# filter out junctions with derivative greater than 50 at the first timestep
if (abs(df_train[depth_col].diff().iloc[1]) > 50 or abs(df_eval[depth_col].diff().iloc[1]) > 50):
print("derivative greater than 50 at first timestep. skipping.")
continue
# filter out junctions with depth greater than 0.01 at the first timestep (should start dry, especially after subtracting minimum)
if (df_train[depth_col].iloc[1] > 0.01 or df_eval[depth_col].iloc[1] > 0.01):
print("depth greater than 0.01 at first timestep. skipping.")
continue
# filter out junctions with depth greater than 20% of the maximum at the last timestep (should end nearly dry)
# some junctions end with a fixed depth, which isn't physical
if (df_train[depth_col].iloc[-1] > 0.2*df_train[depth_col].max() or df_eval[depth_col].iloc[-1] > 0.2*df_eval[depth_col].max()):
print("depth greater than 20% of maximum at last timestep. skipping.")
continue
print(depth_col)
shift=False
for polyorder in range(1, max_polyorder+1):
start = time.perf_counter()
rainfall_runoff_model = modpods.delay_io_train(df_train,[depth_col],['precipitation'],windup_timesteps=windup_timesteps,
init_transforms=1, max_transforms=max_transforms,max_iter=max_iter,
poly_order=polyorder, verbose=False, bibo_stable=True)
end = time.perf_counter()
training_time_minutes = (end-start)/60
if (shift):
results_folder_path = str("G:/My Drive/PhD Admin and Notes/paper1/revisions-code/swmm_modpods_results/" + str(depth_col) + '_po_'+ str(polyorder) + '_shift/')
else:
results_folder_path = str("G:/My Drive/PhD Admin and Notes/paper1/revisions-code/swmm_modpods_results/" + str(depth_col) +'_po_'+ str(polyorder) + '_no_shift/')
if not os.path.exists(results_folder_path):
# Create a new directory because it does not exist
os.makedirs(results_folder_path)
with open(str(results_folder_path + 'rainfall_runoff_model'),'wb') as f:
pickle.dump(rainfall_runoff_model,f) # binary of full training results
perf = pd.DataFrame.from_dict(rainfall_runoff_model[1]['final_model']['error_metrics'],orient='columns',dtype='float')
for num_transforms in range(2, max_transforms+1):
perf = pd.concat([perf, pd.DataFrame.from_dict(rainfall_runoff_model[num_transforms]['final_model']['error_metrics'])])
perf.index = range(1, max_transforms+1)
perf['training_time_minutes'] = training_time_minutes
perf.to_csv(str(results_folder_path +'training_error_metrics.csv'))
print(perf)
del perf
# plot the results
fig, ax = plt.subplots(max_transforms,1,figsize=(10,10))
for i in range(0,max_transforms):
ax[i].plot(df_train.index[windup_timesteps+1:],rainfall_runoff_model[i+1]['final_model']['response'][depth_col][windup_timesteps+1:],label='observed')
if (not rainfall_runoff_model[i+1]['final_model']['diverged']): # simulation didn't diverge, so the simulated data is valid
ax[i].plot(df_train.index[windup_timesteps+1:],rainfall_runoff_model[i+1]['final_model']['simulated'][:,0],label='simulated')
ax[i].set_title(str(str(i+1) + ' transformation(s)'))
if (i<1):
ax[i].legend()
fig.suptitle("training")
plt.tight_layout()
plt.savefig(str(results_folder_path + "training_viz.png"), dpi=300,bbox_inches='tight')
plt.savefig(str(results_folder_path + "training_viz.svg"), dpi=300,bbox_inches='tight')
plt.close()
diverged_sims = list()
eval_simulations = list() # for plotting
for num_transforms in range(1, max_transforms+1):
eval_sim = modpods.delay_io_predict(rainfall_runoff_model, df_eval, num_transforms,evaluation=True)
eval_simulations.append(eval_sim['prediction'])
diverged_sims.append(eval_sim['diverged'])
if (num_transforms < 2):
eval_perf = pd.DataFrame.from_dict(eval_sim['error_metrics'],orient='columns',dtype='float')
print(eval_perf)
else:
eval_perf = pd.concat([eval_perf, pd.DataFrame.from_dict(eval_sim['error_metrics'])])
print(eval_perf)
#print(eval_perf)
eval_perf.index = range(1, max_transforms+1)
eval_perf['training_time_minutes'] = training_time_minutes
eval_perf.to_csv(str(results_folder_path +'eval_error_metrics.csv'))
print(eval_perf)
del eval_perf
# plot the results
fig, ax = plt.subplots(max_transforms,1,figsize=(10,10))
for i in range(0,max_transforms):
ax[i].plot(df_eval.index[windup_timesteps+1:],df_eval[depth_col][windup_timesteps+1:],label='observed')
if (not diverged_sims[i]):
ax[i].plot(df_eval.index[windup_timesteps+1:],eval_simulations[i],label='simulated')
ax[i].set_title(str(str(i+1) + ' transformation(s)'))
if (i<1):
ax[i].legend()
fig.suptitle("evaluation")
plt.tight_layout()
plt.savefig(str(results_folder_path + "eval_viz.png"), dpi=300,bbox_inches='tight')
plt.savefig(str(results_folder_path + "eval_viz.svg"), dpi=300,bbox_inches='tight')
plt.close()