-
Notifications
You must be signed in to change notification settings - Fork 0
/
usgs_shift_or_no_shift.py
113 lines (100 loc) · 4.61 KB
/
usgs_shift_or_no_shift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
"""usgs-shift-or-no-shift.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1zNdwJsl-frHoxcEZj8HwwNRdxh1D35Ls
"""
#from google.colab import drive
#drive.mount('/content/drive',force_remount=True)
#!pip3 install pysindy
#import pysindy as ps
import scipy.stats as stats
import os
import pandas as pd
import numpy as np
pd.set_option("display.precision", 3)
import matplotlib.pyplot as plt
#folder_path = str('/content/drive/MyDrive/PhD Admin and Notes/paper1/revisions-code/usgs_modpods_results')
folder_path = "G:/My Drive/PhD Admin and Notes/paper1/revisions-code/usgs_modpods_results"
shifted_train = dict()
shifted_eval = dict()
noshift_train = dict()
noshift_eval = dict()
performance_summaries = dict()
for subdir, dirs, files in os.walk(folder_path):
print(subdir)
for file in files:
if("error_metrics" in str(os.path.join(subdir, file))):
print(str(subdir)[75:75+8])
site_id = str(subdir)[75:75+8]
# only look at the linear models
if ("po_1" in str(os.path.join(subdir, file))):
if ("training" in str(os.path.join(subdir, file))):
if ("no_shift" in str(os.path.join(subdir, file))):
noshift_train[site_id] = pd.read_csv(str(os.path.join(subdir, file)))
else:
shifted_train[site_id] = pd.read_csv(str(os.path.join(subdir, file)))
elif ("eval" in str(os.path.join(subdir, file))):
if ("no_shift" in str(os.path.join(subdir, file))):
noshift_eval[site_id] = pd.read_csv(str(os.path.join(subdir, file)))
else:
shifted_eval[site_id] = pd.read_csv(str(os.path.join(subdir, file)))
#print(str(file))
#print(str(os.path.join(subdir, file)))
#site_id = str(file).partition('_')[2][:-33]
#print(site_id)
#performance_summaries[site_id] = pd.read_csv(str(os.path.join(subdir, file)))
#trained_site_ids.append(str(subdir)[-8:])
#print(os.path.join(subdir, file))
#print(shifted_train['03439000'].NSE.mean())
for site_id in shifted_train:
print(shifted_train[site_id])
# grab all the NSE's from each type and make a list
shift_train_NSE = list()
for site_id in shifted_train:
shift_train_NSE.append(shifted_train[site_id].NSE.max())
shift_eval_NSE = list()
for site_id in shifted_eval:
shift_eval_NSE.append(shifted_eval[site_id].NSE.max())
noshift_train_NSE = list()
for site_id in noshift_train:
noshift_train_NSE.append(noshift_train[site_id].NSE.max())
noshift_eval_NSE = list()
for site_id in noshift_eval:
noshift_eval_NSE.append(noshift_eval[site_id].NSE.max())
print(shift_train_NSE)
print(shift_eval_NSE)
print(noshift_train_NSE)
print(noshift_eval_NSE)
print("Train Failure Rate (NSE < 0)")
print("Shift: ", len([x for x in shift_train_NSE if x < 0]) / len(shift_train_NSE))
print("No Shift:", len([x for x in noshift_train_NSE if x < 0]) / len(noshift_train_NSE))
print("Eval Failure Rate (NSE < 0)")
print("Shift: ", len([x for x in shift_eval_NSE if x < 0]) / len(shift_eval_NSE))
print("No Shift:", len([x for x in noshift_eval_NSE if x < 0]) / len(noshift_eval_NSE))
print("Train Failure Rate (NSE < -100)")
print("Shift: ", len([x for x in shift_train_NSE if x < -100]) / len(shift_train_NSE))
print("No Shift:", len([x for x in noshift_train_NSE if x < -100]) / len(noshift_train_NSE))
print("Eval Failure Rate (NSE < -100)")
print("Shift: ", len([x for x in shift_eval_NSE if x < -100]) / len(shift_eval_NSE))
print("No Shift:", len([x for x in noshift_eval_NSE if x < -100]) / len(noshift_eval_NSE))
print("length of noshift_eval_nse")
print(len(noshift_eval_NSE))
print("length of shift_eval_nse")
print(len(shift_eval_NSE))
shift_train_bins = np.linspace(0,1,len(shift_train_NSE))
shift_eval_bins = np.linspace(0,1,len(shift_eval_NSE))
noshift_train_bins = np.linspace(0,1,len(noshift_train_NSE))
noshift_eval_bins = np.linspace(0,1,len(noshift_eval_NSE))
plt.figure(figsize=(8,8))
plt.plot(np.sort(shift_train_NSE),shift_train_bins,'b--',label='Train [Shifted]')
plt.plot(np.sort(shift_eval_NSE),shift_eval_bins,'r--',label='Evaluation [Shifted]')
plt.plot(np.sort(noshift_train_NSE),noshift_train_bins,'g-',label='Train [Not Shifted]')
plt.plot(np.sort(noshift_eval_NSE),noshift_eval_bins,'y-',label='Evaluation [Not Shifted]')
plt.xlabel("NSE",fontsize='x-large')
plt.xlim([-1,1])
plt.ylabel("Cumulative Density",fontsize='x-large')
plt.legend(fontsize='x-large',loc='best')
plt.title("USGS Gaging Stations",fontsize='x-large')
plt.savefig(str(folder_path + '/usgs_shift_or_no_shift.png'),format='png',dpi=600)
plt.show()