-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.py
247 lines (208 loc) · 8.82 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import numpy as np
import time, datetime
import matplotlib.pyplot as plt
class MetricLogger:
"""
A utility for logging statistics of learning, such as, mean rewards, mean Q values, times, loss values, and
exploration rate.
"""
def __init__(self, save_dir):
"""
Constructors to initialize the logger, by writing the header log file for training and setting file names
for the images where we plot data. Additionally, we initialize all metrics that we collect with empty lists.
Args:
save_dir: the directory where we save the logs
"""
self.save_log = save_dir / "log"
# write the log header
with open(self.save_log, "w") as f:
f.write(
f"{'Episode':>8}{'Step':>8}{'Epsilon':>10}{'MeanReward':>15}"
f"{'MeanLength':>15}{'MeanLoss':>15}{'MeanQValue':>15}"
f"{'TimeDelta':>15}{'Time':>20}\n"
)
# set file names
self.ep_rewards_plot = save_dir / "reward_plot.jpg"
self.ep_lengths_plot = save_dir / "length_plot.jpg"
self.ep_avg_losses_plot = save_dir / "loss_plot.jpg"
self.ep_avg_qs_plot = save_dir / "q_plot.jpg"
# History metrics
self.ep_rewards = []
self.ep_lengths = []
self.ep_avg_losses = []
self.ep_avg_qs = []
# Moving averages, added for every call to record()
self.moving_avg_ep_rewards = []
self.moving_avg_ep_lengths = []
self.moving_avg_ep_avg_losses = []
self.moving_avg_ep_avg_qs = []
# Current episode metric
self.init_episode()
# Timing
self.record_time = time.time()
def log_step(self, reward, loss, q):
"""
Record a single step, i.e., executing an action in the environment for training
Args:
reward: reward gained in the step
loss: loss from learning, None if there was no learning update in the step
q: average q value from learning, None if there was no learning update in the step
Returns: None
"""
self.curr_ep_reward += reward
self.curr_ep_length += 1
if loss:
self.curr_ep_loss += loss
self.curr_ep_q += q
self.curr_ep_loss_length += 1
def log_episode(self):
"""
With this function, we mark the end of an episodes, i.e., we store the rewards gained in the episode and
initialize a new lists of rewards to collect more of them in another episode. We also compute averages for the
episode.
Returns: None
"""
"Mark end of episode"
self.ep_rewards.append(self.curr_ep_reward)
self.ep_lengths.append(self.curr_ep_length)
if self.curr_ep_loss_length == 0:
ep_avg_loss = 0
ep_avg_q = 0
else:
# compute averages
ep_avg_loss = np.round(self.curr_ep_loss / self.curr_ep_loss_length, 5)
ep_avg_q = np.round(self.curr_ep_q / self.curr_ep_loss_length, 5)
# append the averages to the list of averages
self.ep_avg_losses.append(ep_avg_loss)
self.ep_avg_qs.append(ep_avg_q)
self.init_episode()
def init_episode(self):
"""
We initialize all the metrics that we log to zero
Returns: None
"""
self.curr_ep_reward = 0.0
self.curr_ep_length = 0
self.curr_ep_loss = 0.0
self.curr_ep_q = 0.0
self.curr_ep_loss_length = 0
def record(self, episode, epsilon, step):
"""
Record statistics from episodes and the exploration rate. The statistics logged are moving averages from
multiple episodes, which we print to the log file in regular intervals.
Args:
episode: the number of episodes so far
epsilon: the current exploration rate
step: the number steps performed so far for training
Returns: None
"""
mean_ep_reward = np.round(np.mean(self.ep_rewards[-100:]), 3)
mean_ep_length = np.round(np.mean(self.ep_lengths[-100:]), 3)
mean_ep_loss = np.round(np.mean(self.ep_avg_losses[-100:]), 3)
mean_ep_q = np.round(np.mean(self.ep_avg_qs[-100:]), 3)
self.moving_avg_ep_rewards.append(mean_ep_reward)
self.moving_avg_ep_lengths.append(mean_ep_length)
self.moving_avg_ep_avg_losses.append(mean_ep_loss)
self.moving_avg_ep_avg_qs.append(mean_ep_q)
last_record_time = self.record_time
self.record_time = time.time()
time_since_last_record = np.round(self.record_time - last_record_time, 3)
# print episode data to the console
print(
f"Episode {episode} - "
f"Step {step} - "
f"Epsilon {epsilon} - "
f"Mean Reward {mean_ep_reward} - "
f"Mean Length {mean_ep_length} - "
f"Mean Loss {mean_ep_loss} - "
f"Mean Q Value {mean_ep_q} - "
f"Time Delta {time_since_last_record} - "
f"Time {datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
)
# append the data to the log file
with open(self.save_log, "a") as f:
f.write(
f"{episode:8d}{step:8d}{epsilon:10.3f}"
f"{mean_ep_reward:15.3f}{mean_ep_length:15.3f}{mean_ep_loss:15.3f}{mean_ep_q:15.3f}"
f"{time_since_last_record:15.3f}"
f"{datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'):>20}\n"
)
# add the data to the plot images as well
for metric in ["ep_rewards", "ep_lengths", "ep_avg_losses", "ep_avg_qs"]:
plt.plot(getattr(self, f"moving_avg_{metric}"))
plt.savefig(getattr(self, f"{metric}_plot"))
plt.clf()
class EvaluationLogger:
"""
Another logger class for logging evaluation data, i.e., the data from evaluating intermediate policies in the
environment.
"""
def __init__(self, save_dir, eval_mode = False):
"""
Constructor that initializes everything and create an eval log file.
Args:
save_dir: the directory where the logs shall be stored
eval_mode: true if we are in eval mode, i.e., if we evaluate an existing agent, rather than evaluating
an agent that is currently being trained
"""
self.save_log = save_dir / ("eval" if not eval_mode else "eval_mode")
with open(self.save_log, "w") as f:
f.write(
f"{'Episodes':>8}{'Steps':>8}{'Epsilon':>10}{'MeanReward':>15}"
f"{'MedianReward':>15}{'MaxReward':>15}{'MeanLength':>15}{'MinReward':>15}{'AllRewards':>15}\n"
)
self.curr_ep_reward = 0.0
self.curr_ep_length = 0
self.ep_rewards = []
self.ep_lengths = []
def init_episode(self):
"""
Initialize the data recorded for the current episode, i.e., reward and episode length.
Returns: None
"""
self.curr_ep_reward = 0.0
self.curr_ep_length = 0
def log_step(self, reward):
"""
Log data from a single steps, i.e., just the reward.
Args:
reward: gained reward by the executed action
Returns: None
"""
self.curr_ep_reward += reward
self.curr_ep_length += 1
def log_episode(self):
"""
End the episode, i.e., store the list of rewards gained in the episode and the length of the episodes.
Returns: None
"""
self.ep_rewards.append(self.curr_ep_reward)
self.ep_lengths.append(self.curr_ep_length)
# reinitialize for a new episode
self.init_episode()
def log_evaluation_cycle(self, mario, old_exp_rate):
"""
Log all the data from the current evaluation cycle, i.e., statistics of the rewards gained, like mean and
median over several evaluation episodes.
Args:
mario: the RL agent
old_exp_rate: the exploration rate (relevant if we evaluate during training)
Returns:
"""
# compute statistics
mean_ep_reward = np.round(np.mean(self.ep_rewards), 3)
median_ep_reward = np.round(np.median(self.ep_rewards), 3)
max_ep_reward = np.round(np.max(self.ep_rewards), 3)
mean_ep_length = np.round(np.mean(self.ep_lengths), 3)
min_ep_reward = np.round(np.max(self.ep_lengths), 3)
rewards_copy = list(self.ep_rewards)
# reinitialize
self.ep_rewards.clear()
self.ep_lengths.clear()
# append to evaluation log file
with open(self.save_log, "a") as f:
f.write(
f"{mario.curr_episode:9d}{mario.curr_step:9d}{old_exp_rate:10.3f}"
f"{mean_ep_reward:15.3f}{median_ep_reward:15.3f}{max_ep_reward:15.3f}{mean_ep_length:15.3f}"
f"{min_ep_reward:15.3f} {rewards_copy}\n"
)