-
Notifications
You must be signed in to change notification settings - Fork 1
/
test2.py
71 lines (56 loc) · 1.95 KB
/
test2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import matplotlib.pyplot as plt
from QN import QN
ydata = []
sum = 0
agent = QN()
status = input("#: ")
if(status == "load"):
agent.saver.restore(agent.sess, "save/model.ckpt")
print("Model restored")
total_steps = 0
file = open('test2.txt','w')
file.write('Run Data:\n')
for i_episode in range(agent.RUN_TIME):
observation = agent.env.reset()
t = 0
score = 0
while(1):
t = t + 1
agent.env.render()
s = observation
a = agent.choose_action(s)
observation, reward, done, info = agent.env.step(a)
x, x_dot, theta, theta_dot = observation
r1 = (agent.env.x_threshold - abs(x))/agent.env.x_threshold - 0.8
r2 = (agent.env.theta_threshold_radians - abs(theta))/agent.env.theta_threshold_radians - 0.5
reward = r1 + r2
agent.remember([s, a, reward, observation])
score = score + reward
if done:
print("Run {} - Episode finished after {} timesteps".format(i_episode,t+1))
sum += t+1
print("Score: ", score)
file.write("Run {} - Episode finished after {} timesteps\n".format(i_episode,t+1))
file.write("Score: {}\n".format(score))
print("Epsilon: ", agent.EPSILON, "\n")
break
if total_steps > 1000:
agent.train()
if i_episode % agent.TARGET_REPLACE_ITER == 0:
print("Updating Weights")
file.write("Updating Weights")
agent.update_weights()
agent.saver.save(agent.sess, "save/model.ckpt")
print("Model saved")
file.write("Model saved")
if i_episode % 100 == 0:
ydata.append(sum/100)
sum = 0
plt.close()
plt.plot(range(0, int(i_episode/100) + 1),ydata)
plt.ylabel('Steps')
plt.xlabel('Times')
plt.show(block=False)
total_steps += 1
file.close()