-
Notifications
You must be signed in to change notification settings - Fork 1
/
mdqn.py
105 lines (76 loc) · 2.12 KB
/
mdqn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
import signal
import torch
import os
from environment import Environment
import datageneration
import train
import time
import sys
import subprocess
from subprocess import Popen
from os.path import abspath, dirname, join
import config as cfg
def getValue(filename):
line = subprocess.check_output(['tail', '-1', filename])
return int(line.decode('utf-8').replace('\n', ''))
def setValue(filename,value):
f = open(filename, "w")
f.write(str(value))
f.close()
def openSim(process):
process.terminate()
time.sleep(5)
process = Popen(command)
time.sleep(5)
return process
def killSim(process):
process.terminate()
time.sleep(10)
def signalHandler(sig, frame):
process.terminate()
sys.exit(0)
t_episodes=cfg.t_episodes
file_phase = 'files/phase.txt'
episode=int(torch.load('files/episode.dat'))
command = './simDRLSR.x86_64'
execute_simulator = False
if(len(sys.argv)>1):
execute_simulator = True
directory = str(sys.argv[1])
command = abspath(join(directory,command))
#command = directory+command
phase = getValue(file_phase)
process = Popen('false') # something long running
signal.signal(signal.SIGINT, signalHandler)
print(episode,t_episodes)
for i in range(episode,t_episodes+1):
phase = getValue(file_phase)
if(phase == 0):
print("Episode: ",i," collection data.")
if execute_simulator: process = openSim(process)
recent_rewards=torch.load('recent_rewards.dat')
reward_history=torch.load('files/reward_history.dat')
print(len(recent_rewards))
env=Environment(epi=i)
env.send_data_to_pepper("start")
time.sleep(1)
env.close_connection()
time.sleep(1)
#Execute data generation phase script
datageneration.main()
setValue(file_phase,1)
env=Environment(epi=i)
env.send_data_to_pepper("stop")
phase = getValue(file_phase)
if(phase == 1):
print("Episode: ",i," training model.")
print("Sending signal to kill simulator")
if execute_simulator: killSim(process)
setValue('flag_simulator.txt',9)
time.sleep(1)
#Execute train phase script
train.main()
setValue(file_phase,0)
if execute_simulator: killSim(process)
print("Model trained...")