forked from stanfordnlp/wge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlaunch_jobs.py
113 lines (87 loc) · 3.67 KB
/
launch_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import argparse
import time
from os.path import join
from fabric.api import local
from script_tools import bash_string, upload_code, create_worksheet, task_lists, \
upload_demos
# Parse arguments
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-t', '--tasks') # e.g. 'miniwob-hard', see script_tools.py:task_lists for more.
arg_parser.add_argument('-r', '--seed', type=int) # random seed to run with
arg_parser.add_argument('-s', '--train_strategy') # 'pge' or 'bc_rl'
arg_parser.add_argument('-n', '--num_demos', type=int) # number of demos to use, between 0 and 32
arg_parser.add_argument('-c', '--up_code', action='store_true') # whether to upload code
arg_parser.add_argument('-d', '--up_demos', action='store_true') # whether to upload demos
args = arg_parser.parse_args()
# code is uploaded to the `web-agents` worksheet
# demonstrations are uploaded to the `web-agents` worksheet
# run bundles are uploaded to the corresponding task+strategy worksheet
# e.g. a 'miniwob-hard' job using `pge` will go to `web-agents-miniwob-hard-pge`
# The `web-agents` worksheet is a home page that points to all the other worksheets.
# Different sets of tasks (such as `miniwob-hard`) are defined at script_tools.py:task_lists
config_path = lambda sub_path: join('configs', sub_path)
def launch_job(task, train_strategy, num_demos, worksheet, demo_set, no_demo_filter, seed):
"""Launch a job.
Sleeps for 10 sec after launching.
CONFIGS ARE MERGED IN THE FOLLOWING ORDER:
- default-base.txt
- vanilla-rl.txt
- <task>.txt
Args:
task (str): e.g. "click-checkboxes"
train_strategy (str): can be one of the following:
- bc_rl: behavior cloning plus RL
- pge: program-guided exploration
num_demos (int): number of demonstrations to BC on
worksheet (str): name of target worksheet
demo_set (str): name of the demo collection
no_demo_filter (bool): if True, don't filter demos by reward
"""
assert train_strategy in ('bc_rl', 'pge')
docker_cmd_args = [
'python main.py',
'-t {}'.format(task),
'-s "demonstrations.max_to_use = {}"'.format(num_demos),
'-s "demonstrations.base_dir = {}"'.format(demo_set),
'-r {}'.format(seed),
config_path('default-base.txt'),
]
if train_strategy == 'bc_rl':
docker_cmd_args.append(config_path('config-mixins/bc-rl.txt'))
if no_demo_filter:
docker_cmd_args.append(config_path('config-mixins/no-demo-filter.txt'))
docker_cmd = ' '.join(docker_cmd_args)
launch_cmd_args = [
'python run_codalab.py',
'-w {}'.format(worksheet),
'-n {}_{}'.format(task, train_strategy),
bash_string(docker_cmd),
]
launch_cmd = ' \\\n'.join(launch_cmd_args)
local(launch_cmd)
time.sleep(10) # wait a bit between launches
# upload the latest code and demos
if args.up_demos:
upload_demos('web-agents')
if args.up_code:
upload_code('web-agents')
# get tasks
tasks = task_lists[args.tasks]
# select the right set of demos
if args.tasks == 'few-shot':
if args.train_strategy == 'bc_rl':
demo_set = '2017-10-26_third-turk'
elif args.train_strategy == 'pge':
demo_set = 'clean-demos'
else:
raise ValueError(args.tasks)
else:
demo_set = '2017-10-16_second-turk'
# decide whether to filter demos
no_demo_filter = (args.tasks == 'few-shot')
# set worksheet
worksheet = 'web-agents-{}-{}-{}'.format(args.tasks, args.train_strategy, args.seed)
create_worksheet(worksheet)
for task in tasks:
launch_job(task, args.train_strategy, args.num_demos, worksheet,
demo_set, no_demo_filter, args.seed)