-
Notifications
You must be signed in to change notification settings - Fork 13
/
test_wmg_on_sokoban.py
59 lines (49 loc) · 1.14 KB
/
test_wmg_on_sokoban.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
### CONTROLS (non-tunable) ###
# general
TYPE_OF_RUN = test_episodes # train, test, test_episodes, render
NUM_EPISODES_TO_TEST = 1000
MIN_FINAL_REWARD_FOR_SUCCESS = 2.0
LOAD_MODEL_FROM = models/wmg_sokoban.pth
SAVE_MODELS_TO = None
# worker.py
ENV = Sokoban_Env
ENV_RANDOM_SEED = 1
AGENT_RANDOM_SEED = 1
REPORTING_INTERVAL = 10000
TOTAL_STEPS = 100000
ANNEAL_LR = False
# A3cAgent
AGENT_NET = WMG_Network
# WMG
V2 = True
# Sokoban_Env
SOKOBAN_MAX_STEPS = 120
SOKOBAN_DIFFICULTY = unfiltered
SOKOBAN_SPLIT = test
SOKOBAN_ROOM_OVERRIDE = None
SOKOBAN_BOXES_REQUIRED = 4
SOKOBAN_OBSERVATION_FORMAT = factored
### HYPERPARAMETERS (tunable) ###
# Sokoban_Env
SOKOBAN_REWARD_PER_STEP = 0.
SOKOBAN_REWARD_SUCCESS = 2.
# A3cAgent
A3C_T_MAX = 4
LEARNING_RATE = 1.6e-05
DISCOUNT_FACTOR = 0.995
GRADIENT_CLIP = 512.0
ENTROPY_TERM_STRENGTH = 0.02
ADAM_EPS = 1e-10
REWARD_SCALE = 4.
WEIGHT_DECAY = 0.
# WMG
WMG_MAX_OBS = 0
WMG_MAX_MEMOS = 1
WMG_MEMO_SIZE = 2048
WMG_NUM_LAYERS = 10
WMG_NUM_ATTENTION_HEADS = 8
WMG_ATTENTION_HEAD_SIZE = 32
WMG_HIDDEN_SIZE = 8
AC_HIDDEN_LAYER_SIZE = 2880