forked from Farama-Foundation/ViZDoom
-
Notifications
You must be signed in to change notification settings - Fork 1
/
shaping.py
executable file
·80 lines (61 loc) · 2.74 KB
/
shaping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
#####################################################################
# This script presents how to make use of game variables to implement
# shaping using health_guided.wad scenario
# Health_guided scenario is just like health_gathering
# (see "../../scenarios/README.md") but for each collected medkit global
# variable number 1 in acs script (coresponding to USER1) is increased
# by 100.0. It is not considered a part of reward but will possibly
# reduce learning time.
# <episodes> number of episodes are played.
# Random combination of buttons is chosen for every action.
# Game variables from state and last reward are printed.
#
#####################################################################
from __future__ import print_function
import itertools as it
from random import choice
from time import sleep
from vizdoom import *
game = DoomGame()
# Choose scenario config file you wish to watch.
# Don't load two configs cause the second will overrite the first one.
# Multiple config files are ok but combining these ones doesn't make much sense.
game.load_config("../../examples/config/health_gathering.cfg")
game.set_screen_resolution(ScreenResolution.RES_640X480)
game.init()
# Creates all possible actions.
actions_num = game.get_available_buttons_size()
actions = []
for perm in it.product([False, True], repeat=actions_num):
actions.append(list(perm))
episodes = 10
sleep_time = 0.028
for i in range(episodes):
print("Episode #" + str(i + 1))
# Not needed for the first episode but the loop is nicer.
game.new_episode()
# Use this to remember last shaping reward value.
last_total_shaping_reward = 0
while not game.is_episode_finished():
# Gets the state and possibly to something with it
state = game.get_state()
# Makes a random action and save the reward.
reward = game.make_action(choice(actions))
# Retrieve the shaping reward
fixed_shaping_reward = game.get_game_variable(GameVariable.USER1) # Get value of scripted variable
shaping_reward = doom_fixed_to_double(fixed_shaping_reward) # If value is in DoomFixed format project it to double
shaping_reward = shaping_reward - last_total_shaping_reward
last_total_shaping_reward += shaping_reward
print("State #" + str(state.number))
print("Health: ", state.game_variables[0])
print("Last Reward:", reward)
print("Last Shaping Reward:", shaping_reward)
print("=====================")
# Sleep some time because processing is too fast to watch.
if sleep_time > 0:
sleep(sleep_time)
print("Episode finished!")
print("Total reward:", game.get_total_reward())
print("************************")
game.close()