-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTBU_env_evo.py
179 lines (148 loc) · 6.18 KB
/
TBU_env_evo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Custom Truck Backer Upper Gymnasium Environment
# Description of Truck Backer Upper from : Neuro-Genetic Truck Backer-Upper Controller
# Inspiration of PyGame Code from : https://github.com/johnnycode8/gym_custom_env
import numpy as np
import gymnasium as gym
from gymnasium.envs.registration import register
from gymnasium.utils.env_checker import check_env
import sys
import pygame
# For Declaring Observation and Action Space
from gymnasium import spaces
# My problem code
import sys
sys.path.append('./')
sys.path.append('../')
import trucker_backer_problem_evo as tbu_prob
# Begin by Registering the Environment
register(id = 'TBU_evo', entry_point='TBU_env_evo:TruckBackerEnv')
# Environment Class
class TruckBackerEnv(gym.Env):
# Required (idk why)
metadata = {"render_modes": ["human"], 'render_fps': 1}
# INIT FUNCTION
def __init__(self, trailer_length=14, cab_length=6, x_bounds=[0,200], y_bounds=[-100, 100], render_mode = None, fps = 1):
self.render_mode = render_mode
# Initalizing the Problem Object
self.truck = tbu_prob.TruckBackerUpper(trailer_length, cab_length, x_bounds, y_bounds)
# Defining Action Space
self.action_space = spaces.Box(low=-1, high=1)
# Defining Obseration Space
self.observation_space = spaces.Box(
low=np.array([self.truck.x_bounds[0], self.truck.y_bounds[0], -70.0, -70.0]),
high=np.array([self.truck.x_bounds[1], self.truck.y_bounds[1], 70.0, 70.0]),
shape=(4,),
dtype=np.float64
)
# Defining Possible Positions
self.possible_xys = [np.array([100,0]), np.array([80,50]), np.array([80,-50])]
# Defining Max Number of Steps Counter
self.step_counter = 0
# Pygame stuff
if render_mode == "human":
self.fps = fps
self.last_action=''
# PYGAME FUNCTION
def _init_pygame(self):
pygame.init() # initialize pygame
pygame.display.init() # Initialize the display module
# Game clock
self.clock = pygame.time.Clock()
# Default font
self.action_font = pygame.font.SysFont("Calibre",30)
self.action_info_height = self.action_font.get_height()
# For rendering
self.cell_height = 64
self.cell_width = 64
self.cell_size = (self.cell_width, self.cell_height)
# Define game window size (width, height)
self.window_size = (self.cell_width * 10, self.cell_height * 3 + self.action_info_height)
# Initialize game window
self.window_surface = pygame.display.set_mode(self.window_size)
# Load & resize sprites
file_name = "truck.png"
img = pygame.image.load(file_name)
self.truck_img = pygame.transform.scale(img, self.cell_size)
file_name2 = "target.png"
img2 = pygame.image.load(file_name2)
self.target_img = pygame.transform.scale(img2, self.cell_size)
def render(self):
self._process_events()
# clear to white background, otherwise text with varying length will leave behind prior rendered portions
self.window_surface.fill((255,255,255))
# Draw Truck
pos = (self.truck.x, self.truck.y + 50)
self.window_surface.blit(self.truck_img, pos)
pos = (0,50)
self.window_surface.blit(self.target_img, pos)
pygame.display.update()
# Limit frames per second
self.clock.tick(self.fps)
def _process_events(self):
# Process user events, key presses
for event in pygame.event.get():
# User clicked on X at the top right corner of window
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if(event.type == pygame.KEYDOWN):
# User hit escape
if(event.key == pygame.K_ESCAPE):
pygame.quit()
sys.exit()
# GYM FUNCTION
def reset(self, seed=None, options=None):
super().reset(seed=seed)
position = np.random.randint(0, len(self.possible_xys))
angle = np.random.choice(np.array([-90, -30, 0, 30, 90]))
self.truck.reset_truck(x_val=self.possible_xys[position][0], y_val=self.possible_xys[position][1], angle=angle)
self.step_counter = 0
# Observation is simply the four state variables
obs = np.array([self.truck.x, self.truck.y, self.truck.theta_c, self.truck.theta_t])
# Checking for rendering
if self.render_mode == "human":
self.render()
# I have no info
info = {}
# Return observation and info
return obs, info
def step(self, action):
# Adding a step to the counter
self.step_counter += 1
# Taking the action
terminated_goal, terminated_fail = self.truck.step(u = action[0].item())
terminated = terminated_goal | terminated_fail
truncated = (self.step_counter == 300)
# Reward Function based on the paper in header of truck file
if terminated_goal == True:
reward = 10
else:
reward = 0
# State Observation
obs = np.array([self.truck.x, self.truck.y, self.truck.theta_c, self.truck.theta_t])
# Checking for Rendering
if(self.render_mode=='human'):
self.render()
# Return observation, reward, terminated, truncated (not used), info
return obs, reward, terminated, truncated, {}
# For unit testing
if __name__=="__main__":
env = gym.make('TBU_evo', render_mode=None)
# Reset environment
obs, info = env.reset()
print("First Observation is : ", obs)
rand_action = env.action_space.sample()
print("Random Action is :", rand_action)
# Reset environment
obs,info = env.reset()
# Take some random
num_actions_per_episode = 0
while(True):
rand_action = env.action_space.sample()
obs, reward, terminated, _, _ = env.step(rand_action)
num_actions_per_episode += 1
if reward > 0:
print("reward is ", reward)
if(terminated):
print("Reset after %s actions" %num_actions_per_episode)
obs, info = env.reset()