From 6398c00be550ff47a7c67deaf81602c8118afe24 Mon Sep 17 00:00:00 2001 From: Kunal Kumar Sahoo Date: Tue, 6 Aug 2024 12:19:20 +0530 Subject: [PATCH] Fixed runtime issues of RLLib --- experiments/ppo_4x4grid.py | 1 + experiments/sb3_grid4x4.py | 30 ++++++------------------------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/experiments/ppo_4x4grid.py b/experiments/ppo_4x4grid.py index 5e75608b..fa8cbcce 100755 --- a/experiments/ppo_4x4grid.py +++ b/experiments/ppo_4x4grid.py @@ -20,6 +20,7 @@ if __name__ == "__main__": + print(os.getcwd()) ray.init() env_name = "4x4grid" diff --git a/experiments/sb3_grid4x4.py b/experiments/sb3_grid4x4.py index bb646fcc..a667db76 100644 --- a/experiments/sb3_grid4x4.py +++ b/experiments/sb3_grid4x4.py @@ -5,7 +5,6 @@ import numpy as np import supersuit as ss import traci -from pyvirtualdisplay.smartdisplay import SmartDisplay from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.evaluation import evaluate_policy @@ -16,9 +15,8 @@ if __name__ == "__main__": - RESOLUTION = (3200, 1800) - env = sumo_rl.grid4x4(use_gui=True, out_csv_name="outputs/grid4x4/ppo_test", virtual_display=RESOLUTION) + env = sumo_rl.grid4x4(use_gui=False, out_csv_name="outputs/grid4x4/ppo_train") max_time = env.unwrapped.env.sim_max_time delta_time = env.unwrapped.env.delta_time @@ -26,7 +24,7 @@ print("Environment created") env = ss.pettingzoo_env_to_vec_env_v1(env) - env = ss.concat_vec_envs_v1(env, 2, num_cpus=1, base_class="stable_baselines3") + env = ss.concat_vec_envs_v1(env, 2, num_cpus=16, base_class="stable_baselines3") env = VecMonitor(env) model = PPO( @@ -34,16 +32,9 @@ env, verbose=3, gamma=0.95, - n_steps=256, - ent_coef=0.0905168, learning_rate=0.00062211, - vf_coef=0.042202, - max_grad_norm=0.9, - gae_lambda=0.99, - n_epochs=5, - clip_range=0.3, batch_size=256, - tensorboard_log="./logs/grid4x4/ppo_test", + tensorboard_log="./logs/grid4x4/ppo_train", ) print("Starting training") @@ -55,28 +46,19 @@ print(mean_reward) print(std_reward) + model.save('ppo_output') + # Maximum number of steps before reset, +1 because I'm scared of OBOE print("Starting rendering") num_steps = (max_time // delta_time) + 1 obs = env.reset() - if os.path.exists("temp"): - shutil.rmtree("temp") - - os.mkdir("temp") - # img = disp.grab() - # img.save(f"temp/img0.jpg") - img = env.render() for t in trange(num_steps): actions, _ = model.predict(obs, state=None, deterministic=False) obs, reward, done, info = env.step(actions) - img = env.render() - img.save(f"temp/img{t}.jpg") - - subprocess.run(["ffmpeg", "-y", "-framerate", "5", "-i", "temp/img%d.jpg", "output.mp4"]) + env.render() print("All done, cleaning up") - shutil.rmtree("temp") env.close()