Skip to content

Commit

Permalink
v0.7 (#26)
Browse files Browse the repository at this point in the history
* update from scratch configs

* update gym pretraining configs - use fewer epochs

* update robomimic pretraining configs - use fewer epochs

* allow trajectory plotting in eval agent

* add simple vit unet

* update avoid pretraining configs - use fewer epochs

* update furniture pretraining configs - use same amount of epochs as before

* add robomimic diffusion unet pretraining configs

* update robomimic finetuning configs - higher lr

* add vit unet checkpoint urls

* update pretraining and finetuning instructions as configs are updated
  • Loading branch information
allenzren committed Nov 20, 2024
1 parent d2929f6 commit 1d04211
Show file tree
Hide file tree
Showing 158 changed files with 3,350 additions and 410 deletions.
5 changes: 5 additions & 0 deletions agent/eval/eval_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, cfg):
self.horizon_steps = cfg.horizon_steps
self.max_episode_steps = cfg.env.max_episode_steps
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
self.save_full_observations = cfg.env.get("save_full_observations", False)
self.furniture_sparse_reward = (
cfg.env.specific.get("sparse_reward", False)
if "specific" in cfg.env
Expand Down Expand Up @@ -85,6 +86,10 @@ def __init__(self, cfg):
assert not (
self.n_render <= 0 and self.render_video
), "Need to set n_render > 0 if saving video"
self.traj_plotter = (
hydra.utils.instantiate(cfg.plotter)
if "plotter" in cfg else None
)

def run(self):
pass
Expand Down
22 changes: 22 additions & 0 deletions agent/eval/eval_diffusion_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def run(self):
prev_obs_venv = self.reset_env_all(options_venv=options_venv)
firsts_trajs[0] = 1
reward_trajs = np.zeros((self.n_steps, self.n_envs))
if self.save_full_observations: # state-only
obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
obs_full_trajs = np.vstack(
(obs_full_trajs, prev_obs_venv["state"][:, -1][None])
)

# Collect a set of trajectories from env
for step in range(self.n_steps):
Expand All @@ -62,6 +67,13 @@ def run(self):
)
reward_trajs[step] = reward_venv
firsts_trajs[step + 1] = terminated_venv | truncated_venv
if self.save_full_observations: # state-only
obs_full_venv = np.array(
[info["full_obs"]["state"] for info in info_venv]
) # n_envs x act_steps x obs_dim
obs_full_trajs = np.vstack(
(obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
)

# update for next step
prev_obs_venv = obs_venv
Expand Down Expand Up @@ -108,6 +120,16 @@ def run(self):
success_rate = 0
log.info("[WARNING] No episode completed within the iteration!")

# Plot state trajectories (only in D3IL)
if self.traj_plotter is not None:
self.traj_plotter(
obs_full_trajs=obs_full_trajs,
n_render=self.n_render,
max_episode_steps=self.max_episode_steps,
render_dir=self.render_dir,
itr=0,
)

# Log loss and save metrics
time = timer()
log.info(
Expand Down
68 changes: 68 additions & 0 deletions cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent

name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz

seed: 42
device: cuda:0
env_name: avoiding-m5
obs_dim: 4
action_dim: 2
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4

n_steps: 25
render_num: 40

plotter:
_target_: env.plot_traj.TrajPlotter
env_type: avoid
normalization_path: ${normalization_path}

env:
n_envs: 40
name: ${env_name}
max_episode_steps: 100
reset_at_iteration: True
save_video: False
best_reward_threshold_for_success: 2
save_full_observations: True
wrappers:
d3il_lowdim:
normalization_path: ${normalization_path}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
pass_full_observations: ${env.save_full_observations}
reset_within_step: False

model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
activation_type: ReLU
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
6 changes: 3 additions & 3 deletions cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
batch_size: 32
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
6 changes: 3 additions & 3 deletions cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
batch_size: 32
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
4 changes: 2 additions & 2 deletions cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}

train:
n_epochs: 10000
n_epochs: 5000
batch_size: 32
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500
Expand Down
2 changes: 2 additions & 0 deletions cfg/finetuning.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## Fine-tuning experiments

**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.

### Comparing diffusion-based RL algorithms (Sec. 5.1)
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.

Expand Down
66 changes: 66 additions & 0 deletions cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent

name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth

seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5

n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0

env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: lamp
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True

model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
cond_mlp_dims: [512, 64]
use_layernorm: True # needed for larger MLP
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}
Loading

0 comments on commit 1d04211

Please sign in to comment.