🗂️ Harmonize run and example batch sizes in RLOO docs (#2439)

Doc has different grad_accumulation_steps and per_device_batch size than the actual hyperparameters, can be verified from wandb run.
huggingface · Dec 4, 2024 · b02189a · b02189a
1 parent 52201d3
commit b02189a
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/docs/source/rloo_trainer.md b/docs/source/rloo_trainer.md
@@ -218,8 +218,8 @@ accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml
     --num_ppo_epochs 2 \
     --num_mini_batches 2 \
     --learning_rate 3e-6 \
-    --per_device_train_batch_size 8 \
-    --gradient_accumulation_steps 8 \
+    --per_device_train_batch_size 16 \
+    --gradient_accumulation_steps 16 \
     --total_episodes 1000000 \
     --model_name_or_path EleutherAI/pythia-1b-deduped \
     --sft_model_path cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr \
@@ -276,4 +276,4 @@ python -m openrlbenchmark.rlops_multi_metrics \
 
 ## RLOOConfig
 
-[[autodoc]] RLOOConfig
+[[autodoc]] RLOOConfig