diff --git a/examples/scripts/gkd.py b/examples/scripts/gkd.py index 776aac536f..f5a2ab78e6 100644 --- a/examples/scripts/gkd.py +++ b/examples/scripts/gkd.py @@ -57,6 +57,23 @@ --push_to_hub \ --gradient_checkpointing \ --torch_dtype bfloat16 + +# ULD LoRA: +python examples/scripts/gkd.py \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --teacher_model_name_or_path google/gemma-2-2b-it \ + --dataset_name trl-lib/chatbot_arena_completions \ + --learning_rate 2e-4 \ + --per_device_train_batch_size 4 \ + --gradient_accumulation_steps 8 \ + --output_dir gkd-model \ + --logging_steps 10 \ + --num_train_epochs 1 \ + --push_to_hub \ + --gradient_checkpointing \ + --use_peft \ + --lora_r 64 \ + --lora_alpha 16 """ from accelerate import PartialState