diff --git a/examples/train/multimodal/video.sh b/examples/train/multimodal/video.sh index 73e344dd0..2c438cb87 100644 --- a/examples/train/multimodal/video.sh +++ b/examples/train/multimodal/video.sh @@ -1,8 +1,10 @@ # 4*80GB # You can refer to `https://github.com/QwenLM/Qwen2-VL` for the meaning of the `VIDEO_MAX_PIXELS` parameter. -NPROC_PER_NODE=4 \ +nproc_per_node=4 + +NPROC_PER_NODE=$nproc_per_node \ CUDA_VISIBLE_DEVICES=0,1,2,3 \ -VIDEO_MAX_PIXELS=200704 \ +VIDEO_MAX_PIXELS=100352 \ FPS_MAX_FRAMES=24 \ swift sft \ --model Qwen/QVQ-72B-Preview \ @@ -17,7 +19,7 @@ swift sft \ --lora_alpha 32 \ --target_modules all-linear \ --freeze_vit true \ - --gradient_accumulation_steps 4 \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ --eval_steps 50 \ --save_steps 50 \ --save_total_limit 5 \