diff --git a/examples/train/multimodal/infer.sh b/examples/train/multimodal/infer.sh
index 10495b366..2e8627319 100644
--- a/examples/train/multimodal/infer.sh
+++ b/examples/train/multimodal/infer.sh
@@ -1,5 +1,5 @@
 # Perform inference using the validation set from the training phase.
-# CUDA_VISIBLE_DEVICES=0,1 \
+CUDA_VISIBLE_DEVICES=0 \
 swift infer \
     --adapters output/vx-xxx/checkpoint-xxx \
     --stream true \
diff --git a/examples/train/multimodal/ocr.sh b/examples/train/multimodal/ocr.sh
index d12a0c02c..739033e2b 100644
--- a/examples/train/multimodal/ocr.sh
+++ b/examples/train/multimodal/ocr.sh
@@ -1,8 +1,8 @@
-# 2*75GB
-CUDA_VISIBLE_DEVICES=0,1 \
+# 20GB
+CUDA_VISIBLE_DEVICES=0 \
 MAX_PIXELS=1003520 \
 swift sft \
-    --model Qwen/QVQ-72B-Preview \
+    --model Qwen/Qwen2-VL-7B-Instruct \
     --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
     --train_type lora \
     --torch_dtype bfloat16 \