From 6488cba1052890ba3590f04b2459e456281196c9 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 25 Dec 2024 16:32:02 +0800
Subject: [PATCH] fix shell (#2764)

---
 README.md                                                   | 6 +++---
 README_CN.md                                                | 6 +++---
 .../\345\277\253\351\200\237\345\274\200\345\247\213.md"    | 6 +++---
 "docs/source/Instruction/\345\257\274\345\207\272.md"       | 6 +++---
 docs/source_en/GetStarted/Quick-start.md                    | 6 +++---
 docs/source_en/Instruction/Export.md                        | 6 +++---
 examples/export/quantize/awq.sh                             | 4 ++--
 examples/export/quantize/gptq.sh                            | 4 ++--
 examples/notebook/qwen2.5-self-cognition/sft.sh             | 6 +++---
 examples/train/all_to_all/train.sh                          | 2 +-
 examples/train/demo.sh                                      | 6 +++---
 examples/train/full/train.sh                                | 2 +-
 examples/train/lazy_tokenize/train.sh                       | 2 +-
 examples/train/multi-gpu/ddp/train.sh                       | 2 +-
 examples/train/multi-gpu/ddp_device_map/train.sh            | 2 +-
 examples/train/multi-gpu/deepspeed/train_zero2.sh           | 2 +-
 examples/train/multi-gpu/deepspeed/train_zero3.sh           | 2 +-
 examples/train/multi-gpu/fsdp_qlora/train.sh                | 2 +-
 examples/train/multi-node/accelerate/train_node1.sh         | 2 +-
 examples/train/multi-node/accelerate/train_node2.sh         | 2 +-
 examples/train/multi-node/deepspeed/train.sh                | 2 +-
 examples/train/multi-node/dlc/train.sh                      | 2 +-
 examples/train/multi-node/swift/train_node1.sh              | 2 +-
 examples/train/multi-node/swift/train_node2.sh              | 2 +-
 examples/train/multi-node/torchrun/train_node1.sh           | 2 +-
 examples/train/multi-node/torchrun/train_node2.sh           | 2 +-
 examples/train/multimodal/grounding.sh                      | 2 +-
 examples/train/multimodal/ocr.sh                            | 2 +-
 examples/train/multimodal/vqa.sh                            | 2 +-
 examples/train/packing/train.sh                             | 2 +-
 examples/train/plugins/train_loss_scale.sh                  | 2 +-
 examples/train/rlhf/kto.sh                                  | 2 +-
 examples/train/seq_cls/sft.sh                               | 2 +-
 examples/train/sequence_parallel/train.sh                   | 2 +-
 examples/train/streaming/train.sh                           | 2 +-
 examples/train/tuners/adalora/train.sh                      | 2 +-
 examples/train/tuners/adapter/train.sh                      | 2 +-
 examples/train/tuners/boft/train.sh                         | 2 +-
 examples/train/tuners/bone/train.sh                         | 2 +-
 examples/train/tuners/dora/train.sh                         | 2 +-
 examples/train/tuners/galore/train_galore.sh                | 2 +-
 examples/train/tuners/galore/train_qgalore.sh               | 2 +-
 examples/train/tuners/liger/train.sh                        | 2 +-
 examples/train/tuners/lisa/train.sh                         | 2 +-
 examples/train/tuners/llamapro/train.sh                     | 2 +-
 examples/train/tuners/longlora/train.sh                     | 2 +-
 examples/train/tuners/lora-ga/train.sh                      | 2 +-
 examples/train/tuners/lora/train.sh                         | 2 +-
 examples/train/tuners/neftune/train.sh                      | 2 +-
 examples/train/tuners/olora/train.sh                        | 2 +-
 examples/train/tuners/pissa/train.sh                        | 2 +-
 examples/train/tuners/qlora/train.sh                        | 2 +-
 examples/train/tuners/reft/train.sh                         | 2 +-
 examples/train/tuners/unsloth/train.sh                      | 2 +-
 requirements/framework.txt                                  | 2 +-
 55 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/README.md b/README.md
index 9884d0fc3..a572beaed 100644
--- a/README.md
+++ b/README.md
@@ -114,9 +114,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/README_CN.md b/README_CN.md
index 60b7c79a9..c3796e0a5 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -107,9 +107,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md"
index d30b25cf8..c69597316 100644
--- "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md"
+++ "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md"
@@ -31,9 +31,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git "a/docs/source/Instruction/\345\257\274\345\207\272.md" "b/docs/source/Instruction/\345\257\274\345\207\272.md"
index 1a48068e8..3e5426f59 100644
--- "a/docs/source/Instruction/\345\257\274\345\207\272.md"
+++ "b/docs/source/Instruction/\345\257\274\345\207\272.md"
@@ -77,7 +77,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method bnb \
     --quant_bits 4 \
     --torch_dtype bfloat16
@@ -86,7 +86,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method hqq \
     --quant_bits 4
 
@@ -94,7 +94,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method eetq \
     --torch_dtype float16
 ```
diff --git a/docs/source_en/GetStarted/Quick-start.md b/docs/source_en/GetStarted/Quick-start.md
index b5d58e09a..c410e4484 100644
--- a/docs/source_en/GetStarted/Quick-start.md
+++ b/docs/source_en/GetStarted/Quick-start.md
@@ -31,9 +31,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/docs/source_en/Instruction/Export.md b/docs/source_en/Instruction/Export.md
index fec9f44f6..175fb7b5e 100644
--- a/docs/source_en/Instruction/Export.md
+++ b/docs/source_en/Instruction/Export.md
@@ -78,7 +78,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method bnb \
     --quant_bits 4 \
     --torch_dtype bfloat16
@@ -87,7 +87,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method hqq \
     --quant_bits 4
 
@@ -95,7 +95,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
 CUDA_VISIBLE_DEVICES=0 swift sft \
     --model Qwen/Qwen2-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \
     --quant_method eetq \
     --torch_dtype float16
 ```
diff --git a/examples/export/quantize/awq.sh b/examples/export/quantize/awq.sh
index 379ad270e..04ebbaf6c 100644
--- a/examples/export/quantize/awq.sh
+++ b/examples/export/quantize/awq.sh
@@ -1,8 +1,8 @@
 CUDA_VISIBLE_DEVICES=0 \
 swift export \
     --model Qwen/Qwen2.5-1.5B-Instruct \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
     --quant_n_samples 128 \
     --quant_batch_size 1 \
     --max_length 2048 \
diff --git a/examples/export/quantize/gptq.sh b/examples/export/quantize/gptq.sh
index f53d251bd..b5f8c43e2 100644
--- a/examples/export/quantize/gptq.sh
+++ b/examples/export/quantize/gptq.sh
@@ -3,8 +3,8 @@ OMP_NUM_THREADS=14 \
 CUDA_VISIBLE_DEVICES=0 \
 swift export \
     --model Qwen/Qwen2.5-1.5B-Instruct \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
     --quant_n_samples 128 \
     --quant_batch_size 1 \
     --max_length 2048 \
diff --git a/examples/notebook/qwen2.5-self-cognition/sft.sh b/examples/notebook/qwen2.5-self-cognition/sft.sh
index 119ffd2f1..43f97974f 100644
--- a/examples/notebook/qwen2.5-self-cognition/sft.sh
+++ b/examples/notebook/qwen2.5-self-cognition/sft.sh
@@ -4,9 +4,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-3B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/all_to_all/train.sh b/examples/train/all_to_all/train.sh
index d99a26585..572eb1f16 100644
--- a/examples/train/all_to_all/train.sh
+++ b/examples/train/all_to_all/train.sh
@@ -7,7 +7,7 @@ image_area=518400 \
 swift sft \
     --model BAAI/Emu3-Gen \
     --train_type lora \
-    --dataset swift/TextCaps#40 \
+    --dataset 'swift/TextCaps#40' \
     --loss_scale react \
     --tools_prompt react_zh \
     --torch_dtype bfloat16 \
diff --git a/examples/train/demo.sh b/examples/train/demo.sh
index 7602459e2..e5d135cb6 100644
--- a/examples/train/demo.sh
+++ b/examples/train/demo.sh
@@ -3,9 +3,9 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \
-              AI-ModelScope/alpaca-gpt4-data-en#500 \
-              swift/self-cognition#500 \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/full/train.sh b/examples/train/full/train.sh
index 1190e43bc..ed560bf54 100644
--- a/examples/train/full/train.sh
+++ b/examples/train/full/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type full \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-5 \
diff --git a/examples/train/lazy_tokenize/train.sh b/examples/train/lazy_tokenize/train.sh
index d48192b6e..c6f3a168a 100644
--- a/examples/train/lazy_tokenize/train.sh
+++ b/examples/train/lazy_tokenize/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/multi-gpu/ddp/train.sh b/examples/train/multi-gpu/ddp/train.sh
index 48a3dae7d..6ce56701b 100644
--- a/examples/train/multi-gpu/ddp/train.sh
+++ b/examples/train/multi-gpu/ddp/train.sh
@@ -7,7 +7,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --lora_rank 8 \
diff --git a/examples/train/multi-gpu/ddp_device_map/train.sh b/examples/train/multi-gpu/ddp_device_map/train.sh
index a73d656d4..3949ae766 100644
--- a/examples/train/multi-gpu/ddp_device_map/train.sh
+++ b/examples/train/multi-gpu/ddp_device_map/train.sh
@@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/multi-gpu/deepspeed/train_zero2.sh b/examples/train/multi-gpu/deepspeed/train_zero2.sh
index d7505e1cb..61b92e6fd 100644
--- a/examples/train/multi-gpu/deepspeed/train_zero2.sh
+++ b/examples/train/multi-gpu/deepspeed/train_zero2.sh
@@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/multi-gpu/deepspeed/train_zero3.sh b/examples/train/multi-gpu/deepspeed/train_zero3.sh
index af7a6a4f2..5bed97bf5 100644
--- a/examples/train/multi-gpu/deepspeed/train_zero3.sh
+++ b/examples/train/multi-gpu/deepspeed/train_zero3.sh
@@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --lora_rank 8 \
diff --git a/examples/train/multi-gpu/fsdp_qlora/train.sh b/examples/train/multi-gpu/fsdp_qlora/train.sh
index 827b55024..8b10a78b5 100644
--- a/examples/train/multi-gpu/fsdp_qlora/train.sh
+++ b/examples/train/multi-gpu/fsdp_qlora/train.sh
@@ -6,7 +6,7 @@ accelerate launch --config_file "./examples/train/fsdp_qlora/fsdp_offload.json"
     swift/cli/sft.py \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --max_length 2048 \
diff --git a/examples/train/multi-node/accelerate/train_node1.sh b/examples/train/multi-node/accelerate/train_node1.sh
index 752c91ade..03f630e56 100644
--- a/examples/train/multi-node/accelerate/train_node1.sh
+++ b/examples/train/multi-node/accelerate/train_node1.sh
@@ -4,7 +4,7 @@ accelerate launch --config_file ./examples/train/multi-node/accelerate/multi_nod
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --lora_rank 8 \
     --lora_alpha 32 \
diff --git a/examples/train/multi-node/accelerate/train_node2.sh b/examples/train/multi-node/accelerate/train_node2.sh
index 603f502b5..2149a5a83 100644
--- a/examples/train/multi-node/accelerate/train_node2.sh
+++ b/examples/train/multi-node/accelerate/train_node2.sh
@@ -4,7 +4,7 @@ accelerate launch --config_file ./examples/train/multi-node/accelerate/multi_nod
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --lora_rank 8 \
     --lora_alpha 32 \
diff --git a/examples/train/multi-node/deepspeed/train.sh b/examples/train/multi-node/deepspeed/train.sh
index 253347d78..8616c737e 100644
--- a/examples/train/multi-node/deepspeed/train.sh
+++ b/examples/train/multi-node/deepspeed/train.sh
@@ -5,7 +5,7 @@ deepspeed --hostfile=./examples/train/multi-node-deepspeed/host.txt \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --lora_rank 8 \
     --lora_alpha 32 \
diff --git a/examples/train/multi-node/dlc/train.sh b/examples/train/multi-node/dlc/train.sh
index a2aed445e..182088eb2 100644
--- a/examples/train/multi-node/dlc/train.sh
+++ b/examples/train/multi-node/dlc/train.sh
@@ -3,7 +3,7 @@ NODE_RANK=$RANK \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --lora_rank 8 \
diff --git a/examples/train/multi-node/swift/train_node1.sh b/examples/train/multi-node/swift/train_node1.sh
index 03cc29265..976f757ab 100644
--- a/examples/train/multi-node/swift/train_node1.sh
+++ b/examples/train/multi-node/swift/train_node1.sh
@@ -7,7 +7,7 @@ swift sft \
       --model Qwen/Qwen2.5-7B-Instruct \
       --train_type lora \
       --torch_dtype bfloat16 \
-      --dataset swift/self-cognition#1000 \
+      --dataset 'swift/self-cognition#1000' \
       --num_train_epochs 1 \
       --lora_rank 8 \
       --lora_alpha 32 \
diff --git a/examples/train/multi-node/swift/train_node2.sh b/examples/train/multi-node/swift/train_node2.sh
index cc6a11538..22e4eee12 100644
--- a/examples/train/multi-node/swift/train_node2.sh
+++ b/examples/train/multi-node/swift/train_node2.sh
@@ -7,7 +7,7 @@ swift sft \
       --model Qwen/Qwen2.5-7B-Instruct \
       --train_type lora \
       --torch_dtype bfloat16 \
-      --dataset swift/self-cognition#1000 \
+      --dataset 'swift/self-cognition#1000' \
       --num_train_epochs 1 \
       --lora_rank 8 \
       --lora_alpha 32 \
diff --git a/examples/train/multi-node/torchrun/train_node1.sh b/examples/train/multi-node/torchrun/train_node1.sh
index 007cd8656..0072ca47c 100644
--- a/examples/train/multi-node/torchrun/train_node1.sh
+++ b/examples/train/multi-node/torchrun/train_node1.sh
@@ -4,7 +4,7 @@ torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=0 --maste
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --lora_rank 8 \
     --lora_alpha 32 \
diff --git a/examples/train/multi-node/torchrun/train_node2.sh b/examples/train/multi-node/torchrun/train_node2.sh
index 4704717b9..0de8699c3 100644
--- a/examples/train/multi-node/torchrun/train_node2.sh
+++ b/examples/train/multi-node/torchrun/train_node2.sh
@@ -4,7 +4,7 @@ torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=1 --maste
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --torch_dtype bfloat16 \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --lora_rank 8 \
     --lora_alpha 32 \
diff --git a/examples/train/multimodal/grounding.sh b/examples/train/multimodal/grounding.sh
index 01f04b3a6..2edd83c52 100644
--- a/examples/train/multimodal/grounding.sh
+++ b/examples/train/multimodal/grounding.sh
@@ -4,7 +4,7 @@ MAX_PIXELS=1003520 \
 swift sft \
     --model Qwen/Qwen2-VL-7B-Instruct \
     --train_type lora \
-    --dataset swift/refcoco:grounding#1000 \
+    --dataset 'swift/refcoco:grounding#1000' \
     --num_train_epochs 1 \
     --learning_rate 1e-4 \
     --lora_rank 8 \
diff --git a/examples/train/multimodal/ocr.sh b/examples/train/multimodal/ocr.sh
index 4e39f45d1..d12a0c02c 100644
--- a/examples/train/multimodal/ocr.sh
+++ b/examples/train/multimodal/ocr.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0,1 \
 MAX_PIXELS=1003520 \
 swift sft \
     --model Qwen/QVQ-72B-Preview \
-    --dataset AI-ModelScope/LaTeX_OCR:human_handwrite#20000 \
+    --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
     --train_type lora \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
diff --git a/examples/train/multimodal/vqa.sh b/examples/train/multimodal/vqa.sh
index f5e0aeeba..0fce44bf3 100644
--- a/examples/train/multimodal/vqa.sh
+++ b/examples/train/multimodal/vqa.sh
@@ -6,7 +6,7 @@ MAX_PIXELS=1003520 \
 swift sft \
     --model Qwen/Qwen2-VL-7B-Instruct \
     --train_type lora \
-    --dataset swift/OK-VQA_train#1000 \
+    --dataset 'swift/OK-VQA_train#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/packing/train.sh b/examples/train/packing/train.sh
index aaf2bac62..68720ae77 100644
--- a/examples/train/packing/train.sh
+++ b/examples/train/packing/train.sh
@@ -7,7 +7,7 @@ swift sft \
     --packing true \
     --max_length 8192 \
     --max_steps 100 \
-    --dataset swift/self-cognition#5000 \
+    --dataset 'swift/self-cognition#5000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/plugins/train_loss_scale.sh b/examples/train/plugins/train_loss_scale.sh
index 9349a882b..3722c497d 100644
--- a/examples/train/plugins/train_loss_scale.sh
+++ b/examples/train/plugins/train_loss_scale.sh
@@ -5,7 +5,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/rlhf/kto.sh b/examples/train/rlhf/kto.sh
index 96ebe3bed..5d031bce4 100644
--- a/examples/train/rlhf/kto.sh
+++ b/examples/train/rlhf/kto.sh
@@ -6,7 +6,7 @@ swift rlhf \
     --rlhf_type kto \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000 \
+    --dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
     --num_train_epochs 2 \
     --learning_rate 1e-4 \
     --lora_rank 8 \
diff --git a/examples/train/seq_cls/sft.sh b/examples/train/seq_cls/sft.sh
index a7f0d229c..067c6664e 100644
--- a/examples/train/seq_cls/sft.sh
+++ b/examples/train/seq_cls/sft.sh
@@ -4,7 +4,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B \
     --train_type lora \
-    --dataset DAMO_NLP/jd:cls#2000 \
+    --dataset 'DAMO_NLP/jd:cls#2000' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/sequence_parallel/train.sh b/examples/train/sequence_parallel/train.sh
index 14bc6e1d8..1b61f3d5f 100644
--- a/examples/train/sequence_parallel/train.sh
+++ b/examples/train/sequence_parallel/train.sh
@@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset AI-ModelScope/LongAlpaca-12k#5000 \
+    --dataset 'AI-ModelScope/LongAlpaca-12k#5000' \
     --num_train_epochs 1 \
     --sequence_parallel_size 2 \
     --learning_rate 1e-4 \
diff --git a/examples/train/streaming/train.sh b/examples/train/streaming/train.sh
index e941e0a9a..b864a48f2 100644
--- a/examples/train/streaming/train.sh
+++ b/examples/train/streaming/train.sh
@@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --streaming true \
     --max_steps 1000 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/adalora/train.sh b/examples/train/tuners/adalora/train.sh
index e8b11ddf9..d22860d1e 100644
--- a/examples/train/tuners/adalora/train.sh
+++ b/examples/train/tuners/adalora/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type adalora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/adapter/train.sh b/examples/train/tuners/adapter/train.sh
index 958d1b803..d334ae6cb 100644
--- a/examples/train/tuners/adapter/train.sh
+++ b/examples/train/tuners/adapter/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type adapter \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/boft/train.sh b/examples/train/tuners/boft/train.sh
index b6907d0e6..900bf2351 100644
--- a/examples/train/tuners/boft/train.sh
+++ b/examples/train/tuners/boft/train.sh
@@ -4,7 +4,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type boft \
     --label_names labels \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/bone/train.sh b/examples/train/tuners/bone/train.sh
index 88c220fac..1dff4f740 100644
--- a/examples/train/tuners/bone/train.sh
+++ b/examples/train/tuners/bone/train.sh
@@ -4,7 +4,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type bone \
     --label_names labels \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/dora/train.sh b/examples/train/tuners/dora/train.sh
index 2eb780f83..2bc7d9f23 100644
--- a/examples/train/tuners/dora/train.sh
+++ b/examples/train/tuners/dora/train.sh
@@ -4,7 +4,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
     --use_dora true \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/galore/train_galore.sh b/examples/train/tuners/galore/train_galore.sh
index ac47f68e6..4728e0e49 100644
--- a/examples/train/tuners/galore/train_galore.sh
+++ b/examples/train/tuners/galore/train_galore.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type full \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-5 \
diff --git a/examples/train/tuners/galore/train_qgalore.sh b/examples/train/tuners/galore/train_qgalore.sh
index a250c3a17..cdebbe044 100644
--- a/examples/train/tuners/galore/train_qgalore.sh
+++ b/examples/train/tuners/galore/train_qgalore.sh
@@ -5,7 +5,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type full \
     --torch_dtype bfloat16 \
-    --dataset lvjianjin/AdvertiseGen#1000 \
+    --dataset 'lvjianjin/AdvertiseGen#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-5 \
diff --git a/examples/train/tuners/liger/train.sh b/examples/train/tuners/liger/train.sh
index 068020de9..bb872a710 100644
--- a/examples/train/tuners/liger/train.sh
+++ b/examples/train/tuners/liger/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --use_liger true \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/tuners/lisa/train.sh b/examples/train/tuners/lisa/train.sh
index 704d43442..8a8475a43 100644
--- a/examples/train/tuners/lisa/train.sh
+++ b/examples/train/tuners/lisa/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type full \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --lisa_activated_layers 2 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/tuners/llamapro/train.sh b/examples/train/tuners/llamapro/train.sh
index b1d328c2b..d0956449d 100644
--- a/examples/train/tuners/llamapro/train.sh
+++ b/examples/train/tuners/llamapro/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type llamapro \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --llamapro_num_new_blocks 4 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/tuners/longlora/train.sh b/examples/train/tuners/longlora/train.sh
index 64c03f42d..35697f21e 100644
--- a/examples/train/tuners/longlora/train.sh
+++ b/examples/train/tuners/longlora/train.sh
@@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
     --train_type longlora \
-    --dataset AI-ModelScope/LongAlpaca-12k#1000 \
+    --dataset 'AI-ModelScope/LongAlpaca-12k#1000' \
     --num_train_epochs 1 \
     --learning_rate 1e-4 \
     --attn_impl flash_attn \
diff --git a/examples/train/tuners/lora-ga/train.sh b/examples/train/tuners/lora-ga/train.sh
index dd132200d..fbfe76cc6 100644
--- a/examples/train/tuners/lora-ga/train.sh
+++ b/examples/train/tuners/lora-ga/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2-1.5B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/lora/train.sh b/examples/train/tuners/lora/train.sh
index 51058b446..e8c231c67 100644
--- a/examples/train/tuners/lora/train.sh
+++ b/examples/train/tuners/lora/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/neftune/train.sh b/examples/train/tuners/neftune/train.sh
index a6dcb1585..bf53a4d90 100644
--- a/examples/train/tuners/neftune/train.sh
+++ b/examples/train/tuners/neftune/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --neftune_noise_alpha 15 \
diff --git a/examples/train/tuners/olora/train.sh b/examples/train/tuners/olora/train.sh
index 2614fb8d1..1ead995df 100644
--- a/examples/train/tuners/olora/train.sh
+++ b/examples/train/tuners/olora/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/pissa/train.sh b/examples/train/tuners/pissa/train.sh
index c258c124e..9139ba441 100644
--- a/examples/train/tuners/pissa/train.sh
+++ b/examples/train/tuners/pissa/train.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/qlora/train.sh b/examples/train/tuners/qlora/train.sh
index fbf97a682..716845374 100644
--- a/examples/train/tuners/qlora/train.sh
+++ b/examples/train/tuners/qlora/train.sh
@@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/examples/train/tuners/reft/train.sh b/examples/train/tuners/reft/train.sh
index c4a0d593a..0b3853bfe 100644
--- a/examples/train/tuners/reft/train.sh
+++ b/examples/train/tuners/reft/train.sh
@@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --train_type reft \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --reft_intervention_type 'LoreftIntervention' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
diff --git a/examples/train/tuners/unsloth/train.sh b/examples/train/tuners/unsloth/train.sh
index 87adf7ff1..829114896 100644
--- a/examples/train/tuners/unsloth/train.sh
+++ b/examples/train/tuners/unsloth/train.sh
@@ -4,7 +4,7 @@ swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
     --tuner_backend unsloth \
     --train_type lora \
-    --dataset swift/self-cognition#1000 \
+    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --learning_rate 1e-4 \
diff --git a/requirements/framework.txt b/requirements/framework.txt
index 541d01414..ee068f99a 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -28,7 +28,7 @@ sentencepiece
 tensorboard
 tiktoken
 tqdm
-transformers>=4.33,<4.48
+transformers>=4.33,<4.49
 transformers_stream_generator
 trl>=0.11,<0.12
 uvicorn