From 6488cba1052890ba3590f04b2459e456281196c9 Mon Sep 17 00:00:00 2001 From: Jintao Date: Wed, 25 Dec 2024 16:32:02 +0800 Subject: [PATCH] fix shell (#2764) --- README.md | 6 +++--- README_CN.md | 6 +++--- .../\345\277\253\351\200\237\345\274\200\345\247\213.md" | 6 +++--- "docs/source/Instruction/\345\257\274\345\207\272.md" | 6 +++--- docs/source_en/GetStarted/Quick-start.md | 6 +++--- docs/source_en/Instruction/Export.md | 6 +++--- examples/export/quantize/awq.sh | 4 ++-- examples/export/quantize/gptq.sh | 4 ++-- examples/notebook/qwen2.5-self-cognition/sft.sh | 6 +++--- examples/train/all_to_all/train.sh | 2 +- examples/train/demo.sh | 6 +++--- examples/train/full/train.sh | 2 +- examples/train/lazy_tokenize/train.sh | 2 +- examples/train/multi-gpu/ddp/train.sh | 2 +- examples/train/multi-gpu/ddp_device_map/train.sh | 2 +- examples/train/multi-gpu/deepspeed/train_zero2.sh | 2 +- examples/train/multi-gpu/deepspeed/train_zero3.sh | 2 +- examples/train/multi-gpu/fsdp_qlora/train.sh | 2 +- examples/train/multi-node/accelerate/train_node1.sh | 2 +- examples/train/multi-node/accelerate/train_node2.sh | 2 +- examples/train/multi-node/deepspeed/train.sh | 2 +- examples/train/multi-node/dlc/train.sh | 2 +- examples/train/multi-node/swift/train_node1.sh | 2 +- examples/train/multi-node/swift/train_node2.sh | 2 +- examples/train/multi-node/torchrun/train_node1.sh | 2 +- examples/train/multi-node/torchrun/train_node2.sh | 2 +- examples/train/multimodal/grounding.sh | 2 +- examples/train/multimodal/ocr.sh | 2 +- examples/train/multimodal/vqa.sh | 2 +- examples/train/packing/train.sh | 2 +- examples/train/plugins/train_loss_scale.sh | 2 +- examples/train/rlhf/kto.sh | 2 +- examples/train/seq_cls/sft.sh | 2 +- examples/train/sequence_parallel/train.sh | 2 +- examples/train/streaming/train.sh | 2 +- examples/train/tuners/adalora/train.sh | 2 +- examples/train/tuners/adapter/train.sh | 2 +- examples/train/tuners/boft/train.sh | 2 +- examples/train/tuners/bone/train.sh | 2 +- examples/train/tuners/dora/train.sh | 2 +- examples/train/tuners/galore/train_galore.sh | 2 +- examples/train/tuners/galore/train_qgalore.sh | 2 +- examples/train/tuners/liger/train.sh | 2 +- examples/train/tuners/lisa/train.sh | 2 +- examples/train/tuners/llamapro/train.sh | 2 +- examples/train/tuners/longlora/train.sh | 2 +- examples/train/tuners/lora-ga/train.sh | 2 +- examples/train/tuners/lora/train.sh | 2 +- examples/train/tuners/neftune/train.sh | 2 +- examples/train/tuners/olora/train.sh | 2 +- examples/train/tuners/pissa/train.sh | 2 +- examples/train/tuners/qlora/train.sh | 2 +- examples/train/tuners/reft/train.sh | 2 +- examples/train/tuners/unsloth/train.sh | 2 +- requirements/framework.txt | 2 +- 55 files changed, 73 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index 9884d0fc3..a572beaed 100644 --- a/README.md +++ b/README.md @@ -114,9 +114,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/README_CN.md b/README_CN.md index 60b7c79a9..c3796e0a5 100644 --- a/README_CN.md +++ b/README_CN.md @@ -107,9 +107,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" index d30b25cf8..c69597316 100644 --- "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" +++ "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" @@ -31,9 +31,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git "a/docs/source/Instruction/\345\257\274\345\207\272.md" "b/docs/source/Instruction/\345\257\274\345\207\272.md" index 1a48068e8..3e5426f59 100644 --- "a/docs/source/Instruction/\345\257\274\345\207\272.md" +++ "b/docs/source/Instruction/\345\257\274\345\207\272.md" @@ -77,7 +77,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method bnb \ --quant_bits 4 \ --torch_dtype bfloat16 @@ -86,7 +86,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method hqq \ --quant_bits 4 @@ -94,7 +94,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method eetq \ --torch_dtype float16 ``` diff --git a/docs/source_en/GetStarted/Quick-start.md b/docs/source_en/GetStarted/Quick-start.md index b5d58e09a..c410e4484 100644 --- a/docs/source_en/GetStarted/Quick-start.md +++ b/docs/source_en/GetStarted/Quick-start.md @@ -31,9 +31,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/docs/source_en/Instruction/Export.md b/docs/source_en/Instruction/Export.md index fec9f44f6..175fb7b5e 100644 --- a/docs/source_en/Instruction/Export.md +++ b/docs/source_en/Instruction/Export.md @@ -78,7 +78,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method bnb \ --quant_bits 4 \ --torch_dtype bfloat16 @@ -87,7 +87,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method hqq \ --quant_bits 4 @@ -95,7 +95,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \ CUDA_VISIBLE_DEVICES=0 swift sft \ --model Qwen/Qwen2-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#5000 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#5000' \ --quant_method eetq \ --torch_dtype float16 ``` diff --git a/examples/export/quantize/awq.sh b/examples/export/quantize/awq.sh index 379ad270e..04ebbaf6c 100644 --- a/examples/export/quantize/awq.sh +++ b/examples/export/quantize/awq.sh @@ -1,8 +1,8 @@ CUDA_VISIBLE_DEVICES=0 \ swift export \ --model Qwen/Qwen2.5-1.5B-Instruct \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ --quant_n_samples 128 \ --quant_batch_size 1 \ --max_length 2048 \ diff --git a/examples/export/quantize/gptq.sh b/examples/export/quantize/gptq.sh index f53d251bd..b5f8c43e2 100644 --- a/examples/export/quantize/gptq.sh +++ b/examples/export/quantize/gptq.sh @@ -3,8 +3,8 @@ OMP_NUM_THREADS=14 \ CUDA_VISIBLE_DEVICES=0 \ swift export \ --model Qwen/Qwen2.5-1.5B-Instruct \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ --quant_n_samples 128 \ --quant_batch_size 1 \ --max_length 2048 \ diff --git a/examples/notebook/qwen2.5-self-cognition/sft.sh b/examples/notebook/qwen2.5-self-cognition/sft.sh index 119ffd2f1..43f97974f 100644 --- a/examples/notebook/qwen2.5-self-cognition/sft.sh +++ b/examples/notebook/qwen2.5-self-cognition/sft.sh @@ -4,9 +4,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-3B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/all_to_all/train.sh b/examples/train/all_to_all/train.sh index d99a26585..572eb1f16 100644 --- a/examples/train/all_to_all/train.sh +++ b/examples/train/all_to_all/train.sh @@ -7,7 +7,7 @@ image_area=518400 \ swift sft \ --model BAAI/Emu3-Gen \ --train_type lora \ - --dataset swift/TextCaps#40 \ + --dataset 'swift/TextCaps#40' \ --loss_scale react \ --tools_prompt react_zh \ --torch_dtype bfloat16 \ diff --git a/examples/train/demo.sh b/examples/train/demo.sh index 7602459e2..e5d135cb6 100644 --- a/examples/train/demo.sh +++ b/examples/train/demo.sh @@ -3,9 +3,9 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/alpaca-gpt4-data-zh#500 \ - AI-ModelScope/alpaca-gpt4-data-en#500 \ - swift/self-cognition#500 \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/full/train.sh b/examples/train/full/train.sh index 1190e43bc..ed560bf54 100644 --- a/examples/train/full/train.sh +++ b/examples/train/full/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type full \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-5 \ diff --git a/examples/train/lazy_tokenize/train.sh b/examples/train/lazy_tokenize/train.sh index d48192b6e..c6f3a168a 100644 --- a/examples/train/lazy_tokenize/train.sh +++ b/examples/train/lazy_tokenize/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/multi-gpu/ddp/train.sh b/examples/train/multi-gpu/ddp/train.sh index 48a3dae7d..6ce56701b 100644 --- a/examples/train/multi-gpu/ddp/train.sh +++ b/examples/train/multi-gpu/ddp/train.sh @@ -7,7 +7,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --lora_rank 8 \ diff --git a/examples/train/multi-gpu/ddp_device_map/train.sh b/examples/train/multi-gpu/ddp_device_map/train.sh index a73d656d4..3949ae766 100644 --- a/examples/train/multi-gpu/ddp_device_map/train.sh +++ b/examples/train/multi-gpu/ddp_device_map/train.sh @@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/multi-gpu/deepspeed/train_zero2.sh b/examples/train/multi-gpu/deepspeed/train_zero2.sh index d7505e1cb..61b92e6fd 100644 --- a/examples/train/multi-gpu/deepspeed/train_zero2.sh +++ b/examples/train/multi-gpu/deepspeed/train_zero2.sh @@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/multi-gpu/deepspeed/train_zero3.sh b/examples/train/multi-gpu/deepspeed/train_zero3.sh index af7a6a4f2..5bed97bf5 100644 --- a/examples/train/multi-gpu/deepspeed/train_zero3.sh +++ b/examples/train/multi-gpu/deepspeed/train_zero3.sh @@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --lora_rank 8 \ diff --git a/examples/train/multi-gpu/fsdp_qlora/train.sh b/examples/train/multi-gpu/fsdp_qlora/train.sh index 827b55024..8b10a78b5 100644 --- a/examples/train/multi-gpu/fsdp_qlora/train.sh +++ b/examples/train/multi-gpu/fsdp_qlora/train.sh @@ -6,7 +6,7 @@ accelerate launch --config_file "./examples/train/fsdp_qlora/fsdp_offload.json" swift/cli/sft.py \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --max_length 2048 \ diff --git a/examples/train/multi-node/accelerate/train_node1.sh b/examples/train/multi-node/accelerate/train_node1.sh index 752c91ade..03f630e56 100644 --- a/examples/train/multi-node/accelerate/train_node1.sh +++ b/examples/train/multi-node/accelerate/train_node1.sh @@ -4,7 +4,7 @@ accelerate launch --config_file ./examples/train/multi-node/accelerate/multi_nod --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/accelerate/train_node2.sh b/examples/train/multi-node/accelerate/train_node2.sh index 603f502b5..2149a5a83 100644 --- a/examples/train/multi-node/accelerate/train_node2.sh +++ b/examples/train/multi-node/accelerate/train_node2.sh @@ -4,7 +4,7 @@ accelerate launch --config_file ./examples/train/multi-node/accelerate/multi_nod --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/deepspeed/train.sh b/examples/train/multi-node/deepspeed/train.sh index 253347d78..8616c737e 100644 --- a/examples/train/multi-node/deepspeed/train.sh +++ b/examples/train/multi-node/deepspeed/train.sh @@ -5,7 +5,7 @@ deepspeed --hostfile=./examples/train/multi-node-deepspeed/host.txt \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/dlc/train.sh b/examples/train/multi-node/dlc/train.sh index a2aed445e..182088eb2 100644 --- a/examples/train/multi-node/dlc/train.sh +++ b/examples/train/multi-node/dlc/train.sh @@ -3,7 +3,7 @@ NODE_RANK=$RANK \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --lora_rank 8 \ diff --git a/examples/train/multi-node/swift/train_node1.sh b/examples/train/multi-node/swift/train_node1.sh index 03cc29265..976f757ab 100644 --- a/examples/train/multi-node/swift/train_node1.sh +++ b/examples/train/multi-node/swift/train_node1.sh @@ -7,7 +7,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/swift/train_node2.sh b/examples/train/multi-node/swift/train_node2.sh index cc6a11538..22e4eee12 100644 --- a/examples/train/multi-node/swift/train_node2.sh +++ b/examples/train/multi-node/swift/train_node2.sh @@ -7,7 +7,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/torchrun/train_node1.sh b/examples/train/multi-node/torchrun/train_node1.sh index 007cd8656..0072ca47c 100644 --- a/examples/train/multi-node/torchrun/train_node1.sh +++ b/examples/train/multi-node/torchrun/train_node1.sh @@ -4,7 +4,7 @@ torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=0 --maste --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multi-node/torchrun/train_node2.sh b/examples/train/multi-node/torchrun/train_node2.sh index 4704717b9..0de8699c3 100644 --- a/examples/train/multi-node/torchrun/train_node2.sh +++ b/examples/train/multi-node/torchrun/train_node2.sh @@ -4,7 +4,7 @@ torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=1 --maste --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --torch_dtype bfloat16 \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --lora_rank 8 \ --lora_alpha 32 \ diff --git a/examples/train/multimodal/grounding.sh b/examples/train/multimodal/grounding.sh index 01f04b3a6..2edd83c52 100644 --- a/examples/train/multimodal/grounding.sh +++ b/examples/train/multimodal/grounding.sh @@ -4,7 +4,7 @@ MAX_PIXELS=1003520 \ swift sft \ --model Qwen/Qwen2-VL-7B-Instruct \ --train_type lora \ - --dataset swift/refcoco:grounding#1000 \ + --dataset 'swift/refcoco:grounding#1000' \ --num_train_epochs 1 \ --learning_rate 1e-4 \ --lora_rank 8 \ diff --git a/examples/train/multimodal/ocr.sh b/examples/train/multimodal/ocr.sh index 4e39f45d1..d12a0c02c 100644 --- a/examples/train/multimodal/ocr.sh +++ b/examples/train/multimodal/ocr.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0,1 \ MAX_PIXELS=1003520 \ swift sft \ --model Qwen/QVQ-72B-Preview \ - --dataset AI-ModelScope/LaTeX_OCR:human_handwrite#20000 \ + --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \ --train_type lora \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ diff --git a/examples/train/multimodal/vqa.sh b/examples/train/multimodal/vqa.sh index f5e0aeeba..0fce44bf3 100644 --- a/examples/train/multimodal/vqa.sh +++ b/examples/train/multimodal/vqa.sh @@ -6,7 +6,7 @@ MAX_PIXELS=1003520 \ swift sft \ --model Qwen/Qwen2-VL-7B-Instruct \ --train_type lora \ - --dataset swift/OK-VQA_train#1000 \ + --dataset 'swift/OK-VQA_train#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/packing/train.sh b/examples/train/packing/train.sh index aaf2bac62..68720ae77 100644 --- a/examples/train/packing/train.sh +++ b/examples/train/packing/train.sh @@ -7,7 +7,7 @@ swift sft \ --packing true \ --max_length 8192 \ --max_steps 100 \ - --dataset swift/self-cognition#5000 \ + --dataset 'swift/self-cognition#5000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/plugins/train_loss_scale.sh b/examples/train/plugins/train_loss_scale.sh index 9349a882b..3722c497d 100644 --- a/examples/train/plugins/train_loss_scale.sh +++ b/examples/train/plugins/train_loss_scale.sh @@ -5,7 +5,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/rlhf/kto.sh b/examples/train/rlhf/kto.sh index 96ebe3bed..5d031bce4 100644 --- a/examples/train/rlhf/kto.sh +++ b/examples/train/rlhf/kto.sh @@ -6,7 +6,7 @@ swift rlhf \ --rlhf_type kto \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000 \ + --dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \ --num_train_epochs 2 \ --learning_rate 1e-4 \ --lora_rank 8 \ diff --git a/examples/train/seq_cls/sft.sh b/examples/train/seq_cls/sft.sh index a7f0d229c..067c6664e 100644 --- a/examples/train/seq_cls/sft.sh +++ b/examples/train/seq_cls/sft.sh @@ -4,7 +4,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B \ --train_type lora \ - --dataset DAMO_NLP/jd:cls#2000 \ + --dataset 'DAMO_NLP/jd:cls#2000' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/sequence_parallel/train.sh b/examples/train/sequence_parallel/train.sh index 14bc6e1d8..1b61f3d5f 100644 --- a/examples/train/sequence_parallel/train.sh +++ b/examples/train/sequence_parallel/train.sh @@ -6,7 +6,7 @@ NPROC_PER_NODE=$nproc_per_node \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset AI-ModelScope/LongAlpaca-12k#5000 \ + --dataset 'AI-ModelScope/LongAlpaca-12k#5000' \ --num_train_epochs 1 \ --sequence_parallel_size 2 \ --learning_rate 1e-4 \ diff --git a/examples/train/streaming/train.sh b/examples/train/streaming/train.sh index e941e0a9a..b864a48f2 100644 --- a/examples/train/streaming/train.sh +++ b/examples/train/streaming/train.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --streaming true \ --max_steps 1000 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/adalora/train.sh b/examples/train/tuners/adalora/train.sh index e8b11ddf9..d22860d1e 100644 --- a/examples/train/tuners/adalora/train.sh +++ b/examples/train/tuners/adalora/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type adalora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/adapter/train.sh b/examples/train/tuners/adapter/train.sh index 958d1b803..d334ae6cb 100644 --- a/examples/train/tuners/adapter/train.sh +++ b/examples/train/tuners/adapter/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type adapter \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/boft/train.sh b/examples/train/tuners/boft/train.sh index b6907d0e6..900bf2351 100644 --- a/examples/train/tuners/boft/train.sh +++ b/examples/train/tuners/boft/train.sh @@ -4,7 +4,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type boft \ --label_names labels \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/bone/train.sh b/examples/train/tuners/bone/train.sh index 88c220fac..1dff4f740 100644 --- a/examples/train/tuners/bone/train.sh +++ b/examples/train/tuners/bone/train.sh @@ -4,7 +4,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type bone \ --label_names labels \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/dora/train.sh b/examples/train/tuners/dora/train.sh index 2eb780f83..2bc7d9f23 100644 --- a/examples/train/tuners/dora/train.sh +++ b/examples/train/tuners/dora/train.sh @@ -4,7 +4,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ --use_dora true \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/galore/train_galore.sh b/examples/train/tuners/galore/train_galore.sh index ac47f68e6..4728e0e49 100644 --- a/examples/train/tuners/galore/train_galore.sh +++ b/examples/train/tuners/galore/train_galore.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type full \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-5 \ diff --git a/examples/train/tuners/galore/train_qgalore.sh b/examples/train/tuners/galore/train_qgalore.sh index a250c3a17..cdebbe044 100644 --- a/examples/train/tuners/galore/train_qgalore.sh +++ b/examples/train/tuners/galore/train_qgalore.sh @@ -5,7 +5,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type full \ --torch_dtype bfloat16 \ - --dataset lvjianjin/AdvertiseGen#1000 \ + --dataset 'lvjianjin/AdvertiseGen#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-5 \ diff --git a/examples/train/tuners/liger/train.sh b/examples/train/tuners/liger/train.sh index 068020de9..bb872a710 100644 --- a/examples/train/tuners/liger/train.sh +++ b/examples/train/tuners/liger/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --use_liger true \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/tuners/lisa/train.sh b/examples/train/tuners/lisa/train.sh index 704d43442..8a8475a43 100644 --- a/examples/train/tuners/lisa/train.sh +++ b/examples/train/tuners/lisa/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type full \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --lisa_activated_layers 2 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/tuners/llamapro/train.sh b/examples/train/tuners/llamapro/train.sh index b1d328c2b..d0956449d 100644 --- a/examples/train/tuners/llamapro/train.sh +++ b/examples/train/tuners/llamapro/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type llamapro \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --llamapro_num_new_blocks 4 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/tuners/longlora/train.sh b/examples/train/tuners/longlora/train.sh index 64c03f42d..35697f21e 100644 --- a/examples/train/tuners/longlora/train.sh +++ b/examples/train/tuners/longlora/train.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model LLM-Research/Meta-Llama-3.1-8B-Instruct \ --train_type longlora \ - --dataset AI-ModelScope/LongAlpaca-12k#1000 \ + --dataset 'AI-ModelScope/LongAlpaca-12k#1000' \ --num_train_epochs 1 \ --learning_rate 1e-4 \ --attn_impl flash_attn \ diff --git a/examples/train/tuners/lora-ga/train.sh b/examples/train/tuners/lora-ga/train.sh index dd132200d..fbfe76cc6 100644 --- a/examples/train/tuners/lora-ga/train.sh +++ b/examples/train/tuners/lora-ga/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2-1.5B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/lora/train.sh b/examples/train/tuners/lora/train.sh index 51058b446..e8c231c67 100644 --- a/examples/train/tuners/lora/train.sh +++ b/examples/train/tuners/lora/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/neftune/train.sh b/examples/train/tuners/neftune/train.sh index a6dcb1585..bf53a4d90 100644 --- a/examples/train/tuners/neftune/train.sh +++ b/examples/train/tuners/neftune/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --neftune_noise_alpha 15 \ diff --git a/examples/train/tuners/olora/train.sh b/examples/train/tuners/olora/train.sh index 2614fb8d1..1ead995df 100644 --- a/examples/train/tuners/olora/train.sh +++ b/examples/train/tuners/olora/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/pissa/train.sh b/examples/train/tuners/pissa/train.sh index c258c124e..9139ba441 100644 --- a/examples/train/tuners/pissa/train.sh +++ b/examples/train/tuners/pissa/train.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/qlora/train.sh b/examples/train/tuners/qlora/train.sh index fbf97a682..716845374 100644 --- a/examples/train/tuners/qlora/train.sh +++ b/examples/train/tuners/qlora/train.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/examples/train/tuners/reft/train.sh b/examples/train/tuners/reft/train.sh index c4a0d593a..0b3853bfe 100644 --- a/examples/train/tuners/reft/train.sh +++ b/examples/train/tuners/reft/train.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --train_type reft \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --reft_intervention_type 'LoreftIntervention' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ diff --git a/examples/train/tuners/unsloth/train.sh b/examples/train/tuners/unsloth/train.sh index 87adf7ff1..829114896 100644 --- a/examples/train/tuners/unsloth/train.sh +++ b/examples/train/tuners/unsloth/train.sh @@ -4,7 +4,7 @@ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --tuner_backend unsloth \ --train_type lora \ - --dataset swift/self-cognition#1000 \ + --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --learning_rate 1e-4 \ diff --git a/requirements/framework.txt b/requirements/framework.txt index 541d01414..ee068f99a 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -28,7 +28,7 @@ sentencepiece tensorboard tiktoken tqdm -transformers>=4.33,<4.48 +transformers>=4.33,<4.49 transformers_stream_generator trl>=0.11,<0.12 uvicorn