update

modelscope · Dec 26, 2024 · 1f0668d · 1f0668d
1 parent 1f1abf4
commit 1f0668d
Show file tree

Hide file tree

Showing 8 changed files with 67 additions and 5 deletions.
diff --git a/examples/train/seq_cls/bert/deploy.sh b/examples/train/seq_cls/bert/deploy.sh
@@ -0,0 +1,10 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift deploy \
+    --model output/vx-xxx/checkpoint-xxx \
+    --served_model_name bert-base-chinese bert-base-chinese
+
+# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
+# "model": "bert-base-chinese",
+# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差，容易被调包。\nCategory: negative, positive\nOutput:"}],
+# "temperature": 0
+# }'
diff --git a/examples/train/seq_cls/bert/infer.sh b/examples/train/seq_cls/bert/infer.sh
@@ -0,0 +1,5 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model output/vx-xxx/checkpoint-xxx \
+    --load_data_args true \
+    --max_batch_size 16
diff --git a/examples/train/seq_cls/bert/sft.sh b/examples/train/seq_cls/bert/sft.sh
@@ -0,0 +1,24 @@
+# If `num_labels` is provided, it will be considered a classification task,
+# and AutoModelForSequenceClassification will be used to load the model.
+# The BERT model does not require templates, so it can usually be used without registration.
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model AI-ModelScope/bert-base-chinese \
+    --train_type full \
+    --dataset 'DAMO_NLP/jd:cls#2000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 512 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_labels 2 \
+    --task_type seq_cls
diff --git a/examples/train/seq_cls/qwen2_5/deploy.sh b/examples/train/seq_cls/qwen2_5/deploy.sh
@@ -0,0 +1,9 @@
+CUDA_VISIBLE_DEVICES=0 \
+swift deploy \
+    --adapters output/vx-xxx/checkpoint-xxx
+
+# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
+# "model": "Qwen2.5-7B",
+# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差，容易被调包。\nCategory: negative, positive\nOutput:"}],
+# "temperature": 0
+# }'
diff --git a/examples/train/seq_cls/infer.sh → examples/train/seq_cls/qwen2_5/infer.sh b/examples/train/seq_cls/infer.sh → examples/train/seq_cls/qwen2_5/infer.sh
@@ -1,6 +1,5 @@
 CUDA_VISIBLE_DEVICES=0 \
 swift infer \
     --adapters output/vx-xxx/checkpoint-xxx \
-    --max_new_tokens 2048 \
     --load_data_args true \
     --max_batch_size 16
diff --git a/examples/train/seq_cls/sft.sh → examples/train/seq_cls/qwen2_5/sft.sh b/examples/train/seq_cls/sft.sh → examples/train/seq_cls/qwen2_5/sft.sh
diff --git a/swift/llm/infer/deploy.py b/swift/llm/infer/deploy.py
@@ -121,6 +121,8 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal
 
     def _set_request_config(self, request_config) -> None:
         default_request_config = self.args.get_request_config()
+        if default_request_config is None:
+            return
         for key, val in asdict(request_config).items():
             default_val = getattr(default_request_config, key)
             if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0):

diff --git a/tests/train/test_cls.py b/tests/train/test_cls.py
@@ -1,5 +1,7 @@
 import os
 
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
 kwargs = {
     'per_device_train_batch_size': 2,
     'per_device_eval_batch_size': 2,
@@ -12,18 +14,29 @@
 def test_llm():
     from swift.llm import TrainArguments, sft_main, infer_main, InferArguments
     result = sft_main(
-        TrainArguments(model='Qwen/Qwen2.5-7B-Instruct', num_labels=2, dataset=['DAMO_NLP/jd:cls#2000'], **kwargs))
+        TrainArguments(
+            model='Qwen/Qwen2.5-1.5B-Instruct',
+            train_type='lora',
+            num_labels=2,
+            dataset=['DAMO_NLP/jd:cls#2000'],
+            **kwargs))
     last_model_checkpoint = result['last_model_checkpoint']
     infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
 
 
 def test_bert():
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
     from swift.llm import TrainArguments, sft_main, infer_main, InferArguments
     result = sft_main(
-        TrainArguments(model='answerdotai/ModernBERT-base', num_labels=2, dataset=['DAMO_NLP/jd:cls#2000'], **kwargs))
+        TrainArguments(
+            model='answerdotai/ModernBERT-base',
+            # model='iic/nlp_structbert_backbone_base_std',
+            train_type='full',
+            num_labels=2,
+            dataset=['DAMO_NLP/jd:cls#2000'],
+            **kwargs))
     last_model_checkpoint = result['last_model_checkpoint']
-    infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
+    infer_main(InferArguments(model=last_model_checkpoint, load_data_args=True))
 
 
 if __name__ == '__main__':