Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang committed Dec 26, 2024
1 parent 1f1abf4 commit 1f0668d
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 5 deletions.
10 changes: 10 additions & 0 deletions examples/train/seq_cls/bert/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CUDA_VISIBLE_DEVICES=0 \
swift deploy \
--model output/vx-xxx/checkpoint-xxx \
--served_model_name bert-base-chinese bert-base-chinese

# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
# "model": "bert-base-chinese",
# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}],
# "temperature": 0
# }'
5 changes: 5 additions & 0 deletions examples/train/seq_cls/bert/infer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CUDA_VISIBLE_DEVICES=0 \
swift infer \
--model output/vx-xxx/checkpoint-xxx \
--load_data_args true \
--max_batch_size 16
24 changes: 24 additions & 0 deletions examples/train/seq_cls/bert/sft.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# If `num_labels` is provided, it will be considered a classification task,
# and AutoModelForSequenceClassification will be used to load the model.
# The BERT model does not require templates, so it can usually be used without registration.
CUDA_VISIBLE_DEVICES=0 \
swift sft \
--model AI-ModelScope/bert-base-chinese \
--train_type full \
--dataset 'DAMO_NLP/jd:cls#2000' \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--gradient_accumulation_steps 16 \
--eval_steps 50 \
--save_steps 50 \
--save_total_limit 2 \
--logging_steps 5 \
--max_length 512 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--num_labels 2 \
--task_type seq_cls
9 changes: 9 additions & 0 deletions examples/train/seq_cls/qwen2_5/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CUDA_VISIBLE_DEVICES=0 \
swift deploy \
--adapters output/vx-xxx/checkpoint-xxx

# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
# "model": "Qwen2.5-7B",
# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}],
# "temperature": 0
# }'
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
CUDA_VISIBLE_DEVICES=0 \
swift infer \
--adapters output/vx-xxx/checkpoint-xxx \
--max_new_tokens 2048 \
--load_data_args true \
--max_batch_size 16
File renamed without changes.
2 changes: 2 additions & 0 deletions swift/llm/infer/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal

def _set_request_config(self, request_config) -> None:
default_request_config = self.args.get_request_config()
if default_request_config is None:
return
for key, val in asdict(request_config).items():
default_val = getattr(default_request_config, key)
if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0):
Expand Down
21 changes: 17 additions & 4 deletions tests/train/test_cls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

kwargs = {
'per_device_train_batch_size': 2,
'per_device_eval_batch_size': 2,
Expand All @@ -12,18 +14,29 @@
def test_llm():
from swift.llm import TrainArguments, sft_main, infer_main, InferArguments
result = sft_main(
TrainArguments(model='Qwen/Qwen2.5-7B-Instruct', num_labels=2, dataset=['DAMO_NLP/jd:cls#2000'], **kwargs))
TrainArguments(
model='Qwen/Qwen2.5-1.5B-Instruct',
train_type='lora',
num_labels=2,
dataset=['DAMO_NLP/jd:cls#2000'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


def test_bert():
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import TrainArguments, sft_main, infer_main, InferArguments
result = sft_main(
TrainArguments(model='answerdotai/ModernBERT-base', num_labels=2, dataset=['DAMO_NLP/jd:cls#2000'], **kwargs))
TrainArguments(
model='answerdotai/ModernBERT-base',
# model='iic/nlp_structbert_backbone_base_std',
train_type='full',
num_labels=2,
dataset=['DAMO_NLP/jd:cls#2000'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
infer_main(InferArguments(model=last_model_checkpoint, load_data_args=True))


if __name__ == '__main__':
Expand Down

0 comments on commit 1f0668d

Please sign in to comment.