Skip to content

Commit

Permalink
Merge branch 'main' into release/3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang committed Dec 27, 2024
2 parents 11d4180 + 263fc1c commit 4dac876
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 20 deletions.
2 changes: 2 additions & 0 deletions docs/source/Instruction/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|
Expand Down
2 changes: 2 additions & 0 deletions docs/source_en/Instruction/Supported-models-and-datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ The table below introduces the models integrated with ms-swift:
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|
Expand Down
10 changes: 1 addition & 9 deletions swift/llm/infer/infer_engine/infer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,4 @@ def func(target, queue, args, kwargs):

@staticmethod
def safe_asyncio_run(coro):
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop:
result = InferEngine.thread_run(asyncio.run, args=(coro, ))
else:
result = asyncio.run(coro)
return result
return InferEngine.thread_run(asyncio.run, args=(coro, ))
2 changes: 2 additions & 0 deletions swift/llm/model/model/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def _dtype_hook(module, input, output):
ModelGroup([
Model('deepseek-ai/DeepSeek-V2.5', 'deepseek-ai/DeepSeek-V2.5'),
Model('deepseek-ai/DeepSeek-V2.5-1210', 'deepseek-ai/DeepSeek-V2.5-1210'),
Model('deepseek-ai/DeepSeek-V3-Base', 'deepseek-ai/DeepSeek-V3-Base'),
Model('deepseek-ai/DeepSeek-V3', 'deepseek-ai/DeepSeek-V3'),
]),
],
TemplateType.deepseek_v2_5,
Expand Down
1 change: 1 addition & 0 deletions swift/llm/template/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
if (self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training
and self.mode != 'seq_cls'):
template_backend = 'jinja'
logger.info_once(f'Setting template_backend: {template_backend}')
res_context_list, loss_scale_list, answer_len = (
self._swift_encode(inputs) if template_backend == 'swift' else self._jinja_encode(inputs))
encoded = {}
Expand Down
20 changes: 16 additions & 4 deletions swift/ui/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from copy import copy
from dataclasses import fields
from functools import partial
from typing import List, Union
Expand Down Expand Up @@ -72,6 +73,7 @@ def run(self):
for f in fields(self.args):
if getattr(self.args, f.name):
LLMInfer.default_dict[f.name] = getattr(self.args, f.name)

LLMInfer.is_gradio_app = True
LLMInfer.is_multimodal = self.args.model_meta.is_multimodal
LLMInfer.build_ui(LLMInfer)
Expand All @@ -93,10 +95,20 @@ def run(self):
value = getattr(self.args, f.name)
if isinstance(value, list):
value = ' '.join([v or '' for v in value])
LLMInfer.elements()[f.name].value = value
app.load(LLMInfer.deploy_model, list(LLMInfer.valid_elements().values()),
[LLMInfer.element('runtime_tab'),
LLMInfer.element('running_tasks')])
LLMInfer.elements()[f.name].value = str(value)

args = copy(self.args)
args.port = find_free_port()

values = []
for key in LLMInfer.valid_elements():
if key in args.__dict__:
value = getattr(args, key)
else:
value = LLMInfer.element(key).value
values.append(value)
_, running_task = LLMInfer.deploy_model(*values)
LLMInfer.element('running_tasks').value = running_task['value']
else:
app.load(
partial(LLMTrain.update_input_model, arg_cls=RLHFArguments),
Expand Down
13 changes: 7 additions & 6 deletions swift/ui/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import sys
import time
import typing
from collections import OrderedDict
from dataclasses import fields
from datetime import datetime
from functools import wraps
from typing import Any, Dict, List, OrderedDict, Type
from typing import Any, Dict, List, Type

import gradio as gr
import json
Expand Down Expand Up @@ -220,12 +221,12 @@ def elements(cls):

@classmethod
def valid_elements(cls):
valid_elements = OrderedDict()
elements = cls.elements()
return {
key: value
for key, value in elements.items()
if isinstance(value, (Textbox, Dropdown, Slider, Checkbox)) and key != 'train_record'
}
for key, value in elements.items():
if isinstance(value, (Textbox, Dropdown, Slider, Checkbox)) and key != 'train_record':
valid_elements[key] = value
return valid_elements

@classmethod
def element_keys(cls):
Expand Down
26 changes: 26 additions & 0 deletions swift/ui/llm_infer/llm_infer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import atexit
import os
import re
import signal
import sys
import time
from copy import deepcopy
Expand Down Expand Up @@ -298,13 +300,37 @@ def deploy_model(cls, *args):
cnt += 1
if cnt >= 60:
logger.warning_once(f'Deploy costing too much time, please check log file: {log_file}')
if cls.is_gradio_app:
cls.register_clean_hook()
logger.info('Deploy done.')
cls.deployed = True
running_task = Runtime.refresh_tasks(log_file)
if cls.is_gradio_app:
cls.running_task = running_task['value']
return gr.update(open=True), running_task

@classmethod
def clean_deployment(cls):
if not cls.is_gradio_app:
return

logger.info('Killing deployment')
_, args = Runtime.parse_info_from_cmdline(cls.running_task)
os.system(f'pkill -9 -f {args["log_file"]}')
logger.info('Done.')

@classmethod
def register_clean_hook(cls):
atexit.register(LLMInfer.clean_deployment)
signal.signal(signal.SIGINT, LLMInfer.signal_handler)
if os.name != 'nt':
signal.signal(signal.SIGTERM, LLMInfer.signal_handler)

@staticmethod
def signal_handler(*args, **kwargs):
LLMInfer.clean_deployment()
sys.exit(0)

@classmethod
def clear_session(cls):
return '', [], gr.update(value=None), gr.update(value=None), gr.update(value=None), []
Expand Down
6 changes: 5 additions & 1 deletion tests/test_align/test_template/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ def test_deepseek_v2_5():
tokenizer = get_model_tokenizer('deepseek-ai/DeepSeek-V2.5-1210', load_model=False)[1]
template = get_template(tokenizer.model_meta.template, tokenizer)
inputs = TemplateInputs(messages=[{
'role': 'system',
'content': '000'
}, {
'role': 'user',
'content': 'aaa'
}, {
Expand All @@ -17,8 +20,9 @@ def test_deepseek_v2_5():
res = template.encode(inputs)
template.print_inputs(res)
template.template_backend = 'jinja'
res = template.encode(inputs)
res2 = template.encode(inputs)
template.print_inputs(res)
assert res['input_ids'] == res2['input_ids']


if __name__ == '__main__':
Expand Down

0 comments on commit 4dac876

Please sign in to comment.