Skip to content

Commit

Permalink
support deepseek-v3 (#2781)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang authored Dec 27, 2024
1 parent 1e2fda1 commit 2776550
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/source/Instruction/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|
Expand Down
2 changes: 2 additions & 0 deletions docs/source_en/Instruction/Supported-models-and-datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ The table below introduces the models integrated with ms-swift:
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|
Expand Down
2 changes: 2 additions & 0 deletions swift/llm/model/model/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def _dtype_hook(module, input, output):
ModelGroup([
Model('deepseek-ai/DeepSeek-V2.5', 'deepseek-ai/DeepSeek-V2.5'),
Model('deepseek-ai/DeepSeek-V2.5-1210', 'deepseek-ai/DeepSeek-V2.5-1210'),
Model('deepseek-ai/DeepSeek-V3-Base', 'deepseek-ai/DeepSeek-V3-Base'),
Model('deepseek-ai/DeepSeek-V3', 'deepseek-ai/DeepSeek-V3'),
]),
],
TemplateType.deepseek_v2_5,
Expand Down
1 change: 1 addition & 0 deletions swift/llm/template/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
if (self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training
and self.mode != 'seq_cls'):
template_backend = 'jinja'
logger.info_once(f'Setting template_backend: {template_backend}')
res_context_list, loss_scale_list, answer_len = (
self._swift_encode(inputs) if template_backend == 'swift' else self._jinja_encode(inputs))
encoded = {}
Expand Down
6 changes: 5 additions & 1 deletion tests/test_align/test_template/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ def test_deepseek_v2_5():
tokenizer = get_model_tokenizer('deepseek-ai/DeepSeek-V2.5-1210', load_model=False)[1]
template = get_template(tokenizer.model_meta.template, tokenizer)
inputs = TemplateInputs(messages=[{
'role': 'system',
'content': '000'
}, {
'role': 'user',
'content': 'aaa'
}, {
Expand All @@ -17,8 +20,9 @@ def test_deepseek_v2_5():
res = template.encode(inputs)
template.print_inputs(res)
template.template_backend = 'jinja'
res = template.encode(inputs)
res2 = template.encode(inputs)
template.print_inputs(res)
assert res['input_ids'] == res2['input_ids']


if __name__ == '__main__':
Expand Down

0 comments on commit 2776550

Please sign in to comment.