diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 8b2ecd8df..0e58a6cdc 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -344,6 +344,8 @@ |[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)| |[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)| |[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)| +|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)| +|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)| |[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)| |[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)| |[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index 6b693513d..abe6200f0 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -344,6 +344,8 @@ The table below introduces the models integrated with ms-swift: |[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)| |[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)| |[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)| +|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)| +|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)| |[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)| |[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)| |[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)| diff --git a/swift/llm/model/model/deepseek.py b/swift/llm/model/model/deepseek.py index 0f6779d4d..598bf068f 100644 --- a/swift/llm/model/model/deepseek.py +++ b/swift/llm/model/model/deepseek.py @@ -108,6 +108,8 @@ def _dtype_hook(module, input, output): ModelGroup([ Model('deepseek-ai/DeepSeek-V2.5', 'deepseek-ai/DeepSeek-V2.5'), Model('deepseek-ai/DeepSeek-V2.5-1210', 'deepseek-ai/DeepSeek-V2.5-1210'), + Model('deepseek-ai/DeepSeek-V3-Base', 'deepseek-ai/DeepSeek-V3-Base'), + Model('deepseek-ai/DeepSeek-V3', 'deepseek-ai/DeepSeek-V3'), ]), ], TemplateType.deepseek_v2_5, diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index 7384678cc..b130611ee 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -610,6 +610,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: if (self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training and self.mode != 'seq_cls'): template_backend = 'jinja' + logger.info_once(f'Setting template_backend: {template_backend}') res_context_list, loss_scale_list, answer_len = ( self._swift_encode(inputs) if template_backend == 'swift' else self._jinja_encode(inputs)) encoded = {} diff --git a/tests/test_align/test_template/test_template.py b/tests/test_align/test_template/test_template.py index 30f9ec085..e4be3fce8 100644 --- a/tests/test_align/test_template/test_template.py +++ b/tests/test_align/test_template/test_template.py @@ -5,6 +5,9 @@ def test_deepseek_v2_5(): tokenizer = get_model_tokenizer('deepseek-ai/DeepSeek-V2.5-1210', load_model=False)[1] template = get_template(tokenizer.model_meta.template, tokenizer) inputs = TemplateInputs(messages=[{ + 'role': 'system', + 'content': '000' + }, { 'role': 'user', 'content': 'aaa' }, { @@ -17,8 +20,9 @@ def test_deepseek_v2_5(): res = template.encode(inputs) template.print_inputs(res) template.template_backend = 'jinja' - res = template.encode(inputs) + res2 = template.encode(inputs) template.print_inputs(res) + assert res['input_ids'] == res2['input_ids'] if __name__ == '__main__':