From db3d42b0060b40d606501aae01a83e7fad3b1a44 Mon Sep 17 00:00:00 2001 From: Jintao Date: Tue, 23 Apr 2024 21:00:32 +0800 Subject: [PATCH] fix bugs (#782) --- ...14\346\225\260\346\215\256\351\233\206.md" | 30 +++++++++---------- ...00\344\275\263\345\256\236\350\267\265.md" | 2 ++ .../LLM/Supported-models-datasets.md | 30 +++++++++---------- .../Multi-Modal/deepseek-vl-best-practice.md | 2 ++ swift/llm/utils/model.py | 28 +++++++++++++---- swift/llm/utils/template.py | 8 +++-- swift/utils/utils.py | 2 +- tests/llm/data/multi_modal2.jsonl | 3 ++ tests/llm/test_run.py | 2 +- 9 files changed, 68 insertions(+), 39 deletions(-) create mode 100644 tests/llm/data/multi_modal2.jsonl diff --git "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index e2b969cc3..6b3e86207 100644 --- "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -136,21 +136,21 @@ |internlm-7b-chat-8k|[Shanghai_AI_Laboratory/internlm-chat-7b-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary)|q_proj, k_proj, v_proj|internlm|✘|✔||-|-| |internlm-20b|[Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary)|q_proj, k_proj, v_proj|default-generation-bos|✘|✔||-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| |internlm-20b-chat|[Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)|q_proj, k_proj, v_proj|internlm|✘|✔||-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| -|internlm2-1_8b|[Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b)| -|internlm2-1_8b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-1_8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)| -|internlm2-1_8b-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-1_8b](https://huggingface.co/internlm/internlm2-chat-1_8b)| -|internlm2-7b-base|[Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)| -|internlm2-7b|[Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-7b](https://huggingface.co/internlm/internlm2-7b)| -|internlm2-7b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)| -|internlm2-7b-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)| -|internlm2-20b-base|[Shanghai_AI_Laboratory/internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b)| -|internlm2-20b|[Shanghai_AI_Laboratory/internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| -|internlm2-20b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)| -|internlm2-20b-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| -|internlm2-math-7b|[Shanghai_AI_Laboratory/internlm2-math-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-7b/summary)|wqkv|default-generation-bos|✔|✔||math|[internlm/internlm2-math-base-7b](https://huggingface.co/internlm/internlm2-math-base-7b)| -|internlm2-math-7b-chat|[Shanghai_AI_Laboratory/internlm2-math-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-7b/summary)|wqkv|internlm2|✔|✔||math|[internlm/internlm2-math-7b](https://huggingface.co/internlm/internlm2-math-7b)| -|internlm2-math-20b|[Shanghai_AI_Laboratory/internlm2-math-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-20b/summary)|wqkv|default-generation-bos|✔|✔||math|[internlm/internlm2-math-base-20b](https://huggingface.co/internlm/internlm2-math-base-20b)| -|internlm2-math-20b-chat|[Shanghai_AI_Laboratory/internlm2-math-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-20b/summary)|wqkv|internlm2|✔|✔||math|[internlm/internlm2-math-20b](https://huggingface.co/internlm/internlm2-math-20b)| +|internlm2-1_8b|[Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b)| +|internlm2-1_8b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-1_8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)| +|internlm2-1_8b-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-1_8b](https://huggingface.co/internlm/internlm2-chat-1_8b)| +|internlm2-7b-base|[Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)| +|internlm2-7b|[Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-7b](https://huggingface.co/internlm/internlm2-7b)| +|internlm2-7b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)| +|internlm2-7b-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)| +|internlm2-20b-base|[Shanghai_AI_Laboratory/internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b)| +|internlm2-20b|[Shanghai_AI_Laboratory/internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| +|internlm2-20b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)| +|internlm2-20b-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| +|internlm2-math-7b|[Shanghai_AI_Laboratory/internlm2-math-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-base-7b](https://huggingface.co/internlm/internlm2-math-base-7b)| +|internlm2-math-7b-chat|[Shanghai_AI_Laboratory/internlm2-math-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-7b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-7b](https://huggingface.co/internlm/internlm2-math-7b)| +|internlm2-math-20b|[Shanghai_AI_Laboratory/internlm2-math-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-base-20b](https://huggingface.co/internlm/internlm2-math-base-20b)| +|internlm2-math-20b-chat|[Shanghai_AI_Laboratory/internlm2-math-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-20b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-20b](https://huggingface.co/internlm/internlm2-math-20b)| |internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|wqkv|internlm-xcomposer2|✔|✘||multi-modal, vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)| |deepseek-7b|[deepseek-ai/deepseek-llm-7b-base](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-base/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔||-|[deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)| |deepseek-7b-chat|[deepseek-ai/deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||-|[deepseek-ai/deepseek-llm-7b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat)| diff --git "a/docs/source/Multi-Modal/deepseek-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" "b/docs/source/Multi-Modal/deepseek-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" index 3d757868b..65bf73cc5 100644 --- "a/docs/source/Multi-Modal/deepseek-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" +++ "b/docs/source/Multi-Modal/deepseek-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" @@ -11,6 +11,8 @@ ## 环境准备 ```shell pip install 'ms-swift[llm]' -U + +pip install attrdict ``` 模型链接: diff --git a/docs/source_en/LLM/Supported-models-datasets.md b/docs/source_en/LLM/Supported-models-datasets.md index e58a92a10..92dad624b 100644 --- a/docs/source_en/LLM/Supported-models-datasets.md +++ b/docs/source_en/LLM/Supported-models-datasets.md @@ -136,21 +136,21 @@ The table below introcudes all models supported by SWIFT: |internlm-7b-chat-8k|[Shanghai_AI_Laboratory/internlm-chat-7b-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary)|q_proj, k_proj, v_proj|internlm|✘|✔||-|-| |internlm-20b|[Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary)|q_proj, k_proj, v_proj|default-generation-bos|✘|✔||-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| |internlm-20b-chat|[Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)|q_proj, k_proj, v_proj|internlm|✘|✔||-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| -|internlm2-1_8b|[Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b)| -|internlm2-1_8b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-1_8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)| -|internlm2-1_8b-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-1_8b](https://huggingface.co/internlm/internlm2-chat-1_8b)| -|internlm2-7b-base|[Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)| -|internlm2-7b|[Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-7b](https://huggingface.co/internlm/internlm2-7b)| -|internlm2-7b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)| -|internlm2-7b-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)| -|internlm2-20b-base|[Shanghai_AI_Laboratory/internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b)| -|internlm2-20b|[Shanghai_AI_Laboratory/internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary)|wqkv|default-generation-bos|✔|✔||-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| -|internlm2-20b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)| -|internlm2-20b-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)|wqkv|internlm2|✔|✔||-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| -|internlm2-math-7b|[Shanghai_AI_Laboratory/internlm2-math-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-7b/summary)|wqkv|default-generation-bos|✔|✔||math|[internlm/internlm2-math-base-7b](https://huggingface.co/internlm/internlm2-math-base-7b)| -|internlm2-math-7b-chat|[Shanghai_AI_Laboratory/internlm2-math-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-7b/summary)|wqkv|internlm2|✔|✔||math|[internlm/internlm2-math-7b](https://huggingface.co/internlm/internlm2-math-7b)| -|internlm2-math-20b|[Shanghai_AI_Laboratory/internlm2-math-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-20b/summary)|wqkv|default-generation-bos|✔|✔||math|[internlm/internlm2-math-base-20b](https://huggingface.co/internlm/internlm2-math-base-20b)| -|internlm2-math-20b-chat|[Shanghai_AI_Laboratory/internlm2-math-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-20b/summary)|wqkv|internlm2|✔|✔||math|[internlm/internlm2-math-20b](https://huggingface.co/internlm/internlm2-math-20b)| +|internlm2-1_8b|[Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b)| +|internlm2-1_8b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-1_8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)| +|internlm2-1_8b-chat|[Shanghai_AI_Laboratory/internlm2-chat-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-1_8b](https://huggingface.co/internlm/internlm2-chat-1_8b)| +|internlm2-7b-base|[Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)| +|internlm2-7b|[Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-7b](https://huggingface.co/internlm/internlm2-7b)| +|internlm2-7b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)| +|internlm2-7b-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)| +|internlm2-20b-base|[Shanghai_AI_Laboratory/internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b)| +|internlm2-20b|[Shanghai_AI_Laboratory/internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|-|[internlm/internlm2-20b](https://huggingface.co/internlm/internlm2-20b)| +|internlm2-20b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)| +|internlm2-20b-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|-|[internlm/internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)| +|internlm2-math-7b|[Shanghai_AI_Laboratory/internlm2-math-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-7b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-base-7b](https://huggingface.co/internlm/internlm2-math-base-7b)| +|internlm2-math-7b-chat|[Shanghai_AI_Laboratory/internlm2-math-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-7b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-7b](https://huggingface.co/internlm/internlm2-math-7b)| +|internlm2-math-20b|[Shanghai_AI_Laboratory/internlm2-math-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-base-20b/summary)|wqkv|default-generation-bos|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-base-20b](https://huggingface.co/internlm/internlm2-math-base-20b)| +|internlm2-math-20b-chat|[Shanghai_AI_Laboratory/internlm2-math-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-math-20b/summary)|wqkv|internlm2|✔|✔|transformers>=4.35|math|[internlm/internlm2-math-20b](https://huggingface.co/internlm/internlm2-math-20b)| |internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|wqkv|internlm-xcomposer2|✔|✘||multi-modal, vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)| |deepseek-7b|[deepseek-ai/deepseek-llm-7b-base](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-base/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔||-|[deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)| |deepseek-7b-chat|[deepseek-ai/deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||-|[deepseek-ai/deepseek-llm-7b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat)| diff --git a/docs/source_en/Multi-Modal/deepseek-vl-best-practice.md b/docs/source_en/Multi-Modal/deepseek-vl-best-practice.md index ef8bf82bf..262750113 100644 --- a/docs/source_en/Multi-Modal/deepseek-vl-best-practice.md +++ b/docs/source_en/Multi-Modal/deepseek-vl-best-practice.md @@ -9,6 +9,8 @@ ## Environment Preparation ```shell pip install 'ms-swift[llm]' -U + +pip install attrdict ``` Model Link: diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index dec0c8c09..306d570b6 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -28,7 +28,7 @@ from swift import get_logger from swift.utils import (get_dist_setting, is_dist, is_local_master, - use_torchacc) + subprocess_run, use_torchacc) from .template import TemplateType from .utils import get_max_model_len @@ -2172,6 +2172,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-1_8b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-1_8b') @@ -2181,6 +2182,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-1_8b-sft') @@ -2190,6 +2192,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-1_8b') @@ -2198,6 +2201,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-math-base-7b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, tags=['math'], @@ -2207,6 +2211,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-math-base-20b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, tags=['math'], @@ -2217,6 +2222,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, tags=['math'], @@ -2227,6 +2233,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, tags=['math'], @@ -2237,6 +2244,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-7b-sft') @@ -2246,6 +2254,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-7b') @@ -2255,6 +2264,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-20b-sft') @@ -2264,6 +2274,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, LoRATM.internlm2, TemplateType.internlm2, eos_token='<|im_end|>', + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-chat-20b') @@ -2272,6 +2283,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-7b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-7b') @@ -2280,6 +2292,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-base-7b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-base-7b') @@ -2288,6 +2301,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-20b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-20b') @@ -2296,6 +2310,7 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str, 'Shanghai_AI_Laboratory/internlm2-base-20b', LoRATM.internlm2, TemplateType.default_generation_bos, + requires=['transformers>=4.35'], support_flash_attn=True, support_vllm=True, hf_model_id='internlm/internlm2-base-20b') @@ -2374,9 +2389,12 @@ def _git_clone_github(github_url: str, if not os.path.exists(local_repo_path): if not github_url.endswith('.git'): github_url = f'{github_url}.git' - command = f'git -C {git_cache_dir} clone {github_url} {local_repo_name}' - logger.info(f'Run the command: `{command}`') - os.system(command) + command = [ + 'git', '-C', git_cache_dir, 'clone', github_url, local_repo_name + ] + command_str = f"git -C '{git_cache_dir}' clone '{github_url}' {local_repo_name}" + logger.info(f'Run the command: `{command_str}`') + subprocess_run(command) logger.info(f'local_repo_path: {local_repo_path}') return local_repo_path @@ -3801,7 +3819,7 @@ def safe_snapshot_download(model_type: str, ignore_file_pattern=ignore_file_pattern) else: model_dir = model_id_or_path - logger.info(f'Loading the model using model_dir: {model_dir}') + logger.info(f'Loading the model using model_dir: {model_dir}') if is_dist() and is_local_master(): dist.barrier() diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py index 50e804889..0d0021c41 100644 --- a/swift/llm/utils/template.py +++ b/swift/llm/utils/template.py @@ -869,7 +869,9 @@ def encode( self, example: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]: example = example.copy() - history = example.pop('history', []) + history = example.pop('history', None) + if history is None: + history = [] example['query'], example['history'], images_path = replace_img_tab( example['query'], history, '') @@ -1116,7 +1118,9 @@ def encode( 'docs/source/Multi-Modal/deepseek-vl最佳实践.md') example = example.copy() - history = example.pop('history', []) + history = example.pop('history', None) + if history is None: + history = [] example['query'], example['history'], images_path = replace_img_tab( example['query'], history, '') diff --git a/swift/utils/utils.py b/swift/utils/utils.py index 2a635065a..86e25bdb7 100644 --- a/swift/utils/utils.py +++ b/swift/utils/utils.py @@ -172,7 +172,7 @@ def get_pai_tensorboard_dir() -> Optional[str]: def subprocess_run(command: List[str], env: Optional[Dict[str, str]] = None, stdout=None, - stderr=None) -> None: + stderr=None): # stdoutm stderr: e.g. subprocess.PIPE. resp = subprocess.run(command, env=env, stdout=stdout, stderr=stderr) resp.check_returncode() diff --git a/tests/llm/data/multi_modal2.jsonl b/tests/llm/data/multi_modal2.jsonl new file mode 100644 index 000000000..1c9894f86 --- /dev/null +++ b/tests/llm/data/multi_modal2.jsonl @@ -0,0 +1,3 @@ +{"query": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg55555", "response": "66666"} +{"query": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeghttps://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpegeeeee", "response": "fffff", "history": [["hello", "123"]]} +{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]]} diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py index 0064d0166..3547ba9a1 100644 --- a/tests/llm/test_run.py +++ b/tests/llm/test_run.py @@ -398,7 +398,7 @@ def test_deepseek_vl_chat(self): train_dataset_sample=100, eval_steps=5, custom_train_dataset_path=[ - os.path.join(folder, 'multi_modal.jsonl') + os.path.join(folder, 'multi_modal2.jsonl') ], lazy_tokenize=False))