diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 0ae7bd93f..0cf45af52 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -456,6 +456,9 @@ |[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)| |[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)| |[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)| +|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| +|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| +|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|-|-| ### 多模态大模型 @@ -466,24 +469,24 @@ |[Qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/Qwen/Qwen-VL-Chat-Int4)|qwen_vl|qwen_vl|-|vision|[Qwen/Qwen-VL-Chat-Int4](https://huggingface.co/Qwen/Qwen-VL-Chat-Int4)| |[Qwen/Qwen-Audio-Chat](https://modelscope.cn/models/Qwen/Qwen-Audio-Chat)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)| |[Qwen/Qwen-Audio](https://modelscope.cn/models/Qwen/Qwen-Audio)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio](https://huggingface.co/Qwen/Qwen-Audio)| -|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| -|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| -|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| -|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| -|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| -|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| +|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| +|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| +|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| +|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| +|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| |[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)| |[Qwen/Qwen2-Audio-7B](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)| -|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| +|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| |[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)| |[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)| |[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index 56397f528..b5ac0a249 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -456,6 +456,9 @@ The table below introduces the models integrated with ms-swift: |[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)| |[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)| |[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)| +|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| +|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| +|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|-|-| ### Multimodal large models @@ -466,24 +469,24 @@ The table below introduces the models integrated with ms-swift: |[Qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/Qwen/Qwen-VL-Chat-Int4)|qwen_vl|qwen_vl|-|vision|[Qwen/Qwen-VL-Chat-Int4](https://huggingface.co/Qwen/Qwen-VL-Chat-Int4)| |[Qwen/Qwen-Audio-Chat](https://modelscope.cn/models/Qwen/Qwen-Audio-Chat)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)| |[Qwen/Qwen-Audio](https://modelscope.cn/models/Qwen/Qwen-Audio)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio](https://huggingface.co/Qwen/Qwen-Audio)| -|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| -|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| -|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| -|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| -|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| -|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| +|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| +|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| +|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| +|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| +|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| |[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)| |[Qwen/Qwen2-Audio-7B](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)| -|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| +|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| |[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)| |[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)| |[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)| diff --git a/examples/deploy/client/README.md b/examples/deploy/client/README.md deleted file mode 100644 index 5198b501b..000000000 --- a/examples/deploy/client/README.md +++ /dev/null @@ -1 +0,0 @@ -In each client `.py` program, we have added the `run_deploy` context. The `run_deploy` function is a simple way to deploy locally, making it convenient for users to run the program directly. In common deployment scenarios, users only need to remove the deployment context and modify the `host` and `port` in order to use the client. diff --git a/examples/deploy/client/llm/swift_client.py b/examples/deploy/client/llm/swift_client.py index bad18dd86..16df8d3af 100644 --- a/examples/deploy/client/llm/swift_client.py +++ b/examples/deploy/client/llm/swift_client.py @@ -53,6 +53,7 @@ def run_client(host: str = '127.0.0.1', port: int = 8000): DeployArguments) from swift.plugin import InferStats # TODO: The current 'pt' deployment does not support automatic batch. + # NOTE: In a real deployment scenario, please comment out the context of run_deploy. with run_deploy( DeployArguments(model='Qwen/Qwen2.5-1.5B-Instruct', verbose=False, log_interval=-1, infer_backend='vllm')) as port: diff --git a/examples/deploy/client/mllm/swift_client.py b/examples/deploy/client/mllm/swift_client.py index 2dc76e689..85f7f418c 100644 --- a/examples/deploy/client/mllm/swift_client.py +++ b/examples/deploy/client/mllm/swift_client.py @@ -118,6 +118,7 @@ def run_client(host: str = '127.0.0.1', port: int = 8000): DeployArguments) from swift.plugin import InferStats # TODO: The current 'pt' deployment does not support automatic batch. + # NOTE: In a real deployment scenario, please comment out the context of run_deploy. with run_deploy( DeployArguments(model='Qwen/Qwen2-VL-2B-Instruct', verbose=False, log_interval=-1, infer_backend='vllm')) as port: diff --git a/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb b/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb index 65c43fdc6..1ecd96cad 100644 --- a/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb +++ b/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb @@ -8,7 +8,9 @@ "\n", "Here is a demonstration of using python to perform self-cognition SFT of Qwen2.5-3B-Instruct. Through this tutorial, you can quickly understand some details of swift sft, which will be of great help in customizing ms-swift for you~\n", "\n", - "Are you ready? Let's begin the journey..." + "Are you ready? Let's begin the journey...\n", + "\n", + "中文版:https://modelscope.cn/notebook/share/ipynb/4340fdeb/self-cognition-sft.ipynb" ] }, { diff --git a/examples/notebook/qwen2.5-self-cognition/sft.sh b/examples/notebook/qwen2.5-self-cognition/sft.sh index 43f97974f..5881365a0 100644 --- a/examples/notebook/qwen2.5-self-cognition/sft.sh +++ b/examples/notebook/qwen2.5-self-cognition/sft.sh @@ -26,5 +26,5 @@ swift sft \ --warmup_ratio 0.05 \ --dataloader_num_workers 4 \ --dataset_num_proc 4 \ - --model_author 小黄 'Xiao Huang' \ - --model_name '魔搭' 'ModelScope' + --model_name 小黄 'Xiao Huang' \ + --model_author '魔搭' 'ModelScope' diff --git a/examples/infer/pt/all_to_all.sh b/examples/train/all_to_all/infer.sh similarity index 100% rename from examples/infer/pt/all_to_all.sh rename to examples/train/all_to_all/infer.sh diff --git a/examples/train/seq_cls/bert/deploy.sh b/examples/train/seq_cls/bert/deploy.sh new file mode 100644 index 000000000..13825d349 --- /dev/null +++ b/examples/train/seq_cls/bert/deploy.sh @@ -0,0 +1,9 @@ +CUDA_VISIBLE_DEVICES=0 \ +swift deploy \ + --model output/vx-xxx/checkpoint-xxx \ + --served_model_name bert-base-chinese + +# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ +# "model": "bert-base-chinese", +# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}] +# }' diff --git a/examples/train/seq_cls/bert/infer.sh b/examples/train/seq_cls/bert/infer.sh new file mode 100644 index 000000000..abd8f1f02 --- /dev/null +++ b/examples/train/seq_cls/bert/infer.sh @@ -0,0 +1,5 @@ +CUDA_VISIBLE_DEVICES=0 \ +swift infer \ + --model output/vx-xxx/checkpoint-xxx \ + --load_data_args true \ + --max_batch_size 16 diff --git a/examples/train/seq_cls/bert/sft.sh b/examples/train/seq_cls/bert/sft.sh new file mode 100644 index 000000000..35081e0af --- /dev/null +++ b/examples/train/seq_cls/bert/sft.sh @@ -0,0 +1,24 @@ +# If `num_labels` is provided, it will be considered a classification task, +# and AutoModelForSequenceClassification will be used to load the model. +# The BERT model does not require templates, so it can usually be used without registration. +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model AI-ModelScope/bert-base-chinese \ + --train_type full \ + --dataset 'DAMO_NLP/jd:cls#2000' \ + --torch_dtype bfloat16 \ + --num_train_epochs 1 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-4 \ + --gradient_accumulation_steps 16 \ + --eval_steps 50 \ + --save_steps 50 \ + --save_total_limit 2 \ + --logging_steps 5 \ + --max_length 512 \ + --output_dir output \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --num_labels 2 \ + --task_type seq_cls diff --git a/examples/train/seq_cls/qwen2_5/deploy.sh b/examples/train/seq_cls/qwen2_5/deploy.sh new file mode 100644 index 000000000..5476dae49 --- /dev/null +++ b/examples/train/seq_cls/qwen2_5/deploy.sh @@ -0,0 +1,8 @@ +CUDA_VISIBLE_DEVICES=0 \ +swift deploy \ + --adapters output/vx-xxx/checkpoint-xxx + +# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ +# "model": "Qwen2.5-7B", +# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}] +# }' diff --git a/examples/train/seq_cls/infer.sh b/examples/train/seq_cls/qwen2_5/infer.sh similarity index 83% rename from examples/train/seq_cls/infer.sh rename to examples/train/seq_cls/qwen2_5/infer.sh index c994148de..43aa93bcc 100644 --- a/examples/train/seq_cls/infer.sh +++ b/examples/train/seq_cls/qwen2_5/infer.sh @@ -1,6 +1,5 @@ CUDA_VISIBLE_DEVICES=0 \ swift infer \ --adapters output/vx-xxx/checkpoint-xxx \ - --max_new_tokens 2048 \ --load_data_args true \ --max_batch_size 16 diff --git a/examples/train/seq_cls/sft.sh b/examples/train/seq_cls/qwen2_5/sft.sh similarity index 100% rename from examples/train/seq_cls/sft.sh rename to examples/train/seq_cls/qwen2_5/sft.sh diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py index 1c5013173..2173ce93b 100644 --- a/swift/llm/__init__.py +++ b/swift/llm/__init__.py @@ -18,7 +18,8 @@ from .model import (register_model, MODEL_MAPPING, ModelType, get_model_tokenizer, safe_snapshot_download, HfConfigFactory, ModelInfo, ModelMeta, ModelKeys, register_model_arch, MultiModelKeys, ModelArch, get_model_arch, MODEL_ARCH_MAPPING, get_model_info_meta, get_model_name, ModelGroup, - Model, get_model_tokenizer_with_flash_attn, get_model_tokenizer_multimodal, load_by_unsloth) + Model, get_model_tokenizer_with_flash_attn, get_model_tokenizer_multimodal, load_by_unsloth, + git_clone_github) from .dataset import (AlpacaPreprocessor, ResponsePreprocessor, MessagesPreprocessor, AutoPreprocessor, DATASET_MAPPING, MediaResource, register_dataset, register_dataset_info, EncodePreprocessor, LazyLLMDataset, ConstantLengthDataset, standard_keys, load_dataset, DATASET_TYPE, @@ -51,7 +52,7 @@ 'ModelInfo', 'ModelMeta', 'ModelKeys', 'register_model_arch', 'MultiModelKeys', 'ModelArch', 'MODEL_ARCH_MAPPING', 'get_model_arch', 'get_model_info_meta', 'get_model_name', 'register_model', 'ModelGroup', 'Model', 'get_model_tokenizer_with_flash_attn', 'get_model_tokenizer_multimodal', - 'load_by_unsloth' + 'load_by_unsloth', 'git_clone_github' ], 'dataset': [ 'AlpacaPreprocessor', 'ClsPreprocessor', 'ComposePreprocessor', 'MessagesPreprocessor', 'DATASET_MAPPING', diff --git a/swift/llm/argument/infer_args.py b/swift/llm/argument/infer_args.py index ab5f994e1..29a586a07 100644 --- a/swift/llm/argument/infer_args.py +++ b/swift/llm/argument/infer_args.py @@ -132,11 +132,9 @@ def _init_result_path(self, folder_name: str) -> None: def _init_stream(self): self.eval_human = not (self.dataset and self.split_dataset_ratio > 0 or self.val_dataset) - if self.stream and self.template: - template_meta = get_template_meta(self.template) - if self.num_beams != 1 or not template_meta.support_stream: - self.stream = False - logger.info('Setting args.stream: False') + if self.stream and self.num_beams != 1: + self.stream = False + logger.info('Setting args.stream: False') def _init_pt_ddp(self): if self.infer_backend != 'pt' or not is_dist(): diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py index d0d5b002c..56af8a47e 100644 --- a/swift/llm/dataset/dataset/llm.py +++ b/swift/llm/dataset/dataset/llm.py @@ -9,17 +9,20 @@ from ..register import DatasetMeta, SubsetDataset, register_dataset -def _concat_inst_inp_alpaca_zh(inst: str, inp: str) -> str: - if inp.startswith('输入:'): - inp = inp[3:] - return f'{inst}\n{inp}' +class AlpacaZhPreprocessor(AlpacaPreprocessor): + + @classmethod + def concat_inst_input(cls, instruction, input_): + if input_ and input_.startswith('输入:'): + input_ = input_[3:] + return super().concat_inst_input(instruction, input_) register_dataset( DatasetMeta( ms_dataset_id='AI-ModelScope/alpaca-gpt4-data-zh', hf_dataset_id='llm-wizard/alpaca-gpt4-data-zh', - preprocess_func=AlpacaPreprocessor(concat_inst_input=_concat_inst_inp_alpaca_zh), + preprocess_func=AlpacaZhPreprocessor(), tags=['chat', 'general', '🔥'], )) diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py index 24c6fa73a..c3d692f4c 100644 --- a/swift/llm/dataset/preprocessor/core.py +++ b/swift/llm/dataset/preprocessor/core.py @@ -312,18 +312,14 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class AlpacaPreprocessor(ResponsePreprocessor): - def __init__(self, - *, - concat_inst_input: Union[Callable[[str, str], str]] = '\n', - columns_mapping: Optional[Dict[str, str]] = None, - **kwargs) -> None: - """Alpaca format preprocessor - - Args: - concat_inst_input: The concat sep between instruction and input - """ - super().__init__(columns_mapping=columns_mapping, **kwargs) - self.concat_inst_input = concat_inst_input + @classmethod + def concat_inst_input(cls, instruction, input_): + if instruction and input_: + query = f'{instruction}\n{input_}' + else: + query = instruction or input_ + assert isinstance(query, str), f'query: {query}' + return query def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: instruction = row.pop('instruction', None) @@ -331,15 +327,7 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: output = row.pop('output', None) if output is not None: row['response'] = output - - if instruction is not None or input_ is not None: - instruction = instruction or '' - input_ = input_ or '' - if isinstance(self.concat_inst_input, str): - query = instruction + self.concat_inst_input + input_ - else: - query = self.concat_inst_input(instruction, input_) - row['query'] = query + row['query'] = self.concat_inst_input(instruction, input_) return super().preprocess(row) diff --git a/swift/llm/infer/deploy.py b/swift/llm/infer/deploy.py index 1d1956da1..ff0ab8ec5 100644 --- a/swift/llm/infer/deploy.py +++ b/swift/llm/infer/deploy.py @@ -121,6 +121,8 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal def _set_request_config(self, request_config) -> None: default_request_config = self.args.get_request_config() + if default_request_config is None: + return for key, val in asdict(request_config).items(): default_val = getattr(default_request_config, key) if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0): diff --git a/swift/llm/model/__init__.py b/swift/llm/model/__init__.py index d0db4befb..754d71520 100644 --- a/swift/llm/model/__init__.py +++ b/swift/llm/model/__init__.py @@ -6,4 +6,4 @@ get_default_torch_dtype, get_model_info_meta, get_model_name, get_model_tokenizer, get_model_tokenizer_multimodal, get_model_tokenizer_with_flash_attn, get_model_with_value_head, load_by_unsloth, register_model) -from .utils import HfConfigFactory, ModelInfo, safe_snapshot_download +from .utils import HfConfigFactory, ModelInfo, git_clone_github, safe_snapshot_download diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index 4ac9de3d7..a87f901c7 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -94,6 +94,9 @@ class LLMModelType: polylm = 'polylm' aya = 'aya' + modern_bert = 'modern_bert' + bert = 'bert' + class MLLMModelType: qwen_vl = 'qwen_vl' diff --git a/swift/llm/model/model/__init__.py b/swift/llm/model/model/__init__.py index 8808a518b..a972ec64e 100644 --- a/swift/llm/model/model/__init__.py +++ b/swift/llm/model/model/__init__.py @@ -1,2 +1,2 @@ -from . import (baai, baichuan, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft, minicpm, - mistral, mllm, mplug, openbuddy, qwen, telechat, yi) +from . import (baai, baichuan, bert, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft, + minicpm, mistral, mllm, mplug, openbuddy, qwen, telechat, yi) diff --git a/swift/llm/model/model/bert.py b/swift/llm/model/model/bert.py new file mode 100644 index 000000000..f83aef353 --- /dev/null +++ b/swift/llm/model/model/bert.py @@ -0,0 +1,38 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from transformers import AutoConfig + +from swift.utils import get_logger +from ..constant import LLMModelType +from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model + +logger = get_logger() + + +def get_model_tokenizer_modern_bert(model_dir, *args, **kwargs): + model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) + model_config.reference_compile = False + kwargs['model_config'] = model_config + return get_model_tokenizer_from_local(model_dir, *args, **kwargs) + + +register_model( + ModelMeta( + LLMModelType.modern_bert, [ + ModelGroup([ + Model('answerdotai/ModernBERT-base', 'answerdotai/ModernBERT-base'), + Model('answerdotai/ModernBERT-large', 'answerdotai/ModernBERT-large'), + ]) + ], + None, + get_model_tokenizer_modern_bert, + requires=['transformers>=4.48'], + tags=['bert'])) + +register_model( + ModelMeta( + LLMModelType.bert, [ModelGroup([ + Model('iic/nlp_structbert_backbone_base_std'), + ])], + None, + get_model_tokenizer_from_local, + tags=['bert'])) diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index ca5003ce3..a98406eb8 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -53,7 +53,7 @@ class ModelMeta: # Used to list the model_ids from modelscope/huggingface, # which participate in the automatic inference of the model_type. model_groups: List[ModelGroup] - template: str + template: Optional[str] get_function: GetModelTokenizerFunction model_arch: Optional[str] = None @@ -70,6 +70,8 @@ class ModelMeta: tags: List[str] = field(default_factory=list) def __post_init__(self): + if self.template is None: + self.template = 'dummy' if not isinstance(self.model_groups, (list, tuple)): self.model_groups = [self.model_groups] @@ -508,7 +510,9 @@ def get_model_tokenizer( tokenizer.model_info = model_info tokenizer.model_meta = model_meta - pad_token = tokenizer.pad_token_id or tokenizer.eos_token_id + pad_token = tokenizer.pad_token_id + if pad_token is None: + pad_token = tokenizer.eos_token_id if tokenizer.eos_token_id is None: tokenizer.eos_token_id = pad_token if tokenizer.pad_token_id is None: diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py index 7195b266e..efacbc91f 100644 --- a/swift/llm/model/utils.py +++ b/swift/llm/model/utils.py @@ -274,6 +274,8 @@ def git_clone_github(github_url: str, local_repo_name: Optional[str] = None, branch: Optional[str] = None, commit_hash: Optional[str] = None) -> str: + if github_url.endswith('.git'): + github_url = github_url[:-4] git_cache_dir = os.path.join(get_cache_dir(), '_github') os.makedirs(git_cache_dir, exist_ok=True) if local_repo_name is None: @@ -282,8 +284,7 @@ def git_clone_github(github_url: str, local_repo_path = os.path.join(git_cache_dir, local_repo_name) with safe_ddp_context(hash_id=local_repo_path): if not os.path.exists(local_repo_path): - if not github_url.endswith('.git'): - github_url = f'{github_url}.git' + github_url = f'{github_url}.git' command = ['git', '-C', git_cache_dir, 'clone', github_url, local_repo_name] command_str = f"git -C '{git_cache_dir}' clone '{github_url}' {local_repo_name}" if branch is not None: diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index 1ec5107cf..7384678cc 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -179,6 +179,12 @@ def _kto_encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: encoded['label'] = label return encoded + def _seq_cls_encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: + encoded = self._encode(inputs) + if inputs.label is not None: + encoded['labels'] = int(inputs.label) + return encoded + def encode(self, inputs: Union[TemplateInputs, Dict[str, Any], InferRequest], return_template_inputs: bool = False) -> Dict[str, Any]: @@ -201,8 +207,10 @@ def encode(self, encoded = Template._encode(self, inputs) for key in ['images', 'audios', 'videos']: encoded[key] = getattr(inputs, key) - elif self.mode in {'pt', 'train', 'seq_cls'}: + elif self.mode in {'pt', 'train'}: encoded = self._encode(inputs) + elif self.mode == 'seq_cls': + encoded = self._seq_cls_encode(inputs) elif self.mode == 'rlhf': encoded = self._rlhf_encode(inputs) elif self.mode == 'kto': @@ -599,7 +607,8 @@ def _swift_encode(self, inputs: StdTemplateInputs): def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: template_backend = self.template_backend - if self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training: + if (self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training + and self.mode != 'seq_cls'): template_backend = 'jinja' res_context_list, loss_scale_list, answer_len = ( self._swift_encode(inputs) if template_backend == 'swift' else self._jinja_encode(inputs)) @@ -649,10 +658,6 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: for k in list(encoded.keys()): if k.endswith('loss_scale'): encoded[k] = None - - # sequence_classification - if inputs.label is not None: - encoded['label'] = inputs.label return encoded def _debug_logger(self, generate_ids): @@ -807,7 +812,7 @@ def _seq_cls_data_collator(self, batch: List[Dict[str, Any]], *, padding_to: Optional[int] = None) -> Dict[str, Any]: - labels = [b['label'] for b in batch if b.get('label') is not None] + labels = [b.pop('labels') for b in batch if b.get('labels') is not None] res = self._data_collator(batch, padding_to=padding_to) if labels: res['labels'] = torch.tensor(labels, dtype=torch.long) @@ -930,8 +935,9 @@ def print_inputs(self, inputs: Dict[str, Any], tokenizer_kwargs: Optional[Dict[s if val is not None: key_upper = key.upper() logger.info(f'[{key_upper}_IDS] {val}') - val_str = self.safe_decode(val, **tokenizer_kwargs) - logger.info(f'[{key_upper}] {val_str}') + if isinstance(val, (list, tuple, torch.Tensor)): + val_str = self.safe_decode(val, **tokenizer_kwargs) + logger.info(f'[{key_upper}] {val_str}') if inputs.get('loss_scale') is not None: val = inputs['loss_scale'] logger.info(f'[LOSS_SCALE] {val}') diff --git a/swift/llm/template/template/microsoft.py b/swift/llm/template/template/microsoft.py index d94f441fe..e7a46d9f6 100644 --- a/swift/llm/template/template/microsoft.py +++ b/swift/llm/template/template/microsoft.py @@ -87,7 +87,7 @@ def decode(self, generate_ids: List[int], **kwargs) -> Any: chat_sep=None, suffix=[''], template_cls=FlorenceTemplate, - support_stream=False)) + )) @dataclass diff --git a/swift/llm/template/template/utils.py b/swift/llm/template/template/utils.py index f1c8182fd..fcbdddf19 100644 --- a/swift/llm/template/template/utils.py +++ b/swift/llm/template/template/utils.py @@ -22,8 +22,9 @@ class ChatmlTemplateMeta(TemplateMeta): @dataclass class EmptyTemplateMeta(TemplateMeta): prefix: Prompt = field(default_factory=list) - prompt: Prompt = field(default_factory=list) - chat_sep: Optional[Prompt] = field(default_factory=list) + prompt: Prompt = field(default_factory=lambda: ['{{QUERY}}']) + chat_sep: Optional[Prompt] = None + auto_add_bos: bool = True register_template(ChatmlTemplateMeta(LLMTemplateType.chatml)) diff --git a/swift/llm/template/template_meta.py b/swift/llm/template/template_meta.py index 5ca9ea136..98520516c 100644 --- a/swift/llm/template/template_meta.py +++ b/swift/llm/template/template_meta.py @@ -47,7 +47,6 @@ class TemplateMeta: placeholder_tokens: List[Union[int, str]] = field(default_factory=list) default_tools_prompt: str = 'react_en' - support_stream: bool = True def to_generate_template_meta(self) -> 'TemplateMeta': self = deepcopy(self) @@ -60,7 +59,6 @@ def to_generate_template_meta(self) -> 'TemplateMeta': auto_add_bos=True, stop_words=self.stop_words, placeholder_tokens=self.placeholder_tokens, - support_stream=self.support_stream, ) @staticmethod diff --git a/swift/ui/app.py b/swift/ui/app.py index 5ea31dfea..92d87e5a4 100644 --- a/swift/ui/app.py +++ b/swift/ui/app.py @@ -87,6 +87,13 @@ def run(self): if is_gradio_app: from swift.utils import find_free_port LLMInfer.element('port').value = str(find_free_port()) + for f in fields(self.args): + if getattr(self.args, f.name) and f.name in LLMInfer.elements() and hasattr( + LLMInfer.elements()[f.name], 'value') and f.name != 'port': + value = getattr(self.args, f.name) + if isinstance(value, list): + value = ' '.join([v or '' for v in value]) + LLMInfer.elements()[f.name].value = value app.load(LLMInfer.deploy_model, list(LLMInfer.valid_elements().values()), [LLMInfer.element('runtime_tab'), LLMInfer.element('running_tasks')]) diff --git a/swift/ui/llm_infer/llm_infer.py b/swift/ui/llm_infer/llm_infer.py index d855d34d8..6b6b609fe 100644 --- a/swift/ui/llm_infer/llm_infer.py +++ b/swift/ui/llm_infer/llm_infer.py @@ -297,7 +297,7 @@ def deploy_model(cls, *args): time.sleep(1) cnt += 1 if cnt >= 60: - logger.warn(f'Deploy costing too much time, please check log file: {log_file}') + logger.warning_once(f'Deploy costing too much time, please check log file: {log_file}') logger.info('Deploy done.') cls.deployed = True running_task = Runtime.refresh_tasks(log_file) diff --git a/swift/utils/torch_utils.py b/swift/utils/torch_utils.py index 512a50ba7..d318e752e 100644 --- a/swift/utils/torch_utils.py +++ b/swift/utils/torch_utils.py @@ -221,9 +221,9 @@ def find_all_linears(model: nn.Module) -> List[str]: else: linear_cls = [nn.Linear] - # 'score': classification model + # 'score', 'classifier': classification model # 'v_head': reward model - ignore_layers = [lm_head_name, 'score', 'v_head'] + ignore_layers = [lm_head_name, 'score', 'v_head', 'classifier'] return _find_layers( model, lambda name, module: isinstance(module, tuple(linear_cls)) and all(layer not in name for layer in ignore_layers)) diff --git a/tests/general/test_dataset.py b/tests/general/test_dataset.py index cf4da8312..371401fbe 100644 --- a/tests/general/test_dataset.py +++ b/tests/general/test_dataset.py @@ -15,8 +15,11 @@ def test_sft(): # _test_dataset(['AI-ModelScope/Duet-v0.5']) # _test_dataset(['swift/SlimOrca', 'swift/cosmopedia-100k']) # _test_dataset(['OmniData/Zhihu-KOL-More-Than-100-Upvotes']) - _test_dataset(['OmniData/Zhihu-KOL']) - # _test_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000', 'AI-ModelScope/alpaca-gpt4-data-en#200']) + # _test_dataset(['OmniData/Zhihu-KOL']) + _test_dataset([ + 'AI-ModelScope/alpaca-gpt4-data-zh#1000', 'AI-ModelScope/alpaca-gpt4-data-en#1000', + 'AI-ModelScope/LongAlpaca-12k#1000' + ]) # _test_dataset(['swift/Infinity-Instruct:all']) # _test_dataset(['swift/sharegpt:all']) # _test_dataset(['AI-ModelScope/sharegpt_gpt4:all']) diff --git a/tests/general/test_stream.py b/tests/general/test_stream.py index 08828d12f..ad2069622 100644 --- a/tests/general/test_stream.py +++ b/tests/general/test_stream.py @@ -3,11 +3,10 @@ def test_local_dataset(): # please use git clone - local_dataset = '/mnt/nas2/huangjintao.hjt/work/datasets/swift-sft-mixture:firefly#100' - dataset = load_dataset(datasets=[local_dataset], streaming=True)[0] - for i, x in enumerate(dataset): - pass - print(i, x) + from swift.llm import git_clone_github + model_dir = git_clone_github('https://www.modelscope.cn/datasets/swift/swift-sft-mixture.git') + dataset = load_dataset(datasets=[f'{model_dir}:firefly'], streaming=True)[0] + print(next(iter(dataset))) def test_hub_dataset(): diff --git a/tests/train/test_cls.py b/tests/train/test_cls.py index 5d20f790b..27ed5b902 100644 --- a/tests/train/test_cls.py +++ b/tests/train/test_cls.py @@ -1,9 +1,11 @@ import os +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + kwargs = { 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, - 'save_steps': 10, + 'save_steps': 50, 'gradient_accumulation_steps': 4, 'num_train_epochs': 1, } @@ -12,10 +14,31 @@ def test_llm(): from swift.llm import TrainArguments, sft_main, infer_main, InferArguments result = sft_main( - TrainArguments(model='Qwen/Qwen2.5-7B-Instruct', num_labels=2, dataset=['DAMO_NLP/jd:cls#2000'], **kwargs)) + TrainArguments( + model='Qwen/Qwen2.5-1.5B-Instruct', + train_type='lora', + num_labels=2, + dataset=['DAMO_NLP/jd:cls#2000'], + **kwargs)) last_model_checkpoint = result['last_model_checkpoint'] infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) +def test_bert(): + + from swift.llm import TrainArguments, sft_main, infer_main, InferArguments + result = sft_main( + TrainArguments( + model='answerdotai/ModernBERT-base', + # model='iic/nlp_structbert_backbone_base_std', + train_type='full', + num_labels=2, + dataset=['DAMO_NLP/jd:cls#2000'], + **kwargs)) + last_model_checkpoint = result['last_model_checkpoint'] + infer_main(InferArguments(model=last_model_checkpoint, load_data_args=True)) + + if __name__ == '__main__': - test_llm() + # test_llm() + test_bert()