diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 0cf45af52..8b2ecd8df 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -456,9 +456,9 @@ |[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)| |[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)| |[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)| -|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| -|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| -|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|-|-| +|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| +|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| +|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|bert|-| ### 多模态大模型 @@ -469,24 +469,24 @@ |[Qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/Qwen/Qwen-VL-Chat-Int4)|qwen_vl|qwen_vl|-|vision|[Qwen/Qwen-VL-Chat-Int4](https://huggingface.co/Qwen/Qwen-VL-Chat-Int4)| |[Qwen/Qwen-Audio-Chat](https://modelscope.cn/models/Qwen/Qwen-Audio-Chat)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)| |[Qwen/Qwen-Audio](https://modelscope.cn/models/Qwen/Qwen-Audio)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio](https://huggingface.co/Qwen/Qwen-Audio)| -|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| -|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| -|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| -|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| -|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| -|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| +|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| +|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| +|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| +|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| +|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| |[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)| |[Qwen/Qwen2-Audio-7B](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)| -|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| +|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| |[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)| |[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)| |[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index b5ac0a249..6b693513d 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -456,9 +456,9 @@ The table below introduces the models integrated with ms-swift: |[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)| |[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)| |[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)| -|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| -|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|-|-|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| -|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|-|-| +|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| +|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| +|[iic/nlp_structbert_backbone_base_std](https://modelscope.cn/models/iic/nlp_structbert_backbone_base_std)|bert|dummy|-|bert|-| ### Multimodal large models @@ -469,24 +469,24 @@ The table below introduces the models integrated with ms-swift: |[Qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/Qwen/Qwen-VL-Chat-Int4)|qwen_vl|qwen_vl|-|vision|[Qwen/Qwen-VL-Chat-Int4](https://huggingface.co/Qwen/Qwen-VL-Chat-Int4)| |[Qwen/Qwen-Audio-Chat](https://modelscope.cn/models/Qwen/Qwen-Audio-Chat)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)| |[Qwen/Qwen-Audio](https://modelscope.cn/models/Qwen/Qwen-Audio)|qwen_audio|qwen_audio|-|audio|[Qwen/Qwen-Audio](https://huggingface.co/Qwen/Qwen-Audio)| -|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| -|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| -|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| -|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| -|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| -|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| -|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| -|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| -|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)| +|[Qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)| +|[Qwen/Qwen2-VL-72B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct)| +|[Qwen/Qwen2-VL-2B](https://modelscope.cn/models/Qwen/Qwen2-VL-2B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B)| +|[Qwen/Qwen2-VL-7B](https://modelscope.cn/models/Qwen/Qwen2-VL-7B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B](https://huggingface.co/Qwen/Qwen2-VL-7B)| +|[Qwen/Qwen2-VL-72B](https://modelscope.cn/models/Qwen/Qwen2-VL-72B)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B](https://huggingface.co/Qwen/Qwen2-VL-72B)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4)| +|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)| +|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-7B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)| +|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/Qwen/Qwen2-VL-72B-Instruct-AWQ)|qwen2_vl|qwen2_vl|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)| |[Qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B-Instruct)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)| |[Qwen/Qwen2-Audio-7B](https://modelscope.cn/models/Qwen/Qwen2-Audio-7B)|qwen2_audio|qwen2_audio|transformers>=4.45, librosa|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)| -|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| +|[Qwen/QVQ-72B-Preview](https://modelscope.cn/models/Qwen/QVQ-72B-Preview)|qvq|qvq|transformers>=4.45, qwen_vl_utils>=0.0.6, pyav, decord|vision, video|[Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview)| |[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)| |[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)| |[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)| diff --git a/swift/llm/model/model/qwen.py b/swift/llm/model/model/qwen.py index 7a9b4255a..80ef247aa 100644 --- a/swift/llm/model/model/qwen.py +++ b/swift/llm/model/model/qwen.py @@ -542,7 +542,7 @@ def get_model_tokenizer_qwen2_vl(model_dir: str, get_model_tokenizer_qwen2_vl, model_arch=ModelArch.qwen2_vl, architectures=['Qwen2VLForConditionalGeneration'], - requires=['transformers>=4.45', 'qwen_vl_utils', 'pyav', 'decord'], + requires=['transformers>=4.45', 'qwen_vl_utils>=0.0.6', 'pyav', 'decord'], tags=['vision', 'video'])) register_model( @@ -556,7 +556,7 @@ def get_model_tokenizer_qwen2_vl(model_dir: str, get_model_tokenizer_qwen2_vl, model_arch=ModelArch.qwen2_vl, architectures=['Qwen2VLForConditionalGeneration'], - requires=['transformers>=4.45', 'qwen_vl_utils', 'pyav', 'decord'], + requires=['transformers>=4.45', 'qwen_vl_utils>=0.0.6', 'pyav', 'decord'], tags=['vision', 'video']))