diff --git a/README.md b/README.md index b1eb720..3b80bf3 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,8 @@ models: - chatglm4: #自定义的模型名称 alias: null # 别名 例如 gpt4,gpt3 enable: true # false true 控制是否启动模型worker - model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/ + model_config: + model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/ model_type: chatglm # qwen yi internlm work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch # lora: # lora 配置 @@ -162,7 +163,11 @@ models: - qwen: #自定义的模型名称 alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3 enable: true # false true 控制是否启动模型worker - model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/ + model_config: + model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/ + enable_prefix_caching: false + dtype: auto + max_model_len: 65536 model_type: qwen # qwen yi internlm work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch device: gpu # gpu / cpu @@ -176,7 +181,8 @@ models: - bge-base-zh: alias: null # 别名 enable: true # false true - model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/ + model_config: + model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/ model_type: embedding_infinity # embedding_infinity work_mode: hf device: gpu # gpu / cpu @@ -187,7 +193,8 @@ models: - bge-reranker-base: alias: null # 别名 enable: true # false true 控制是否启动模型worker - model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/ + model_config: + model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/ model_type: embedding_infinity # embedding_infinity work_mode: hf device: gpu # gpu / cpu diff --git a/gpt_server/script/config.yaml b/gpt_server/script/config.yaml index 3207f20..eb1dc75 100644 --- a/gpt_server/script/config.yaml +++ b/gpt_server/script/config.yaml @@ -14,10 +14,12 @@ models: - minicpmv: alias: null enable: false - model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/ model_type: minicpmv + model_config: + model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/ + enable_prefix_caching: false + dtype: auto work_mode: lmdeploy-turbomind - enable_prefix_caching: false device: gpu workers: - gpus: @@ -25,10 +27,11 @@ models: - internvl2: alias: null enable: false - model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/ + model_config: + model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/ + enable_prefix_caching: false model_type: internvl2 work_mode: lmdeploy-turbomind - enable_prefix_caching: false device: gpu workers: - gpus: @@ -36,26 +39,32 @@ models: - chatglm4: alias: chatglm3 enable: true - model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat + model_config: + model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat + enable_prefix_caching: false model_type: chatglm work_mode: vllm - enable_prefix_caching: false device: gpu workers: - gpus: - 3 + - qwen-72b: alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k enable: true - model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/ + model_config: + model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/ + enable_prefix_caching: true + dtype: auto + max_model_len: 65536 model_type: qwen work_mode: lmdeploy-turbomind - enable_prefix_caching: true device: gpu workers: - gpus: - 0 - 1 + - piccolo-base-zh: alias: null enable: true @@ -78,6 +87,7 @@ models: workers: - gpus: - 2 + - bge-reranker-base: alias: null enable: true