From 334f6811921ad87f3a1e14c70b3231396a2c3020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=AE=87?= <506610466@qq.com> Date: Sat, 26 Oct 2024 15:25:35 +0800 Subject: [PATCH] update config --- gpt_server/script/config.yaml | 6 +- gpt_server/script/config_example.yaml | 170 +++----------------------- gpt_server/serving/server_ui.py | 34 +++--- 3 files changed, 36 insertions(+), 174 deletions(-) diff --git a/gpt_server/script/config.yaml b/gpt_server/script/config.yaml index eb1dc75..1dbc47b 100644 --- a/gpt_server/script/config.yaml +++ b/gpt_server/script/config.yaml @@ -14,11 +14,10 @@ models: - minicpmv: alias: null enable: false - model_type: minicpmv model_config: model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/ enable_prefix_caching: false - dtype: auto + model_type: minicpmv work_mode: lmdeploy-turbomind device: gpu workers: @@ -48,7 +47,6 @@ models: workers: - gpus: - 3 - - qwen-72b: alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k enable: true @@ -64,7 +62,6 @@ models: - gpus: - 0 - 1 - - piccolo-base-zh: alias: null enable: true @@ -87,7 +84,6 @@ models: workers: - gpus: - 2 - - bge-reranker-base: alias: null enable: true diff --git a/gpt_server/script/config_example.yaml b/gpt_server/script/config_example.yaml index 086703f..8642bd4 100644 --- a/gpt_server/script/config_example.yaml +++ b/gpt_server/script/config_example.yaml @@ -18,21 +18,12 @@ model_worker_args: controller_address: http://localhost:21001 models: -- qwenvl: #自定义的模型名称 - alias: null # 别名 例如 gpt4,gpt3 - enable: false # false true - model_name_or_path: /home/dev/model/qwen/Qwen2-VL-7B-Instruct/ - model_type: qwen # qwen chatglm3 yi internlm - work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - # - 1 - - 0 - internvl2: #自定义的模型名称 alias: null # 别名 例如 gpt4,gpt3 enable: false # false true - model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/ + model_config: + model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/ + enable_prefix_caching: false model_type: internvl2 # qwen yi internlm work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch device: gpu # gpu / cpu @@ -42,26 +33,20 @@ models: - 0 # - gpus: # - 0 -- chatglm4: #自定义的模型名称 - alias: chatglm3 # 别名 例如 gpt4,gpt3 - enable: true # false true - model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat - model_type: chatglm # qwen yi internlm - work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - # - 1 - - 0 - qwen: #自定义的模型名称 alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3 enable: false # false true - model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/ + model_config: + model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/ + enable_prefix_caching: true + dtype: auto + max_model_len: 65536 + # lora: + # test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100 + model_type: qwen # qwen yi internlm work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch - # lora: - # test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100 device: gpu # gpu / cpu workers: @@ -69,147 +54,23 @@ models: - 1 # - gpus: # - 3 -- qwen-72b: #自定义的模型名称 - alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3 - enable: true # false true - model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/ - model_type: qwen # qwen yi internlm - work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch - enable_prefix_caching: true # false true - device: gpu # gpu / cpu - workers: - - gpus: - - 3 - - 1 - # - gpus: - # - 1 - -- mixtral: #自定义的模型名称 - alias: null # 别名 例如 gpt4,gpt3 - enable: false # false true - model_name_or_path: /home/dev/model/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/ - model_type: qwen # qwen yi internlm - work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - - 3 - - 0 - - -- llama3: #自定义的模型名称 - alias: null # 别名 例如 gpt4,gpt3 - enable: false # false true - model_name_or_path: /home/dev/model/unsloth/unsloth/llama-3-8b-Instruct/ - model_type: llama # qwen yi internlm - work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - - 0 - -- yi: #自定义的模型名称 - alias: null # 别名 例如 gpt4,gpt3 - enable: false # false true - model_name_or_path: /home/dev/model/01ai/Yi-34B-Chat/ - model_type: yi # qwen yi internlm - work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - - 2 - # - 0 - -- internlm2: #自定义的模型名称 - alias: null # 别名 例如 gpt4,gpt3 - enable: false # false true - model_name_or_path: /home/dev/model/Shanghai_AI_Laboratory/internlm2_5-7b-chat/ - model_type: internlm # qwen yi internlm - work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch - device: gpu # gpu / cpu - workers: - - gpus: - - 0 - -# Embedding 模型 -- piccolo-base-zh: - alias: null # 别名 - enable: true # false true - model_name_or_path: /home/dev/model/assets/embeddings/sensenova/piccolo-base-zh/ - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 - -- bce-embedding-base_v1: - alias: text-embedding-ada-002 # 别名 - enable: true # false true - model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/ - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 - -- conan: - alias: null # 别名 - enable: true # false true - model_name_or_path: /home/dev/model/model1001/Conan/ - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 - bge-reranker-base: alias: null # 别名 enable: true # false true - model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/ + model_config: + model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/ model_type: embedding_infinity # embedding_infinity work_mode: hf device: gpu # gpu / cpu workers: - gpus: - 2 -- puff: - alias: null # 别名 - enable: true # false true - model_name_or_path: /home/dev/model/infgrad/puff-large-v1/ - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 - - acge_text_embedding: alias: text-embedding-ada-002 # 别名 enable: true # false true - model_name_or_path: /home/dev/model/aspire/acge_text_embedding - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 -- yinka: - alias: null # 别名 - enable: false # false true - model_name_or_path: /home/dev/model/Classical/Yinka/ - model_type: embedding_infinity # embedding_infinity - work_mode: hf - device: gpu # gpu / cpu - workers: - - gpus: - - 2 -- xiaobu-embedding: - alias: null # 别名 - enable: true # false true - model_name_or_path: /home/dev/model/lier007/xiaobu-embedding-v2/ + model_config: + model_name_or_path: /home/dev/model/aspire/acge_text_embedding model_type: embedding_infinity # embedding_infinity work_mode: hf device: gpu # gpu / cpu @@ -223,4 +84,3 @@ models: - diff --git a/gpt_server/serving/server_ui.py b/gpt_server/serving/server_ui.py index e6b510b..11a32b6 100644 --- a/gpt_server/serving/server_ui.py +++ b/gpt_server/serving/server_ui.py @@ -142,14 +142,16 @@ def on_change(): st.session_state[f"model_name_{i}"]: { "alias": st.session_state[f"alias_{i}"], "enable": st.session_state[f"enable_{i}"], - "model_name_or_path": st.session_state[ - f"model_name_or_path_{i}" - ], + "model_config": { + "model_name_or_path": st.session_state[ + f"model_name_or_path_{i}" + ], + "enable_prefix_caching": st.session_state[ + f"enable_prefix_caching_{i}" + ], + }, "model_type": st.session_state[f"model_type_{i}"], "work_mode": st.session_state[f"work_mode_{i}"], - "enable_prefix_caching": st.session_state[ - f"enable_prefix_caching_{i}" - ], "device": st.session_state[f"device_{i}"], "workers": yaml.safe_load( st.session_state[f"workers_{i}"] @@ -180,16 +182,18 @@ def on_change(): "new_model_name": { "alias": st.session_state[f"alias_{i}"], "enable": False, - "model_name_or_path": st.session_state[ - f"model_name_or_path_{i}" - ], + "model_config": { + "model_name_or_path": st.session_state[ + f"model_name_or_path_{i}" + ], + "enable_prefix_caching": st.session_state[ + f"enable_prefix_caching_{i}" + ], + }, "model_type": st.session_state[ f"model_type_{i}" ], "work_mode": st.session_state[f"work_mode_{i}"], - "enable_prefix_caching": st.session_state[ - f"enable_prefix_caching_{i}" - ], "device": st.session_state[f"device_{i}"], "workers": yaml.safe_load( st.session_state[f"workers_{i}"] @@ -299,10 +303,12 @@ def on_change(): model_name_input: { "alias": model_alias, "enable": enable, - "model_name_or_path": model_name_or_path, + "model_config": { + "model_name_or_path": model_name_or_path, + "enable_prefix_caching": enable_prefix_caching, + }, "model_type": model_type, "work_mode": work_mode, - "enable_prefix_caching": enable_prefix_caching, "device": device, "workers": workers_value_dict, }