update config

shell-nlp · Oct 26, 2024 · 334f681 · 334f681
1 parent 609cc88
commit 334f681
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 174 deletions.
diff --git a/gpt_server/script/config.yaml b/gpt_server/script/config.yaml
@@ -14,11 +14,10 @@ models:
 - minicpmv:
     alias: null
     enable: false
-    model_type: minicpmv
     model_config:
       model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
       enable_prefix_caching: false
-      dtype: auto
+    model_type: minicpmv
     work_mode: lmdeploy-turbomind
     device: gpu
     workers:
@@ -48,7 +47,6 @@ models:
     workers:
     - gpus:
       - 3
-
 - qwen-72b:
     alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
     enable: true
@@ -64,7 +62,6 @@ models:
     - gpus:
       - 0
       - 1
-
 - piccolo-base-zh:
     alias: null
     enable: true
@@ -87,7 +84,6 @@ models:
     workers:
     - gpus:
       - 2
-
 - bge-reranker-base:
     alias: null
     enable: true

diff --git a/gpt_server/script/config_example.yaml b/gpt_server/script/config_example.yaml
@@ -18,21 +18,12 @@ model_worker_args:
   controller_address: http://localhost:21001
 
 models:
-- qwenvl: #自定义的模型名称
-    alias: null # 别名     例如  gpt4,gpt3
-    enable: false # false true
-    model_name_or_path: /home/dev/model/qwen/Qwen2-VL-7B-Instruct/
-    model_type: qwen # qwen  chatglm3 yi internlm
-    work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      # - 1
-      - 0
 - internvl2: #自定义的模型名称
     alias: null # 别名     例如  gpt4,gpt3
     enable: false # false true
-    model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
+    model_config:
+      model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
+      enable_prefix_caching: false
     model_type: internvl2 # qwen  yi internlm
     work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
     device: gpu # gpu / cpu
@@ -42,174 +33,44 @@ models:
       - 0
       # - gpus:
       #   - 0
-- chatglm4: #自定义的模型名称
-    alias: chatglm3 # 别名     例如  gpt4,gpt3
-    enable: true # false true
-    model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
-    model_type: chatglm # qwen yi internlm
-    work_mode: vllm # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      # - 1
-      - 0
 
 - qwen: #自定义的模型名称
     alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名     例如  gpt4,gpt3
     enable: false # false true
-    model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
+    model_config:
+      model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
+      enable_prefix_caching: true
+      dtype: auto
+      max_model_len: 65536
+      # lora:
+      #   test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100
+
     model_type: qwen # qwen  yi internlm
     work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    # lora:
-    #   test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100
 
     device: gpu # gpu / cpu
     workers:
     - gpus:
       - 1
       # - gpus:
       #   - 3
-- qwen-72b: #自定义的模型名称
-    alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名     例如  gpt4,gpt3
-    enable: true # false true
-    model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
-    model_type: qwen # qwen yi internlm
-    work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    enable_prefix_caching: true # false true
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 3
-      - 1
-      # - gpus:
-      #   - 1
-
-- mixtral: #自定义的模型名称
-    alias: null # 别名     例如  gpt4,gpt3
-    enable: false # false true
-    model_name_or_path: /home/dev/model/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/
-    model_type: qwen # qwen yi internlm
-    work_mode: vllm # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 3
-      - 0
-
-
-- llama3: #自定义的模型名称
-    alias: null # 别名     例如  gpt4,gpt3
-    enable: false # false true
-    model_name_or_path: /home/dev/model/unsloth/unsloth/llama-3-8b-Instruct/
-    model_type: llama # qwen yi internlm
-    work_mode: hf # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 0
-
-- yi: #自定义的模型名称
-    alias: null # 别名     例如  gpt4,gpt3
-    enable: false # false true
-    model_name_or_path: /home/dev/model/01ai/Yi-34B-Chat/
-    model_type: yi # qwen yi internlm
-    work_mode: hf # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-      # - 0
-
-- internlm2: #自定义的模型名称
-    alias: null # 别名     例如  gpt4,gpt3
-    enable: false # false true
-    model_name_or_path: /home/dev/model/Shanghai_AI_Laboratory/internlm2_5-7b-chat/
-    model_type: internlm # qwen yi internlm
-    work_mode: hf # vllm hf lmdeploy-turbomind  lmdeploy-pytorch
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 0
-
-# Embedding 模型
-- piccolo-base-zh:
-    alias: null # 别名   
-    enable: true # false true
-    model_name_or_path: /home/dev/model/assets/embeddings/sensenova/piccolo-base-zh/
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-
-- bce-embedding-base_v1:
-    alias: text-embedding-ada-002 # 别名   
-    enable: true # false true
-    model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-
-- conan:
-    alias: null # 别名   
-    enable: true # false true
-    model_name_or_path: /home/dev/model/model1001/Conan/
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
 
 - bge-reranker-base:
     alias: null # 别名   
     enable: true # false true
-    model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
+    model_config:
+      model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
     model_type: embedding_infinity # embedding_infinity
     work_mode: hf
     device: gpu # gpu / cpu
     workers:
     - gpus:
       - 2
-- puff:
-    alias: null # 别名   
-    enable: true # false true
-    model_name_or_path: /home/dev/model/infgrad/puff-large-v1/
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-
 - acge_text_embedding:
     alias: text-embedding-ada-002 # 别名   
     enable: true # false true
-    model_name_or_path: /home/dev/model/aspire/acge_text_embedding
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-- yinka:
-    alias: null # 别名   
-    enable: false # false true
-    model_name_or_path: /home/dev/model/Classical/Yinka/
-    model_type: embedding_infinity # embedding_infinity
-    work_mode: hf
-    device: gpu # gpu / cpu
-    workers:
-    - gpus:
-      - 2
-- xiaobu-embedding:
-    alias: null # 别名   
-    enable: true # false true
-    model_name_or_path: /home/dev/model/lier007/xiaobu-embedding-v2/
+    model_config:
+      model_name_or_path: /home/dev/model/aspire/acge_text_embedding
     model_type: embedding_infinity # embedding_infinity
     work_mode: hf
     device: gpu # gpu / cpu
@@ -223,4 +84,3 @@ models:
 
 
 
-
diff --git a/gpt_server/serving/server_ui.py b/gpt_server/serving/server_ui.py
@@ -142,14 +142,16 @@ def on_change():
                             st.session_state[f"model_name_{i}"]: {
                                 "alias": st.session_state[f"alias_{i}"],
                                 "enable": st.session_state[f"enable_{i}"],
-                                "model_name_or_path": st.session_state[
-                                    f"model_name_or_path_{i}"
-                                ],
+                                "model_config": {
+                                    "model_name_or_path": st.session_state[
+                                        f"model_name_or_path_{i}"
+                                    ],
+                                    "enable_prefix_caching": st.session_state[
+                                        f"enable_prefix_caching_{i}"
+                                    ],
+                                },
                                 "model_type": st.session_state[f"model_type_{i}"],
                                 "work_mode": st.session_state[f"work_mode_{i}"],
-                                "enable_prefix_caching": st.session_state[
-                                    f"enable_prefix_caching_{i}"
-                                ],
                                 "device": st.session_state[f"device_{i}"],
                                 "workers": yaml.safe_load(
                                     st.session_state[f"workers_{i}"]
@@ -180,16 +182,18 @@ def on_change():
                                     "new_model_name": {
                                         "alias": st.session_state[f"alias_{i}"],
                                         "enable": False,
-                                        "model_name_or_path": st.session_state[
-                                            f"model_name_or_path_{i}"
-                                        ],
+                                        "model_config": {
+                                            "model_name_or_path": st.session_state[
+                                                f"model_name_or_path_{i}"
+                                            ],
+                                            "enable_prefix_caching": st.session_state[
+                                                f"enable_prefix_caching_{i}"
+                                            ],
+                                        },
                                         "model_type": st.session_state[
                                             f"model_type_{i}"
                                         ],
                                         "work_mode": st.session_state[f"work_mode_{i}"],
-                                        "enable_prefix_caching": st.session_state[
-                                            f"enable_prefix_caching_{i}"
-                                        ],
                                         "device": st.session_state[f"device_{i}"],
                                         "workers": yaml.safe_load(
                                             st.session_state[f"workers_{i}"]
@@ -299,10 +303,12 @@ def on_change():
                     model_name_input: {
                         "alias": model_alias,
                         "enable": enable,
-                        "model_name_or_path": model_name_or_path,
+                        "model_config": {
+                            "model_name_or_path": model_name_or_path,
+                            "enable_prefix_caching": enable_prefix_caching,
+                        },
                         "model_type": model_type,
                         "work_mode": work_mode,
-                        "enable_prefix_caching": enable_prefix_caching,
                         "device": device,
                         "workers": workers_value_dict,
                     }