Skip to content

Commit

Permalink
update config
Browse files Browse the repository at this point in the history
  • Loading branch information
shell-nlp committed Oct 26, 2024
1 parent 609cc88 commit 334f681
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 174 deletions.
6 changes: 1 addition & 5 deletions gpt_server/script/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@ models:
- minicpmv:
alias: null
enable: false
model_type: minicpmv
model_config:
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
enable_prefix_caching: false
dtype: auto
model_type: minicpmv
work_mode: lmdeploy-turbomind
device: gpu
workers:
Expand Down Expand Up @@ -48,7 +47,6 @@ models:
workers:
- gpus:
- 3

- qwen-72b:
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
enable: true
Expand All @@ -64,7 +62,6 @@ models:
- gpus:
- 0
- 1

- piccolo-base-zh:
alias: null
enable: true
Expand All @@ -87,7 +84,6 @@ models:
workers:
- gpus:
- 2

- bge-reranker-base:
alias: null
enable: true
Expand Down
170 changes: 15 additions & 155 deletions gpt_server/script/config_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,12 @@ model_worker_args:
controller_address: http://localhost:21001

models:
- qwenvl: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/qwen/Qwen2-VL-7B-Instruct/
model_type: qwen # qwen chatglm3 yi internlm
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
# - 1
- 0
- internvl2: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
model_config:
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
enable_prefix_caching: false
model_type: internvl2 # qwen yi internlm
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
Expand All @@ -42,174 +33,44 @@ models:
- 0
# - gpus:
# - 0
- chatglm4: #自定义的模型名称
alias: chatglm3 # 别名 例如 gpt4,gpt3
enable: true # false true
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
model_type: chatglm # qwen yi internlm
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
# - 1
- 0

- qwen: #自定义的模型名称
alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
model_config:
model_name_or_path: /home/dev/model/qwen/Qwen2___5-7B-Instruct/
enable_prefix_caching: true
dtype: auto
max_model_len: 65536
# lora:
# test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100

model_type: qwen # qwen yi internlm
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
# lora:
# test_lora: /home/dev/project/LLaMA-Factory/saves/Qwen1.5-14B-Chat/lora/train_2024-03-22-09-01-32/checkpoint-100

device: gpu # gpu / cpu
workers:
- gpus:
- 1
# - gpus:
# - 3
- qwen-72b: #自定义的模型名称
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
enable: true # false true
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
model_type: qwen # qwen yi internlm
work_mode: lmdeploy-turbomind # vllm hf lmdeploy-turbomind lmdeploy-pytorch
enable_prefix_caching: true # false true
device: gpu # gpu / cpu
workers:
- gpus:
- 3
- 1
# - gpus:
# - 1

- mixtral: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/
model_type: qwen # qwen yi internlm
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
- 3
- 0


- llama3: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/unsloth/unsloth/llama-3-8b-Instruct/
model_type: llama # qwen yi internlm
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
- 0

- yi: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/01ai/Yi-34B-Chat/
model_type: yi # qwen yi internlm
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
- 2
# - 0

- internlm2: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: false # false true
model_name_or_path: /home/dev/model/Shanghai_AI_Laboratory/internlm2_5-7b-chat/
model_type: internlm # qwen yi internlm
work_mode: hf # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
workers:
- gpus:
- 0

# Embedding 模型
- piccolo-base-zh:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/assets/embeddings/sensenova/piccolo-base-zh/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2

- bce-embedding-base_v1:
alias: text-embedding-ada-002 # 别名
enable: true # false true
model_name_or_path: /home/dev/model/maidalun1020/bce-embedding-base_v1/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2

- conan:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/model1001/Conan/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2

- bge-reranker-base:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
model_config:
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2
- puff:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/infgrad/puff-large-v1/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2

- acge_text_embedding:
alias: text-embedding-ada-002 # 别名
enable: true # false true
model_name_or_path: /home/dev/model/aspire/acge_text_embedding
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2
- yinka:
alias: null # 别名
enable: false # false true
model_name_or_path: /home/dev/model/Classical/Yinka/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
workers:
- gpus:
- 2
- xiaobu-embedding:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/lier007/xiaobu-embedding-v2/
model_config:
model_name_or_path: /home/dev/model/aspire/acge_text_embedding
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
Expand All @@ -223,4 +84,3 @@ models:




34 changes: 20 additions & 14 deletions gpt_server/serving/server_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,16 @@ def on_change():
st.session_state[f"model_name_{i}"]: {
"alias": st.session_state[f"alias_{i}"],
"enable": st.session_state[f"enable_{i}"],
"model_name_or_path": st.session_state[
f"model_name_or_path_{i}"
],
"model_config": {
"model_name_or_path": st.session_state[
f"model_name_or_path_{i}"
],
"enable_prefix_caching": st.session_state[
f"enable_prefix_caching_{i}"
],
},
"model_type": st.session_state[f"model_type_{i}"],
"work_mode": st.session_state[f"work_mode_{i}"],
"enable_prefix_caching": st.session_state[
f"enable_prefix_caching_{i}"
],
"device": st.session_state[f"device_{i}"],
"workers": yaml.safe_load(
st.session_state[f"workers_{i}"]
Expand Down Expand Up @@ -180,16 +182,18 @@ def on_change():
"new_model_name": {
"alias": st.session_state[f"alias_{i}"],
"enable": False,
"model_name_or_path": st.session_state[
f"model_name_or_path_{i}"
],
"model_config": {
"model_name_or_path": st.session_state[
f"model_name_or_path_{i}"
],
"enable_prefix_caching": st.session_state[
f"enable_prefix_caching_{i}"
],
},
"model_type": st.session_state[
f"model_type_{i}"
],
"work_mode": st.session_state[f"work_mode_{i}"],
"enable_prefix_caching": st.session_state[
f"enable_prefix_caching_{i}"
],
"device": st.session_state[f"device_{i}"],
"workers": yaml.safe_load(
st.session_state[f"workers_{i}"]
Expand Down Expand Up @@ -299,10 +303,12 @@ def on_change():
model_name_input: {
"alias": model_alias,
"enable": enable,
"model_name_or_path": model_name_or_path,
"model_config": {
"model_name_or_path": model_name_or_path,
"enable_prefix_caching": enable_prefix_caching,
},
"model_type": model_type,
"work_mode": work_mode,
"enable_prefix_caching": enable_prefix_caching,
"device": device,
"workers": workers_value_dict,
}
Expand Down

0 comments on commit 334f681

Please sign in to comment.