Skip to content

Commit

Permalink
update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
shell-nlp committed Oct 26, 2024
1 parent dbac077 commit 609cc88
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ models:
- chatglm4: #自定义的模型名称
alias: null # 别名 例如 gpt4,gpt3
enable: true # false true 控制是否启动模型worker
model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
model_config:
model_name_or_path: /home/dev/model/THUDM/glm-4-9b-chat/
model_type: chatglm # qwen yi internlm
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
# lora: # lora 配置
Expand All @@ -162,7 +163,11 @@ models:
- qwen: #自定义的模型名称
alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
enable: true # false true 控制是否启动模型worker
model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/
model_config:
model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/
enable_prefix_caching: false
dtype: auto
max_model_len: 65536
model_type: qwen # qwen yi internlm
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
device: gpu # gpu / cpu
Expand All @@ -176,7 +181,8 @@ models:
- bge-base-zh:
alias: null # 别名
enable: true # false true
model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
model_config:
model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
Expand All @@ -187,7 +193,8 @@ models:
- bge-reranker-base:
alias: null # 别名
enable: true # false true 控制是否启动模型worker
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
model_config:
model_name_or_path: /home/dev/model/Xorbits/bge-reranker-base/
model_type: embedding_infinity # embedding_infinity
work_mode: hf
device: gpu # gpu / cpu
Expand Down
26 changes: 18 additions & 8 deletions gpt_server/script/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,48 +14,57 @@ models:
- minicpmv:
alias: null
enable: false
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
model_type: minicpmv
model_config:
model_name_or_path: /home/dev/model/OpenBMB/MiniCPM-V-2_6/
enable_prefix_caching: false
dtype: auto
work_mode: lmdeploy-turbomind
enable_prefix_caching: false
device: gpu
workers:
- gpus:
- 3
- internvl2:
alias: null
enable: false
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
model_config:
model_name_or_path: /home/dev/model/OpenGVLab/InternVL2-40B-AWQ/
enable_prefix_caching: false
model_type: internvl2
work_mode: lmdeploy-turbomind
enable_prefix_caching: false
device: gpu
workers:
- gpus:
- 3
- chatglm4:
alias: chatglm3
enable: true
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
model_config:
model_name_or_path: /home/dev/model/ZhipuAI/glm-4-9b-chat
enable_prefix_caching: false
model_type: chatglm
work_mode: vllm
enable_prefix_caching: false
device: gpu
workers:
- gpus:
- 3

- qwen-72b:
alias: qwen,gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k
enable: true
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
model_config:
model_name_or_path: /home/dev/model/qwen/Qwen2___5-72B-Instruct-AWQ/
enable_prefix_caching: true
dtype: auto
max_model_len: 65536
model_type: qwen
work_mode: lmdeploy-turbomind
enable_prefix_caching: true
device: gpu
workers:
- gpus:
- 0
- 1

- piccolo-base-zh:
alias: null
enable: true
Expand All @@ -78,6 +87,7 @@ models:
workers:
- gpus:
- 2

- bge-reranker-base:
alias: null
enable: true
Expand Down

0 comments on commit 609cc88

Please sign in to comment.