From 91d9e89a8d2f31d6452f6a531df89eac4f76cba5 Mon Sep 17 00:00:00 2001 From: hiworldwzj <30762946+hiworldwzj@users.noreply.github.com> Date: Fri, 29 Nov 2024 15:18:10 +0800 Subject: [PATCH] fix mem alloc (#627) Co-authored-by: wangzaijun --- lightllm/common/basemodel/basemodel.py | 11 +++++++++-- .../router/model_infer/mode_backend/base_backend.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lightllm/common/basemodel/basemodel.py b/lightllm/common/basemodel/basemodel.py index e7f46c8c..019d1dd6 100755 --- a/lightllm/common/basemodel/basemodel.py +++ b/lightllm/common/basemodel/basemodel.py @@ -71,8 +71,15 @@ def __init__(self, kvargs): self._verify_must() self._verify_params() self._init_quant() - self._init_weights() - self._init_mem_manager() + + # 更连续的显存分配可以有更好的性能 + if self.max_total_token_num is None: + self._init_weights() + self._init_mem_manager() + else: + self._init_mem_manager() + self._init_weights() + self._init_kv_move_buffer() self._check_mem_size() self._init_req_manager() diff --git a/lightllm/server/router/model_infer/mode_backend/base_backend.py b/lightllm/server/router/model_infer/mode_backend/base_backend.py index 7eeb30ed..a7693f69 100644 --- a/lightllm/server/router/model_infer/mode_backend/base_backend.py +++ b/lightllm/server/router/model_infer/mode_backend/base_backend.py @@ -85,7 +85,7 @@ def init_model(self, kvargs): rank=self.tp_rank, world_size=self.world_size, ) - + from lightllm.distributed import set_custom_reduce set_custom_reduce()