From 2b993ad4797d395303a084aa6b72bf2ef20e7593 Mon Sep 17 00:00:00 2001 From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com> Date: Wed, 4 Sep 2024 13:50:32 +0800 Subject: [PATCH] vllm update for glm-4 model automatic not_convert (#12003) --- python/llm/src/ipex_llm/vllm/xpu/model_convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 7979bfbc62a..065652e7162 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -250,7 +250,8 @@ def _ipex_llm_load_model(self) -> None: from ipex_llm import optimize_model import os not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None) - if not_convert_last_mlp is not None: + is_glm4_model = "glm-4" in self.model_config.model.lower() + if not_convert_last_mlp is not None or is_glm4_model: # only use to avoid nan value in last mlp forward running glm4-9b-chat modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"] else: