Skip to content

Commit

Permalink
vllm update for glm-4 model automatic not_convert (intel-analytics#12003
Browse files Browse the repository at this point in the history
)
  • Loading branch information
hzjane authored Sep 4, 2024
1 parent 9eaff5e commit 2b993ad
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion python/llm/src/ipex_llm/vllm/xpu/model_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ def _ipex_llm_load_model(self) -> None:
from ipex_llm import optimize_model
import os
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
if not_convert_last_mlp is not None:
is_glm4_model = "glm-4" in self.model_config.model.lower()
if not_convert_last_mlp is not None or is_glm4_model:
# only use to avoid nan value in last mlp forward running glm4-9b-chat
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
else:
Expand Down

0 comments on commit 2b993ad

Please sign in to comment.