From f2edaa5b5d739c94d67400ad81ce948b6e0513b2 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 23 May 2024 22:07:41 +0000 Subject: [PATCH] fix integer overflow in fused moe kernel --- vllm/model_executor/layers/fused_moe/fused_moe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index bd7321ef0..e0dcb7447 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -116,7 +116,9 @@ def fused_moe_kernel( offs_k[None, :] * stride_ak) off_experts = tl.load(expert_ids_ptr + pid_m) - # off_experts = 0 + off_experts = off_experts.to(tl.int64) + stride_be = stride_be.to(tl.int64) + tl.device_assert(off_experts * stride_be >= 0, "off_experts * stride_be overflows!") b_ptrs = b_ptr + off_experts * stride_be + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn)