From 82d6aacf15775a736a1e31141e1c69051cb41885 Mon Sep 17 00:00:00 2001 From: intellinjun Date: Mon, 3 Jun 2024 22:33:25 -0700 Subject: [PATCH] enable qwen2-70b Signed-off-by: intellinjun --- neural_speed/models/qwen/qwen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_speed/models/qwen/qwen.h b/neural_speed/models/qwen/qwen.h index 2dacfab02..2bca01fab 100644 --- a/neural_speed/models/qwen/qwen.h +++ b/neural_speed/models/qwen/qwen.h @@ -50,7 +50,7 @@ static const model_scratch qwen_mem_req(int n_layers, float scratch_size_ratio = static_cast(scratch_size_ratio * 2048) * MB, static_cast(scratch_size_ratio * 4096) * MB, }; - case 70: + case 80: return { static_cast(scratch_size_ratio * 10*4096) * MB, static_cast(scratch_size_ratio * 10*2048) * MB,