enable qwen2-70b

Signed-off-by: intellinjun <[email protected]>
intel · Jun 4, 2024 · 82d6aac · 82d6aac
1 parent 7c385a2
commit 82d6aac
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/neural_speed/models/qwen/qwen.h b/neural_speed/models/qwen/qwen.h
@@ -50,7 +50,7 @@ static const model_scratch qwen_mem_req(int n_layers, float scratch_size_ratio =
           static_cast<unsigned long long>(scratch_size_ratio * 2048) * MB,
           static_cast<unsigned long long>(scratch_size_ratio * 4096) * MB,
       };
-    case 70:
+    case 80:
       return {
           static_cast<unsigned long long>(scratch_size_ratio * 10*4096) * MB,
           static_cast<unsigned long long>(scratch_size_ratio * 10*2048) * MB,