From 011e8ec577fd135cbc02993d3ea9840c516d6a1c Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Mon, 22 Jan 2024 23:42:41 +0100
Subject: [PATCH] llama : fix not enough space in buffer with Qwen (#5086)

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 8c906a22f0ba9..f6f1ec0f403a8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4440,9 +4440,9 @@ static struct ggml_tensor * llm_build_kv(
 
     // these nodes are added to the graph together so that they are not reordered
     // by doing so, the number of splits in the graph is reduced
+    ggml_build_forward_expand(graph, q_cur);
     ggml_build_forward_expand(graph, k_cur);
     ggml_build_forward_expand(graph, v_cur);
-    ggml_build_forward_expand(graph, q_cur);
 
     llm_build_kv_store(ctx, hparams, kv, graph, k_cur, v_cur, n_ctx, n_tokens, kv_head, cb, il);