From adb9de6c1c7bbb996f9e230b5be7f9f441f0467a Mon Sep 17 00:00:00 2001
From: "Yu, Zhentao" <zhentao.yu@intel.com>
Date: Thu, 29 Feb 2024 08:41:12 +0000
Subject: [PATCH] remove useless code

Signed-off-by: Yu, Zhentao <zhentao.yu@intel.com>
---
 neural_speed/models/llama/llama.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/neural_speed/models/llama/llama.cpp b/neural_speed/models/llama/llama.cpp
index a06080881..db1ec821a 100644
--- a/neural_speed/models/llama/llama.cpp
+++ b/neural_speed/models/llama/llama.cpp
@@ -268,10 +268,6 @@ static bool llama_model_eval_internal(model_context* ctx, const model_input* inp
         struct ne_tensor* const v_cache =
             ne_view_1d(ctx0, kv_self.v, n_ctx * n_embd_gqa * kv_n_ctx_block,
                        il * n_ctx * ne_element_size(kv_self.v) * n_embd_gqa * kv_n_ctx_block);
-        std::vector<ne_tensor*> Kcur_bs(batch_size);
-        std::vector<ne_tensor*> Vcur_bs(batch_size);
-        std::vector<ne_tensor*> k_bs(batch_size);
-        std::vector<ne_tensor*> v_bs(batch_size);
         // cache = [tokens, beams, requests, layers],
         // tokens = [head_dim, head_num, n_ctx] (may different orders)
         size_t off_N_i = 0;