From adb9de6c1c7bbb996f9e230b5be7f9f441f0467a Mon Sep 17 00:00:00 2001 From: "Yu, Zhentao" Date: Thu, 29 Feb 2024 08:41:12 +0000 Subject: [PATCH] remove useless code Signed-off-by: Yu, Zhentao --- neural_speed/models/llama/llama.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/neural_speed/models/llama/llama.cpp b/neural_speed/models/llama/llama.cpp index a06080881..db1ec821a 100644 --- a/neural_speed/models/llama/llama.cpp +++ b/neural_speed/models/llama/llama.cpp @@ -268,10 +268,6 @@ static bool llama_model_eval_internal(model_context* ctx, const model_input* inp struct ne_tensor* const v_cache = ne_view_1d(ctx0, kv_self.v, n_ctx * n_embd_gqa * kv_n_ctx_block, il * n_ctx * ne_element_size(kv_self.v) * n_embd_gqa * kv_n_ctx_block); - std::vector Kcur_bs(batch_size); - std::vector Vcur_bs(batch_size); - std::vector k_bs(batch_size); - std::vector v_bs(batch_size); // cache = [tokens, beams, requests, layers], // tokens = [head_dim, head_num, n_ctx] (may different orders) size_t off_N_i = 0;