Skip to content

Commit

Permalink
llama : minor style
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov authored and dsx1986 committed Oct 29, 2024
1 parent d0798ed commit 6d75550
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8711,8 +8711,7 @@ static void llm_build_kv_store(

GGML_ASSERT(kv.size == n_ctx);

struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa,
(ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa))*kv_head);
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa, ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa)*kv_head);
cb(k_cache_view, "k_cache_view", il);

// note: storing RoPE-ed version of K in the KV cache
Expand All @@ -8723,8 +8722,7 @@ static void llm_build_kv_store(
struct ggml_tensor * v_cache_view = nullptr;

if (cparams.flash_attn) {
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa,
(kv_head)*ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa));
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa, ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa)*kv_head);
} else {
// note: the V cache is transposed when not using flash attention
v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_v_gqa,
Expand Down Expand Up @@ -9211,8 +9209,7 @@ static struct ggml_tensor * llm_build_kv(

struct ggml_tensor * cur;

cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b,
q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b, q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
cb(cur, "kqv_out", il);

return cur;
Expand Down

0 comments on commit 6d75550

Please sign in to comment.