Skip to content

Commit

Permalink
Merge branch 'layla-build' of https://github.com/l3utterfly/llama.cpp
Browse files Browse the repository at this point in the history
…into layla-build
  • Loading branch information
l3utterfly committed Oct 15, 2023
2 parents 1e7c960 + 5448c8c commit 84c2784
Showing 1 changed file with 36 additions and 8 deletions.
44 changes: 36 additions & 8 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9000,7 +9000,21 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
const size_t elt_size = ggml_element_size(kv_self.k);

ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
ggml_cgraph gf{};

// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));

(*gf).n_nodes = 0;
(*gf).n_leafs = 0;
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
(*gf).perf_runs = 0;
(*gf).perf_cycles = 0;
(*gf).perf_time_us = 0;

memset((*gf).nodes, 0, sizeof((*gf).nodes));
memset((*gf).grads, 0, sizeof((*gf).grads));
memset((*gf).leafs, 0, sizeof((*gf).leafs));
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));

ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
Expand All @@ -9018,9 +9032,9 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
kv_head, n_embd, n_layer,
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);

ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d));
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d));
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);

ggml_free(cpy_ctx);

Expand Down Expand Up @@ -9128,7 +9142,21 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
const size_t elt_size = ggml_element_size(kv_self.k);

ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
ggml_cgraph gf{};

// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));

(*gf).n_nodes = 0;
(*gf).n_leafs = 0;
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
(*gf).perf_runs = 0;
(*gf).perf_cycles = 0;
(*gf).perf_time_us = 0;

memset((*gf).nodes, 0, sizeof((*gf).nodes));
memset((*gf).grads, 0, sizeof((*gf).grads));
memset((*gf).leafs, 0, sizeof((*gf).leafs));
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));

ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
kin3d->data = (void *) inp;
Expand All @@ -9146,9 +9174,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
kv_head, n_embd, n_layer,
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);

ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d));
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d));
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);

ggml_free(cpy_ctx);
}
Expand Down

0 comments on commit 84c2784

Please sign in to comment.