From 9ee8aeccd73dbc4a000eb93c1c08eea843c752a3 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Sun, 8 Oct 2023 00:36:20 +0800 Subject: [PATCH] fixed memory leak by freeing temporary graph during session load --- llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama.cpp b/llama.cpp index 4d5fb7e4be308..242815af2471f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8405,6 +8405,10 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); ggml_free(cpy_ctx); + + // free our allocated graph + free(gf); + gf = NULL; } ctx->kv_self.head = kv_head;