diff --git a/neural_speed/models/llama/llama_utils.cpp b/neural_speed/models/llama/llama_utils.cpp index 6e4df8c13..73bb92c9e 100644 --- a/neural_speed/models/llama/llama_utils.cpp +++ b/neural_speed/models/llama/llama_utils.cpp @@ -193,7 +193,7 @@ void Llama::load(model_context* ctx, model_progress_callback progress_callback, // ffn GEMM - if (ml->verify_tensor(layers_i + ".feed_forward")) { + if (ml->verify_tensor(layers_i + ".feed_forward.w1.weight")) { layer.ffn[0] = ml->get_tensor(layers_i + ".feed_forward.w1.weight", {n_embd, n_ff}, backend); layer.ffn[1] = ml->get_tensor(layers_i + ".feed_forward.w2.weight", {n_ff, n_embd}, backend); layer.ffn[2] = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);