Skip to content

Commit

Permalink
llama : update llama_model API names (#11063)
Browse files Browse the repository at this point in the history
* llama : deprecate llama_free_model, add llama_model_free

ggml-ci

* llama : change `llama_load_model_from_file` -> `llama_model_load_from_file`

ggml-ci
  • Loading branch information
ggerganov authored Jan 6, 2025
1 parent 3e6e7a6 commit 47182dd
Show file tree
Hide file tree
Showing 23 changed files with 76 additions and 59 deletions.
14 changes: 7 additions & 7 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ struct common_init_result common_init_from_params(common_params & params) {
} else if (!params.model_url.empty()) {
model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
} else {
model = llama_load_model_from_file(params.model.c_str(), mparams);
model = llama_model_load_from_file(params.model.c_str(), mparams);
}

if (model == NULL) {
Expand All @@ -873,7 +873,7 @@ struct common_init_result common_init_from_params(common_params & params) {
}

if (!ok) {
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -884,7 +884,7 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_context * lctx = llama_new_context_with_model(model, cparams);
if (lctx == NULL) {
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
llama_free_model(model);
llama_model_free(model);
return iparams;
}

Expand All @@ -900,7 +900,7 @@ struct common_init_result common_init_from_params(common_params & params) {
const auto cvec = common_control_vector_load(params.control_vectors);
if (cvec.n_embd == -1) {
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -913,7 +913,7 @@ struct common_init_result common_init_from_params(common_params & params) {
params.control_vector_layer_end);
if (err) {
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -926,7 +926,7 @@ struct common_init_result common_init_from_params(common_params & params) {
if (lora == nullptr) {
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);
return iparams;
}

Expand Down Expand Up @@ -1411,7 +1411,7 @@ struct llama_model * common_load_model_from_url(
}
}

return llama_load_model_from_file(local_path.c_str(), params);
return llama_model_load_from_file(local_path.c_str(), params);
}

struct llama_model * common_load_model_from_hf(
Expand Down
4 changes: 2 additions & 2 deletions examples/batched-bench/batched-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
4 changes: 2 additions & 2 deletions examples/batched/batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -236,7 +236,7 @@ int main(int argc, char ** argv) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
4 changes: 2 additions & 2 deletions examples/gritlm/gritlm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ int main(int argc, char * argv[]) {

llama_backend_init();

llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);

// create generation context
llama_context * ctx = llama_new_context_with_model(model, cparams);
Expand Down Expand Up @@ -219,7 +219,7 @@ int main(int argc, char * argv[]) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
llama_backend_free();

return 0;
Expand Down
8 changes: 4 additions & 4 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1526,10 +1526,10 @@ int main(int argc, char ** argv) {
// keep the same model between tests when possible
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
if (lmodel) {
llama_free_model(lmodel);
llama_model_free(lmodel);
}

lmodel = llama_load_model_from_file(inst.model.c_str(), inst.to_llama_mparams());
lmodel = llama_model_load_from_file(inst.model.c_str(), inst.to_llama_mparams());
if (lmodel == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, inst.model.c_str());
return 1;
Expand All @@ -1540,7 +1540,7 @@ int main(int argc, char ** argv) {
llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, inst.model.c_str());
llama_free_model(lmodel);
llama_model_free(lmodel);
return 1;
}

Expand Down Expand Up @@ -1626,7 +1626,7 @@ int main(int argc, char ** argv) {
ggml_threadpool_free_fn(threadpool);
}

llama_free_model(lmodel);
llama_model_free(lmodel);

if (p) {
p->print_footer();
Expand Down
6 changes: 3 additions & 3 deletions examples/llava/llava-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -265,7 +265,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down Expand Up @@ -323,7 +323,7 @@ int main(int argc, char ** argv) {
}
}

llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -75,7 +75,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down
6 changes: 3 additions & 3 deletions examples/llava/qwen2vl-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -354,7 +354,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down Expand Up @@ -575,7 +575,7 @@ int main(int argc, char ** argv) {
}
}

llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/passkey/passkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
Expand Down Expand Up @@ -266,7 +266,7 @@ int main(int argc, char ** argv) {
llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
8 changes: 4 additions & 4 deletions examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ int main(int argc, char ** argv) {
auto mparams = llama_model_default_params();
mparams.use_mlock = false;

model = llama_load_model_from_file(params.model.c_str(), mparams);
model = llama_model_load_from_file(params.model.c_str(), mparams);

if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
Expand All @@ -323,7 +323,7 @@ int main(int argc, char ** argv) {

if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
llama_free_model(model);
llama_model_free(model);
return 1;
}
}
Expand All @@ -347,7 +347,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: error: Quantization should be tested with a float model, "
"this model contains already quantized layers (%s is type %d)\n", __func__, kv_tensor.first.c_str(), kv_tensor.second->type);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
return 1;
}
included_layers++;
Expand Down Expand Up @@ -409,7 +409,7 @@ int main(int argc, char ** argv) {


llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
Expand Down
2 changes: 1 addition & 1 deletion examples/run/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ class LlamaData {
"\r%*s"
"\rLoading model",
get_terminal_width(), " ");
llama_model_ptr model(llama_load_model_from_file(opt.model_.c_str(), opt.model_params));
llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params));
if (!model) {
printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str());
}
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-chat/simple-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ int main(int argc, char ** argv) {
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = ngl;

llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params);
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
if (!model) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
return 1;
Expand Down Expand Up @@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
}
llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/simple/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int main(int argc, char ** argv) {
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = ngl;

llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params);
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);

if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -199,7 +199,7 @@ int main(int argc, char ** argv) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/tokenize/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ int main(int raw_argc, char ** raw_argv) {

llama_model_params model_params = llama_model_default_params();
model_params.vocab_only = true;
llama_model * model = llama_load_model_from_file(model_path, model_params);
llama_model * model = llama_model_load_from_file(model_path, model_params);
if (!model) {
fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path);
return 1;
Expand Down Expand Up @@ -408,7 +408,7 @@ int main(int raw_argc, char ** raw_argv) {
}
// silence valgrind
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
2 changes: 1 addition & 1 deletion include/llama-cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "llama.h"

struct llama_model_deleter {
void operator()(llama_model * model) { llama_free_model(model); }
void operator()(llama_model * model) { llama_model_free(model); }
};

struct llama_context_deleter {
Expand Down
13 changes: 10 additions & 3 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,12 +413,19 @@ extern "C" {
// Call once at the end of the program - currently only used for MPI
LLAMA_API void llama_backend_free(void);

LLAMA_API struct llama_model * llama_load_model_from_file(
DEPRECATED(LLAMA_API struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params),
"use llama_model_load_from_file instead");

LLAMA_API struct llama_model * llama_model_load_from_file(
const char * path_model,
struct llama_model_params params);

// TODO: rename to llama_model_free
LLAMA_API void llama_free_model(struct llama_model * model);
DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
"use llama_model_free instead");

LLAMA_API void llama_model_free(struct llama_model * model);

// TODO: rename to llama_init_from_model
LLAMA_API struct llama_context * llama_new_context_with_model(
Expand Down
4 changes: 4 additions & 0 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,10 @@ struct llama_model_params llama_model_default_params() {
}

void llama_free_model(struct llama_model * model) {
llama_model_free(model);
}

void llama_model_free(struct llama_model * model) {
delete model;
}

Expand Down
Loading

0 comments on commit 47182dd

Please sign in to comment.