Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
gguf refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhenzhong1 committed Jan 9, 2024
1 parent af60e9a commit 428d4c7
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 163 deletions.
13 changes: 7 additions & 6 deletions neural_speed/application/main_run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,10 +404,11 @@ int main(int argc, char** argv) { // NOLINT
const float mirostat_eta = params.mirostat_eta;
const bool penalize_nl = params.penalize_nl;
model_token id = 0;

if (params.warmup) {
if (ns_log_level() >= 0 && params.warmup) {
// Warmup phase is used to generate static objects(e.g. JIT kernels)
int constexpr WarmUpPromptLen = 32;
{
const std::vector<model_token> tmp(32, ctx->vocab.bos_token_id);
const std::vector<model_token> tmp(WarmUpPromptLen, ctx->vocab.bos_token_id);
std::vector<model_input> inputs = {model_input{
/*.tokens =*/tmp.data(),
/*.n_tokens =*/(uint32_t)tmp.size(),
Expand All @@ -430,8 +431,8 @@ int main(int argc, char** argv) { // NOLINT
/*.tokens =*/tmp.data(),
/*.n_tokens =*/(uint32_t)tmp.size(),
/*.n_prompt_tokens =*/0,
/*.n_past =*/(uint32_t)(params.n_predict - 1),
/*.n_total =*/(uint32_t)(params.n_predict - 1),
/*.n_past =*/WarmUpPromptLen,
/*.n_total =*/WarmUpPromptLen,
/*.request_idx =*/0,
/*.beam_idx =*/0,
/*.padding_side =*/0,
Expand Down Expand Up @@ -771,4 +772,4 @@ int main(int argc, char** argv) { // NOLINT
model_free(ctx);

return 0;
} // NOLINT
} // NOLINT
Loading

0 comments on commit 428d4c7

Please sign in to comment.