Skip to content

Commit

Permalink
server : samplers accept the prompt correctly (ggerganov#10019)
Browse files Browse the repository at this point in the history
  • Loading branch information
wwoodsTM authored Oct 23, 2024
1 parent 190a37d commit 0a1c750
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2163,17 +2163,10 @@ struct server_context {
GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx);
}

common_sampler_reset(slot.smpl);

if (slot.params.cache_prompt) {
// reuse any previously computed tokens that are common with the new prompt
slot.n_past = longest_common_prefix(slot.cache_tokens, prompt_tokens);

// push the prompt into the sampling context (do not apply grammar)
for (int i = 0; i < slot.n_past; ++i) {
common_sampler_accept(slot.smpl, slot.cache_tokens[i], false);
}

// reuse chunks from the cached prompt by shifting their KV cache in the new position
if (params.n_cache_reuse > 0) {
size_t head_c = slot.n_past; // cache
Expand Down Expand Up @@ -2206,8 +2199,6 @@ struct server_context {
for (size_t i = 0; i < n_match; i++) {
slot.cache_tokens[head_p + i] = slot.cache_tokens[head_c + i];

common_sampler_accept(slot.smpl, slot.cache_tokens[head_p + i], false);

slot.n_past++;
}

Expand Down Expand Up @@ -2259,8 +2250,6 @@ struct server_context {

// there is no common part left
slot.n_past = 0;

common_sampler_reset(slot.smpl);
}

SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past);
Expand Down Expand Up @@ -2288,6 +2277,13 @@ struct server_context {

GGML_ASSERT(batch.n_tokens > 0);

common_sampler_reset(slot.smpl);

// Process all prompt tokens through sampler system
for (int i = 0; i < slot.n_prompt_tokens; ++i) {
common_sampler_accept(slot.smpl, prompt_tokens[i], false);
}

// extract the logits only for the last token
batch.logits[batch.n_tokens - 1] = true;

Expand Down

0 comments on commit 0a1c750

Please sign in to comment.