diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3693cf747a72c..8655c097aa51b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1837,8 +1837,10 @@ struct server_context { if (slot.ga_n == 1) { if (slot.is_processing() && (int) system_tokens.size() + slot.n_past >= slot.n_ctx - 1) { if (!params.ctx_shift) { + // this check is redundant (for good) // we should never get here, because generation should already stopped in process_token() - GGML_ASSERT(false && "context shifting is disabled"); + slot.release(); + send_error(slot, "context shift is disabled", ERROR_TYPE_SERVER); continue; }