diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 75a2ad480877d..04b6e52859ed5 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2558,7 +2558,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t } // Disable CUDA graphs (from the next token) if the use-case is demanding too many consecutive graph updates. - if (cuda_graph_update_required) { + if (use_cuda_graph && cuda_graph_update_required) { cuda_ctx->cuda_graph->number_consecutive_updates++; } else { cuda_ctx->cuda_graph->number_consecutive_updates = 0;