From e6ecc2be470e3c5c6c5c9d8b90aa83a1f7725084 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 18 Jun 2024 09:37:20 +0300 Subject: [PATCH] whisper : use ggml_backend_sched (whisper/2239) * whisper : use ggml_backend_sched (wip) * use sched in whisper_allocr * whisper : single backend in whisper_context * whisper : remove whisper_state->backends_used * whisper : remove whisper_context->backend * whisper : reset scheduler after init * whisper : fix external encoder (e.g. CoreML) * whisper : cleanup * whisper : handle null GPU buffer types + fix sycl --------- Co-authored-by: slaren --- ggml-backend.c | 15 +++++++++++++-- ggml-backend.h | 3 +++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 26dce7f724213..13c71c310c446 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1706,14 +1706,16 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { bool backend_ids_changed = false; for (int i = 0; i < sched->graph->n_nodes; i++) { - if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i]) { + if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] && + sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) { backend_ids_changed = true; break; } } if (!backend_ids_changed) { for (int i = 0; i < sched->graph->n_leafs; i++) { - if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i]) { + if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] && + sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) { backend_ids_changed = true; break; } @@ -1977,6 +1979,15 @@ int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) { return sched->n_copies; } +int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) { + return sched->n_backends; +} + +ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) { + GGML_ASSERT(i >= 0 && i < sched->n_backends); + return sched->backends[i]; +} + size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) { int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); diff --git a/ggml-backend.h b/ggml-backend.h index 47fd814751795..4a38eeb5c23bd 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -182,6 +182,9 @@ extern "C" { // Initialize backend buffers from a measure graph GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); + GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched); + GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i); + // Get the number of splits of the last graph GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);