wip

ggerganov · Jan 2, 2024 · ed8e256 · ed8e256
1 parent 83e633c
commit ed8e256
Show file tree

Hide file tree

Showing 4 changed files with 610 additions and 1,179 deletions.
diff --git a/ggml-alloc.c b/ggml-alloc.c
@@ -779,10 +779,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
 
     if (nbytes == 0) {
         // all the tensors in the context are already allocated
+#ifndef NDEBUG
+        fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
+#endif
         return NULL;
     }
 
     ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes);
+    if (buffer == NULL) {
+        // failed to allocate buffer
+#ifndef NDEBUG
+        fprintf(stderr, "%s: failed to allocate buffer\n", __func__);
+#endif
+        return NULL;
+    }
+
     ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer);
 
     for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {

diff --git a/ggml-backend.c b/ggml-backend.c
@@ -809,16 +809,16 @@ static ggml_backend_t sched_backend_from_cur(ggml_backend_sched_t sched, struct
             break;
         }
         ggml_backend_t src_backend = get_buffer_backend(sched, src->buffer);
-        if (src_backend != NULL) {
+        //if (src_backend != NULL) {
             int src_prio = sched_backend_prio(sched, src_backend);
             size_t src_size = ggml_nbytes(src);
-            if (src_prio < cur_prio && src_size >= cur_size) {
+            if (/*src_prio < cur_prio &&*/ src_size >= cur_size) {
                 cur_prio = src_prio;
                 cur_size = src_size;
                 cur_backend = src_backend;
                 SET_CAUSE(node, "1.src%d", i);
             }
-        }
+        //}
     }
     return cur_backend;
 }
@@ -1025,9 +1025,21 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
             }
             ggml_tallocr_t src_allocr = node_allocr(src);
             if (src_allocr != node_allocr) {
-                int n_inputs = sched->splits[cur_split].n_inputs++;
-                GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS);
-                sched->splits[cur_split].inputs[n_inputs] = (struct ggml_tensor *)src;
+                // check if the input is already in the split
+                bool found = false;
+                for (int k = 0; k < sched->splits[cur_split].n_inputs; k++) {
+                    if (sched->splits[cur_split].inputs[k] == src) {
+                        found = true;
+                        break;
+                    }
+                }
+
+                if (!found) {
+                    int n_inputs = sched->splits[cur_split].n_inputs++;
+                    //printf("split %d input %d: %s (%s)\n", cur_split, n_inputs, src->name, ggml_backend_name(get_allocr_backend(sched, src_allocr)));
+                    GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS);
+                    sched->splits[cur_split].inputs[n_inputs] = (struct ggml_tensor *)src;
+                }
 
                 // create copies
                 size_t id = hash_id(src);
@@ -1316,6 +1328,7 @@ static void graph_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor
 
     struct ggml_tensor * dst = node_copies[id];
     if (dst->view_src != NULL) {
+        graph_init_tensor(hash_set, node_copies, node_init, src->view_src);
         ggml_backend_view_init(dst->view_src->buffer, dst);
     }
     else {

diff --git a/ggml.c b/ggml.c
@@ -2324,6 +2324,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
 }
 
 void ggml_free(struct ggml_context * ctx) {
+    if (ctx == NULL) {
+        return;
+    }
+
     // make this function thread safe
     ggml_critical_section_start();