From 6f6612570ef1df5e59684654f3e05f8dc4d7811f Mon Sep 17 00:00:00 2001 From: Joe Todd Date: Fri, 14 Jun 2024 22:22:57 +0100 Subject: [PATCH 1/2] Revert "Minor arithmetic improvement to mmvq wrapper kernel (#7172)" This reverts commit 8c570c9496212073079476651c7517c02581101f. --- ggml-sycl.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index f6ae84ce93620..0e202ce25a3ea 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -7984,26 +7984,24 @@ static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict_ const int blocks_per_row = ncols / qk; const int blocks_per_warp = vdr * WARP_SIZE / qi; - const int qi_vdr = (qi / vdr); // N_threads processing 1 qk block - - // partial sum for each thread +// partial sum for each thread float tmp = 0.0f; const block_q_t * x = (const block_q_t *) vx; const block_q8_1 * y = (const block_q8_1 *) vy; - for (int i = item_ct1.get_local_id(2) / qi_vdr; i < blocks_per_row; + for (int i = item_ct1.get_local_id(2) / (qi / vdr); i < blocks_per_row; i += blocks_per_warp) { - const int ibx = row * blocks_per_row + i; // x block index + const int ibx = row*blocks_per_row + i; // x block index - const int iby = i * (qk / QK8_1); // y block index that aligns with ibx + const int iby = i * (qk/QK8_1); // y block index that aligns with ibx - const int iqs = - vdr * - (item_ct1.get_local_id(2) - - i * qi_vdr); // x block quant index when casting the quants to int + const int iqs = + vdr * + (item_ct1.get_local_id(2) % + (qi / vdr)); // x block quant index when casting the quants to int - tmp += vec_dot_q_sycl(&x[ibx], &y[iby], iqs); + tmp += vec_dot_q_sycl(&x[ibx], &y[iby], iqs); } // sum up partial sums and write back result From ce6e28cc2385d72c386a86062dc9b3dd7fbbc035 Mon Sep 17 00:00:00 2001 From: Joe Todd Date: Tue, 18 Jun 2024 09:57:14 +0100 Subject: [PATCH 2/2] Update ggml-sycl.cpp --- ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 0e202ce25a3ea..03e06c3a6a5bc 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -7984,7 +7984,7 @@ static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict_ const int blocks_per_row = ncols / qk; const int blocks_per_warp = vdr * WARP_SIZE / qi; -// partial sum for each thread + // partial sum for each thread float tmp = 0.0f; const block_q_t * x = (const block_q_t *) vx;