Skip to content

Commit

Permalink
MUSA: Stop explicitly setting use_mul_mat_vec_q to false
Browse files Browse the repository at this point in the history
Signed-off-by: Xiaodong Ye <[email protected]>
  • Loading branch information
yeahdongcn committed Jul 15, 2024
1 parent 12329e6 commit 0fc2a35
Showing 1 changed file with 5 additions and 7 deletions.
12 changes: 5 additions & 7 deletions ggml/src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1169,17 +1169,21 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(

const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3;
if (nb0 == ts && nb1 == ts*ne0/bs) {
printf("nb0 == ts && nb1 == ts*ne0/bs\n");
return cudaMemcpyAsync(dst_ptr, x, i1_diff*nb1, cudaMemcpyDeviceToDevice, stream);
} else if (nb0 == ts) {
printf("nb0 == ts\n");
return cudaMemcpy2DAsync(dst_ptr, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, cudaMemcpyDeviceToDevice, stream);
} else {
printf("else\n");
for (int64_t i1 = 0; i1 < i1_diff; i1++) {
const void * rx = (const void *) ((const char *) x + i1*nb1);
void * rd = (void *) (dst_ptr + i1*ts*ne0/bs);
// pretend the row is a matrix with cols=1
cudaError_t r = cudaMemcpy2DAsync(rd, ts/bs, rx, nb0, ts/bs, ne0, cudaMemcpyDeviceToDevice, stream);
if (r != cudaSuccess) {
return r;
printf("r = %d\n", r);
return cudaSuccess;
}
}
return cudaSuccess;
Expand Down Expand Up @@ -1906,17 +1910,11 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
const int cc = ggml_cuda_info().devices[id].cc;
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
#ifdef GGML_USE_MUSA
use_mul_mat_vec_q = false;
#endif // GGML_USE_MUSA
}
} else {
const int cc = ggml_cuda_info().devices[ctx.device].cc;
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
#ifdef GGML_USE_MUSA
use_mul_mat_vec_q = false;
#endif // GGML_USE_MUSA
}

// debug helpers
Expand Down

0 comments on commit 0fc2a35

Please sign in to comment.