diff --git a/include/experimental/kernel/int4_dequantize/int4_dequantize_xe_impl.hpp b/include/experimental/kernel/int4_dequantize/int4_dequantize_xe_impl.hpp index cd46b4033..2ff0523fa 100644 --- a/include/experimental/kernel/int4_dequantize/int4_dequantize_xe_impl.hpp +++ b/include/experimental/kernel/int4_dequantize/int4_dequantize_xe_impl.hpp @@ -252,7 +252,6 @@ struct int4_dequantize_t< typename dequantize_t::arguments_t dequantize_args(start_n, start_k); dequantize_t dequantize; int tile_k_idx = (start_k + k_stride - 1) / k_stride; - SW_BARRIER(); #pragma unroll for (uint32_t i = 0; i < k_dim_loop; i++) { subgroup::tile_load( @@ -264,7 +263,6 @@ struct int4_dequantize_t< zp, zp_payload); } tile_k_idx++; - SW_BARRIER(); mat_qweight_payload.template update_tdesc( mat_qweight_t::tile_size_y); @@ -276,13 +274,11 @@ struct int4_dequantize_t< zp_t::tile_size_y); } } - SW_BARRIER(); dequantize(mat_dequant_weight, mat_qweight, scale, zp, dequantize_args); tile_transpose(mat_dequant_weight); subgroup::tile_store(mat_dequant_weight, mat_dequant_weight_payload); mat_dequant_weight_payload.template update_tdesc( mat_dequant_weight_t::tile_size_y); - SW_BARRIER(); } }; };