Skip to content

Commit

Permalink
Allow short16 for ushort16 intel_sub_group_block_read/write (#2718) (#…
Browse files Browse the repository at this point in the history
…2833)

Per cl_intel_subgroups_short V 1.1.0 short16 is allowed for these
builtins.

Signed-off-by: Sidorov, Dmitry <[email protected]>
  • Loading branch information
MrSidims authored Nov 8, 2024
1 parent 0d1b803 commit 3d182dc
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 4 deletions.
4 changes: 2 additions & 2 deletions lib/SPIRV/OCLUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1490,8 +1490,8 @@ std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
OSS << VectorNumElements;
break;
case 16:
assert(ElementBitSize == 8 &&
"16 elements vector allowed only for char builtins");
assert((ElementBitSize == 8 || ElementBitSize == 16) &&
"16 elements vector allowed only for char and short builtins");
OSS << VectorNumElements;
break;
default:
Expand Down
75 changes: 73 additions & 2 deletions test/transcoding/cl_intel_sub_groups.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@
; intel_sub_group_block_write_ul2(image_out, coord, ul2);
; ul2 = intel_sub_group_block_read_ul2(lp);
; intel_sub_group_block_write_ul2(lp, ul2);
;
; uchar16 uc16 = intel_sub_group_block_read_uc16(image_in, coord);
; intel_sub_group_block_write_uc16(image_out, coord, uc16);
; uc16 = intel_sub_group_block_read_uc16(cp);
; intel_sub_group_block_write_uc2(cp, uc16);
;
; ushort16 us16 = intel_sub_group_block_read_us16(image_in, coord);
; intel_sub_group_block_write_us16(image_out, coord, us16);
; us16 = intel_sub_group_block_read_us16(sp);
; intel_sub_group_block_write_us16(sp, us16);
;}

; RUN: llvm-as %s -o %t.bc
Expand Down Expand Up @@ -75,6 +85,16 @@
; CHECK-SPIRV: SubgroupBlockReadINTEL
; CHECK-SPIRV: SubgroupBlockWriteINTEL

; CHECK-SPIRV: SubgroupImageBlockReadINTEL
; CHECK-SPIRV: SubgroupImageBlockWriteINTEL
; CHECK-SPIRV: SubgroupBlockReadINTEL
; CHECK-SPIRV: SubgroupBlockWriteINTEL

; CHECK-SPIRV: SubgroupImageBlockReadINTEL
; CHECK-SPIRV: SubgroupImageBlockWriteINTEL
; CHECK-SPIRV: SubgroupBlockReadINTEL
; CHECK-SPIRV: SubgroupBlockWriteINTEL

; CHECK-SPIRV-LABEL: Return

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
Expand Down Expand Up @@ -108,6 +128,15 @@ define spir_kernel void @test(<2 x float> %x, i32 %c, %opencl.image2d_ro_t addrs
; CHECK-LLVM-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(%opencl.image2d_wo_t addrspace(1)* [[IMAGE_OUT]], <2 x i32> [[COORD]], <2 x i64> [[CALL10]])
; CHECK-LLVM-NEXT: [[CALL11:%.*]] = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* [[LP:%.*]])
; CHECK-LLVM-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(i64 addrspace(1)* [[LP]], <2 x i64> [[CALL11]])
; CHECK-LLVM-NEXT: [[CALL12:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)* [[IMAGE_IN]], <2 x i32> [[COORD]])
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(%opencl.image2d_wo_t addrspace(1)* [[IMAGE_OUT]], <2 x i32> [[COORD]], <16 x i8> [[CALL12]])
; CHECK-LLVM-NEXT: [[CALL13:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(i8 addrspace(1)* [[CP]])
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(i8 addrspace(1)* [[CP]], <16 x i8> [[CALL13]])
; CHECK-LLVM-NEXT: [[CALL14:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)* [[IMAGE_IN]], <2 x i32> [[COORD]])
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(%opencl.image2d_wo_t addrspace(1)* [[IMAGE_OUT]], <2 x i32> [[COORD]], <16 x i16> [[CALL14]])
; CHECK-LLVM-NEXT: [[CALL15:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(i16 addrspace(1)* [[SP]])
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(i16 addrspace(1)* [[SP]], <16 x i16> [[CALL15]])

; CHECK-LLVM-NEXT: ret void

; CHECK-LLVM-SPIRV: call spir_func <2 x float> @_Z28__spirv_SubgroupShuffleINTELDv2_fj
Expand All @@ -128,8 +157,16 @@ define spir_kernel void @test(<2 x float> %x, i32 %c, %opencl.image2d_ro_t addrs
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv2_h
; CHECK-LLVM-SPIRV: call spir_func <2 x i64> @_Z42__spirv_SubgroupImageBlockReadINTEL_Rlong2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_i(%spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iDv2_m(%spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func <2 x i64> @_Z37__spirv_SubgroupBlockReadINTEL_Rlong2PU3AS1Km
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mDv2_m
; CHECK-LLVM-SPIRV: call spir_func <2 x i64> @_Z37__spirv_SubgroupBlockReadINTEL_Rlong2PU3AS1Km(
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mDv2_m(
; CHECK-LLVM-SPIRV: call spir_func <16 x i8> @_Z43__spirv_SubgroupImageBlockReadINTEL_Rchar16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_i(%spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iDv16_h(%spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func <16 x i8> @_Z38__spirv_SubgroupBlockReadINTEL_Rchar16PU3AS1Kh(
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv16_h(
; CHECK-LLVM-SPIRV: call spir_func <16 x i16> @_Z44__spirv_SubgroupImageBlockReadINTEL_Rshort16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_i(%spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iDv16_t(%spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)*
; CHECK-LLVM-SPIRV: call spir_func <16 x i16> @_Z39__spirv_SubgroupBlockReadINTEL_Rshort16PU3AS1Kt(
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv16_t(

entry:
%call = tail call spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float> %x, i32 %c) #2
Expand Down Expand Up @@ -157,6 +194,16 @@ entry:
%call11 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %lp) #2
tail call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(i64 addrspace(1)* %lp, <2 x i64> %call11) #2

%call12 = tail call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)* %image_in, <2 x i32> %coord) #2
tail call spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <16 x i8> %call12) #2
%call13 = tail call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(i8 addrspace(1)* %cp) #2
tail call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(i8 addrspace(1)* %cp, <16 x i8> %call13) #2

%call14 = tail call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)* %image_in, <2 x i32> %coord) #2
tail call spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <16 x i16> %call14) #2
%call15 = tail call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(i16 addrspace(1)* %sp) #2
tail call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(i16 addrspace(1)* %sp, <16 x i16> %call15) #2

ret void
}

Expand Down Expand Up @@ -220,6 +267,30 @@ declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addr
; Function Attrs: convergent
declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(i64 addrspace(1)*, <2 x i64>) local_unnamed_addr #1

; Function Attrs: convergent
declare spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)*, <2 x i32>) #1

; Function Attrs: convergent
declare spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(%opencl.image2d_wo_t addrspace(1)*, <2 x i32>, <16 x i8>) #1

; Function Attrs: convergent
declare spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(i8 addrspace(1)*) #1

; Function Attrs: convergent
declare spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(i8 addrspace(1)*, <16 x i8>) #1

; Function Attrs: convergent
declare spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)*, <2 x i32>) local_unnamed_addr #1

; Function Attrs: convergent
declare spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(%opencl.image2d_wo_t addrspace(1)*, <2 x i32>, <16 x i16>) local_unnamed_addr #1

; Function Attrs: convergent
declare spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(i16 addrspace(1)*) local_unnamed_addr #1

; Function Attrs: convergent
declare spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(i16 addrspace(1)*, <16 x i16>) local_unnamed_addr #1

attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { convergent nounwind }
Expand Down

0 comments on commit 3d182dc

Please sign in to comment.