diff --git a/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll b/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll index 54cd0ced11a5a..97ff17140a9f8 100644 --- a/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll +++ b/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll @@ -8,7 +8,7 @@ target triple = "nvptx" @foo.l.0 = internal unnamed_addr addrspace(3) global i32 undef, align 4 -define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) { +define dso_local spir_kernel void @foo(ptr addrspace(1) nocapture %0) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @_Z13get_global_idj(i32 0) #0 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @_Z12get_local_idj(i32 0) #0 @@ -16,13 +16,13 @@ define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) { ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] ; CHECK: 5: ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 5 -; CHECK-NEXT: store i32 [[TMP6]], i32 addrspace(3)* @foo.l.0, align 4 +; CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(3) @foo.l.0, align 4 ; CHECK-NEXT: br label [[TMP7]] ; CHECK: 7: ; CHECK-NEXT: tail call void @llvm.nvvm.barrier0() #2 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(3)* @foo.l.0, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP0:%.*]], i32 [[TMP2]] -; CHECK-NEXT: store i32 [[TMP8]], i32 addrspace(1)* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) @foo.l.0, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0:%.*]], i32 [[TMP2]] +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4 ; CHECK-NEXT: ret void ; %2 = tail call i32 @_Z13get_global_idj(i32 0) #0 @@ -32,14 +32,14 @@ define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) { 5: ; preds = %1 %6 = add i32 %2, 5 - store i32 %6, i32 addrspace(3)* @foo.l.0, align 4 + store i32 %6, ptr addrspace(3) @foo.l.0, align 4 br label %7 7: ; preds = %5, %1 tail call void @llvm.nvvm.barrier0() #1 - %8 = load i32, i32 addrspace(3)* @foo.l.0, align 4 - %9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %2 - store i32 %8, i32 addrspace(1)* %9, align 4 + %8 = load i32, ptr addrspace(3) @foo.l.0, align 4 + %9 = getelementptr inbounds i32, ptr addrspace(1) %0, i32 %2 + store i32 %8, ptr addrspace(1) %9, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/global-offset-dbg.ll b/llvm/test/CodeGen/AMDGPU/global-offset-dbg.ll index d03e44933c518..787ba90e7c785 100644 --- a/llvm/test/CodeGen/AMDGPU/global-offset-dbg.ll +++ b/llvm/test/CodeGen/AMDGPU/global-offset-dbg.ll @@ -7,14 +7,14 @@ target triple = "amdgcn-amd-amdhsa" ; This test checks that debug information on functions and callsites are preserved -declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() +declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() ; CHECK-NOT: llvm.amdgcn.implicit.offset define weak_odr dso_local i64 @_ZTS14other_function() !dbg !11 { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32 addrspace(5)* %0) !dbg !11 { - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr addrspace(5) %0) !dbg !11 { + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -24,15 +24,15 @@ define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 { ; CHECK: define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 { entry: %0 = call i64 @_ZTS14other_function(), !dbg !15 -; CHECK: %3 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2), !dbg !15 +; CHECK: %2 = call i64 @_ZTS14other_function(ptr addrspace(5) %1), !dbg !15 ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) !dbg !16 { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byref([3 x i32]) %0) !dbg !16 { ; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5), !dbg !17 -; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)*, !dbg !17 -; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false), !dbg !17 -; CHECK: %5 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2), !dbg !17 +; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4), !dbg !17 +; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false), !dbg !17 +; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1), !dbg !17 !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} @@ -43,7 +43,7 @@ entry: !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!5 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !6 = !{i32 1, i32 4} !7 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !8 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/global-offset-invalid-triple.ll b/llvm/test/CodeGen/AMDGPU/global-offset-invalid-triple.ll index d5e4140f7f6a5..9d58c72fce71c 100644 --- a/llvm/test/CodeGen/AMDGPU/global-offset-invalid-triple.ll +++ b/llvm/test/CodeGen/AMDGPU/global-offset-invalid-triple.ll @@ -7,12 +7,12 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 ; This test checks that the pass does not run on nvcl triples. -declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() +declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() define weak_odr dso_local i64 @_ZTS14other_function() { - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -26,7 +26,7 @@ entry: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/global-offset-multiple-calls-from-one-function.ll b/llvm/test/CodeGen/AMDGPU/global-offset-multiple-calls-from-one-function.ll index d27003ffee3fe..1fcc09c30bdf5 100644 --- a/llvm/test/CodeGen/AMDGPU/global-offset-multiple-calls-from-one-function.ll +++ b/llvm/test/CodeGen/AMDGPU/global-offset-multiple-calls-from-one-function.ll @@ -11,23 +11,23 @@ target triple = "amdgcn-amd-amdhsa" ; to other functions that has a variant that takes an offset parameter will have ; all calls redirected to the corresponding variants. -declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() +declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() define weak_odr dso_local i64 @_ZTS14other_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32 addrspace(5)* %0) { - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 -; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr addrspace(5) %0) { + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: tail call ptr addrspace(5)* @llvm.amdgcn.implicit.offset() + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 +; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 - %5 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %6 = getelementptr inbounds i32, i32 addrspace(5)* %5, i64 2 -; CHECK: %5 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %7 = load i32, i32 addrspace(5)* %6, align 4 + %5 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: tail call ptr addrspace(5)* @llvm.amdgcn.implicit.offset() + %6 = getelementptr inbounds i32, ptr addrspace(5) %5, i64 2 +; CHECK: %5 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2 + %7 = load i32, ptr addrspace(5) %6, align 4 %8 = zext i32 %7 to i64 ret i64 %4 @@ -37,32 +37,31 @@ define weak_odr dso_local i64 @_ZTS14other_function() { define weak_odr dso_local void @_ZTS14example_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)* -; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0 %0 = call i64 @_ZTS14other_function() -; CHECK: %3 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) +; CHECK: %2 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) %1 = call i64 @_ZTS14other_function() -; CHECK: %4 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) +; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byref([3 x i32]) %0) { ; CHECK: entry: ; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)* -; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false) -; CHECK: %5 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) -; CHECK: %6 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) +; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4) +; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false) +; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) +; CHECK: %4 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) ; CHECK: ret void ; CHECK: } !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} ; CHECK: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -; CHECK: !5 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1} +; CHECK: !5 = !{ptr @_ZTS14example_kernel_with_offset, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/global-offset-multiple-entry-points.ll b/llvm/test/CodeGen/AMDGPU/global-offset-multiple-entry-points.ll index 34417514e3e1e..dbdf823ff3dce 100644 --- a/llvm/test/CodeGen/AMDGPU/global-offset-multiple-entry-points.ll +++ b/llvm/test/CodeGen/AMDGPU/global-offset-multiple-entry-points.ll @@ -7,8 +7,8 @@ target triple = "amdgcn-amd-amdhsa" ; This test checks that the pass works with multiple entry points. -declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() +declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() ; This function is a kernel entry point that does not use global offset. It will ; not get a clone with a global offset parameter. @@ -19,20 +19,20 @@ entry: } define weak_odr dso_local i64 @_ZTS15common_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(i32 addrspace(5)* %0) { - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(ptr addrspace(5) %0) { + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2 + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } define weak_odr dso_local i64 @_ZTS14first_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(i32 addrspace(5)* %0) { +; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(ptr addrspace(5) %0) { %1 = call i64 @_ZTS15common_function() -; CHECK: %2 = call i64 @_ZTS15common_function(i32 addrspace(5)* %0) +; CHECK: %2 = call i64 @_ZTS15common_function(ptr addrspace(5) %0) ret i64 %1 } @@ -40,27 +40,26 @@ define weak_odr dso_local i64 @_ZTS14first_function() { define weak_odr dso_local void @_ZTS12first_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)* -; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0 %0 = call i64 @_ZTS14first_function() -; CHECK: %3 = call i64 @_ZTS14first_function(i32 addrspace(5)* %2) +; CHECK: %2 = call i64 @_ZTS14first_function(ptr addrspace(5) %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset(ptr byref([3 x i32]) %0) { ; CHECK: entry: ; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)* -; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false) -; CHECK: %5 = call i64 @_ZTS14first_function(i32 addrspace(5)* %2) +; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4) +; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false) +; CHECK: %3 = call i64 @_ZTS14first_function(ptr addrspace(5) %1) ; CHECK: ret void ; CHECK: } define weak_odr dso_local i64 @_ZTS15second_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(i32 addrspace(5)* %0) { +; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(ptr addrspace(5) %0) { %1 = call i64 @_ZTS15common_function() -; CHECK: %2 = call i64 @_ZTS15common_function(i32 addrspace(5)* %0) +; CHECK: %2 = call i64 @_ZTS15common_function(ptr addrspace(5) %0) ret i64 %1 } @@ -68,31 +67,30 @@ define weak_odr dso_local i64 @_ZTS15second_function() { define weak_odr dso_local void @_ZTS13second_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)* -; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0 %0 = call i64 @_ZTS15second_function() -; CHECK: %3 = call i64 @_ZTS15second_function(i32 addrspace(5)* %2) +; CHECK: %2 = call i64 @_ZTS15second_function(ptr addrspace(5) %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset(ptr byref([3 x i32]) %0) { ; CHECK: entry: ; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)* -; CHEKC: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false) -; CHECK: %5 = call i64 @_ZTS15second_function(i32 addrspace(5)* %2) +; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4) +; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false) +; CHECK: %3 = call i64 @_ZTS15second_function(ptr addrspace(5) %1) ; CHECK: ret void ; CHECK: } ; This function doesn't get called by a kernel entry point. define weak_odr dso_local i64 @_ZTS15no_entry_point() { -; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(i32 addrspace(5)* %0) { - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 -; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(ptr addrspace(5) %0) { + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 +; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -100,12 +98,12 @@ define weak_odr dso_local i64 @_ZTS15no_entry_point() { !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5, !6} ; CHECK: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5, !6, !7, !8} -!0 = distinct !{void ()* @_ZTS12first_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS12first_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!5 = distinct !{void ()* @_ZTS13second_kernel, !"kernel", i32 1} -!6 = distinct !{void ()* @_ZTS12third_kernel, !"kernel", i32 1} -; CHECK: !7 = !{void ([3 x i32]*)* @_ZTS13second_kernel_with_offset, !"kernel", i32 1} -; CHECK: !8 = !{void ([3 x i32]*)* @_ZTS12first_kernel_with_offset, !"kernel", i32 1} +!5 = distinct !{ptr @_ZTS13second_kernel, !"kernel", i32 1} +!6 = distinct !{ptr @_ZTS12third_kernel, !"kernel", i32 1} +; CHECK: !7 = !{ptr @_ZTS13second_kernel_with_offset, !"kernel", i32 1} +; CHECK: !8 = !{ptr @_ZTS12first_kernel_with_offset, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/global-offset-simple.ll b/llvm/test/CodeGen/AMDGPU/global-offset-simple.ll index 67081df419990..7a41d914e2068 100644 --- a/llvm/test/CodeGen/AMDGPU/global-offset-simple.ll +++ b/llvm/test/CodeGen/AMDGPU/global-offset-simple.ll @@ -7,16 +7,16 @@ target triple = "amdgcn-amd-amdhsa" ; This test checks that the transformation is applied in the basic case. -declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset() +declare ptr addrspace(5) @llvm.amdgcn.implicit.offset() ; CHECK-NOT: llvm.amdgcn.implicit.offset define weak_odr dso_local i64 @_ZTS14other_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32 addrspace(5)* %0) { -; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2 - %1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() -; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset() - %2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2 - %3 = load i32, i32 addrspace(5)* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr addrspace(5) %0) { +; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2 + %1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() +; CHECK-NOT: tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset() + %2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2 + %3 = load i32, ptr addrspace(5) %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -25,29 +25,27 @@ define weak_odr dso_local i64 @_ZTS14other_function() { define weak_odr dso_local void @_ZTS14example_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)* -; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0 %0 = call i64 @_ZTS14other_function() -; CHECK: %3 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) +; CHECK: %2 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byref([3 x i32]) %0) { ; CHECK: entry: ; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5) -; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)* -; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false) -; CHECK: %5 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2) +; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false) +; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1) ; CHECK: ret void ; CHECK: } !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} ; CHECK: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -; CHECK: !5 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1} +; CHECK: !5 = !{ptr @_ZTS14example_kernel_with_offset, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-basic-transformation.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-basic-transformation.ll index 8332dc2f3eda3..eb12cc16fa88a 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-basic-transformation.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-basic-transformation.ll @@ -9,15 +9,14 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 ; Function Attrs: noinline -define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define amdgpu_kernel void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +define amdgpu_kernel void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define amdgpu_kernel void @_ZTS14example_kernel(i32 %0, ptr addrspace(1) %b, i32 %c) { entry: -; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %2 = load i32, i32 addrspace(3)* %a - %1 = load i32, i32 addrspace(1)* %b -; CHECK: %3 = load i32, i32 addrspace(1)* %b +; CHECK: %1 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 + %0 = load i32, ptr addrspace(3) %a +; CHECK: %2 = load i32, ptr addrspace(3) %a + %1 = load i32, ptr addrspace(1) %b +; CHECK: %3 = load i32, ptr addrspace(1) %b %2 = add i32 %c, %c ; CHECK: %4 = add i32 %c, %c ret void @@ -25,8 +24,8 @@ entry: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} -!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-multiple-functions.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-multiple-functions.ll index b7a09c7bd320a..60f0e1de6d01c 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-multiple-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-multiple-functions.ll @@ -8,32 +8,31 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 -define void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { - %1 = load i32, i32 addrspace(3)* %a -; CHECK: %1 = load i32, i32 addrspace(3)* %a - %2 = load i32, i32 addrspace(1)* %b -; CHECK: %2 = load i32, i32 addrspace(1)* %b +define void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { + %1 = load i32, ptr addrspace(3) %a +; CHECK: %1 = load i32, ptr addrspace(3) %a + %2 = load i32, ptr addrspace(1) %b +; CHECK: %2 = load i32, ptr addrspace(1) %b %3 = add i32 %c, %c ; CHECK: %3 = add i32 %c, %c ret void } ; Function Attrs: noinline -define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define amdgpu_kernel void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +define amdgpu_kernel void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define amdgpu_kernel void @_ZTS14example_kernel(i32 %0, ptr addrspace(1) %b, i32 %c) { entry: -; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* - call void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) -; CHECK: call void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) +; CHECK: %1 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 + call void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) +; CHECK: call void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) ret void } !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} -!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-no-entry-points.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-no-entry-points.ll index 16c9a370fe4c0..7d54793e469de 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-no-entry-points.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-no-entry-points.ll @@ -7,13 +7,13 @@ target triple = "amdgcn-amd-amdhsa" ; This test checks that no transformation is applied when there are no entry points. ; Function Attrs: noinline -define void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { +define void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { entry: - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %0 = load i32, i32 addrspace(3)* %a - %1 = load i32, i32 addrspace(1)* %b -; CHECK: %1 = load i32, i32 addrspace(1)* %b + %0 = load i32, ptr addrspace(3) %a +; CHECK: %0 = load i32, ptr addrspace(3) %a + %1 = load i32, ptr addrspace(1) %b +; CHECK: %1 = load i32, ptr addrspace(1) %b %2 = add i32 %c, %c ; CHECK: %2 = add i32 %c, %c ret void diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-preserves-types.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-preserves-types.ll index ddce2f80c8fd2..74d8ab3995885 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-preserves-types.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-preserves-types.ll @@ -9,32 +9,28 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 ; Function Attrs: noinline -define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a, i64 addrspace(3)* %b, i16 addrspace(3)* %c, i8 addrspace(3)* %d) { +define amdgpu_kernel void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) { ; CHECK: define amdgpu_kernel void @_ZTS14example_kernel(i32 %0, i32 %1, i32 %2, i32 %3) { entry: -; CHECK: %4 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %3 -; CHECK: %d = bitcast i8 addrspace(3)* %4 to i8 addrspace(3)* -; CHECK: %5 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %2 -; CHECK: %c = bitcast i8 addrspace(3)* %5 to i16 addrspace(3)* -; CHECK: %6 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %1 -; CHECK: %b = bitcast i8 addrspace(3)* %6 to i64 addrspace(3)* -; CHECK: %7 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %7 to i32 addrspace(3)* - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %8 = load i32, i32 addrspace(3)* %a - %1 = load i64, i64 addrspace(3)* %b -; CHECK: %9 = load i64, i64 addrspace(3)* %b - %2 = load i16, i16 addrspace(3)* %c -; CHECK: %10 = load i16, i16 addrspace(3)* %c - %3 = load i8, i8 addrspace(3)* %d -; CHECK: %11 = load i8, i8 addrspace(3)* %d +; CHECK: %4 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %3 +; CHECK: %5 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %2 +; CHECK: %6 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %1 +; CHECK: %7 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 + %0 = load i32, ptr addrspace(3) %a +; CHECK: %8 = load i32, ptr addrspace(3) %a + %1 = load i64, ptr addrspace(3) %b +; CHECK: %9 = load i64, ptr addrspace(3) %b + %2 = load i16, ptr addrspace(3) %c +; CHECK: %10 = load i16, ptr addrspace(3) %c + %3 = load i8, ptr addrspace(3) %d +; CHECK: %11 = load i8, ptr addrspace(3) %d ret void } !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} -!0 = distinct !{void (i32 addrspace(3)*, i64 addrspace(3)*, i16 addrspace(3)*, i8 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32, i32, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll index 9349c2ea7b3d4..1104cc96d214c 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll @@ -20,15 +20,15 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK-INVALID-NOT: - .args: ; Function Attrs: noinline -define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a) { +define amdgpu_kernel void @_ZTS14example_kernel(ptr addrspace(3) %a) { entry: - %0 = load i32, i32 addrspace(3)* %a + %0 = load i32, ptr addrspace(3) %a ret void } !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} -!0 = distinct !{void (i32 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll index 5c5e0e5242b9a..94ed4aeb63eb2 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll @@ -17,9 +17,9 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK-OPT-NEXT: .size: 4 ; CHECK-OPT-NEXT: .value_kind: by_value ; Function Attrs: noinline -define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a) { +define amdgpu_kernel void @_ZTS14example_kernel(ptr addrspace(3) %a) { entry: - %0 = load i32, i32 addrspace(3)* %a + %0 = load i32, ptr addrspace(3) %a ret void } @@ -27,7 +27,7 @@ entry: !llvm.ident = !{!7, !8} !llvm.module.flags = !{!9, !10} -!0 = distinct !{void (i32 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-dbg.ll b/llvm/test/CodeGen/NVPTX/global-offset-dbg.ll index c48499a803e8f..edcbbfd82ef90 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-dbg.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-dbg.ll @@ -6,14 +6,14 @@ target triple = "nvptx64-nvidia-cuda" ; This test checks that debug information on functions and callsites are preserved -declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() ; CHECK-NOT: llvm.nvvm.implicit.offset define weak_odr dso_local i64 @_ZTS14other_function() !dbg !11 { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32* %0) !dbg !11 { - %1 = tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 - %3 = load i32, i32* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr %0) !dbg !11 { + %1 = tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -23,12 +23,12 @@ define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 { ; CHECK: define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 { entry: %0 = call i64 @_ZTS14other_function(), !dbg !15 -; CHECK: %3 = call i64 @_ZTS14other_function(i32* %2), !dbg !15 +; CHECK: %2 = call i64 @_ZTS14other_function(ptr %1), !dbg !15 ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byval([3 x i32]) %0) !dbg !16 { -; CHECK: %2 = call i64 @_ZTS14other_function(i32* %1), !dbg !17 +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byval([3 x i32]) %0) !dbg !16 { +; CHECK: %1 = call i64 @_ZTS14other_function(ptr %0), !dbg !17 !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} @@ -40,7 +40,7 @@ entry: !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!5 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !6 = !{i32 1, i32 4} !7 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !8 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-invalid-triple.ll b/llvm/test/CodeGen/NVPTX/global-offset-invalid-triple.ll index 65fccbc60fa66..753887300dd18 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-invalid-triple.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-invalid-triple.ll @@ -7,12 +7,12 @@ target triple = "nvptx64-nvidia-nvcl" ; This test checks that the pass does not run on nvcl triples. -declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() define weak_odr dso_local i64 @_ZTS14other_function() { - %1 = tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 - %3 = load i32, i32* %2, align 4 + %1 = tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -27,7 +27,7 @@ entry: !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} !nvvmir.version = !{!5} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-multiple-calls-from-one-function.ll b/llvm/test/CodeGen/NVPTX/global-offset-multiple-calls-from-one-function.ll index 08fa0a9ec6426..542e1cc71ef16 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-multiple-calls-from-one-function.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-multiple-calls-from-one-function.ll @@ -10,23 +10,23 @@ target triple = "nvptx64-nvidia-cuda" ; to other functions that has a variant that takes an offset parameter will have ; all calls redirected to the corresponding variants. -declare i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: declare ptr @llvm.nvvm.implicit.offset() define weak_odr dso_local i64 @_ZTS14other_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32* %0) { - %1 = tail call i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 -; CHECK: %2 = getelementptr inbounds i32, i32* %0, i64 2 - %3 = load i32, i32* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr %0) { + %1 = tail call ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 +; CHECK: %2 = getelementptr inbounds i32, ptr %0, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 - %5 = tail call i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: tail call i32* @llvm.nvvm.implicit.offset() - %6 = getelementptr inbounds i32, i32* %5, i64 2 -; CHECK: %5 = getelementptr inbounds i32, i32* %0, i64 2 - %7 = load i32, i32* %6, align 4 + %5 = tail call ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: tail call ptr @llvm.nvvm.implicit.offset() + %6 = getelementptr inbounds i32, ptr %5, i64 2 +; CHECK: %5 = getelementptr inbounds i32, ptr %0, i64 2 + %7 = load i32, ptr %6, align 4 %8 = zext i32 %7 to i64 ret i64 %4 @@ -36,21 +36,19 @@ define weak_odr dso_local i64 @_ZTS14other_function() { define weak_odr dso_local void @_ZTS14example_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32]* %0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32]* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p0.i64(ptr nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr %0, i32 0, i32 0 %0 = call i64 @_ZTS14other_function() -; CHECK: %3 = call i64 @_ZTS14other_function(i32* %2) +; CHECK: %2 = call i64 @_ZTS14other_function(ptr %1) %1 = call i64 @_ZTS14other_function() -; CHECK: %4 = call i64 @_ZTS14other_function(i32* %2) +; CHECK: %3 = call i64 @_ZTS14other_function(ptr %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byval([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byval([3 x i32]) %0) { ; CHECK: entry: -; CHECK: %1 = bitcast [3 x i32]* %0 to i32* -; CHECK: %2 = call i64 @_ZTS14other_function(i32* %1) -; CHECK: %3 = call i64 @_ZTS14other_function(i32* %1) +; CHECK: %1 = call i64 @_ZTS14other_function(ptr %0) +; CHECK: %2 = call i64 @_ZTS14other_function(ptr %0) ; CHECK: ret void ; CHECK: } @@ -58,10 +56,10 @@ entry: ; CHECK: !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5} !nvvmir.version = !{!6} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -; CHECK: !5 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1} +; CHECK: !5 = !{ptr @_ZTS14example_kernel_with_offset, !"kernel", i32 1} !6 = !{i32 1, i32 4} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-multiple-entry-points.ll b/llvm/test/CodeGen/NVPTX/global-offset-multiple-entry-points.ll index 2c6de2cc330b5..e124469291917 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-multiple-entry-points.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-multiple-entry-points.ll @@ -6,8 +6,8 @@ target triple = "nvptx64-nvidia-cuda" ; This test checks that the pass works with multiple entry points. -declare i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: declare ptr @llvm.nvvm.implicit.offset() ; This function is a kernel entry point that does not use global offset. It will ; not get a clone with a global offset parameter. @@ -18,20 +18,20 @@ entry: } define weak_odr dso_local i64 @_ZTS15common_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(i32* %0) { - %1 = tail call i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: tail call i32* @llvm.nvvm.implicit.offset() -; CHECK: %2 = getelementptr inbounds i32, i32* %0, i64 2 - %2 = getelementptr inbounds i32, i32* %1, i64 2 - %3 = load i32, i32* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(ptr %0) { + %1 = tail call ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: tail call ptr @llvm.nvvm.implicit.offset() +; CHECK: %2 = getelementptr inbounds i32, ptr %0, i64 2 + %2 = getelementptr inbounds i32, ptr %1, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } define weak_odr dso_local i64 @_ZTS14first_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(i32* %0) { +; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(ptr %0) { %1 = call i64 @_ZTS15common_function() -; CHECK: %2 = call i64 @_ZTS15common_function(i32* %0) +; CHECK: %2 = call i64 @_ZTS15common_function(ptr %0) ret i64 %1 } @@ -39,25 +39,23 @@ define weak_odr dso_local i64 @_ZTS14first_function() { define weak_odr dso_local void @_ZTS12first_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32]* %0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32]* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p0.i64(ptr nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr %0, i32 0, i32 0 %0 = call i64 @_ZTS14first_function() -; CHECK: %3 = call i64 @_ZTS14first_function(i32* %2) +; CHECK: %2 = call i64 @_ZTS14first_function(ptr %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset([3 x i32]* byval([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset(ptr byval([3 x i32]) %0) { ; CHECK: entry: -; CHECK: %1 = bitcast [3 x i32]* %0 to i32* -; CHECK: %2 = call i64 @_ZTS14first_function(i32* %1) +; CHECK: %1 = call i64 @_ZTS14first_function(ptr %0) ; CHECK: ret void ; CHECK: } define weak_odr dso_local i64 @_ZTS15second_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(i32* %0) { +; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(ptr %0) { %1 = call i64 @_ZTS15common_function() -; CHECK: %2 = call i64 @_ZTS15common_function(i32* %0) +; CHECK: %2 = call i64 @_ZTS15common_function(ptr %0) ret i64 %1 } @@ -65,29 +63,27 @@ define weak_odr dso_local i64 @_ZTS15second_function() { define weak_odr dso_local void @_ZTS13second_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32]* %0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32]* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p0.i64(ptr nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr %0, i32 0, i32 0 %0 = call i64 @_ZTS15second_function() -; CHECK: %3 = call i64 @_ZTS15second_function(i32* %2) +; CHECK: %2 = call i64 @_ZTS15second_function(ptr %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset([3 x i32]* byval([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset(ptr byval([3 x i32]) %0) { ; CHECK: entry: -; CHECK: %1 = bitcast [3 x i32]* %0 to i32* -; CHECK: %2 = call i64 @_ZTS15second_function(i32* %1) +; CHECK: %1 = call i64 @_ZTS15second_function(ptr %0) ; CHECK: ret void ; CHECK: } ; This function doesn't get called by a kernel entry point. define weak_odr dso_local i64 @_ZTS15no_entry_point() { -; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(i32* %0) { - %1 = tail call i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 -; CHECK: %2 = getelementptr inbounds i32, i32* %0, i64 2 - %3 = load i32, i32* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(ptr %0) { + %1 = tail call ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 +; CHECK: %2 = getelementptr inbounds i32, ptr %0, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -96,13 +92,13 @@ define weak_odr dso_local i64 @_ZTS15no_entry_point() { ; CHECK: !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5, !6, !7, !8} !nvvmir.version = !{!9} -!0 = distinct !{void ()* @_ZTS12first_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS12first_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!5 = distinct !{void ()* @_ZTS13second_kernel, !"kernel", i32 1} -!6 = distinct !{void ()* @_ZTS12third_kernel, !"kernel", i32 1} -; CHECK: !7 = !{void ([3 x i32]*)* @_ZTS13second_kernel_with_offset, !"kernel", i32 1} -; CHECK: !8 = !{void ([3 x i32]*)* @_ZTS12first_kernel_with_offset, !"kernel", i32 1} +!5 = distinct !{ptr @_ZTS13second_kernel, !"kernel", i32 1} +!6 = distinct !{ptr @_ZTS12third_kernel, !"kernel", i32 1} +; CHECK: !7 = !{ptr @_ZTS13second_kernel_with_offset, !"kernel", i32 1} +; CHECK: !8 = !{ptr @_ZTS12first_kernel_with_offset, !"kernel", i32 1} !9 = !{i32 1, i32 4} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-simple.ll b/llvm/test/CodeGen/NVPTX/global-offset-simple.ll index 254dda71708a0..0d5264e79b6f8 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-simple.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-simple.ll @@ -6,16 +6,16 @@ target triple = "nvptx64-nvidia-cuda" ; This test checks that the transformation is applied in the basic case. -declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() ; CHECK-NOT: llvm.nvvm.implicit.offset define weak_odr dso_local i64 @_ZTS14other_function() { -; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32* %0) { -; CHECK: %2 = getelementptr inbounds i32, i32* %0, i64 2 - %1 = tail call i32* @llvm.nvvm.implicit.offset() -; CHECK-NOT: tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 - %3 = load i32, i32* %2, align 4 +; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr %0) { +; CHECK: %2 = getelementptr inbounds i32, ptr %0, i64 2 + %1 = tail call ptr @llvm.nvvm.implicit.offset() +; CHECK-NOT: tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -24,18 +24,16 @@ define weak_odr dso_local i64 @_ZTS14other_function() { define weak_odr dso_local void @_ZTS14example_kernel() { entry: ; CHECK: %0 = alloca [3 x i32], align 4 -; CHECK: %1 = bitcast [3 x i32]* %0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false) -; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32]* %0, i32 0, i32 0 +; CHECK: call void @llvm.memset.p0.i64(ptr nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false) +; CHECK: %1 = getelementptr inbounds [3 x i32], ptr %0, i32 0, i32 0 %0 = call i64 @_ZTS14other_function() -; CHECK: %3 = call i64 @_ZTS14other_function(i32* %2) +; CHECK: %2 = call i64 @_ZTS14other_function(ptr %1) ret void } -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byval([3 x i32]) %0) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byval([3 x i32]) %0) { ; CHECK: entry: -; CHECK: %1 = bitcast [3 x i32]* %0 to i32* -; CHECK: %2 = call i64 @_ZTS14other_function(i32* %1) +; CHECK: %1 = call i64 @_ZTS14other_function(ptr %0) ; CHECK: ret void ; CHECK: } @@ -43,10 +41,10 @@ entry: ; CHECK: !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5} !nvvmir.version = !{!6} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -; CHECK: !5 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1} +; CHECK: !5 = !{ptr @_ZTS14example_kernel_with_offset, !"kernel", i32 1} !6 = !{i32 1, i32 4} diff --git a/llvm/test/CodeGen/NVPTX/global-offset-valid-triple.ll b/llvm/test/CodeGen/NVPTX/global-offset-valid-triple.ll index 2f1554fe0245f..5b5baa19e787f 100644 --- a/llvm/test/CodeGen/NVPTX/global-offset-valid-triple.ll +++ b/llvm/test/CodeGen/NVPTX/global-offset-valid-triple.ll @@ -7,12 +7,12 @@ target triple = "nvptx64-nvidia-cuda" ; This test checks that the pass does run on cuda triples. -declare i32* @llvm.nvvm.implicit.offset() +declare ptr @llvm.nvvm.implicit.offset() define weak_odr dso_local i64 @_ZTS14other_function() { - %1 = tail call i32* @llvm.nvvm.implicit.offset() - %2 = getelementptr inbounds i32, i32* %1, i64 2 - %3 = load i32, i32* %2, align 4 + %1 = tail call ptr @llvm.nvvm.implicit.offset() + %2 = getelementptr inbounds i32, ptr %1, i64 2 + %3 = load i32, ptr %2, align 4 %4 = zext i32 %3 to i64 ret i64 %4 } @@ -29,7 +29,7 @@ entry: !nvvmir.version = !{!9} !llvm.module.flags = !{!10, !11} -!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-basic-transformation.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-basic-transformation.ll index 31785f3303a49..8ff198d387dad 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-basic-transformation.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-basic-transformation.ll @@ -11,15 +11,14 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, ptr addrspace(1) %b, i32 %c) { entry: -; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %2 = load i32, i32 addrspace(3)* %a - %1 = load i32, i32 addrspace(1)* %b -; CHECK: %3 = load i32, i32 addrspace(1)* %b +; CHECK: %1 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 + %0 = load i32, ptr addrspace(3) %a +; CHECK: %2 = load i32, ptr addrspace(3) %a + %1 = load i32, ptr addrspace(1) %b +; CHECK: %3 = load i32, ptr addrspace(1) %b %2 = add i32 %c, %c ; CHECK: %4 = add i32 %c, %c ret void @@ -28,12 +27,12 @@ entry: !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5} !nvvmir.version = !{!6} -!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} !4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -; CHECK: !5 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"maxntidx", i32 256} -!5 = !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"maxntidx", i32 256} +; CHECK: !5 = distinct !{ptr @_ZTS14example_kernel, !"maxntidx", i32 256} +!5 = !{ptr @_ZTS14example_kernel, !"maxntidx", i32 256} !6 = !{i32 1, i32 4} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-invalid-triple.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-invalid-triple.ll index 1d04536a8da66..a3fad590360b8 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-invalid-triple.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-invalid-triple.ll @@ -9,7 +9,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-nvcl" ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a) { entry: ret void } @@ -19,7 +19,7 @@ entry: !nvvmir.version = !{!9} !llvm.module.flags = !{!10, !11} -!0 = distinct !{void (i32 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll index 44f00f446fd28..638d87991c9c0 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll @@ -9,15 +9,14 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, ptr addrspace(1) %b, i32 %c) { entry: -; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %2 = load i32, i32 addrspace(3)* %a - %1 = load i32, i32 addrspace(1)* %b -; CHECK: %3 = load i32, i32 addrspace(1)* %b +; CHECK: %1 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 + %0 = load i32, ptr addrspace(3) %a +; CHECK: %2 = load i32, ptr addrspace(3) %a + %1 = load i32, ptr addrspace(1) %b +; CHECK: %3 = load i32, ptr addrspace(1) %b %2 = add i32 %c, %c ; CHECK: %4 = add i32 %c, %c ret void @@ -26,8 +25,8 @@ entry: !nvvm.annotations = !{!0, !0} !nvvmir.version = !{!5} -!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-functions.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-functions.ll index f973c736a1aa1..ead341c3bfe72 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-functions.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-functions.ll @@ -8,33 +8,33 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 -define weak_odr dso_local void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define weak_odr dso_local void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { - %1 = load i32, i32 addrspace(3)* %a -; CHECK: %1 = load i32, i32 addrspace(3)* %a - %2 = load i32, i32 addrspace(1)* %b -; CHECK: %2 = load i32, i32 addrspace(1)* %b +define weak_odr dso_local void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { + %1 = load i32, ptr addrspace(3) %a +; CHECK: %1 = load i32, ptr addrspace(3) %a + %2 = load i32, ptr addrspace(1) %b +; CHECK: %2 = load i32, ptr addrspace(1) %b %3 = add i32 %c, %c ; CHECK: %3 = add i32 %c, %c ret void } ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, ptr addrspace(1) %b, i32 %c) { entry: -; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* - call void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) -; CHECK: call void @_ZTS14other_function(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) +; CHECK: %1 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 +; CHECK: %a = bitcast ptr addrspace(3) %1 to ptr addrspace(3) + call void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) +; CHECK: call void @_ZTS14other_function(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) ret void } !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} !nvvmir.version = !{!5} -!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-no-entry-points.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-no-entry-points.ll index 8b3acc6bcf53d..6af963d51869f 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-no-entry-points.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-no-entry-points.ll @@ -7,13 +7,13 @@ target triple = "nvptx64-nvidia-cuda" ; This test checks that no transformation is applied when there are no entry points. ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { -; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { entry: - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %0 = load i32, i32 addrspace(3)* %a - %1 = load i32, i32 addrspace(1)* %b -; CHECK: %1 = load i32, i32 addrspace(1)* %b + %0 = load i32, ptr addrspace(3) %a +; CHECK: %0 = load i32, ptr addrspace(3) %a + %1 = load i32, ptr addrspace(1) %b +; CHECK: %1 = load i32, ptr addrspace(1) %b %2 = add i32 %c, %c ; CHECK: %2 = add i32 %c, %c ret void diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-preserves-types.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-preserves-types.ll index 68416cd9d7eae..9dd020c2fb657 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-preserves-types.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-preserves-types.ll @@ -9,33 +9,33 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i64 addrspace(3)* %b, i16 addrspace(3)* %c, i8 addrspace(3)* %d) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) { ; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 %1, i32 %2, i32 %3) { entry: -; CHECK: %4 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %3 -; CHECK: %d = bitcast i8 addrspace(3)* %4 to i8 addrspace(3)* -; CHECK: %5 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %2 -; CHECK: %c = bitcast i8 addrspace(3)* %5 to i16 addrspace(3)* -; CHECK: %6 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %1 -; CHECK: %b = bitcast i8 addrspace(3)* %6 to i64 addrspace(3)* -; CHECK: %7 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 -; CHECK: %a = bitcast i8 addrspace(3)* %7 to i32 addrspace(3)* - %0 = load i32, i32 addrspace(3)* %a -; CHECK: %8 = load i32, i32 addrspace(3)* %a - %1 = load i64, i64 addrspace(3)* %b -; CHECK: %9 = load i64, i64 addrspace(3)* %b - %2 = load i16, i16 addrspace(3)* %c -; CHECK: %10 = load i16, i16 addrspace(3)* %c - %3 = load i8, i8 addrspace(3)* %d -; CHECK: %11 = load i8, i8 addrspace(3)* %d +; CHECK: %4 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %3 +; CHECK: %d = bitcast ptr addrspace(3) %4 to ptr addrspace(3) +; CHECK: %5 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %2 +; CHECK: %c = bitcast ptr addrspace(3) %5 to ptr addrspace(3) +; CHECK: %6 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %1 +; CHECK: %b = bitcast ptr addrspace(3) %6 to ptr addrspace(3) +; CHECK: %7 = getelementptr inbounds [0 x i8], ptr addrspace(3) @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 +; CHECK: %a = bitcast ptr addrspace(3) %7 to ptr addrspace(3) + %0 = load i32, ptr addrspace(3) %a +; CHECK: %8 = load i32, ptr addrspace(3) %a + %1 = load i64, ptr addrspace(3) %b +; CHECK: %9 = load i64, ptr addrspace(3) %b + %2 = load i16, ptr addrspace(3) %c +; CHECK: %10 = load i16, ptr addrspace(3) %c + %3 = load i8, ptr addrspace(3) %d +; CHECK: %11 = load i8, ptr addrspace(3) %d ret void } !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} !nvvmir.version = !{!5} -!0 = distinct !{void (i32 addrspace(3)*, i64 addrspace(3)*, i16 addrspace(3)*, i8 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} -; CHECK: !0 = distinct !{void (i32, i32, i32, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-triple.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-triple.ll index 5e5e2b6138f10..b0c80068434c8 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-triple.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-triple.ll @@ -11,16 +11,16 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a) { entry: - %0 = load i32, i32 addrspace(3)* %a + %0 = load i32, ptr addrspace(3) %a ret void } !nvvm.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3} !nvvmir.version = !{!5} -!0 = distinct !{void (i32 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-valid-triple.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-valid-triple.ll index 43688a9bbb489..467cea7204f3c 100644 --- a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-valid-triple.ll +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-valid-triple.ll @@ -10,9 +10,9 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" ; Function Attrs: noinline -define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a) { +define weak_odr dso_local void @_ZTS14example_kernel(ptr addrspace(3) %a) { entry: - %0 = load i32, i32 addrspace(3)* %a + %0 = load i32, ptr addrspace(3) %a ret void } @@ -21,7 +21,7 @@ entry: !nvvmir.version = !{!9} !llvm.module.flags = !{!10, !11} -!0 = distinct !{void (i32 addrspace(3)*)* @_ZTS14example_kernel, !"kernel", i32 1} +!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1} !1 = !{null, !"align", i32 8} !2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} !3 = !{null, !"align", i32 16} diff --git a/llvm/test/tools/spirv-to-ir-wrapper/spirv-to-ir-wrapper.ll b/llvm/test/tools/spirv-to-ir-wrapper/spirv-to-ir-wrapper.ll index 140c31b7b3539..5c3baf8a07587 100644 --- a/llvm/test/tools/spirv-to-ir-wrapper/spirv-to-ir-wrapper.ll +++ b/llvm/test/tools/spirv-to-ir-wrapper/spirv-to-ir-wrapper.ll @@ -1,34 +1,32 @@ -; TODO: switch to opaque pointers once llvm-spirv tool is able to handle them. - ; Check for passthrough abilities -; RUN: llvm-as %s -o %t.bc +; RUN: llvm-as %s -o %t.bc ; RUN: spirv-to-ir-wrapper %t.bc -o %t_1.bc -skip-unknown-input -; RUN: llvm-dis %t_1.bc -o %t_1.ll +; RUN: llvm-dis %t_1.bc -o %t_1.ll ; RUN: FileCheck %s --input-file %t_1.ll ; Check for SPIR-V conversion -; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv %t.bc -o %t.spv ; RUN: spirv-to-ir-wrapper %t.spv -o %t_2.bc -; RUN: llvm-dis %t_2.bc -o %t_2.ll +; RUN: llvm-dis %t_2.bc -o %t_2.ll ; RUNx: FileCheck %s --input-file %t_2.ll ; CHECK: target datalayout ; CHECK-NEXT: target triple = "spir-unknown-unknown" ; CHECK: Function Attrs: nounwind -; CHECK-NEXT: define spir_kernel void @foo(i32 addrspace(1)* %a) +; CHECK-NEXT: define spir_kernel void @foo(ptr addrspace(1) %a) ; CHECK-NEXT: entry: -; CHECK-NEXT: %a.addr = alloca i32 addrspace(1)*, align 4 -; CHECK-NEXT: store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 +; CHECK-NEXT: %a.addr = alloca ptr addrspace(1), align 4 +; CHECK-NEXT: store ptr addrspace(1) %a, ptr %a.addr, align 4 ; CHECK-NEXT: ret void target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir-unknown-unknown" ; Function Attrs: nounwind -define spir_kernel void @foo(i32 addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { +define spir_kernel void @foo(ptr addrspace(1) %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %a.addr = alloca i32 addrspace(1)*, align 4 - store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 + %a.addr = alloca ptr addrspace(1), align 4 + store ptr addrspace(1) %a, ptr %a.addr, align 4 ret void } diff --git a/sycl-fusion/test/internalization/abort-promote-stored-ptr.ll b/sycl-fusion/test/internalization/abort-promote-stored-ptr.ll index 194f8a4624544..dae651aebfadf 100644 --- a/sycl-fusion/test/internalization/abort-promote-stored-ptr.ll +++ b/sycl-fusion/test/internalization/abort-promote-stored-ptr.ll @@ -5,29 +5,29 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "spir64-unknown-unknown" ; CHECK-LABEL: define {{[^@]+}}@fused_0 -; CHECK-SAME: (float addrspace(1)* align 4 %[[ACC:.*]]) -define spir_kernel void @fused_0(float addrspace(1)* align 4 %acc) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { +; CHECK-SAME: (ptr addrspace(1) align 4 %[[ACC:.*]]) +define spir_kernel void @fused_0(ptr addrspace(1) align 4 %acc) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { ; Scenario: Test private internalization is not performed when the ; input pointer is stored in another pointer. -; CHECK-NEXT: %[[ALLOCA:.*]] = alloca float addrspace(1)*, align 8 -; CHECK-NEXT: store float addrspace(1)* %[[ACC]], float addrspace(1)** %[[ALLOCA]], align 8 -; CHECK-NEXT: %[[ACC_PTR:.*]] = load float addrspace(1)*, float addrspace(1)** %[[ALLOCA]], align 8 -; CHECK-NEXT: store float 7.000000e+00, float addrspace(1)* %[[ACC]], align 4 -; CHECK-NEXT: %[[RES:.*]] = load float, float addrspace(1)* %[[ACC]], align 4 +; CHECK-NEXT: %[[ALLOCA:.*]] = alloca ptr addrspace(1), align 8 +; CHECK-NEXT: store ptr addrspace(1) %[[ACC]], ptr %[[ALLOCA]], align 8 +; CHECK-NEXT: %[[ACC_PTR:.*]] = load ptr addrspace(1), ptr %[[ALLOCA]], align 8 +; CHECK-NEXT: store float 7.000000e+00, ptr addrspace(1) %[[ACC]], align 4 +; CHECK-NEXT: %[[RES:.*]] = load float, ptr addrspace(1) %[[ACC]], align 4 ; CHECK-NEXT: ret void - %alloca = alloca float addrspace(1)* - store float addrspace(1)* %acc, float addrspace(1)** %alloca - %acc_ptr = load float addrspace(1)*, float addrspace(1)** %alloca - store float 7.0, float addrspace(1)* %acc - %res = load float, float addrspace(1)* %acc + %alloca = alloca ptr addrspace(1) + store ptr addrspace(1) %acc, ptr %alloca + %acc_ptr = load ptr addrspace(1), ptr %alloca + store float 7.0, ptr addrspace(1) %acc + %res = load float, ptr addrspace(1) %acc ret void } !12 = !{i32 1} !13 = !{!"none"} -!14 = !{!"float*"} +!14 = !{!"ptr"} !15 = !{!""} !16 = !{!"acc"} !17 = !{!"private"} diff --git a/sycl-fusion/test/internalization/promote-local-scalar.ll b/sycl-fusion/test/internalization/promote-local-scalar.ll index 58bd9d29578ee..7138c848059fd 100644 --- a/sycl-fusion/test/internalization/promote-local-scalar.ll +++ b/sycl-fusion/test/internalization/promote-local-scalar.ll @@ -29,55 +29,55 @@ declare spir_func void @__itt_offload_wi_start_wrapper() #3 declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 -define spir_kernel void @fused_0(float addrspace(1)* align 4 %KernelOne_accTmp, %0* byval(%0) align 8 %KernelOne_accTmp3, float addrspace(1)* align 4 %KernelOne_accIn1, %0* byval(%0) align 8 %KernelOne_accIn16, float addrspace(1)* align 4 %KernelOne_accIn2, float addrspace(1)* align 4 %KernelTwo_accOut, float addrspace(1)* align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { +define spir_kernel void @fused_0(ptr addrspace(1) align 4 %KernelOne_accTmp, ptr byval(%0) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 4 %KernelOne_accIn1, ptr byval(%0) align 8 %KernelOne_accIn16, ptr addrspace(1) align 4 %KernelOne_accIn2, ptr addrspace(1) align 4 %KernelTwo_accOut, ptr addrspace(1) align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { ; Scenario: Test the successful local internalization of the first pointer ; argument. This means, the first pointer argument has been replaced by a ; pointer to local address space (address space 3). ; CHECK-LABEL: define {{[^@]+}}@fused_0 -; CHECK-SAME: (float addrspace(3)* align 4 [[KERNELONE_ACCTMP:%.*]], %0* byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], float addrspace(1)* align 4 [[KERNELONE_ACCIN1:%.*]], %0* byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], float addrspace(1)* align 4 [[KERNELONE_ACCIN2:%.*]], float addrspace(1)* align 4 [[KERNELTWO_ACCOUT:%.*]], float addrspace(1)* align 4 [[KERNELTWO_ACCIN3:%.*]]) +; CHECK-SAME: (ptr addrspace(3) align 4 [[KERNELONE_ACCTMP:%.*]], ptr byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], ptr addrspace(1) align 4 [[KERNELONE_ACCIN1:%.*]], ptr byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], ptr addrspace(1) align 4 [[KERNELONE_ACCIN2:%.*]], ptr addrspace(1) align 4 [[KERNELTWO_ACCOUT:%.*]], ptr addrspace(1) align 4 [[KERNELTWO_ACCIN3:%.*]]) ; CHECK: entry: -; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE0]], %0* [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 -; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX]], align 1 +; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE0]], ptr [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 +; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX]], align 1 ; CHECK: [[TMP0:%.*]] = urem i64 [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD]], 16 -; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds float, float addrspace(3)* [[KERNELONE_ACCTMP]], i64 [[TMP0]] +; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[KERNELONE_ACCTMP]], i64 [[TMP0]] ; CHECK: [[TMP1:%.*]] = call spir_func i64 @_Z13get_global_idj(i32 0) #[[ATTR1:[0-9]+]] ; CHECK: [[ADD_I_I:%.*]] = fadd float ; CHECK: [[TMP9:%.*]] = add i64 [[TMP1]], [[TMP0]] ; CHECK: [[TMP10:%.*]] = urem i64 [[TMP9]], 16 -; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds float, float addrspace(3)* [[ADD_PTR_I_I]], i64 [[TMP10]] -; CHECK: store float [[ADD_I_I]], float addrspace(3)* [[ARRAYIDX_I13_I_I]], align 4 -; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE0]], %0* [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 -; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX]], align 1 +; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ADD_PTR_I_I]], i64 [[TMP10]] +; CHECK: store float [[ADD_I_I]], ptr addrspace(3) [[ARRAYIDX_I13_I_I]], align 4 +; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE0]], ptr [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 +; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX]], align 1 ; CHECK: [[TMP11:%.*]] = urem i64 [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD]], 16 -; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds float, float addrspace(3)* [[KERNELONE_ACCTMP]], i64 [[TMP11]] +; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[KERNELONE_ACCTMP]], i64 [[TMP11]] ; CHECK: [[TMP12:%.*]] = call spir_func i64 @_Z13get_global_idj(i32 0) #[[ATTR1]] ; CHECK: [[TMP18:%.*]] = add i64 [[TMP12]], [[TMP11]] ; CHECK: [[TMP19:%.*]] = urem i64 [[TMP18]], 16 -; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds float, float addrspace(3)* [[ADD_PTR_I39_I8]], i64 [[TMP19]] -; CHECK: [[TMP20:%.*]] = load float, float addrspace(3)* [[ARRAYIDX_I_I_I11]], align 4 +; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ADD_PTR_I39_I8]], i64 [[TMP19]] +; CHECK: [[TMP20:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX_I_I_I11]], align 4 ; CHECK: [[MUL_I_I:%.*]] = fmul float [[TMP20]] ; CHECK: store float [[MUL_I_I]], ; CHECK-NOT: store ; CHECK: ret void ; entry: - %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn163.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn162.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp31.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn163.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn162.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp31.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload - %add.ptr.i39.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload - %add.ptr.i53.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload + %add.ptr.i39.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload + %add.ptr.i53.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %1 = insertelement <3 x i64> undef, i64 %0, i32 0 %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -86,24 +86,24 @@ entry: %5 = insertelement <3 x i64> %3, i64 %4, i32 2 %cmp.i.i.i = icmp ult i64 %0, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i) - %arrayidx.i.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39.i, i64 %0 - %6 = load float, float addrspace(1)* %arrayidx.i.i.i, align 4 - %arrayidx.i9.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53.i, i64 %0 - %7 = load float, float addrspace(1)* %arrayidx.i9.i.i, align 4 + %arrayidx.i.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39.i, i64 %0 + %6 = load float, ptr addrspace(1) %arrayidx.i.i.i, align 4 + %arrayidx.i9.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53.i, i64 %0 + %7 = load float, ptr addrspace(1) %arrayidx.i9.i.i, align 4 %add.i.i = fadd float %6, %7 - %arrayidx.i13.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i, i64 %0 - store float %add.i.i, float addrspace(1)* %arrayidx.i13.i.i, align 4 + %arrayidx.i13.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %0 + store float %add.i.i, ptr addrspace(1) %arrayidx.i13.i.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 - %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn166.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp35.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn164.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn166.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp35.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn164.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i7 = getelementptr inbounds float, float addrspace(1)* %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload - %add.ptr.i39.i8 = getelementptr inbounds float, float addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload - %add.ptr.i53.i9 = getelementptr inbounds float, float addrspace(1)* %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload + %add.ptr.i.i7 = getelementptr inbounds float, ptr addrspace(1) %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload + %add.ptr.i39.i8 = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload + %add.ptr.i53.i9 = getelementptr inbounds float, ptr addrspace(1) %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload %8 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %9 = insertelement <3 x i64> undef, i64 %8, i32 0 %10 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -112,13 +112,13 @@ entry: %13 = insertelement <3 x i64> %11, i64 %12, i32 2 %cmp.i.i.i10 = icmp ult i64 %8, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i10) - %arrayidx.i.i.i11 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39.i8, i64 %8 - %14 = load float, float addrspace(1)* %arrayidx.i.i.i11, align 4 - %arrayidx.i9.i.i13 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53.i9, i64 %8 - %15 = load float, float addrspace(1)* %arrayidx.i9.i.i13, align 4 + %arrayidx.i.i.i11 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39.i8, i64 %8 + %14 = load float, ptr addrspace(1) %arrayidx.i.i.i11, align 4 + %arrayidx.i9.i.i13 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53.i9, i64 %8 + %15 = load float, ptr addrspace(1) %arrayidx.i9.i.i13, align 4 %mul.i.i = fmul float %14, %15 - %arrayidx.i13.i.i15 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i7, i64 %8 - store float %mul.i.i, float addrspace(1)* %arrayidx.i13.i.i15, align 4 + %arrayidx.i13.i.i15 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i7, i64 %8 + store float %mul.i.i, ptr addrspace(1) %arrayidx.i13.i.i15, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -133,7 +133,7 @@ attributes #5 = { nounwind } !12 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1} !13 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!14 = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"float*", !"float*"} +!14 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"ptr", !"ptr"} !15 = !{!"", !"", !"", !"", !"", !"", !""} !16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"} !17 = !{!"local", !"none", !"none", !"none", !"none", !"none", !"none"} diff --git a/sycl-fusion/test/internalization/promote-local-vec.ll b/sycl-fusion/test/internalization/promote-local-vec.ll index 2a745166edb8d..f03ebc811fa3c 100644 --- a/sycl-fusion/test/internalization/promote-local-vec.ll +++ b/sycl-fusion/test/internalization/promote-local-vec.ll @@ -30,63 +30,63 @@ declare spir_func void @__itt_offload_wi_start_wrapper() #3 declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 -define spir_kernel void @fused_0(%0 addrspace(1)* align 16 %KernelOne_accTmp, %1* byval(%1) align 8 %KernelOne_accTmp3, %0 addrspace(1)* align 16 %KernelOne_accIn1, %1* byval(%1) align 8 %KernelOne_accIn16, %0 addrspace(1)* align 16 %KernelOne_accIn2, %0 addrspace(1)* align 16 %KernelTwo_accOut, %0 addrspace(1)* align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { +define spir_kernel void @fused_0(ptr addrspace(1) align 16 %KernelOne_accTmp, ptr byval(%1) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 16 %KernelOne_accIn1, ptr byval(%1) align 8 %KernelOne_accIn16, ptr addrspace(1) align 16 %KernelOne_accIn2, ptr addrspace(1) align 16 %KernelTwo_accOut, ptr addrspace(1) align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { ; Scenario: Test the successful private internalization of the first pointer ; argument. This means, the first pointer argument has been replaced by a ; function-local alloca and all accesses have been updated to use this alloca ; instead. ; CHECK-LABEL: define {{[^@]+}}@fused_0 -; CHECK-SAME: ([[TYPE0:%.*]] addrspace(3)* align 16 [[KERNELONE_ACCTMP:%.*]], %1* byval([[TYPE1:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], [[TYPE0]] addrspace(1)* align 16 [[KERNELONE_ACCIN1:%.*]], %1* byval([[TYPE1]]) align 8 [[KERNELONE_ACCIN16:%.*]], [[TYPE0]] addrspace(1)* align 16 [[KERNELONE_ACCIN2:%.*]], [[TYPE0]] addrspace(1)* align 16 [[KERNELTWO_ACCOUT:%.*]], [[TYPE0]] addrspace(1)* align 16 [[KERNELTWO_ACCIN3:%.*]]) +; CHECK-SAME: (ptr addrspace(3) align 16 [[KERNELONE_ACCTMP:%.*]], ptr byval([[TYPE1:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], ptr addrspace(1) align 16 [[KERNELONE_ACCIN1:%.*]], ptr byval([[TYPE1]]) align 8 [[KERNELONE_ACCIN16:%.*]], ptr addrspace(1) align 16 [[KERNELONE_ACCIN2:%.*]], ptr addrspace(1) align 16 [[KERNELTWO_ACCOUT:%.*]], ptr addrspace(1) align 16 [[KERNELTWO_ACCIN3:%.*]]) ; CHECK: entry: -; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE1]], %1* [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 -; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX]], align 1 +; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE1]], ptr [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 +; CHECK: [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[KERNELONE_ACCTMP31_SROA_0_0__SROA_IDX]], align 1 ; CHECK: [[TMP0:%.*]] = urem i64 [[KERNELONE_ACCTMP31_SROA_0_0_COPYLOAD]], 16 -; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[KERNELONE_ACCTMP]], i64 [[TMP0]] +; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds [[TYPE0:%.*]], ptr addrspace(3) [[KERNELONE_ACCTMP]], i64 [[TMP0]] ; CHECK: [[TMP1:%.*]] = call spir_func i64 @_Z13get_global_idj(i32 0) #[[ATTR1:[0-9]+]] ; CHECK: [[ADD_I_I_I:%.*]] = fadd <4 x float> ; CHECK: [[TMP9:%.*]] = add i64 [[TMP1]], [[TMP0]] ; CHECK: [[TMP10:%.*]] = urem i64 [[TMP9]], 16 -; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[ADD_PTR_I_I]], i64 [[TMP10]] +; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [[TYPE0]], ptr addrspace(3) [[ADD_PTR_I_I]], i64 [[TMP10]] ; CHECK: [[TMP11:%.*]] = add i64 0, [[TMP10]] ; CHECK: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP0]] ; CHECK: [[TMP13:%.*]] = urem i64 [[TMP12]], 16 -; CHECK: [[REF_TMP_SROA_0_0__SROA_IDX_I_I:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[ARRAYIDX_I13_I_I]], i64 [[TMP13]], i32 0 -; CHECK: store <4 x float> [[ADD_I_I_I]], <4 x float> addrspace(3)* [[REF_TMP_SROA_0_0__SROA_IDX_I_I]], align 16 -; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE1]], %1* [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 -; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX]], align 1 +; CHECK: [[REF_TMP_SROA_0_0__SROA_IDX_I_I:%.*]] = getelementptr inbounds [[TYPE0]], ptr addrspace(3) [[ARRAYIDX_I13_I_I]], i64 [[TMP13]], i32 0 +; CHECK: store <4 x float> [[ADD_I_I_I]], ptr addrspace(3) [[REF_TMP_SROA_0_0__SROA_IDX_I_I]], align 16 +; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE1]], ptr [[KERNELONE_ACCTMP3]], i64 0, i32 0, i32 0, i64 0 +; CHECK: [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[KERNELONE_ACCTMP35_SROA_0_0__SROA_IDX]], align 1 ; CHECK: [[TMP14:%.*]] = urem i64 [[KERNELONE_ACCTMP35_SROA_0_0_COPYLOAD]], 16 -; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[KERNELONE_ACCTMP]], i64 [[TMP14]] +; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds [[TYPE0]], ptr addrspace(3) [[KERNELONE_ACCTMP]], i64 [[TMP14]] ; CHECK: [[TMP15:%.*]] = call spir_func i64 @_Z13get_global_idj(i32 0) #[[ATTR1]] ; CHECK: [[TMP21:%.*]] = add i64 [[TMP15]], [[TMP14]] ; CHECK: [[TMP22:%.*]] = urem i64 [[TMP21]], 16 -; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[ADD_PTR_I39_I8]], i64 [[TMP22]] +; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds [[TYPE0]], ptr addrspace(3) [[ADD_PTR_I39_I8]], i64 [[TMP22]] ; CHECK: [[TMP23:%.*]] = add i64 0, [[TMP22]] ; CHECK: [[TMP24:%.*]] = add i64 [[TMP23]], [[TMP14]] ; CHECK: [[TMP25:%.*]] = urem i64 [[TMP24]], 16 -; CHECK: [[M_DATA_I_I_I15:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]] addrspace(3)* [[ARRAYIDX_I_I_I11]], i64 [[TMP25]], i32 0 -; CHECK: [[TMP26:%.*]] = load <4 x float>, <4 x float> addrspace(3)* [[M_DATA_I_I_I15]], align 16 +; CHECK: [[M_DATA_I_I_I15:%.*]] = getelementptr inbounds [[TYPE0]], ptr addrspace(3) [[ARRAYIDX_I_I_I11]], i64 [[TMP25]], i32 0 +; CHECK: [[TMP26:%.*]] = load <4 x float>, ptr addrspace(3) [[M_DATA_I_I_I15]], align 16 ; CHECK: [[MUL_I_I_I:%.*]] = fmul <4 x float> [[TMP26]] ; CHECK: store <4 x float> [[MUL_I_I_I]] ; CHECK-NOT: store ; CHECK: ret void ; entry: - %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn163.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn162.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp31.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn163.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn162.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp31.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload - %add.ptr.i39.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload - %add.ptr.i53.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload + %add.ptr.i.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload + %add.ptr.i39.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload + %add.ptr.i53.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %1 = insertelement <3 x i64> undef, i64 %0, i32 0 %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -95,27 +95,27 @@ entry: %5 = insertelement <3 x i64> %3, i64 %4, i32 2 %cmp.i.i.i = icmp ult i64 %0, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i) - %arrayidx.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i39.i, i64 %0 - %arrayidx.i9.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i53.i, i64 %0 - %m_Data.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i.i.i, i64 0, i32 0 - %6 = load <4 x float>, <4 x float> addrspace(1)* %m_Data.i.i.i, align 16 - %m_Data2.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i9.i.i, i64 0, i32 0 - %7 = load <4 x float>, <4 x float> addrspace(1)* %m_Data2.i.i.i, align 16 + %arrayidx.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i39.i, i64 %0 + %arrayidx.i9.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i53.i, i64 %0 + %m_Data.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i.i.i, i64 0, i32 0 + %6 = load <4 x float>, ptr addrspace(1) %m_Data.i.i.i, align 16 + %m_Data2.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i9.i.i, i64 0, i32 0 + %7 = load <4 x float>, ptr addrspace(1) %m_Data2.i.i.i, align 16 %add.i.i.i = fadd <4 x float> %6, %7 - %arrayidx.i13.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i.i, i64 %0 - %ref.tmp.sroa.0.0..sroa_idx.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i13.i.i, i64 0, i32 0 - store <4 x float> %add.i.i.i, <4 x float> addrspace(1)* %ref.tmp.sroa.0.0..sroa_idx.i.i, align 16 + %arrayidx.i13.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i.i, i64 %0 + %ref.tmp.sroa.0.0..sroa_idx.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i13.i.i, i64 0, i32 0 + store <4 x float> %add.i.i.i, ptr addrspace(1) %ref.tmp.sroa.0.0..sroa_idx.i.i, align 16 call spir_func void @__itt_offload_wi_finish_wrapper() #3 - %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn166.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp35.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn164.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn166.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp35.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn164.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i7 = getelementptr inbounds %0, %0 addrspace(1)* %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload - %add.ptr.i39.i8 = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload - %add.ptr.i53.i9 = getelementptr inbounds %0, %0 addrspace(1)* %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload + %add.ptr.i.i7 = getelementptr inbounds %0, ptr addrspace(1) %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload + %add.ptr.i39.i8 = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload + %add.ptr.i53.i9 = getelementptr inbounds %0, ptr addrspace(1) %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload %8 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %9 = insertelement <3 x i64> undef, i64 %8, i32 0 %10 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -124,16 +124,16 @@ entry: %13 = insertelement <3 x i64> %11, i64 %12, i32 2 %cmp.i.i.i10 = icmp ult i64 %8, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i10) - %arrayidx.i.i.i11 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i39.i8, i64 %8 - %arrayidx.i9.i.i13 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i53.i9, i64 %8 - %m_Data.i.i.i15 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i.i.i11, i64 0, i32 0 - %14 = load <4 x float>, <4 x float> addrspace(1)* %m_Data.i.i.i15, align 16 - %m_Data2.i.i.i16 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i9.i.i13, i64 0, i32 0 - %15 = load <4 x float>, <4 x float> addrspace(1)* %m_Data2.i.i.i16, align 16 + %arrayidx.i.i.i11 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i39.i8, i64 %8 + %arrayidx.i9.i.i13 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i53.i9, i64 %8 + %m_Data.i.i.i15 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i.i.i11, i64 0, i32 0 + %14 = load <4 x float>, ptr addrspace(1) %m_Data.i.i.i15, align 16 + %m_Data2.i.i.i16 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i9.i.i13, i64 0, i32 0 + %15 = load <4 x float>, ptr addrspace(1) %m_Data2.i.i.i16, align 16 %mul.i.i.i = fmul <4 x float> %14, %15 - %arrayidx.i13.i.i17 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i.i7, i64 %8 - %ref.tmp.sroa.0.0..sroa_idx.i.i19 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i13.i.i17, i64 0, i32 0 - store <4 x float> %mul.i.i.i, <4 x float> addrspace(1)* %ref.tmp.sroa.0.0..sroa_idx.i.i19, align 16 + %arrayidx.i13.i.i17 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i.i7, i64 %8 + %ref.tmp.sroa.0.0..sroa_idx.i.i19 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i13.i.i17, i64 0, i32 0 + store <4 x float> %mul.i.i.i, ptr addrspace(1) %ref.tmp.sroa.0.0..sroa_idx.i.i19, align 16 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -148,7 +148,7 @@ attributes #5 = { nounwind } !12 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1} !13 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!14 = !{!"class.sycl::_V1::vec*", !"class.sycl::_V1::range", !"class.sycl::_V1::vec*", !"class.sycl::_V1::range", !"class.sycl::_V1::vec*", !"class.sycl::_V1::vec*", !"class.sycl::_V1::vec*"} +!14 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"ptr", !"ptr"} !15 = !{!"", !"", !"", !"", !"", !"", !""} !16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"} !17 = !{!"local", !"none", !"none", !"none", !"none", !"none", !"none"} diff --git a/sycl-fusion/test/internalization/promote-private-scalar.ll b/sycl-fusion/test/internalization/promote-private-scalar.ll index 0d805576d3917..6e1d4cb674ecf 100644 --- a/sycl-fusion/test/internalization/promote-private-scalar.ll +++ b/sycl-fusion/test/internalization/promote-private-scalar.ll @@ -29,46 +29,46 @@ declare spir_func void @__itt_offload_wi_start_wrapper() #3 declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 -define spir_kernel void @fused_0(float addrspace(1)* align 4 %KernelOne_accTmp, %0* byval(%0) align 8 %KernelOne_accTmp3, float addrspace(1)* align 4 %KernelOne_accIn1, %0* byval(%0) align 8 %KernelOne_accIn16, float addrspace(1)* align 4 %KernelOne_accIn2, float addrspace(1)* align 4 %KernelTwo_accOut, float addrspace(1)* align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { +define spir_kernel void @fused_0(ptr addrspace(1) align 4 %KernelOne_accTmp, ptr byval(%0) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 4 %KernelOne_accIn1, ptr byval(%0) align 8 %KernelOne_accIn16, ptr addrspace(1) align 4 %KernelOne_accIn2, ptr addrspace(1) align 4 %KernelTwo_accOut, ptr addrspace(1) align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { ; Scenario: Test the successful private internalization of the first pointer ; argument. This means, the first pointer argument has been replaced by a ; function-local alloca and all accesses have been updated to use this alloca ; instead. ; CHECK-LABEL: define {{[^@]+}}@fused_0 -; CHECK-SAME: (%0* byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], float addrspace(1)* align 4 [[KERNELONE_ACCIN1:%.*]], %0* byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], float addrspace(1)* align 4 [[KERNELONE_ACCIN2:%.*]], float addrspace(1)* align 4 [[KERNELTWO_ACCOUT:%.*]], float addrspace(1)* align 4 [[KERNELTWO_ACCIN3:%.*]]) +; CHECK-SAME: (ptr byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], ptr addrspace(1) align 4 [[KERNELONE_ACCIN1:%.*]], ptr byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], ptr addrspace(1) align 4 [[KERNELONE_ACCIN2:%.*]], ptr addrspace(1) align 4 [[KERNELTWO_ACCOUT:%.*]], ptr addrspace(1) align 4 [[KERNELTWO_ACCIN3:%.*]]) ; CHECK: entry: ; CHECK: [[TMP0:%.*]] = alloca [1 x float], align 4 -; CHECK: [[TMP1:%.*]] = getelementptr inbounds [1 x float], [1 x float]* [[TMP0]], i64 0, i64 0 -; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 0 +; CHECK: [[TMP1:%.*]] = getelementptr inbounds [1 x float], ptr [[TMP0]], i64 0, i64 0 +; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0 ; CHECK: [[ADD_I_I:%.*]] = fadd float -; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds float, float* [[ADD_PTR_I_I]], i64 0 -; CHECK: store float [[ADD_I_I]], float* [[ARRAYIDX_I13_I_I]], align 4 -; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 0 -; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR_I39_I8]], i64 0 -; CHECK: [[TMP16:%.*]] = load float, float* [[ARRAYIDX_I_I_I11]], align 4 +; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR_I_I]], i64 0 +; CHECK: store float [[ADD_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 4 +; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0 +; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR_I39_I8]], i64 0 +; CHECK: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX_I_I_I11]], align 4 ; CHECK: [[MUL_I_I:%.*]] = fmul float [[TMP16]] ; CHECK: store float [[MUL_I_I]] ; CHECK-NOT: store ; CHECK: ret void ; entry: - %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn163.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn162.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp31.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn163.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn162.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp31.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload - %add.ptr.i39.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload - %add.ptr.i53.i = getelementptr inbounds float, float addrspace(1)* %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload + %add.ptr.i39.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload + %add.ptr.i53.i = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %1 = insertelement <3 x i64> undef, i64 %0, i32 0 %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -77,24 +77,24 @@ entry: %5 = insertelement <3 x i64> %3, i64 %4, i32 2 %cmp.i.i.i = icmp ult i64 %0, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i) - %arrayidx.i.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39.i, i64 %0 - %6 = load float, float addrspace(1)* %arrayidx.i.i.i, align 4 - %arrayidx.i9.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53.i, i64 %0 - %7 = load float, float addrspace(1)* %arrayidx.i9.i.i, align 4 + %arrayidx.i.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39.i, i64 %0 + %6 = load float, ptr addrspace(1) %arrayidx.i.i.i, align 4 + %arrayidx.i9.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53.i, i64 %0 + %7 = load float, ptr addrspace(1) %arrayidx.i9.i.i, align 4 %add.i.i = fadd float %6, %7 - %arrayidx.i13.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i, i64 %0 - store float %add.i.i, float addrspace(1)* %arrayidx.i13.i.i, align 4 + %arrayidx.i13.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %0 + store float %add.i.i, ptr addrspace(1) %arrayidx.i13.i.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 - %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn166.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp35.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn164.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn166.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp35.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn164.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i7 = getelementptr inbounds float, float addrspace(1)* %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload - %add.ptr.i39.i8 = getelementptr inbounds float, float addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload - %add.ptr.i53.i9 = getelementptr inbounds float, float addrspace(1)* %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload + %add.ptr.i.i7 = getelementptr inbounds float, ptr addrspace(1) %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload + %add.ptr.i39.i8 = getelementptr inbounds float, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload + %add.ptr.i53.i9 = getelementptr inbounds float, ptr addrspace(1) %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload %8 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %9 = insertelement <3 x i64> undef, i64 %8, i32 0 %10 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -103,13 +103,13 @@ entry: %13 = insertelement <3 x i64> %11, i64 %12, i32 2 %cmp.i.i.i10 = icmp ult i64 %8, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i10) - %arrayidx.i.i.i11 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39.i8, i64 %8 - %14 = load float, float addrspace(1)* %arrayidx.i.i.i11, align 4 - %arrayidx.i9.i.i13 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53.i9, i64 %8 - %15 = load float, float addrspace(1)* %arrayidx.i9.i.i13, align 4 + %arrayidx.i.i.i11 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39.i8, i64 %8 + %14 = load float, ptr addrspace(1) %arrayidx.i.i.i11, align 4 + %arrayidx.i9.i.i13 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53.i9, i64 %8 + %15 = load float, ptr addrspace(1) %arrayidx.i9.i.i13, align 4 %mul.i.i = fmul float %14, %15 - %arrayidx.i13.i.i15 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i7, i64 %8 - store float %mul.i.i, float addrspace(1)* %arrayidx.i13.i.i15, align 4 + %arrayidx.i13.i.i15 = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i7, i64 %8 + store float %mul.i.i, ptr addrspace(1) %arrayidx.i13.i.i15, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -124,7 +124,7 @@ attributes #5 = { nounwind } !12 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1} !13 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!14 = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"float*", !"float*"} +!14 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"ptr", !"ptr"} !15 = !{!"", !"", !"", !"", !"", !"", !""} !16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"} !17 = !{!"private", !"none", !"none", !"none", !"none", !"none", !"none"} diff --git a/sycl-fusion/test/internalization/promote-private-vec.ll b/sycl-fusion/test/internalization/promote-private-vec.ll index 0cb56002967e2..b682833d9da7a 100644 --- a/sycl-fusion/test/internalization/promote-private-vec.ll +++ b/sycl-fusion/test/internalization/promote-private-vec.ll @@ -30,47 +30,47 @@ declare spir_func void @__itt_offload_wi_start_wrapper() #3 declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 -define spir_kernel void @fused_0(%0 addrspace(1)* align 16 %KernelOne_accTmp, %1* byval(%1) align 8 %KernelOne_accTmp3, %0 addrspace(1)* align 16 %KernelOne_accIn1, %1* byval(%1) align 8 %KernelOne_accIn16, %0 addrspace(1)* align 16 %KernelOne_accIn2, %0 addrspace(1)* align 16 %KernelTwo_accOut, %0 addrspace(1)* align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { +define spir_kernel void @fused_0(ptr addrspace(1) align 16 %KernelOne_accTmp, ptr byval(%1) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 16 %KernelOne_accIn1, ptr byval(%1) align 8 %KernelOne_accIn16, ptr addrspace(1) align 16 %KernelOne_accIn2, ptr addrspace(1) align 16 %KernelTwo_accOut, ptr addrspace(1) align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 { ; Scenario: Test the successful private internalization of the first pointer ; argument. This means, the first pointer argument has been replaced by a ; function-local alloca and all accesses have been updated to use this alloca ; instead. ; CHECK-LABEL: define {{[^@]+}}@fused_0 -; CHECK-SAME: (%0* byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], [[TYPE2:%.*]] addrspace(1)* align 16 [[KERNELONE_ACCIN1:%.*]], %0* byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], [[TYPE2]] addrspace(1)* align 16 [[KERNELONE_ACCIN2:%.*]], [[TYPE2]] addrspace(1)* align 16 [[KERNELTWO_ACCOUT:%.*]], [[TYPE2]] addrspace(1)* align 16 [[KERNELTWO_ACCIN3:%.*]]) +; CHECK-SAME: (ptr byval([[TYPE0:%.*]]) align 8 [[KERNELONE_ACCTMP3:%.*]], ptr addrspace(1) align 16 [[KERNELONE_ACCIN1:%.*]], ptr byval([[TYPE0]]) align 8 [[KERNELONE_ACCIN16:%.*]], ptr addrspace(1) align 16 [[KERNELONE_ACCIN2:%.*]], ptr addrspace(1) align 16 [[KERNELTWO_ACCOUT:%.*]], ptr addrspace(1) align 16 [[KERNELTWO_ACCIN3:%.*]]) ; CHECK: entry: -; CHECK: [[TMP0:%.*]] = alloca [1 x [[TYPE2]]], align 16 -; CHECK: [[TMP1:%.*]] = getelementptr inbounds [1 x [[TYPE2]]], [1 x [[TYPE2]]]* [[TMP0]], i64 0, i64 0 -; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP1]], i64 0 +; CHECK: [[TMP0:%.*]] = alloca [1 x [[TYPE2:%.*]]], align 16 +; CHECK: [[TMP1:%.*]] = getelementptr inbounds [1 x [[TYPE2]]], ptr [[TMP0]], i64 0, i64 0 +; CHECK: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP1]], i64 0 ; CHECK: [[ADD_I_I_I:%.*]] = fadd <4 x float> -; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[ADD_PTR_I_I]], i64 0 -; CHECK: [[REF_TMP_SROA_0_0__SROA_IDX_I_I:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[ARRAYIDX_I13_I_I]], i64 0, i32 0 -; CHECK: store <4 x float> [[ADD_I_I_I]], <4 x float>* [[REF_TMP_SROA_0_0__SROA_IDX_I_I]], align 16 -; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP1]], i64 0 -; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[ADD_PTR_I39_I8]], i64 0 -; CHECK: [[M_DATA_I_I_I15:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[ARRAYIDX_I_I_I11]], i64 0, i32 0 -; CHECK: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[M_DATA_I_I_I15]], align 16 +; CHECK: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[ADD_PTR_I_I]], i64 0 +; CHECK: [[REF_TMP_SROA_0_0__SROA_IDX_I_I:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[ARRAYIDX_I13_I_I]], i64 0, i32 0 +; CHECK: store <4 x float> [[ADD_I_I_I]], ptr [[REF_TMP_SROA_0_0__SROA_IDX_I_I]], align 16 +; CHECK: [[ADD_PTR_I39_I8:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP1]], i64 0 +; CHECK: [[ARRAYIDX_I_I_I11:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[ADD_PTR_I39_I8]], i64 0 +; CHECK: [[M_DATA_I_I_I15:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[ARRAYIDX_I_I_I11]], i64 0, i32 0 +; CHECK: [[TMP16:%.*]] = load <4 x float>, ptr [[M_DATA_I_I_I15]], align 16 ; CHECK: [[MUL_I_I_I:%.*]] = fmul <4 x float> [[TMP16]] ; CHECK: store <4 x float> [[MUL_I_I_I]] ; CHECK-NOT: store ; CHECK: ret void ; entry: - %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn163.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn162.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp31.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn163.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn162.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp31.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload - %add.ptr.i39.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload - %add.ptr.i53.i = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload + %add.ptr.i.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp31.sroa.0.0.copyload + %add.ptr.i39.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload + %add.ptr.i53.i = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %1 = insertelement <3 x i64> undef, i64 %0, i32 0 %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -79,27 +79,27 @@ entry: %5 = insertelement <3 x i64> %3, i64 %4, i32 2 %cmp.i.i.i = icmp ult i64 %0, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i) - %arrayidx.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i39.i, i64 %0 - %arrayidx.i9.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i53.i, i64 %0 - %m_Data.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i.i.i, i64 0, i32 0 - %6 = load <4 x float>, <4 x float> addrspace(1)* %m_Data.i.i.i, align 16 - %m_Data2.i.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i9.i.i, i64 0, i32 0 - %7 = load <4 x float>, <4 x float> addrspace(1)* %m_Data2.i.i.i, align 16 + %arrayidx.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i39.i, i64 %0 + %arrayidx.i9.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i53.i, i64 %0 + %m_Data.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i.i.i, i64 0, i32 0 + %6 = load <4 x float>,ptr addrspace(1) %m_Data.i.i.i, align 16 + %m_Data2.i.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i9.i.i, i64 0, i32 0 + %7 = load <4 x float>,ptr addrspace(1) %m_Data2.i.i.i, align 16 %add.i.i.i = fadd <4 x float> %6, %7 - %arrayidx.i13.i.i = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i.i, i64 %0 - %ref.tmp.sroa.0.0..sroa_idx.i.i = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i13.i.i, i64 0, i32 0 - store <4 x float> %add.i.i.i, <4 x float> addrspace(1)* %ref.tmp.sroa.0.0..sroa_idx.i.i, align 16 + %arrayidx.i13.i.i = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i.i, i64 %0 + %ref.tmp.sroa.0.0..sroa_idx.i.i = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i13.i.i, i64 0, i32 0 + store <4 x float> %add.i.i.i,ptr addrspace(1) %ref.tmp.sroa.0.0..sroa_idx.i.i, align 16 call spir_func void @__itt_offload_wi_finish_wrapper() #3 - %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn166.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp35.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %1, %1* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn164.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn166.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn166.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn166.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp35.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp35.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp35.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn164.sroa.0.0..sroa_idx = getelementptr inbounds %1, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn164.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn164.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #3 - %add.ptr.i.i7 = getelementptr inbounds %0, %0 addrspace(1)* %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload - %add.ptr.i39.i8 = getelementptr inbounds %0, %0 addrspace(1)* %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload - %add.ptr.i53.i9 = getelementptr inbounds %0, %0 addrspace(1)* %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload + %add.ptr.i.i7 = getelementptr inbounds %0, ptr addrspace(1) %KernelTwo_accOut, i64 %KernelOne_accIn164.sroa.0.0.copyload + %add.ptr.i39.i8 = getelementptr inbounds %0, ptr addrspace(1) %KernelOne_accTmp, i64 %KernelOne_accTmp35.sroa.0.0.copyload + %add.ptr.i53.i9 = getelementptr inbounds %0, ptr addrspace(1) %KernelTwo_accIn3, i64 %KernelOne_accIn166.sroa.0.0.copyload %8 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %9 = insertelement <3 x i64> undef, i64 %8, i32 0 %10 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 @@ -108,16 +108,16 @@ entry: %13 = insertelement <3 x i64> %11, i64 %12, i32 2 %cmp.i.i.i10 = icmp ult i64 %8, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i10) - %arrayidx.i.i.i11 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i39.i8, i64 %8 - %arrayidx.i9.i.i13 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i53.i9, i64 %8 - %m_Data.i.i.i15 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i.i.i11, i64 0, i32 0 - %14 = load <4 x float>, <4 x float> addrspace(1)* %m_Data.i.i.i15, align 16 - %m_Data2.i.i.i16 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i9.i.i13, i64 0, i32 0 - %15 = load <4 x float>, <4 x float> addrspace(1)* %m_Data2.i.i.i16, align 16 + %arrayidx.i.i.i11 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i39.i8, i64 %8 + %arrayidx.i9.i.i13 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i53.i9, i64 %8 + %m_Data.i.i.i15 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i.i.i11, i64 0, i32 0 + %14 = load <4 x float>,ptr addrspace(1) %m_Data.i.i.i15, align 16 + %m_Data2.i.i.i16 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i9.i.i13, i64 0, i32 0 + %15 = load <4 x float>,ptr addrspace(1) %m_Data2.i.i.i16, align 16 %mul.i.i.i = fmul <4 x float> %14, %15 - %arrayidx.i13.i.i17 = getelementptr inbounds %0, %0 addrspace(1)* %add.ptr.i.i7, i64 %8 - %ref.tmp.sroa.0.0..sroa_idx.i.i19 = getelementptr inbounds %0, %0 addrspace(1)* %arrayidx.i13.i.i17, i64 0, i32 0 - store <4 x float> %mul.i.i.i, <4 x float> addrspace(1)* %ref.tmp.sroa.0.0..sroa_idx.i.i19, align 16 + %arrayidx.i13.i.i17 = getelementptr inbounds %0, ptr addrspace(1) %add.ptr.i.i7, i64 %8 + %ref.tmp.sroa.0.0..sroa_idx.i.i19 = getelementptr inbounds %0, ptr addrspace(1) %arrayidx.i13.i.i17, i64 0, i32 0 + store <4 x float> %mul.i.i.i,ptr addrspace(1) %ref.tmp.sroa.0.0..sroa_idx.i.i19, align 16 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -132,7 +132,7 @@ attributes #5 = { nounwind } !12 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1} !13 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!14 = !{!"class.sycl::_V1::vec*", !"class.sycl::_V1::range", !"class.sycl::_V1::vec*", !"class.sycl::_V1::range", !"class.sycl::_V1::vec*", !"class.sycl::_V1::vec*", !"class.sycl::_V1::vec*"} +!14 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"ptr", !"ptr"} !15 = !{!"", !"", !"", !"", !"", !"", !""} !16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"} !17 = !{!"private", !"none", !"none", !"none", !"none", !"none", !"none"} diff --git a/sycl-fusion/test/kernel-fusion/required_work_group_size.ll b/sycl-fusion/test/kernel-fusion/required_work_group_size.ll index 2ee2d6d869dc3..4ef498da0bc5a 100644 --- a/sycl-fusion/test/kernel-fusion/required_work_group_size.ll +++ b/sycl-fusion/test/kernel-fusion/required_work_group_size.ll @@ -15,21 +15,21 @@ declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: nounwind -define spir_kernel void @KernelOne(float addrspace(1)* align 4 %_arg_x) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !reqd_group_size !11 { +define spir_kernel void @KernelOne(ptr addrspace(1) align 4 %_arg_x) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !reqd_group_size !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = addrspacecast float addrspace(1)* %_arg_x to float addrspace(4)* - store float 4.200000e+01, float addrspace(4)* %0, align 4 + %0 = addrspacecast ptr addrspace(1) %_arg_x to ptr addrspace(4) + store float 4.200000e+01, ptr addrspace(4) %0, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } ; Function Attrs: nounwind -define spir_kernel void @KernelTwo(float addrspace(1)* align 4 %_arg_y) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !12 { +define spir_kernel void @KernelTwo(ptr addrspace(1) align 4 %_arg_y) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !12 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = addrspacecast float addrspace(1)* %_arg_y to float addrspace(4)* - store float 2.500000e+01, float addrspace(4)* %0, align 4 + %0 = addrspacecast ptr addrspace(1) %_arg_y to ptr addrspace(4) + store float 2.500000e+01, ptr addrspace(4) %0, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -41,7 +41,7 @@ attributes #3 = { alwaysinline nounwind } !6 = !{i32 1} !7 = !{!"none"} -!8 = !{!"float*"} +!8 = !{!"ptr"} !9 = !{!""} !10 = !{!"_arg_x"} !11 = !{i32 64, i32 1, i32 1} diff --git a/sycl-fusion/test/kernel-fusion/two-kernels-no-identities.ll b/sycl-fusion/test/kernel-fusion/two-kernels-no-identities.ll index e69c585044665..07cd292370895 100644 --- a/sycl-fusion/test/kernel-fusion/two-kernels-no-identities.ll +++ b/sycl-fusion/test/kernel-fusion/two-kernels-no-identities.ll @@ -20,10 +20,10 @@ target triple = "spir64-unknown-unknown" %1 = type { [1 x i64] } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.assume(i1 noundef %0) #1 @@ -47,10 +47,10 @@ entry: br i1 true, label %return, label %if.end if.end: ; preds = %entry - %0 = bitcast [3 x i64]* %GroupID to i8* - call void @llvm.lifetime.start.p0i8(i64 24, i8* %0) - %arrayinit.begin5 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 0 - %arrayinit.begin = addrspacecast i64* %arrayinit.begin5 to i64 addrspace(4)* + %0 = bitcast ptr %GroupID to ptr + call void @llvm.lifetime.start.p0i8(i64 24, ptr %0) + %arrayinit.begin5 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 0 + %arrayinit.begin = addrspacecast ptr %arrayinit.begin5 to ptr addrspace(4) %1 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0) #2 %2 = insertelement <3 x i64> undef, i64 %1, i32 0 %3 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 1) #2 @@ -58,15 +58,15 @@ if.end: ; preds = %entry %5 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 2) #2 %6 = insertelement <3 x i64> %4, i64 %5, i32 2 %7 = extractelement <3 x i64> %6, i32 0 - store i64 %7, i64 addrspace(4)* %arrayinit.begin, align 8 - %arrayinit.element6 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 1 - %arrayinit.element = addrspacecast i64* %arrayinit.element6 to i64 addrspace(4)* + store i64 %7, ptr addrspace(4) %arrayinit.begin, align 8 + %arrayinit.element6 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 1 + %arrayinit.element = addrspacecast ptr %arrayinit.element6 to ptr addrspace(4) %8 = extractelement <3 x i64> %6, i32 1 - store i64 %8, i64 addrspace(4)* %arrayinit.element, align 8 - %arrayinit.element17 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 2 - %arrayinit.element1 = addrspacecast i64* %arrayinit.element17 to i64 addrspace(4)* + store i64 %8, ptr addrspace(4) %arrayinit.element, align 8 + %arrayinit.element17 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 2 + %arrayinit.element1 = addrspacecast ptr %arrayinit.element17 to ptr addrspace(4) %9 = extractelement <3 x i64> %6, i32 2 - store i64 %9, i64 addrspace(4)* %arrayinit.element1, align 8 + store i64 %9, ptr addrspace(4) %arrayinit.element1, align 8 %10 = call spir_func i64 @_Z29__spirv_BuiltInGlobalLinearIdv() #2 %11 = call spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32 0) #2 %12 = insertelement <3 x i64> undef, i64 %11, i32 0 @@ -80,8 +80,8 @@ if.end: ; preds = %entry %19 = extractelement <3 x i64> %16, i32 2 %mul2 = mul i64 %mul, %19 %conv = trunc i64 %mul2 to i32 - call spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %arrayinit.begin, i64 %10, i32 %conv) #4 - call void @llvm.lifetime.end.p0i8(i64 24, i8* %0) + call spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %arrayinit.begin, i64 %10, i32 %conv) #4 + call void @llvm.lifetime.end.p0i8(i64 24, ptr %0) br label %return return: ; preds = %if.end, %entry @@ -95,10 +95,10 @@ entry: br i1 true, label %return, label %if.end if.end: ; preds = %entry - %0 = bitcast [3 x i64]* %GroupID to i8* - call void @llvm.lifetime.start.p0i8(i64 24, i8* %0) - %arrayinit.begin3 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 0 - %arrayinit.begin = addrspacecast i64* %arrayinit.begin3 to i64 addrspace(4)* + %0 = bitcast ptr %GroupID to ptr + call void @llvm.lifetime.start.p0i8(i64 24, ptr %0) + %arrayinit.begin3 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 0 + %arrayinit.begin = addrspacecast ptr %arrayinit.begin3 to ptr addrspace(4) %1 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0) #2 %2 = insertelement <3 x i64> undef, i64 %1, i32 0 %3 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 1) #2 @@ -106,18 +106,18 @@ if.end: ; preds = %entry %5 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 2) #2 %6 = insertelement <3 x i64> %4, i64 %5, i32 2 %7 = extractelement <3 x i64> %6, i32 0 - store i64 %7, i64 addrspace(4)* %arrayinit.begin, align 8 - %arrayinit.element4 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 1 - %arrayinit.element = addrspacecast i64* %arrayinit.element4 to i64 addrspace(4)* + store i64 %7, ptr addrspace(4) %arrayinit.begin, align 8 + %arrayinit.element4 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 1 + %arrayinit.element = addrspacecast ptr %arrayinit.element4 to ptr addrspace(4) %8 = extractelement <3 x i64> %6, i32 1 - store i64 %8, i64 addrspace(4)* %arrayinit.element, align 8 - %arrayinit.element15 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 2 - %arrayinit.element1 = addrspacecast i64* %arrayinit.element15 to i64 addrspace(4)* + store i64 %8, ptr addrspace(4) %arrayinit.element, align 8 + %arrayinit.element15 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 2 + %arrayinit.element1 = addrspacecast ptr %arrayinit.element15 to ptr addrspace(4) %9 = extractelement <3 x i64> %6, i32 2 - store i64 %9, i64 addrspace(4)* %arrayinit.element1, align 8 + store i64 %9, ptr addrspace(4) %arrayinit.element1, align 8 %10 = call spir_func i64 @_Z29__spirv_BuiltInGlobalLinearIdv() #2 - call spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %arrayinit.begin, i64 %10) #4 - call void @llvm.lifetime.end.p0i8(i64 24, i8* %0) + call spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %arrayinit.begin, i64 %10) #4 + call void @llvm.lifetime.end.p0i8(i64 24, ptr %0) br label %return return: ; preds = %if.end, %entry @@ -125,27 +125,27 @@ return: ; preds = %if.end, %entry } ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 ; Function Attrs: nounwind -define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne(float addrspace(1)* align 4 %_arg_accTmp, %0* byval(%0) align 8 %_arg_accTmp3, float addrspace(1)* align 4 %_arg_accIn1, %0* byval(%0) align 8 %_arg_accIn16, float addrspace(1)* align 4 %_arg_accIn2, %0* byval(%0) align 8 %_arg_accIn29) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !spirv.ParameterDecorations !11 { +define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne(ptr addrspace(1) align 4 %_arg_accTmp, ptr byval(%0) align 8 %_arg_accTmp3, ptr addrspace(1) align 4 %_arg_accIn1, ptr byval(%0) align 8 %_arg_accIn16, ptr addrspace(1) align 4 %_arg_accIn2, ptr byval(%0) align 8 %_arg_accIn29) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !spirv.ParameterDecorations !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = getelementptr inbounds %0, %0* %_arg_accTmp3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_accTmp, i64 %2 - %3 = getelementptr inbounds %0, %0* %_arg_accIn16, i64 0, i32 0, i32 0, i64 0 - %4 = addrspacecast i64* %3 to i64 addrspace(4)* - %5 = load i64, i64 addrspace(4)* %4, align 8 - %add.ptr.i39 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn1, i64 %5 - %6 = getelementptr inbounds %0, %0* %_arg_accIn29, i64 0, i32 0, i32 0, i64 0 - %7 = addrspacecast i64* %6 to i64 addrspace(4)* - %8 = load i64, i64 addrspace(4)* %7, align 8 - %add.ptr.i53 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn2, i64 %8 + %0 = getelementptr inbounds %0, ptr %_arg_accTmp3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %2 = load i64, ptr addrspace(4) %1, align 8 + %add.ptr.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accTmp, i64 %2 + %3 = getelementptr inbounds %0, ptr %_arg_accIn16, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast ptr %3 to ptr addrspace(4) + %5 = load i64, ptr addrspace(4) %4, align 8 + %add.ptr.i39 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn1, i64 %5 + %6 = getelementptr inbounds %0, ptr %_arg_accIn29, i64 0, i32 0, i32 0, i64 0 + %7 = addrspacecast ptr %6 to ptr addrspace(4) + %8 = load i64, ptr addrspace(4) %7, align 8 + %add.ptr.i53 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn2, i64 %8 %9 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0) #2 %10 = insertelement <3 x i64> undef, i64 %9, i32 0 %11 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) #2 @@ -155,36 +155,36 @@ entry: %15 = extractelement <3 x i64> %14, i32 0 %cmp.i.i = icmp ult i64 %15, 2147483648 call void @llvm.assume(i1 %cmp.i.i) - %arrayidx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39, i64 %15 - %arrayidx.ascast.i.i = addrspacecast float addrspace(1)* %arrayidx.i.i to float addrspace(4)* - %16 = load float, float addrspace(4)* %arrayidx.ascast.i.i, align 4 - %arrayidx.i9.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53, i64 %15 - %arrayidx.ascast.i10.i = addrspacecast float addrspace(1)* %arrayidx.i9.i to float addrspace(4)* - %17 = load float, float addrspace(4)* %arrayidx.ascast.i10.i, align 4 + %arrayidx.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39, i64 %15 + %arrayidx.ascast.i.i = addrspacecast ptr addrspace(1) %arrayidx.i.i to ptr addrspace(4) + %16 = load float, ptr addrspace(4) %arrayidx.ascast.i.i, align 4 + %arrayidx.i9.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53, i64 %15 + %arrayidx.ascast.i10.i = addrspacecast ptr addrspace(1) %arrayidx.i9.i to ptr addrspace(4) + %17 = load float, ptr addrspace(4) %arrayidx.ascast.i10.i, align 4 %add.i = fadd float %16, %17 - %arrayidx.i13.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %15 - %arrayidx.ascast.i14.i = addrspacecast float addrspace(1)* %arrayidx.i13.i to float addrspace(4)* - store float %add.i, float addrspace(4)* %arrayidx.ascast.i14.i, align 4 + %arrayidx.i13.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i, i64 %15 + %arrayidx.ascast.i14.i = addrspacecast ptr addrspace(1) %arrayidx.i13.i to ptr addrspace(4) + store float %add.i, ptr addrspace(4) %arrayidx.ascast.i14.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } ; Function Attrs: nounwind -define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo(float addrspace(1)* align 4 %_arg_accOut, %0* byval(%0) align 8 %_arg_accOut3, float addrspace(1)* align 4 %_arg_accTmp, %0* byval(%0) align 8 %_arg_accTmp6, float addrspace(1)* align 4 %_arg_accIn3, %0* byval(%0) align 8 %_arg_accIn39) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !17 !spirv.ParameterDecorations !11 { +define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo(ptr addrspace(1) align 4 %_arg_accOut, ptr byval(%0) align 8 %_arg_accOut3, ptr addrspace(1) align 4 %_arg_accTmp, ptr byval(%0) align 8 %_arg_accTmp6, ptr addrspace(1) align 4 %_arg_accIn3, ptr byval(%0) align 8 %_arg_accIn39) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !17 !spirv.ParameterDecorations !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = getelementptr inbounds %0, %0* %_arg_accOut3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_accOut, i64 %2 - %3 = getelementptr inbounds %0, %0* %_arg_accTmp6, i64 0, i32 0, i32 0, i64 0 - %4 = addrspacecast i64* %3 to i64 addrspace(4)* - %5 = load i64, i64 addrspace(4)* %4, align 8 - %add.ptr.i39 = getelementptr inbounds float, float addrspace(1)* %_arg_accTmp, i64 %5 - %6 = getelementptr inbounds %0, %0* %_arg_accIn39, i64 0, i32 0, i32 0, i64 0 - %7 = addrspacecast i64* %6 to i64 addrspace(4)* - %8 = load i64, i64 addrspace(4)* %7, align 8 - %add.ptr.i53 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn3, i64 %8 + %0 = getelementptr inbounds %0, ptr %_arg_accOut3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %2 = load i64, ptr addrspace(4) %1, align 8 + %add.ptr.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accOut, i64 %2 + %3 = getelementptr inbounds %0, ptr %_arg_accTmp6, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast ptr %3 to ptr addrspace(4) + %5 = load i64, ptr addrspace(4) %4, align 8 + %add.ptr.i39 = getelementptr inbounds float, ptr addrspace(1) %_arg_accTmp, i64 %5 + %6 = getelementptr inbounds %0, ptr %_arg_accIn39, i64 0, i32 0, i32 0, i64 0 + %7 = addrspacecast ptr %6 to ptr addrspace(4) + %8 = load i64, ptr addrspace(4) %7, align 8 + %add.ptr.i53 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn3, i64 %8 %9 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0) #2 %10 = insertelement <3 x i64> undef, i64 %9, i32 0 %11 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) #2 @@ -194,16 +194,16 @@ entry: %15 = extractelement <3 x i64> %14, i32 0 %cmp.i.i = icmp ult i64 %15, 2147483648 call void @llvm.assume(i1 %cmp.i.i) - %arrayidx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39, i64 %15 - %arrayidx.ascast.i.i = addrspacecast float addrspace(1)* %arrayidx.i.i to float addrspace(4)* - %16 = load float, float addrspace(4)* %arrayidx.ascast.i.i, align 4 - %arrayidx.i9.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53, i64 %15 - %arrayidx.ascast.i10.i = addrspacecast float addrspace(1)* %arrayidx.i9.i to float addrspace(4)* - %17 = load float, float addrspace(4)* %arrayidx.ascast.i10.i, align 4 + %arrayidx.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39, i64 %15 + %arrayidx.ascast.i.i = addrspacecast ptr addrspace(1) %arrayidx.i.i to ptr addrspace(4) + %16 = load float, ptr addrspace(4) %arrayidx.ascast.i.i, align 4 + %arrayidx.i9.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53, i64 %15 + %arrayidx.ascast.i10.i = addrspacecast ptr addrspace(1) %arrayidx.i9.i to ptr addrspace(4) + %17 = load float, ptr addrspace(4) %arrayidx.ascast.i10.i, align 4 %mul.i = fmul float %16, %17 - %arrayidx.i13.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %15 - %arrayidx.ascast.i14.i = addrspacecast float addrspace(1)* %arrayidx.i13.i to float addrspace(4)* - store float %mul.i, float addrspace(4)* %arrayidx.ascast.i14.i, align 4 + %arrayidx.i13.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i, i64 %15 + %arrayidx.ascast.i14.i = addrspacecast ptr addrspace(1) %arrayidx.i13.i to ptr addrspace(4) + store float %mul.i, ptr addrspace(4) %arrayidx.ascast.i14.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -219,7 +219,7 @@ attributes #5 = { nounwind } !6 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 0} !7 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!8 = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range"} +!8 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range"} !9 = !{!"", !"", !"", !"", !"", !""} !10 = !{!"_arg_accTmp", !"_arg_accTmp3", !"_arg_accIn1", !"_arg_accIn16", !"_arg_accIn2", !"_arg_accIn29"} !11 = !{!12, !14, !12, !14, !12, !14} @@ -245,18 +245,18 @@ attributes #5 = { nounwind } ; via 'implicit-check-not'. ; FUSION-LABEL: define spir_kernel void @fused_0 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 ; FUSION-LABEL: entry: ; FUSION-NEXT: call spir_func void @__itt_offload_wi_start_wrapper() ; FUSION: [[IN1:%.*]] = load float @@ -275,18 +275,18 @@ attributes #5 = { nounwind } ; attached to the fused kernel. ; MD-LABEL: define spir_kernel void @fused_0 -; MD-SAME: float addrspace(1)* align 4 %[[ARG1:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG2:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG3:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG4:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG5:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG6:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG7:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG8:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG9:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG10:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG11:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG12:[^)]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG1:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG2:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG3:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG4:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG5:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG6:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG7:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG8:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG9:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG10:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG11:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG12:[^)]+]] ; MD-SAME: !kernel_arg_addr_space ![[#ADDR_SPACE:]] ; MD-SAME: !kernel_arg_access_qual ![[#ACCESS_QUAL:]] ; MD-SAME: !kernel_arg_type ![[#ARG_TYPE:]] @@ -296,7 +296,7 @@ attributes #5 = { nounwind } ;. ; MD: [[#ADDR_SPACE]] = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0} ; MD: [[#ACCESS_QUAL]] = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -; MD: [[#ARG_TYPE]] = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range"} +; MD: [[#ARG_TYPE]] = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range"} ; MD: [[#TYPE_QUAL]] = !{!"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !""} ; MD: [[#ARG_NAME]] = !{!"[[ARG1]]", !"[[ARG2]]", !"[[ARG3]]", !"[[ARG4]]", !"[[ARG5]]", !"[[ARG6]]", !"[[ARG7]]", !"[[ARG8]]", !"[[ARG9]]", !"[[ARG10]]", !"[[ARG11]]", !"[[ARG12]]"} ;. diff --git a/sycl-fusion/test/kernel-fusion/two-kernels-out-is-in.ll b/sycl-fusion/test/kernel-fusion/two-kernels-out-is-in.ll index 5d0d53089b0e1..82dd904f278db 100644 --- a/sycl-fusion/test/kernel-fusion/two-kernels-out-is-in.ll +++ b/sycl-fusion/test/kernel-fusion/two-kernels-out-is-in.ll @@ -21,10 +21,10 @@ target triple = "spir64-unknown-unknown" %1 = type { [1 x i64] } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.assume(i1 noundef %0) #1 @@ -48,10 +48,10 @@ entry: br i1 true, label %return, label %if.end if.end: ; preds = %entry - %0 = bitcast [3 x i64]* %GroupID to i8* - call void @llvm.lifetime.start.p0i8(i64 24, i8* %0) - %arrayinit.begin5 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 0 - %arrayinit.begin = addrspacecast i64* %arrayinit.begin5 to i64 addrspace(4)* + %0 = bitcast ptr %GroupID to ptr + call void @llvm.lifetime.start.p0i8(i64 24, ptr %0) + %arrayinit.begin5 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 0 + %arrayinit.begin = addrspacecast ptr %arrayinit.begin5 to ptr addrspace(4) %1 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0) #2 %2 = insertelement <3 x i64> undef, i64 %1, i32 0 %3 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 1) #2 @@ -59,15 +59,15 @@ if.end: ; preds = %entry %5 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 2) #2 %6 = insertelement <3 x i64> %4, i64 %5, i32 2 %7 = extractelement <3 x i64> %6, i32 0 - store i64 %7, i64 addrspace(4)* %arrayinit.begin, align 8 - %arrayinit.element6 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 1 - %arrayinit.element = addrspacecast i64* %arrayinit.element6 to i64 addrspace(4)* + store i64 %7, ptr addrspace(4) %arrayinit.begin, align 8 + %arrayinit.element6 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 1 + %arrayinit.element = addrspacecast ptr %arrayinit.element6 to ptr addrspace(4) %8 = extractelement <3 x i64> %6, i32 1 - store i64 %8, i64 addrspace(4)* %arrayinit.element, align 8 - %arrayinit.element17 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 2 - %arrayinit.element1 = addrspacecast i64* %arrayinit.element17 to i64 addrspace(4)* + store i64 %8, ptr addrspace(4) %arrayinit.element, align 8 + %arrayinit.element17 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 2 + %arrayinit.element1 = addrspacecast ptr %arrayinit.element17 to ptr addrspace(4) %9 = extractelement <3 x i64> %6, i32 2 - store i64 %9, i64 addrspace(4)* %arrayinit.element1, align 8 + store i64 %9, ptr addrspace(4) %arrayinit.element1, align 8 %10 = call spir_func i64 @_Z29__spirv_BuiltInGlobalLinearIdv() #2 %11 = call spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32 0) #2 %12 = insertelement <3 x i64> undef, i64 %11, i32 0 @@ -81,8 +81,8 @@ if.end: ; preds = %entry %19 = extractelement <3 x i64> %16, i32 2 %mul2 = mul i64 %mul, %19 %conv = trunc i64 %mul2 to i32 - call spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %arrayinit.begin, i64 %10, i32 %conv) #4 - call void @llvm.lifetime.end.p0i8(i64 24, i8* %0) + call spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %arrayinit.begin, i64 %10, i32 %conv) #4 + call void @llvm.lifetime.end.p0i8(i64 24, ptr %0) br label %return return: ; preds = %if.end, %entry @@ -96,10 +96,10 @@ entry: br i1 true, label %return, label %if.end if.end: ; preds = %entry - %0 = bitcast [3 x i64]* %GroupID to i8* - call void @llvm.lifetime.start.p0i8(i64 24, i8* %0) - %arrayinit.begin3 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 0 - %arrayinit.begin = addrspacecast i64* %arrayinit.begin3 to i64 addrspace(4)* + %0 = bitcast ptr %GroupID to ptr + call void @llvm.lifetime.start.p0i8(i64 24, ptr %0) + %arrayinit.begin3 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 0 + %arrayinit.begin = addrspacecast ptr %arrayinit.begin3 to ptr addrspace(4) %1 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0) #2 %2 = insertelement <3 x i64> undef, i64 %1, i32 0 %3 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 1) #2 @@ -107,18 +107,18 @@ if.end: ; preds = %entry %5 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 2) #2 %6 = insertelement <3 x i64> %4, i64 %5, i32 2 %7 = extractelement <3 x i64> %6, i32 0 - store i64 %7, i64 addrspace(4)* %arrayinit.begin, align 8 - %arrayinit.element4 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 1 - %arrayinit.element = addrspacecast i64* %arrayinit.element4 to i64 addrspace(4)* + store i64 %7, ptr addrspace(4) %arrayinit.begin, align 8 + %arrayinit.element4 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 1 + %arrayinit.element = addrspacecast ptr %arrayinit.element4 to ptr addrspace(4) %8 = extractelement <3 x i64> %6, i32 1 - store i64 %8, i64 addrspace(4)* %arrayinit.element, align 8 - %arrayinit.element15 = getelementptr inbounds [3 x i64], [3 x i64]* %GroupID, i64 0, i64 2 - %arrayinit.element1 = addrspacecast i64* %arrayinit.element15 to i64 addrspace(4)* + store i64 %8, ptr addrspace(4) %arrayinit.element, align 8 + %arrayinit.element15 = getelementptr inbounds [3 x i64], ptr %GroupID, i64 0, i64 2 + %arrayinit.element1 = addrspacecast ptr %arrayinit.element15 to ptr addrspace(4) %9 = extractelement <3 x i64> %6, i32 2 - store i64 %9, i64 addrspace(4)* %arrayinit.element1, align 8 + store i64 %9, ptr addrspace(4) %arrayinit.element1, align 8 %10 = call spir_func i64 @_Z29__spirv_BuiltInGlobalLinearIdv() #2 - call spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %arrayinit.begin, i64 %10) #4 - call void @llvm.lifetime.end.p0i8(i64 24, i8* %0) + call spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %arrayinit.begin, i64 %10) #4 + call void @llvm.lifetime.end.p0i8(i64 24, ptr %0) br label %return return: ; preds = %if.end, %entry @@ -126,27 +126,27 @@ return: ; preds = %if.end, %entry } ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #4 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #4 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4 ; Function Attrs: nounwind -define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne(float addrspace(1)* align 4 %_arg_accTmp, %0* byval(%0) align 8 %_arg_accTmp3, float addrspace(1)* align 4 %_arg_accIn1, %0* byval(%0) align 8 %_arg_accIn16, float addrspace(1)* align 4 %_arg_accIn2, %0* byval(%0) align 8 %_arg_accIn29) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !spirv.ParameterDecorations !11 { +define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne(ptr addrspace(1) align 4 %_arg_accTmp, ptr byval(%0) align 8 %_arg_accTmp3, ptr addrspace(1) align 4 %_arg_accIn1, ptr byval(%0) align 8 %_arg_accIn16, ptr addrspace(1) align 4 %_arg_accIn2, ptr byval(%0) align 8 %_arg_accIn29) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !spirv.ParameterDecorations !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = getelementptr inbounds %0, %0* %_arg_accTmp3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_accTmp, i64 %2 - %3 = getelementptr inbounds %0, %0* %_arg_accIn16, i64 0, i32 0, i32 0, i64 0 - %4 = addrspacecast i64* %3 to i64 addrspace(4)* - %5 = load i64, i64 addrspace(4)* %4, align 8 - %add.ptr.i39 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn1, i64 %5 - %6 = getelementptr inbounds %0, %0* %_arg_accIn29, i64 0, i32 0, i32 0, i64 0 - %7 = addrspacecast i64* %6 to i64 addrspace(4)* - %8 = load i64, i64 addrspace(4)* %7, align 8 - %add.ptr.i53 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn2, i64 %8 + %0 = getelementptr inbounds %0, ptr %_arg_accTmp3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %2 = load i64, ptr addrspace(4) %1, align 8 + %add.ptr.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accTmp, i64 %2 + %3 = getelementptr inbounds %0, ptr %_arg_accIn16, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast ptr %3 to ptr addrspace(4) + %5 = load i64, ptr addrspace(4) %4, align 8 + %add.ptr.i39 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn1, i64 %5 + %6 = getelementptr inbounds %0, ptr %_arg_accIn29, i64 0, i32 0, i32 0, i64 0 + %7 = addrspacecast ptr %6 to ptr addrspace(4) + %8 = load i64, ptr addrspace(4) %7, align 8 + %add.ptr.i53 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn2, i64 %8 %9 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0) #2 %10 = insertelement <3 x i64> undef, i64 %9, i32 0 %11 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) #2 @@ -156,36 +156,36 @@ entry: %15 = extractelement <3 x i64> %14, i32 0 %cmp.i.i = icmp ult i64 %15, 2147483648 call void @llvm.assume(i1 %cmp.i.i) - %arrayidx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39, i64 %15 - %arrayidx.ascast.i.i = addrspacecast float addrspace(1)* %arrayidx.i.i to float addrspace(4)* - %16 = load float, float addrspace(4)* %arrayidx.ascast.i.i, align 4 - %arrayidx.i9.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53, i64 %15 - %arrayidx.ascast.i10.i = addrspacecast float addrspace(1)* %arrayidx.i9.i to float addrspace(4)* - %17 = load float, float addrspace(4)* %arrayidx.ascast.i10.i, align 4 + %arrayidx.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39, i64 %15 + %arrayidx.ascast.i.i = addrspacecast ptr addrspace(1) %arrayidx.i.i to ptr addrspace(4) + %16 = load float, ptr addrspace(4) %arrayidx.ascast.i.i, align 4 + %arrayidx.i9.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53, i64 %15 + %arrayidx.ascast.i10.i = addrspacecast ptr addrspace(1) %arrayidx.i9.i to ptr addrspace(4) + %17 = load float, ptr addrspace(4) %arrayidx.ascast.i10.i, align 4 %add.i = fadd float %16, %17 - %arrayidx.i13.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %15 - %arrayidx.ascast.i14.i = addrspacecast float addrspace(1)* %arrayidx.i13.i to float addrspace(4)* - store float %add.i, float addrspace(4)* %arrayidx.ascast.i14.i, align 4 + %arrayidx.i13.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i, i64 %15 + %arrayidx.ascast.i14.i = addrspacecast ptr addrspace(1) %arrayidx.i13.i to ptr addrspace(4) + store float %add.i, ptr addrspace(4) %arrayidx.ascast.i14.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } ; Function Attrs: nounwind -define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo(float addrspace(1)* align 4 %_arg_accOut, %0* byval(%0) align 8 %_arg_accOut3, float addrspace(1)* align 4 %_arg_accTmp, %0* byval(%0) align 8 %_arg_accTmp6, float addrspace(1)* align 4 %_arg_accIn3, %0* byval(%0) align 8 %_arg_accIn39) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !17 !spirv.ParameterDecorations !11 { +define spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo(ptr addrspace(1) align 4 %_arg_accOut, ptr byval(%0) align 8 %_arg_accOut3, ptr addrspace(1) align 4 %_arg_accTmp, ptr byval(%0) align 8 %_arg_accTmp6, ptr addrspace(1) align 4 %_arg_accIn3, ptr byval(%0) align 8 %_arg_accIn39) #5 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !17 !spirv.ParameterDecorations !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = getelementptr inbounds %0, %0* %_arg_accOut3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_accOut, i64 %2 - %3 = getelementptr inbounds %0, %0* %_arg_accTmp6, i64 0, i32 0, i32 0, i64 0 - %4 = addrspacecast i64* %3 to i64 addrspace(4)* - %5 = load i64, i64 addrspace(4)* %4, align 8 - %add.ptr.i39 = getelementptr inbounds float, float addrspace(1)* %_arg_accTmp, i64 %5 - %6 = getelementptr inbounds %0, %0* %_arg_accIn39, i64 0, i32 0, i32 0, i64 0 - %7 = addrspacecast i64* %6 to i64 addrspace(4)* - %8 = load i64, i64 addrspace(4)* %7, align 8 - %add.ptr.i53 = getelementptr inbounds float, float addrspace(1)* %_arg_accIn3, i64 %8 + %0 = getelementptr inbounds %0, ptr %_arg_accOut3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %2 = load i64, ptr addrspace(4) %1, align 8 + %add.ptr.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accOut, i64 %2 + %3 = getelementptr inbounds %0, ptr %_arg_accTmp6, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast ptr %3 to ptr addrspace(4) + %5 = load i64, ptr addrspace(4) %4, align 8 + %add.ptr.i39 = getelementptr inbounds float, ptr addrspace(1) %_arg_accTmp, i64 %5 + %6 = getelementptr inbounds %0, ptr %_arg_accIn39, i64 0, i32 0, i32 0, i64 0 + %7 = addrspacecast ptr %6 to ptr addrspace(4) + %8 = load i64, ptr addrspace(4) %7, align 8 + %add.ptr.i53 = getelementptr inbounds float, ptr addrspace(1) %_arg_accIn3, i64 %8 %9 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0) #2 %10 = insertelement <3 x i64> undef, i64 %9, i32 0 %11 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) #2 @@ -195,16 +195,16 @@ entry: %15 = extractelement <3 x i64> %14, i32 0 %cmp.i.i = icmp ult i64 %15, 2147483648 call void @llvm.assume(i1 %cmp.i.i) - %arrayidx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i39, i64 %15 - %arrayidx.ascast.i.i = addrspacecast float addrspace(1)* %arrayidx.i.i to float addrspace(4)* - %16 = load float, float addrspace(4)* %arrayidx.ascast.i.i, align 4 - %arrayidx.i9.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i53, i64 %15 - %arrayidx.ascast.i10.i = addrspacecast float addrspace(1)* %arrayidx.i9.i to float addrspace(4)* - %17 = load float, float addrspace(4)* %arrayidx.ascast.i10.i, align 4 + %arrayidx.i.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i39, i64 %15 + %arrayidx.ascast.i.i = addrspacecast ptr addrspace(1) %arrayidx.i.i to ptr addrspace(4) + %16 = load float, ptr addrspace(4) %arrayidx.ascast.i.i, align 4 + %arrayidx.i9.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i53, i64 %15 + %arrayidx.ascast.i10.i = addrspacecast ptr addrspace(1) %arrayidx.i9.i to ptr addrspace(4) + %17 = load float, ptr addrspace(4) %arrayidx.ascast.i10.i, align 4 %mul.i = fmul float %16, %17 - %arrayidx.i13.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %15 - %arrayidx.ascast.i14.i = addrspacecast float addrspace(1)* %arrayidx.i13.i to float addrspace(4)* - store float %mul.i, float addrspace(4)* %arrayidx.ascast.i14.i, align 4 + %arrayidx.i13.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i, i64 %15 + %arrayidx.ascast.i14.i = addrspacecast ptr addrspace(1) %arrayidx.i13.i to ptr addrspace(4) + store float %mul.i, ptr addrspace(4) %arrayidx.ascast.i14.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -220,7 +220,7 @@ attributes #5 = { nounwind } !6 = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 0} !7 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!8 = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range"} +!8 = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range"} !9 = !{!"", !"", !"", !"", !"", !""} !10 = !{!"_arg_accTmp", !"_arg_accTmp3", !"_arg_accIn1", !"_arg_accIn16", !"_arg_accIn2", !"_arg_accIn29"} !11 = !{!12, !14, !12, !14, !12, !14} @@ -254,16 +254,16 @@ attributes #5 = { nounwind } ; via 'implicit-check-not'. ; FUSION-LABEL: define spir_kernel void @fused_0 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 -; FUSION-SAME: float addrspace(1)* align 4 -; FUSION-SAME: %0* byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 +; FUSION-SAME: ptr addrspace(1) align 4 +; FUSION-SAME: ptr byval(%0) align 8 ; FUSION-LABEL: entry: ; FUSION-NEXT: call spir_func void @__itt_offload_wi_start_wrapper() ; FUSION: [[IN1:%.*]] = load float @@ -283,16 +283,16 @@ attributes #5 = { nounwind } ; attached to the fused kernel. ; MD-LABEL: define spir_kernel void @fused_0 -; MD-SAME: float addrspace(1)* align 4 %[[ARG1:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG2:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG3:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG4:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG5:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG6:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG7:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG8:[^,]+]] -; MD-SAME: float addrspace(1)* align 4 %[[ARG9:[^,]+]] -; MD-SAME: %0* byval(%0) align 8 %[[ARG10:[^)]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG1:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG2:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG3:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG4:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG5:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG6:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG7:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG8:[^,]+]] +; MD-SAME: ptr addrspace(1) align 4 %[[ARG9:[^,]+]] +; MD-SAME: ptr byval(%0) align 8 %[[ARG10:[^)]+]] ; MD-SAME: !kernel_arg_addr_space ![[#ADDR_SPACE:]] ; MD-SAME: !kernel_arg_access_qual ![[#ACCESS_QUAL:]] ; MD-SAME: !kernel_arg_type ![[#ARG_TYPE:]] @@ -302,7 +302,7 @@ attributes #5 = { nounwind } ;. ; MD: [[#ADDR_SPACE]] = !{i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0} ; MD: [[#ACCESS_QUAL]] = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -; MD: [[#ARG_TYPE]] = !{!"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range", !"float*", !"class.sycl::_V1::range"} +; MD: [[#ARG_TYPE]] = !{!"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range", !"ptr", !"class.sycl::_V1::range"} ; MD: [[#TYPE_QUAL]] = !{!"", !"", !"", !"", !"", !"", !"", !"", !"", !""} ; MD: [[#ARG_NAME]] = !{!"[[ARG1]]", !"[[ARG2]]", !"[[ARG3]]", !"[[ARG4]]", !"[[ARG5]]", !"[[ARG6]]", !"[[ARG7]]", !"[[ARG8]]", !"[[ARG9]]", !"[[ARG10]]"} ;. diff --git a/sycl-fusion/test/kernel-fusion/work_group_size_hint.ll b/sycl-fusion/test/kernel-fusion/work_group_size_hint.ll index e1e51dba740ff..988e295a5a88f 100644 --- a/sycl-fusion/test/kernel-fusion/work_group_size_hint.ll +++ b/sycl-fusion/test/kernel-fusion/work_group_size_hint.ll @@ -16,21 +16,21 @@ declare spir_func void @__itt_offload_wi_finish_wrapper() #3 ; Function Attrs: nounwind -define spir_kernel void @KernelOne(float addrspace(1)* align 4 %_arg_x) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !work_group_size_hint !11 { +define spir_kernel void @KernelOne(ptr addrspace(1) align 4 %_arg_x) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !work_group_size_hint !11 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = addrspacecast float addrspace(1)* %_arg_x to float addrspace(4)* - store float 4.200000e+01, float addrspace(4)* %0, align 4 + %0 = addrspacecast ptr addrspace(1) %_arg_x to ptr addrspace(4) + store float 4.200000e+01, ptr addrspace(4) %0, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } ; Function Attrs: nounwind -define spir_kernel void @KernelTwo(float addrspace(1)* align 4 %_arg_y) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !12 { +define spir_kernel void @KernelTwo(ptr addrspace(1) align 4 %_arg_y) #2 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !12 { entry: call spir_func void @__itt_offload_wi_start_wrapper() #3 - %0 = addrspacecast float addrspace(1)* %_arg_y to float addrspace(4)* - store float 2.500000e+01, float addrspace(4)* %0, align 4 + %0 = addrspacecast ptr addrspace(1) %_arg_y to ptr addrspace(4) + store float 2.500000e+01, ptr addrspace(4) %0, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #3 ret void } @@ -42,7 +42,7 @@ attributes #3 = { alwaysinline nounwind } !6 = !{i32 1} !7 = !{!"none"} -!8 = !{!"float*"} +!8 = !{!"ptr"} !9 = !{!""} !10 = !{!"_arg_x"} !11 = !{i32 64, i32 1, i32 1} diff --git a/sycl-fusion/test/syclcp/syclcp.ll b/sycl-fusion/test/syclcp/syclcp.ll index 4ebca007b28c8..9bcf678c170c6 100644 --- a/sycl-fusion/test/syclcp/syclcp.ll +++ b/sycl-fusion/test/syclcp/syclcp.ll @@ -9,13 +9,13 @@ target triple = "spir64-unknown-unknown" %3 = type { %4, %4, [10 x i32] } %4 = type { %5, %6 } %5 = type { %0, %0, %0 } -%6 = type { i32 addrspace(1)* } +%6 = type { ptr addrspace(1) } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #0 +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, ptr nocapture %1) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.assume(i1 noundef %0) #1 @@ -45,13 +45,13 @@ declare spir_func void @__itt_offload_wi_start_wrapper() #4 declare spir_func void @__itt_offload_wi_finish_wrapper() #4 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_finish_stub(i64 addrspace(4)* %group_id, i64 %wi_id) #5 +declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id, i64 %wi_id) #5 ; Function Attrs: noinline nounwind -declare spir_func void @__itt_offload_wi_start_stub(i64 addrspace(4)* %group_id, i64 %wi_id, i32 %wg_size) #5 +declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #5 -define spir_kernel void @fused_0(%0* byval(%0) align 8 %KernelOne_accTmp3, i32 addrspace(1)* align 4 %KernelOne_accIn1, %0* byval(%0) align 8 %KernelOne_accIn16, i32 addrspace(1)* align 4 %KernelOne_accIn2, i32 addrspace(1)* align 4 %KernelTwo_out, %2* byval(%2) align 4 %KernelTwo_coef) !kernel_arg_addr_space !16 !kernel_arg_access_qual !7 !kernel_arg_type !17 !kernel_arg_type_qual !9 !kernel_arg_base_type !17 !kernel_arg_name !18 !sycl.kernel.constants !19 { +define spir_kernel void @fused_0(ptr byval(%0) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 4 %KernelOne_accIn1, ptr byval(%0) align 8 %KernelOne_accIn16, ptr addrspace(1) align 4 %KernelOne_accIn2, ptr addrspace(1) align 4 %KernelTwo_out, ptr byval(%2) align 4 %KernelTwo_coef) !kernel_arg_addr_space !16 !kernel_arg_access_qual !7 !kernel_arg_type !17 !kernel_arg_type_qual !9 !kernel_arg_base_type !17 !kernel_arg_name !18 !sycl.kernel.constants !19 { ; Scenario: Test constant propagation. Propagates a scalar ([1x i64]) and ; an aggregate ([10xi32]) constant. The test mainly verifies that the function ; signature has been updated, i.e., the propagated arguments have been removed, @@ -59,120 +59,120 @@ define spir_kernel void @fused_0(%0* byval(%0) align 8 %KernelOne_accTmp3, i32 a ; CHECK: [[TYPE2:%.*]] = type { [10 x i32] } ; CHECK: define {{[^@]+}}@fused_0 -; CHECK-SAME: ([[TYPE0:%.*]]* byval([[TYPE0]]) align 8 [[KernelOne_ACCTMP3:%.*]], i32 addrspace(1)* align 4 [[KernelOne_ACCIN1:%.*]], i32 addrspace(1)* align 4 [[KernelOne_ACCIN2:%.*]], i32 addrspace(1)* align 4 [[KernelTwo_OUT:%.*]]) +; CHECK-SAME: (ptr byval([[TYPE0:%.*]]) align 8 [[KernelOne_ACCTMP3:%.*]], ptr addrspace(1) align 4 [[KernelOne_ACCIN1:%.*]], ptr addrspace(1) align 4 [[KernelOne_ACCIN2:%.*]], ptr addrspace(1) align 4 [[KernelTwo_OUT:%.*]]) ; CHECK: entry: ; CHECK: [[TMP0:%.*]] = alloca [[TYPE0]], align 8 -; CHECK: [[TMP1:%.*]] = getelementptr inbounds [[TYPE0]], [[TYPE0]]* [[TMP0]], i32 0, i32 0, i32 0, i32 0 -; CHECK: store i64 0, i64* [[TMP1]], align 8 +; CHECK: [[TMP1:%.*]] = getelementptr inbounds [[TYPE0]], ptr [[TMP0]], i32 0, i32 0, i32 0, i32 0 +; CHECK: store i64 0, ptr [[TMP1]], align 8 ; CHECK: [[TMP2:%.*]] = alloca [[TYPE2]], align 8 -; CHECK: [[TMP3:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 0 -; CHECK: store i32 0, i32* [[TMP3]], align 4 -; CHECK: [[TMP4:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 1 -; CHECK: store i32 1, i32* [[TMP4]], align 4 -; CHECK: [[TMP5:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 2 -; CHECK: store i32 2, i32* [[TMP5]], align 4 -; CHECK: [[TMP6:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 3 -; CHECK: store i32 3, i32* [[TMP6]], align 4 -; CHECK: [[TMP7:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 4 -; CHECK: store i32 4, i32* [[TMP7]], align 4 -; CHECK: [[TMP8:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 5 -; CHECK: store i32 5, i32* [[TMP8]], align 4 -; CHECK: [[TMP9:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 6 -; CHECK: store i32 6, i32* [[TMP9]], align 4 -; CHECK: [[TMP10:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 7 -; CHECK: store i32 7, i32* [[TMP10]], align 4 -; CHECK: [[TMP11:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 8 -; CHECK: store i32 8, i32* [[TMP11]], align 4 -; CHECK: [[TMP12:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i32 0, i32 0, i32 9 -; CHECK: store i32 9, i32* [[TMP12]], align 4 +; CHECK: [[TMP3:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 0 +; CHECK: store i32 0, ptr [[TMP3]], align 4 +; CHECK: [[TMP4:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 1 +; CHECK: store i32 1, ptr [[TMP4]], align 4 +; CHECK: [[TMP5:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 2 +; CHECK: store i32 2, ptr [[TMP5]], align 4 +; CHECK: [[TMP6:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 3 +; CHECK: store i32 3, ptr [[TMP6]], align 4 +; CHECK: [[TMP7:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 4 +; CHECK: store i32 4, ptr [[TMP7]], align 4 +; CHECK: [[TMP8:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 5 +; CHECK: store i32 5, ptr [[TMP8]], align 4 +; CHECK: [[TMP9:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 6 +; CHECK: store i32 6, ptr [[TMP9]], align 4 +; CHECK: [[TMP10:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 7 +; CHECK: store i32 7, ptr [[TMP10]], align 4 +; CHECK: [[TMP11:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 8 +; CHECK: store i32 8, ptr [[TMP11]], align 4 +; CHECK: [[TMP12:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i32 0, i32 0, i32 9 +; CHECK: store i32 9, ptr [[TMP12]], align 4 ; CHECK: [[TMP13:%.*]] = alloca [1 x i32], align 4 -; CHECK: [[TMP14:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[TMP13]], i64 0, i64 0 +; CHECK: [[TMP14:%.*]] = getelementptr inbounds [1 x i32], ptr [[TMP13]], i64 0, i64 0 ; CHECK: [[KERNELTWO_I:%.*]] = alloca [[TMP3]], align 8 -; CHECK: [[KernelOne_ACCIN163_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], [[TYPE0]]* [[TMP0]], i64 0, i32 0, i32 0, i64 0 -; CHECK: [[KernelOne_ACCIN163_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* -; CHECK: [[KernelTwo_COEF6_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE2]], [[TYPE2]]* [[TMP2]], i64 0, i32 0, i64 0 -; CHECK: [[KernelTwo_COEF6_SROA_0_0_COPYLOAD:%.*]] = load i32, i32* [[KernelTwo_COEF6_SROA_0_0__SROA_IDX]], align 1 +; CHECK: [[KernelOne_ACCIN163_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], ptr [[TMP0]], i64 0, i32 0, i32 0, i64 0 +; CHECK: [[KernelOne_ACCIN163_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr +; CHECK: [[KernelTwo_COEF6_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [[TYPE2]], ptr [[TMP2]], i64 0, i32 0, i64 0 +; CHECK: [[KernelTwo_COEF6_SROA_0_0_COPYLOAD:%.*]] = load i32, ptr [[KernelTwo_COEF6_SROA_0_0__SROA_IDX]], align 1 ; CHECK: ret void ; entry: %0 = alloca [1 x i32], align 4 - %1 = getelementptr inbounds [1 x i32], [1 x i32]* %0, i64 0, i64 0 + %1 = getelementptr inbounds [1 x i32], ptr %0, i64 0, i64 0 %KernelTwo.i = alloca %3, align 8 - %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn163.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 - %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn162.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp31.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn163.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn163.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn163.sroa.0.0..sroa_idx, align 1 + %KernelOne_accIn162.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn162.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn162.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp31.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp31.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp31.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #4 - %add.ptr.i.i = getelementptr inbounds i32, i32* %1, i64 0 - %add.ptr.i39.i = getelementptr inbounds i32, i32 addrspace(1)* %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload - %add.ptr.i53.i = getelementptr inbounds i32, i32 addrspace(1)* %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload + %add.ptr.i.i = getelementptr inbounds i32, ptr %1, i64 0 + %add.ptr.i39.i = getelementptr inbounds i32, ptr addrspace(1) %KernelOne_accIn1, i64 %KernelOne_accIn162.sroa.0.0.copyload + %add.ptr.i53.i = getelementptr inbounds i32, ptr addrspace(1) %KernelOne_accIn2, i64 %KernelOne_accIn163.sroa.0.0.copyload %2 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %3 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 %4 = call spir_func i64 @_Z13get_global_idj(i32 2) #2 %cmp.i.i.i = icmp ult i64 %2, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i) - %arrayidx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i39.i, i64 %2 - %5 = load i32, i32 addrspace(1)* %arrayidx.i.i.i, align 4 - %arrayidx.i9.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i53.i, i64 %2 - %6 = load i32, i32 addrspace(1)* %arrayidx.i9.i.i, align 4 + %arrayidx.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i39.i, i64 %2 + %5 = load i32, ptr addrspace(1) %arrayidx.i.i.i, align 4 + %arrayidx.i9.i.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i53.i, i64 %2 + %6 = load i32, ptr addrspace(1) %arrayidx.i9.i.i, align 4 %add.i.i = add nsw i32 %5, %6 - %arrayidx.i13.i.i = getelementptr inbounds i32, i32* %add.ptr.i.i, i64 0 - store i32 %add.i.i, i32* %arrayidx.i13.i.i, align 4 + %arrayidx.i13.i.i = getelementptr inbounds i32, ptr %add.ptr.i.i, i64 0 + store i32 %add.i.i, ptr %arrayidx.i13.i.i, align 4 call spir_func void @__itt_offload_wi_finish_wrapper() #4 - %KernelTwo_coef6.sroa.0.0..sroa_idx = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 0 - %KernelTwo_coef6.sroa.0.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.0.0..sroa_idx, align 1 - %KernelTwo_coef6.sroa.4.0..sroa_idx17 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 1 - %KernelTwo_coef6.sroa.4.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.4.0..sroa_idx17, align 1 - %KernelTwo_coef6.sroa.5.0..sroa_idx19 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 2 - %KernelTwo_coef6.sroa.5.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.5.0..sroa_idx19, align 1 - %KernelTwo_coef6.sroa.6.0..sroa_idx21 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 3 - %KernelTwo_coef6.sroa.6.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.6.0..sroa_idx21, align 1 - %KernelTwo_coef6.sroa.7.0..sroa_idx23 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 4 - %KernelTwo_coef6.sroa.7.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.7.0..sroa_idx23, align 1 - %KernelTwo_coef6.sroa.8.0..sroa_idx25 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 5 - %KernelTwo_coef6.sroa.8.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.8.0..sroa_idx25, align 1 - %KernelTwo_coef6.sroa.9.0..sroa_idx27 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 6 - %KernelTwo_coef6.sroa.9.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.9.0..sroa_idx27, align 1 - %KernelTwo_coef6.sroa.10.0..sroa_idx29 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 7 - %KernelTwo_coef6.sroa.10.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.10.0..sroa_idx29, align 1 - %KernelTwo_coef6.sroa.11.0..sroa_idx31 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 8 - %KernelTwo_coef6.sroa.11.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.11.0..sroa_idx31, align 1 - %KernelTwo_coef6.sroa.12.0..sroa_idx33 = getelementptr inbounds %2, %2* %KernelTwo_coef, i64 0, i32 0, i64 9 - %KernelTwo_coef6.sroa.12.0.copyload = load i32, i32* %KernelTwo_coef6.sroa.12.0..sroa_idx33, align 1 - %KernelOne_accIn165.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accIn165.sroa.0.0.copyload = load i64, i64* %KernelOne_accIn165.sroa.0.0..sroa_idx, align 1 - %KernelOne_accTmp34.sroa.0.0..sroa_idx = getelementptr inbounds %0, %0* %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 - %KernelOne_accTmp34.sroa.0.0.copyload = load i64, i64* %KernelOne_accTmp34.sroa.0.0..sroa_idx, align 1 + %KernelTwo_coef6.sroa.0.0..sroa_idx = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 0 + %KernelTwo_coef6.sroa.0.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.0.0..sroa_idx, align 1 + %KernelTwo_coef6.sroa.4.0..sroa_idx17 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 1 + %KernelTwo_coef6.sroa.4.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.4.0..sroa_idx17, align 1 + %KernelTwo_coef6.sroa.5.0..sroa_idx19 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 2 + %KernelTwo_coef6.sroa.5.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.5.0..sroa_idx19, align 1 + %KernelTwo_coef6.sroa.6.0..sroa_idx21 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 3 + %KernelTwo_coef6.sroa.6.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.6.0..sroa_idx21, align 1 + %KernelTwo_coef6.sroa.7.0..sroa_idx23 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 4 + %KernelTwo_coef6.sroa.7.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.7.0..sroa_idx23, align 1 + %KernelTwo_coef6.sroa.8.0..sroa_idx25 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 5 + %KernelTwo_coef6.sroa.8.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.8.0..sroa_idx25, align 1 + %KernelTwo_coef6.sroa.9.0..sroa_idx27 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 6 + %KernelTwo_coef6.sroa.9.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.9.0..sroa_idx27, align 1 + %KernelTwo_coef6.sroa.10.0..sroa_idx29 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 7 + %KernelTwo_coef6.sroa.10.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.10.0..sroa_idx29, align 1 + %KernelTwo_coef6.sroa.11.0..sroa_idx31 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 8 + %KernelTwo_coef6.sroa.11.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.11.0..sroa_idx31, align 1 + %KernelTwo_coef6.sroa.12.0..sroa_idx33 = getelementptr inbounds %2, ptr %KernelTwo_coef, i64 0, i32 0, i64 9 + %KernelTwo_coef6.sroa.12.0.copyload = load i32, ptr %KernelTwo_coef6.sroa.12.0..sroa_idx33, align 1 + %KernelOne_accIn165.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accIn16, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accIn165.sroa.0.0.copyload = load i64, ptr %KernelOne_accIn165.sroa.0.0..sroa_idx, align 1 + %KernelOne_accTmp34.sroa.0.0..sroa_idx = getelementptr inbounds %0, ptr %KernelOne_accTmp3, i64 0, i32 0, i32 0, i64 0 + %KernelOne_accTmp34.sroa.0.0.copyload = load i64, ptr %KernelOne_accTmp34.sroa.0.0..sroa_idx, align 1 call spir_func void @__itt_offload_wi_start_wrapper() #4 - %7 = bitcast %3* %KernelTwo.i to i8* - call void @llvm.lifetime.start.p0i8(i64 104, i8* %7) + %7 = bitcast ptr %KernelTwo.i to ptr + call void @llvm.lifetime.start.p0i8(i64 104, ptr %7) br label %arrayinit.body.i arrayinit.body.i: ; preds = %entry - %8 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 0 - store i32 %KernelTwo_coef6.sroa.0.0.copyload, i32* %8, align 4 - %9 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 1 - store i32 %KernelTwo_coef6.sroa.4.0.copyload, i32* %9, align 4 - %10 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 2 - store i32 %KernelTwo_coef6.sroa.5.0.copyload, i32* %10, align 4 - %11 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 3 - store i32 %KernelTwo_coef6.sroa.6.0.copyload, i32* %11, align 4 - %12 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 4 - store i32 %KernelTwo_coef6.sroa.7.0.copyload, i32* %12, align 4 - %13 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 5 - store i32 %KernelTwo_coef6.sroa.8.0.copyload, i32* %13, align 4 - %14 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 6 - store i32 %KernelTwo_coef6.sroa.9.0.copyload, i32* %14, align 4 - %15 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 7 - store i32 %KernelTwo_coef6.sroa.10.0.copyload, i32* %15, align 4 - %16 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 8 - store i32 %KernelTwo_coef6.sroa.11.0.copyload, i32* %16, align 4 - %17 = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 9 - store i32 %KernelTwo_coef6.sroa.12.0.copyload, i32* %17, align 4 - %add.ptr.i.i7 = getelementptr inbounds i32, i32* %1, i64 0 - %add.ptr.i33.i = getelementptr inbounds i32, i32 addrspace(1)* %KernelTwo_out, i64 %KernelOne_accIn165.sroa.0.0.copyload + %8 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 0 + store i32 %KernelTwo_coef6.sroa.0.0.copyload, ptr %8, align 4 + %9 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 1 + store i32 %KernelTwo_coef6.sroa.4.0.copyload, ptr %9, align 4 + %10 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 2 + store i32 %KernelTwo_coef6.sroa.5.0.copyload, ptr %10, align 4 + %11 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 3 + store i32 %KernelTwo_coef6.sroa.6.0.copyload, ptr %11, align 4 + %12 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 4 + store i32 %KernelTwo_coef6.sroa.7.0.copyload, ptr %12, align 4 + %13 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 5 + store i32 %KernelTwo_coef6.sroa.8.0.copyload, ptr %13, align 4 + %14 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 6 + store i32 %KernelTwo_coef6.sroa.9.0.copyload, ptr %14, align 4 + %15 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 7 + store i32 %KernelTwo_coef6.sroa.10.0.copyload, ptr %15, align 4 + %16 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 8 + store i32 %KernelTwo_coef6.sroa.11.0.copyload, ptr %16, align 4 + %17 = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 9 + store i32 %KernelTwo_coef6.sroa.12.0.copyload, ptr %17, align 4 + %add.ptr.i.i7 = getelementptr inbounds i32, ptr %1, i64 0 + %add.ptr.i33.i = getelementptr inbounds i32, ptr addrspace(1) %KernelTwo_out, i64 %KernelOne_accIn165.sroa.0.0.copyload %18 = call spir_func i64 @_Z13get_global_idj(i32 0) #2 %19 = call spir_func i64 @_Z13get_global_idj(i32 1) #2 %20 = call spir_func i64 @_Z13get_global_idj(i32 2) #2 @@ -185,16 +185,16 @@ arrayinit.body.i: ; preds = %entry %sub.i.i.i.i.i = sub i64 %18, %24 %cmp.i.i.i8 = icmp ult i64 %sub.i.i.i.i.i, 2147483648 call void @llvm.assume(i1 %cmp.i.i.i8) - %arrayidx.i.i.i9 = getelementptr inbounds i32, i32* %add.ptr.i.i7, i64 0 - %27 = load i32, i32* %arrayidx.i.i.i9, align 4 + %arrayidx.i.i.i9 = getelementptr inbounds i32, ptr %add.ptr.i.i7, i64 0 + %27 = load i32, ptr %arrayidx.i.i.i9, align 4 %cmp.i13.i.i = icmp ult i64 %21, 2147483648 call void @llvm.assume(i1 %cmp.i13.i.i) - %arrayidx.i.i = getelementptr inbounds %3, %3* %KernelTwo.i, i64 0, i32 2, i64 %21 - %28 = load i32, i32* %arrayidx.i.i, align 4 + %arrayidx.i.i = getelementptr inbounds %3, ptr %KernelTwo.i, i64 0, i32 2, i64 %21 + %28 = load i32, ptr %arrayidx.i.i, align 4 %mul.i.i = mul nsw i32 %27, %28 - %arrayidx.i17.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i33.i, i64 %sub.i.i.i.i.i - store i32 %mul.i.i, i32 addrspace(1)* %arrayidx.i17.i.i, align 4 - call void @llvm.lifetime.end.p0i8(i64 104, i8* %7) + %arrayidx.i17.i.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i33.i, i64 %sub.i.i.i.i.i + store i32 %mul.i.i, ptr addrspace(1) %arrayidx.i17.i.i, align 4 + call void @llvm.lifetime.end.p0i8(i64 104, ptr %7) call spir_func void @__itt_offload_wi_finish_wrapper() #4 ret void } @@ -209,6 +209,6 @@ attributes #5 = { noinline nounwind } !7 = !{!"none", !"none", !"none", !"none", !"none", !"none"} !9 = !{!"", !"", !"", !"", !"", !""} !16 = !{i32 0, i32 1, i32 0, i32 1, i32 1, i32 0} -!17 = !{!"class.sycl::_V1::id", !"int*", !"class.sycl::_V1::id", !"int*", !"int*", !"struct __wrapper_class"} +!17 = !{!"class.sycl::_V1::id", !"ptr", !"class.sycl::_V1::id", !"ptr", !"ptr", !"struct __wrapper_class"} !18 = !{!"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_out", !"KernelTwo_coef"} !19 = !{!"", !"", !"\00\00\00\00\00\00\00\00", !"", !"", !"\00\00\00\00\01\00\00\00\02\00\00\00\03\00\00\00\04\00\00\00\05\00\00\00\06\00\00\00\07\00\00\00\08\00\00\00\09\00\00\00"}