Skip to content

Commit

Permalink
[Opaque pointer] Port tests in AMDGPU/NVPTX/SYCL-FUSION etc to opaque…
Browse files Browse the repository at this point in the history
… pointers (intel#10889)

This is in preparation of -DINTEL_SYCL_OPAQUEPOINTER_READY=1 

- Port barrier_intrinsic.ll to opaque pointer
- Port spirv-to-ir-wrapper.ll to opaque pointers
- Update NVPTX tests to opaque pointers
- Update AMDGPU tests to opaque pointers
- Update sycl fusion tests to opaque pointers
  • Loading branch information
jsji authored Aug 21, 2023
1 parent bd1250f commit a08431c
Show file tree
Hide file tree
Showing 37 changed files with 835 additions and 858 deletions.
18 changes: 9 additions & 9 deletions llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ target triple = "nvptx"

@foo.l.0 = internal unnamed_addr addrspace(3) global i32 undef, align 4

define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) {
define dso_local spir_kernel void @foo(ptr addrspace(1) nocapture %0) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @_Z13get_global_idj(i32 0) #0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @_Z12get_local_idj(i32 0) #0
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]]
; CHECK: 5:
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 5
; CHECK-NEXT: store i32 [[TMP6]], i32 addrspace(3)* @foo.l.0, align 4
; CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(3) @foo.l.0, align 4
; CHECK-NEXT: br label [[TMP7]]
; CHECK: 7:
; CHECK-NEXT: tail call void @llvm.nvvm.barrier0() #2
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(3)* @foo.l.0, align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP0:%.*]], i32 [[TMP2]]
; CHECK-NEXT: store i32 [[TMP8]], i32 addrspace(1)* [[TMP9]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) @foo.l.0, align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0:%.*]], i32 [[TMP2]]
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
; CHECK-NEXT: ret void
;
%2 = tail call i32 @_Z13get_global_idj(i32 0) #0
Expand All @@ -32,14 +32,14 @@ define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) {

5: ; preds = %1
%6 = add i32 %2, 5
store i32 %6, i32 addrspace(3)* @foo.l.0, align 4
store i32 %6, ptr addrspace(3) @foo.l.0, align 4
br label %7

7: ; preds = %5, %1
tail call void @llvm.nvvm.barrier0() #1
%8 = load i32, i32 addrspace(3)* @foo.l.0, align 4
%9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %2
store i32 %8, i32 addrspace(1)* %9, align 4
%8 = load i32, ptr addrspace(3) @foo.l.0, align 4
%9 = getelementptr inbounds i32, ptr addrspace(1) %0, i32 %2
store i32 %8, ptr addrspace(1) %9, align 4
ret void
}

Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/global-offset-dbg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ target triple = "amdgcn-amd-amdhsa"

; This test checks that debug information on functions and callsites are preserved

declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: llvm.amdgcn.implicit.offset

define weak_odr dso_local i64 @_ZTS14other_function() !dbg !11 {
; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32 addrspace(5)* %0) !dbg !11 {
%1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2
%3 = load i32, i32 addrspace(5)* %2, align 4
; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr addrspace(5) %0) !dbg !11 {
%1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2
%3 = load i32, ptr addrspace(5) %2, align 4
%4 = zext i32 %3 to i64
ret i64 %4
}
Expand All @@ -24,15 +24,15 @@ define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 {
; CHECK: define weak_odr dso_local void @_ZTS14example_kernel() !dbg !14 {
entry:
%0 = call i64 @_ZTS14other_function(), !dbg !15
; CHECK: %3 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2), !dbg !15
; CHECK: %2 = call i64 @_ZTS14other_function(ptr addrspace(5) %1), !dbg !15
ret void
}

; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) !dbg !16 {
; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byref([3 x i32]) %0) !dbg !16 {
; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5), !dbg !17
; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)*, !dbg !17
; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false), !dbg !17
; CHECK: %5 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2), !dbg !17
; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4), !dbg !17
; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false), !dbg !17
; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1), !dbg !17

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
Expand All @@ -43,7 +43,7 @@ entry:
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1}
!5 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1}
!6 = !{i32 1, i32 4}
!7 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
!8 = !{null, !"align", i32 16}
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/global-offset-invalid-triple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3

; This test checks that the pass does not run on nvcl triples.

declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()

define weak_odr dso_local i64 @_ZTS14other_function() {
%1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2
%3 = load i32, i32 addrspace(5)* %2, align 4
%1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2
%3 = load i32, ptr addrspace(5) %2, align 4
%4 = zext i32 %3 to i64
ret i64 %4
}
Expand All @@ -26,7 +26,7 @@ entry:

!amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3}

!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1}
!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1}
!1 = !{null, !"align", i32 8}
!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
!3 = !{null, !"align", i32 16}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@ target triple = "amdgcn-amd-amdhsa"
; to other functions that has a variant that takes an offset parameter will have
; all calls redirected to the corresponding variants.

declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()

define weak_odr dso_local i64 @_ZTS14other_function() {
; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(i32 addrspace(5)* %0) {
%1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2
; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
%3 = load i32, i32 addrspace(5)* %2, align 4
; CHECK: define weak_odr dso_local i64 @_ZTS14other_function(ptr addrspace(5) %0) {
%1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call ptr addrspace(5)* @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2
; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2
%3 = load i32, ptr addrspace(5) %2, align 4
%4 = zext i32 %3 to i64

%5 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
%6 = getelementptr inbounds i32, i32 addrspace(5)* %5, i64 2
; CHECK: %5 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
%7 = load i32, i32 addrspace(5)* %6, align 4
%5 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call ptr addrspace(5)* @llvm.amdgcn.implicit.offset()
%6 = getelementptr inbounds i32, ptr addrspace(5) %5, i64 2
; CHECK: %5 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2
%7 = load i32, ptr addrspace(5) %6, align 4
%8 = zext i32 %7 to i64

ret i64 %4
Expand All @@ -37,32 +37,31 @@ define weak_odr dso_local i64 @_ZTS14other_function() {
define weak_odr dso_local void @_ZTS14example_kernel() {
entry:
; CHECK: %0 = alloca [3 x i32], align 4, addrspace(5)
; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)*
; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false)
; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0
; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false)
; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0
%0 = call i64 @_ZTS14other_function()
; CHECK: %3 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2)
; CHECK: %2 = call i64 @_ZTS14other_function(ptr addrspace(5) %1)
%1 = call i64 @_ZTS14other_function()
; CHECK: %4 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2)
; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1)
ret void
}

; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) {
; CHECK: define weak_odr dso_local void @_ZTS14example_kernel_with_offset(ptr byref([3 x i32]) %0) {
; CHECK: entry:
; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5)
; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)*
; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false)
; CHECK: %5 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2)
; CHECK: %6 = call i64 @_ZTS14other_function(i32 addrspace(5)* %2)
; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4)
; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false)
; CHECK: %3 = call i64 @_ZTS14other_function(ptr addrspace(5) %1)
; CHECK: %4 = call i64 @_ZTS14other_function(ptr addrspace(5) %1)
; CHECK: ret void
; CHECK: }

!amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3}
; CHECK: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5}

!0 = distinct !{void ()* @_ZTS14example_kernel, !"kernel", i32 1}
!0 = distinct !{ptr @_ZTS14example_kernel, !"kernel", i32 1}
!1 = !{null, !"align", i32 8}
!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
!3 = !{null, !"align", i32 16}
!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
; CHECK: !5 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1}
; CHECK: !5 = !{ptr @_ZTS14example_kernel_with_offset, !"kernel", i32 1}
76 changes: 37 additions & 39 deletions llvm/test/CodeGen/AMDGPU/global-offset-multiple-entry-points.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ target triple = "amdgcn-amd-amdhsa"

; This test checks that the pass works with multiple entry points.

declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: declare ptr addrspace(5) @llvm.amdgcn.implicit.offset()

; This function is a kernel entry point that does not use global offset. It will
; not get a clone with a global offset parameter.
Expand All @@ -19,93 +19,91 @@ entry:
}

define weak_odr dso_local i64 @_ZTS15common_function() {
; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(i32 addrspace(5)* %0) {
%1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
%2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2
%3 = load i32, i32 addrspace(5)* %2, align 4
; CHECK: define weak_odr dso_local i64 @_ZTS15common_function(ptr addrspace(5) %0) {
%1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2
%2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2
%3 = load i32, ptr addrspace(5) %2, align 4
%4 = zext i32 %3 to i64
ret i64 %4
}

define weak_odr dso_local i64 @_ZTS14first_function() {
; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(i32 addrspace(5)* %0) {
; CHECK: define weak_odr dso_local i64 @_ZTS14first_function(ptr addrspace(5) %0) {
%1 = call i64 @_ZTS15common_function()
; CHECK: %2 = call i64 @_ZTS15common_function(i32 addrspace(5)* %0)
; CHECK: %2 = call i64 @_ZTS15common_function(ptr addrspace(5) %0)
ret i64 %1
}

; Function Attrs: noinline
define weak_odr dso_local void @_ZTS12first_kernel() {
entry:
; CHECK: %0 = alloca [3 x i32], align 4
; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)*
; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false)
; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0
; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false)
; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0
%0 = call i64 @_ZTS14first_function()
; CHECK: %3 = call i64 @_ZTS14first_function(i32 addrspace(5)* %2)
; CHECK: %2 = call i64 @_ZTS14first_function(ptr addrspace(5) %1)
ret void
}

; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) {
; CHECK: define weak_odr dso_local void @_ZTS12first_kernel_with_offset(ptr byref([3 x i32]) %0) {
; CHECK: entry:
; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5)
; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)*
; CHECK: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false)
; CHECK: %5 = call i64 @_ZTS14first_function(i32 addrspace(5)* %2)
; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4)
; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false)
; CHECK: %3 = call i64 @_ZTS14first_function(ptr addrspace(5) %1)
; CHECK: ret void
; CHECK: }

define weak_odr dso_local i64 @_ZTS15second_function() {
; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(i32 addrspace(5)* %0) {
; CHECK: define weak_odr dso_local i64 @_ZTS15second_function(ptr addrspace(5) %0) {
%1 = call i64 @_ZTS15common_function()
; CHECK: %2 = call i64 @_ZTS15common_function(i32 addrspace(5)* %0)
; CHECK: %2 = call i64 @_ZTS15common_function(ptr addrspace(5) %0)
ret i64 %1
}

; Function Attrs: noinline
define weak_odr dso_local void @_ZTS13second_kernel() {
entry:
; CHECK: %0 = alloca [3 x i32], align 4
; CHECK: %1 = bitcast [3 x i32] addrspace(5)* %0 to i8 addrspace(5)*
; CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* nonnull align 4 dereferenceable(12) %1, i8 0, i64 12, i1 false)
; CHECK: %2 = getelementptr inbounds [3 x i32], [3 x i32] addrspace(5)* %0, i32 0, i32 0
; CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) nonnull align 4 dereferenceable(12) %0, i8 0, i64 12, i1 false)
; CHECK: %1 = getelementptr inbounds [3 x i32], ptr addrspace(5) %0, i32 0, i32 0
%0 = call i64 @_ZTS15second_function()
; CHECK: %3 = call i64 @_ZTS15second_function(i32 addrspace(5)* %2)
; CHECK: %2 = call i64 @_ZTS15second_function(ptr addrspace(5) %1)
ret void
}

; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset([3 x i32]* byref([3 x i32]) %0) {
; CHECK: define weak_odr dso_local void @_ZTS13second_kernel_with_offset(ptr byref([3 x i32]) %0) {
; CHECK: entry:
; CHECK: %1 = alloca [3 x i32], align 4, addrspace(5)
; CHECK: %2 = bitcast [3 x i32] addrspace(5)* %1 to i32 addrspace(5)*
; CHEKC: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %4, i8 addrspace(4)* align 1 %3, i64 12, i1 false)
; CHECK: %5 = call i64 @_ZTS15second_function(i32 addrspace(5)* %2)
; CHECK: %2 = addrspacecast ptr %0 to ptr addrspace(4)
; CHECK: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 4 %1, ptr addrspace(4) align 1 %2, i64 12, i1 false)
; CHECK: %3 = call i64 @_ZTS15second_function(ptr addrspace(5) %1)
; CHECK: ret void
; CHECK: }

; This function doesn't get called by a kernel entry point.
define weak_odr dso_local i64 @_ZTS15no_entry_point() {
; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(i32 addrspace(5)* %0) {
%1 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, i32 addrspace(5)* %1, i64 2
; CHECK: %2 = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
%3 = load i32, i32 addrspace(5)* %2, align 4
; CHECK: define weak_odr dso_local i64 @_ZTS15no_entry_point(ptr addrspace(5) %0) {
%1 = tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
; CHECK-NOT: tail call ptr addrspace(5) @llvm.amdgcn.implicit.offset()
%2 = getelementptr inbounds i32, ptr addrspace(5) %1, i64 2
; CHECK: %2 = getelementptr inbounds i32, ptr addrspace(5) %0, i64 2
%3 = load i32, ptr addrspace(5) %2, align 4
%4 = zext i32 %3 to i64
ret i64 %4
}

!amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5, !6}
; CHECK: !amdgcn.annotations = !{!0, !1, !2, !1, !3, !3, !3, !3, !4, !4, !3, !5, !6, !7, !8}

!0 = distinct !{void ()* @_ZTS12first_kernel, !"kernel", i32 1}
!0 = distinct !{ptr @_ZTS12first_kernel, !"kernel", i32 1}
!1 = !{null, !"align", i32 8}
!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
!3 = !{null, !"align", i32 16}
!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
!5 = distinct !{void ()* @_ZTS13second_kernel, !"kernel", i32 1}
!6 = distinct !{void ()* @_ZTS12third_kernel, !"kernel", i32 1}
; CHECK: !7 = !{void ([3 x i32]*)* @_ZTS13second_kernel_with_offset, !"kernel", i32 1}
; CHECK: !8 = !{void ([3 x i32]*)* @_ZTS12first_kernel_with_offset, !"kernel", i32 1}
!5 = distinct !{ptr @_ZTS13second_kernel, !"kernel", i32 1}
!6 = distinct !{ptr @_ZTS12third_kernel, !"kernel", i32 1}
; CHECK: !7 = !{ptr @_ZTS13second_kernel_with_offset, !"kernel", i32 1}
; CHECK: !8 = !{ptr @_ZTS12first_kernel_with_offset, !"kernel", i32 1}
Loading

0 comments on commit a08431c

Please sign in to comment.