Skip to content

Commit

Permalink
[AMDGPU] Emit amdgcn.if.break in the same BB as amdgcn.loop
Browse files Browse the repository at this point in the history
Before this change if.break was placed in wrong loop level
which resulted in accumulating values only from last iteration
of the inner loop.

Change-Id: Ib76ee5477a7173068d5d1b9ba9c065676265d584
  • Loading branch information
mariusz-sikora-at-amd committed Nov 8, 2024
1 parent e63d145 commit 79609c3
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 86 deletions.
7 changes: 6 additions & 1 deletion llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,13 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
if (L->contains(Inst)) {
if (LI->getLoopFor(Parent) == L) {
// Insert IfBreak in the same BB as Cond, which can help
// SILowerControlFlow to know that it does not have to insert an
// AND with EXEC.
Insert = Parent->getTerminator();
} else if (L->contains(Inst)) {
Insert = Term;
} else {
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
}
Expand Down
26 changes: 15 additions & 11 deletions llvm/test/CodeGen/AMDGPU/multilevel-break.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; OPT-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]]
; OPT: Flow1:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
; OPT-NEXT: br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT: IF:
Expand All @@ -57,33 +57,37 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
; GCN-NEXT: .LBB0_2: ; %LOOP.outer
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_branch .LBB0_4
; GCN-NEXT: .LBB0_3: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7]
; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
; GCN-NEXT: s_and_b64 s[10:11], exec, s[8:9]
; GCN-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GCN-NEXT: s_and_b64 s[10:11], s[6:7], exec
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11]
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: .LBB0_4: ; %LOOP
; GCN-NEXT: ; Parent Loop BB0_2 Depth=1
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v0, v4
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], exec
; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_3
; GCN-NEXT: ; %bb.5: ; %ENDIF
; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v0
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_and_b64 s[10:11], vcc, exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
; GCN-NEXT: s_and_b64 s[12:13], vcc, exec
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
; GCN-NEXT: s_branch .LBB0_3
; GCN-NEXT: .LBB0_6: ; %IF
; GCN-NEXT: s_endpgm
Expand Down
150 changes: 76 additions & 74 deletions llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

Expand Down Expand Up @@ -46,51 +46,52 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocap
; GCN-NEXT: s_cbranch_vccz .LBB0_6
; GCN-NEXT: .LBB0_7: ; %DummyReturnBlock
; GCN-NEXT: s_endpgm
; IR-LABEL: @reduced_nested_loop_conditions(
; IR-NEXT: bb:
; IR-LABEL: define amdgpu_kernel void @reduced_nested_loop_conditions(
; IR-SAME: ptr addrspace(3) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
; IR-NEXT: [[BB:.*]]:
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]]
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG:%.*]], i32 [[MY_TMP]]
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG]], i32 [[MY_TMP]]
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, ptr addrspace(3) [[MY_TMP1]], align 8
; IR-NEXT: br label [[BB5:%.*]]
; IR: bb3:
; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]]
; IR: bb4:
; IR-NEXT: br label [[FLOW:%.*]]
; IR: bb5:
; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[BB10:%.*]] ], [ 0, [[BB:%.*]] ]
; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP5:%.*]], [[BB10]] ]
; IR-NEXT: br label %[[BB5:.*]]
; IR: [[BB3:.*]]:
; IR-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB13:.*]]
; IR: [[BB4]]:
; IR-NEXT: br label %[[FLOW:.*]]
; IR: [[BB5]]:
; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], %[[BB10:.*]] ], [ 0, %[[BB]] ]
; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB10]] ]
; IR-NEXT: [[MY_TMP7:%.*]] = icmp eq i32 [[MY_TMP6]], 1
; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP7]])
; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
; IR-NEXT: br i1 [[TMP1]], label [[BB8:%.*]], label [[FLOW]]
; IR: bb8:
; IR-NEXT: br label [[BB13]]
; IR: bb9:
; IR-NEXT: br i1 false, label [[BB3:%.*]], label [[BB9:%.*]]
; IR: bb10:
; IR-NEXT: br i1 [[TMP1]], label %[[BB8:.*]], label %[[FLOW]]
; IR: [[BB8]]:
; IR-NEXT: br label %[[BB13]]
; IR: [[BB9:.*]]:
; IR-NEXT: br i1 false, label %[[BB3]], label %[[BB9]]
; IR: [[BB10]]:
; IR-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
; IR-NEXT: br i1 [[TMP3]], label [[BB23:%.*]], label [[BB5]]
; IR: Flow:
; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], [[BB4]] ], [ true, [[BB5]] ]
; IR-NEXT: [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], [[BB4]] ], [ undef, [[BB5]] ]
; IR-NEXT: br i1 [[TMP3]], label %[[BB23:.*]], label %[[BB5]]
; IR: [[FLOW]]:
; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], %[[BB4]] ], [ true, %[[BB5]] ]
; IR-NEXT: [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], %[[BB4]] ], [ undef, %[[BB5]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; IR-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]])
; IR-NEXT: br label [[BB10]]
; IR: bb13:
; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], [[BB3]] ], [ true, [[BB8]] ]
; IR-NEXT: br label %[[BB10]]
; IR: [[BB13]]:
; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], %[[BB3]] ], [ true, %[[BB8]] ]
; IR-NEXT: [[MY_TMP15:%.*]] = bitcast i64 [[MY_TMP2]] to <2 x i32>
; IR-NEXT: br i1 [[MY_TMP14]], label [[BB16:%.*]], label [[BB20:%.*]]
; IR: bb16:
; IR-NEXT: br i1 [[MY_TMP14]], label %[[BB16:.*]], label %[[BB20:.*]]
; IR: [[BB16]]:
; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1
; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) undef, i32 [[MY_TMP17]]
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[MY_TMP18]], align 4
; IR-NEXT: br label [[BB20]]
; IR: bb20:
; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ]
; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, [[BB16]] ], [ [[MY_TMP14]], [[BB13]] ]
; IR-NEXT: br label [[BB9]]
; IR: bb23:
; IR-NEXT: br label %[[BB20]]
; IR: [[BB20]]:
; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], %[[BB16]] ], [ 0, %[[BB13]] ]
; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, %[[BB16]] ], [ [[MY_TMP14]], %[[BB13]] ]
; IR-NEXT: br label %[[BB9]]
; IR: [[BB23]]:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; IR-NEXT: ret void
bb:
Expand Down Expand Up @@ -188,66 +189,67 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %ar
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
; IR-LABEL: @nested_loop_conditions(
; IR-NEXT: bb:
; IR-LABEL: define amdgpu_kernel void @nested_loop_conditions(
; IR-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] {
; IR-NEXT: [[BB:.*]]:
; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9
; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]]
; IR: bb14.lr.ph:
; IR-NEXT: br i1 [[MY_TMP1235]], label %[[BB14_LR_PH:.*]], label %[[FLOW:.*]]
; IR: [[BB14_LR_PH]]:
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]]
; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64
; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[ARG:%.*]], i64 [[MY_TMP1]]
; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[ARG]], i64 [[MY_TMP1]]
; IR-NEXT: [[MY_TMP3:%.*]] = load i64, ptr addrspace(1) [[MY_TMP2]], align 16
; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, ptr addrspace(1) undef, align 16
; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0
; IR-NEXT: br label [[BB14:%.*]]
; IR: Flow3:
; IR-NEXT: br label %[[BB14:.*]]
; IR: [[FLOW3:.*]]:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]])
; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]])
; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
; IR-NEXT: br i1 [[TMP1]], label [[BB4_BB13_CRIT_EDGE:%.*]], label [[FLOW4:%.*]]
; IR: bb4.bb13_crit_edge:
; IR-NEXT: br label [[FLOW4]]
; IR: Flow4:
; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[BB4_BB13_CRIT_EDGE]] ], [ false, [[FLOW3:%.*]] ]
; IR-NEXT: br i1 [[TMP1]], label %[[BB4_BB13_CRIT_EDGE:.*]], label %[[FLOW4:.*]]
; IR: [[BB4_BB13_CRIT_EDGE]]:
; IR-NEXT: br label %[[FLOW4]]
; IR: [[FLOW4]]:
; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, %[[BB4_BB13_CRIT_EDGE]] ], [ false, %[[FLOW3]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; IR-NEXT: br label [[FLOW]]
; IR: bb13:
; IR-NEXT: br label [[BB31:%.*]]
; IR: Flow:
; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], [[FLOW4]] ], [ true, [[BB:%.*]] ]
; IR-NEXT: br label %[[FLOW]]
; IR: [[BB13:.*]]:
; IR-NEXT: br label %[[BB31:.*]]
; IR: [[FLOW]]:
; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], %[[FLOW4]] ], [ true, %[[BB]] ]
; IR-NEXT: [[TMP5:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP4]])
; IR-NEXT: [[TMP6:%.*]] = extractvalue { i1, i64 } [[TMP5]], 0
; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP5]], 1
; IR-NEXT: br i1 [[TMP6]], label [[BB13:%.*]], label [[BB31]]
; IR: bb14:
; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], [[FLOW1:%.*]] ], [ 0, [[BB14_LR_PH]] ]
; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], [[BB14_LR_PH]] ], [ [[TMP12:%.*]], [[FLOW1]] ]
; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], [[BB14_LR_PH]] ], [ [[TMP11:%.*]], [[FLOW1]] ]
; IR-NEXT: br i1 [[TMP6]], label %[[BB13]], label %[[BB31]]
; IR: [[BB14]]:
; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], %[[FLOW1:.*]] ], [ 0, %[[BB14_LR_PH]] ]
; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], %[[BB14_LR_PH]] ], [ [[TMP12:%.*]], %[[FLOW1]] ]
; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], %[[BB14_LR_PH]] ], [ [[TMP11:%.*]], %[[FLOW1]] ]
; IR-NEXT: [[MY_TMP15:%.*]] = icmp eq i32 [[MY_TMP1037]], 1
; IR-NEXT: [[TMP8:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP15]])
; IR-NEXT: [[TMP9:%.*]] = extractvalue { i1, i64 } [[TMP8]], 0
; IR-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP8]], 1
; IR-NEXT: br i1 [[TMP9]], label [[BB16:%.*]], label [[FLOW1]]
; IR: bb16:
; IR-NEXT: br i1 [[TMP9]], label %[[BB16:.*]], label %[[FLOW1]]
; IR: [[BB16]]:
; IR-NEXT: [[MY_TMP17:%.*]] = bitcast i64 [[MY_TMP3]] to <2 x i32>
; IR-NEXT: br label [[BB18:%.*]]
; IR: Flow1:
; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ]
; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ]
; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ]
; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ]
; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ]
; IR-NEXT: br label %[[BB18:.*]]
; IR: [[FLOW1]]:
; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], %[[BB21:.*]] ], [ undef, %[[BB14]] ]
; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], %[[BB21]] ], [ undef, %[[BB14]] ]
; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], %[[BB21]] ], [ true, %[[BB14]] ]
; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], %[[BB21]] ], [ false, %[[BB14]] ]
; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, %[[BB21]] ], [ true, %[[BB14]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]])
; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]])
; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]]
; IR: bb18:
; IR-NEXT: br i1 [[TMP17]], label %[[FLOW2:.*]], label %[[BB14]]
; IR: [[BB18]]:
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9
; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]]
; IR: bb21:
; IR-NEXT: br i1 [[MY_TMP20]], label %[[BB21]], label %[[BB18]]
; IR: [[BB21]]:
; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1
; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16
; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]]
Expand All @@ -263,16 +265,16 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %ar
; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0
; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9
; IR-NEXT: br label [[FLOW1]]
; IR: Flow2:
; IR-NEXT: br label %[[FLOW1]]
; IR: [[FLOW2]]:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]])
; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0
; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1
; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
; IR: bb31.loopexit:
; IR-NEXT: br label [[FLOW3]]
; IR: bb31:
; IR-NEXT: br i1 [[TMP19]], label %[[BB31_LOOPEXIT:.*]], label %[[FLOW3]]
; IR: [[BB31_LOOPEXIT]]:
; IR-NEXT: br label %[[FLOW3]]
; IR: [[BB31]]:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
; IR-NEXT: store volatile i32 0, ptr addrspace(1) undef, align 4
; IR-NEXT: ret void
Expand Down

0 comments on commit 79609c3

Please sign in to comment.