diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6ac380a6ab6c..a1a28076881c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9640,6 +9640,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { bool IsAllowedSingleBVNode = VectorizableTree.size() > 1 || (VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() && + !VectorizableTree.front()->isAltShuffle() && VectorizableTree.front()->getOpcode() != Instruction::PHI && VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr && allSameBlock(VectorizableTree.front()->Scalars)); @@ -11032,7 +11033,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { isUsedOutsideBlock(V); }) || (E->State == TreeEntry::NeedToGather && E->Idx == 0 && - all_of(E->Scalars, IsaPred))) + all_of(E->Scalars, [](Value *V) { + return isa(V) || + areAllOperandsNonInsts(V); + }))) Res.second = FindLastInst(); else Res.second = FindFirstInst(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll new file mode 100644 index 000000000000..89268837c9d8 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define <2 x i32> @test(i32 %arg) { +; CHECK-LABEL: define <2 x i32> @test( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1 +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; +bb: + %or = or i32 %arg, 0 + %mul = mul i32 0, 1 + %mul1 = mul i32 %or, %mul + %cmp = icmp ugt i32 0, %mul1 + %0 = insertelement <2 x i32> poison, i32 %or, i32 0 + %1 = insertelement <2 x i32> %0, i32 %mul, i32 1 + ret <2 x i32> %1 +} +