From ea979b24b0a755c9839e32dd716078ea816a0508 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me@m-sp.org>
Date: Sun, 17 Dec 2023 13:19:27 +0900
Subject: [PATCH 01/32] [mlir][SparseTensor][NFC] Remove `isNestedIn` helper
 function (#75729)

Use `Region::findAncestorBlockInRegion` instead of a custom IR
traversal.
---
 .../SparseTensor/Transforms/SparseGPUCodegen.cpp      | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index 30ab2a1f18e3f7..69fd1eb746ffe7 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -1155,7 +1155,7 @@ struct ForallRewriter : public OpRewritePattern<scf::ParallelOp> {
           block = arg.getOwner();
         else
           block = val.getDefiningOp()->getBlock();
-        if (!isNestedIn(block, forallOp))
+        if (!forallOp.getRegion().findAncestorBlockInRegion(*block))
           invariants.insert(val);
       }
     });
@@ -1208,15 +1208,6 @@ struct ForallRewriter : public OpRewritePattern<scf::ParallelOp> {
   }
 
 private:
-  // Helper method to see if block appears in given loop.
-  static bool isNestedIn(Block *block, scf::ParallelOp forallOp) {
-    for (Operation *o = block->getParentOp(); o; o = o->getParentOp()) {
-      if (o == forallOp)
-        return true;
-    }
-    return false;
-  }
-
   unsigned numThreads;
 };
 

From 5139299618cfc33eb7b4772cea5a8b60131dfc90 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Sun, 17 Dec 2023 16:44:16 +0900
Subject: [PATCH 02/32] [AMDGPU] Track physical VGPRs used for SGPR spills
 (#75573)

Physical VGPRs used for SGPR spills need to be tracked independent of
WWM reserved registers. The WWM reserved set contains extra registers
allocated during WWM pre-allocation pass.

This causes SGPR spills allocated after WWM pre-allocation to overlap
with WWM register usage, e.g. if frame pointer is spilt during
prologue/epilog insertion.
---
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp            | 3 ++-
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h              | 1 +
 llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
       MBB.addLiveIn(LaneVGPR);
       MBB.sortUniqueLiveIns();
     }
+    SpillPhysVGPRs.push_back(LaneVGPR);
   } else {
-    LaneVGPR = WWMReservedRegs.back();
+    LaneVGPR = SpillPhysVGPRs.back();
   }
 
   SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   unsigned NumVirtualVGPRSpillLanes = 0;
   unsigned NumPhysicalVGPRSpillLanes = 0;
   SmallVector<Register, 2> SpillVGPRs;
+  SmallVector<Register, 2> SpillPhysVGPRs;
   using WWMSpillsMap = MapVector<Register, int>;
   // To track the registers used in instructions that can potentially modify the
   // inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 35e205561a4169..1473e667f894cd 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -39,7 +39,7 @@ body:             |
   ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
   ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr1
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr4
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
   ; GCN-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
@@ -198,7 +198,7 @@ body:             |
   ; GCN-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
   ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
   ; GCN-NEXT:   KILL killed renamable $vgpr5
-  ; GCN-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 4
+  ; GCN-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
   ; GCN-NEXT:   $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
   ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)

From d08b59f3337777acda520469309514cc6d8e4547 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 17 Dec 2023 00:42:26 -0800
Subject: [PATCH 03/32] [test] Improve MC/X86/index-operations.s

---
 llvm/test/MC/X86/index-operations.s | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/test/MC/X86/index-operations.s b/llvm/test/MC/X86/index-operations.s
index a355b7ae0760d7..899cf4656549f6 100644
--- a/llvm/test/MC/X86/index-operations.s
+++ b/llvm/test/MC/X86/index-operations.s
@@ -1,5 +1,5 @@
 // RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2> %t.err | FileCheck --check-prefix=64 %s
-// RUN: FileCheck --check-prefix=ERR64 < %t.err %s
+// RUN: FileCheck --input-file=%t.err %s --check-prefix=ERR64 --implicit-check-not=error:
 // RUN: not llvm-mc -triple i386-unknown-unknown --show-encoding %s 2> %t.err | FileCheck --check-prefix=32 %s
 // RUN: FileCheck --check-prefix=ERR32 < %t.err %s
 // RUN: not llvm-mc -triple i386-unknown-unknown-code16 --show-encoding %s 2> %t.err | FileCheck --check-prefix=16 %s
@@ -21,7 +21,7 @@ lodsb (%esi), %al
 // 16: lodsb (%esi), %al # encoding: [0x67,0xac]
 
 lodsb (%si), %al
-// ERR64: invalid 16-bit base register
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid 16-bit base register
 // 32: lodsb (%si), %al # encoding: [0x67,0xac]
 // 16: lodsb (%si), %al # encoding: [0xac]
 
@@ -31,12 +31,12 @@ lodsl %gs:(%esi)
 // 16: lodsl %gs:(%esi), %eax # encoding: [0x67,0x65,0x66,0xad]
 
 lodsl (%edi), %eax
-// ERR64: invalid operand
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand
 // ERR32: invalid operand
 // ERR16: invalid operand
 
 lodsl 44(%edi), %eax
-// ERR64: invalid operand
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand
 // ERR32: invalid operand
 // ERR16: invalid operand
 
@@ -56,7 +56,7 @@ stos %eax, (%edi)
 // 16: stosl %eax, %es:(%edi) # encoding: [0x67,0x66,0xab]
 
 stosb %al, %fs:(%edi)
-// ERR64: invalid operand for instruction
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand for instruction
 // ERR32: invalid operand for instruction
 // ERR16: invalid operand for instruction
 
@@ -86,12 +86,12 @@ scasq %es:(%edi)
 // ERR16: 64-bit
 
 scasl %es:(%edi), %al
-// ERR64: invalid operand
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand
 // ERR32: invalid operand
 // ERR16: invalid operand
 
 scas %es:(%di), %ax
-// ERR64: invalid 16-bit base register
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid 16-bit base register
 // 16: scasw %es:(%di), %ax # encoding: [0xaf]
 // 32: scasw %es:(%di), %ax # encoding: [0x67,0x66,0xaf]
 
@@ -106,7 +106,7 @@ cmpsw (%edi), (%esi)
 // 16: cmpsw %es:(%edi), (%esi) # encoding: [0x67,0xa7]
 
 cmpsb (%di), (%esi)
-// ERR64: invalid 16-bit base register
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid 16-bit base register
 // ERR32: mismatching source and destination
 // ERR16: mismatching source and destination
 
@@ -146,7 +146,7 @@ insw %dx, (%edi)
 // 16: insw %dx, %es:(%edi) # encoding: [0x67,0x6d]
 
 insw %dx, (%bx)
-// ERR64: invalid 16-bit base register
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid 16-bit base register
 // 32: insw %dx, %es:(%di) # encoding: [0x67,0x66,0x6d]
 // 16: insw %dx, %es:(%di) # encoding: [0x6d]
 
@@ -161,18 +161,20 @@ insw %dx, (%rbx)
 // ERR16: 64-bit
 
 movdir64b	291(%si), %ecx
+// ERR64: error: invalid 16-bit base register
 // ERR32: invalid operand
 // ERR16: invalid operand
 
 movdir64b	291(%esi), %cx
+// ERR64: error: invalid operand for instruction
 // ERR32: invalid operand
 // ERR16: invalid operand
 
 movdir64b (%rdx), %r15d
-// ERR64: invalid operand
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand
 
 movdir64b (%edx), %r15
-// ERR64: invalid operand
+// ERR64: [[#@LINE-1]]:[[#]]: error: invalid operand
 
 movdir64b (%eip), %ebx
 // 64: movdir64b (%eip), %ebx # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x1d,0x00,0x00,0x00,0x00]
@@ -185,4 +187,4 @@ movdir64b 291(%esi, %eiz, 4), %ebx
 // 32: movdir64b 291(%esi,%eiz,4), %ebx # encoding: [0x66,0x0f,0x38,0xf8,0x9c,0xa6,0x23,0x01,0x00,0x00]
 
 movdir64b 291(%rsi, %riz, 4), %rbx
-// 64: movdir64b 291(%rsi,%riz,4), %rbx # encoding: [0x66,0x0f,0x38,0xf8,0x9c,0xa6,0x23,0x01,0x00,0x00]
\ No newline at end of file
+// 64: movdir64b 291(%rsi,%riz,4), %rbx # encoding: [0x66,0x0f,0x38,0xf8,0x9c,0xa6,0x23,0x01,0x00,0x00]

From a3952b4f022ce03c778ecc3b44ffff350b512735 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 00:57:24 -0800
Subject: [PATCH 04/32] [Analysis] Remove unused forward declarations (NFC)

---
 llvm/include/llvm/Analysis/AliasAnalysis.h          | 1 -
 llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h | 1 -
 llvm/include/llvm/Analysis/InstructionSimplify.h    | 1 -
 3 files changed, 3 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 081783e243678c..e1cfb025fb6580 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -64,7 +64,6 @@ class LoopInfo;
 class PreservedAnalyses;
 class TargetLibraryInfo;
 class Value;
-template <typename> class SmallPtrSetImpl;
 
 /// The possible results of an alias query.
 ///
diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 20bcbc592afbdb..e4f152c232aa6b 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -29,7 +29,6 @@
 namespace llvm {
 class AAResults;
 class Function;
-class FunctionPass;
 
 class AAEvaluator : public PassInfoMixin<AAEvaluator> {
   int64_t FunctionCount = 0;
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index c626a6522d0177..a29955a06cf4e0 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -45,7 +45,6 @@ class DominatorTree;
 class Function;
 class Instruction;
 struct LoopStandardAnalysisResults;
-class MDNode;
 class Pass;
 template <class T, unsigned n> class SmallSetVector;
 class TargetLibraryInfo;

From 3a1ae2f46db473cfde4baa6e1b090f5dae67e8db Mon Sep 17 00:00:00 2001
From: Rik Huijzer <github@huijzer.xyz>
Date: Sun, 17 Dec 2023 11:42:35 +0100
Subject: [PATCH 05/32] [mlir][vector] Fix invalid `LoadOp` indices being
 created (#75519)

Fixes https://github.com/llvm/llvm-project/issues/71326.

The cause of the issue was that a new `LoadOp` was created which looked
something like:
```mlir
%arg4 =
func.func main(%arg1 : index, %arg2 : index) {
  %alloca_0 = memref.alloca() : memref<vector<1x32xi1>>
  %1 = vector.type_cast %alloca_0 : memref<vector<1x32xi1>> to memref<1xvector<32xi1>>
  %2 = memref.load %1[%arg1, %arg2] : memref<1xvector<32xi1>>
  return
}
```
which crashed inside the `LoadOp::verify`. Note here that `%alloca_0` is
0 dimensional, `%1` has one dimension, but `memref.load` tries to index
`%1` with two indices.

This is now fixed by using the fact that `unpackOneDim` always unpacks
one dim


https://github.com/llvm/llvm-project/blob/1bce61e6b01b38e04260be4f422bbae59c34c766/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp#L897-L903

and so the `loadOp` should just index only one dimension.

---------

Co-authored-by: Benjamin Maxwell <macdue@dueutil.tech>
---
 .../Conversion/VectorToSCF/VectorToSCF.cpp    | 27 ++++++++++++-------
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp      |  6 +++--
 .../Conversion/VectorToSCF/vector-to-scf.mlir | 17 ++++++++++++
 mlir/test/Dialect/MemRef/invalid.mlir         |  9 +++++++
 4 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 2ee314e9fedfe3..2026d0cd216a9e 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -369,7 +369,7 @@ struct Strategy<TransferReadOp> {
   /// Retrieve the indices of the current StoreOp that stores into the buffer.
   static void getBufferIndices(TransferReadOp xferOp,
                                SmallVector<Value, 8> &indices) {
-    auto storeOp = getStoreOp(xferOp);
+    memref::StoreOp storeOp = getStoreOp(xferOp);
     auto prevIndices = memref::StoreOpAdaptor(storeOp).getIndices();
     indices.append(prevIndices.begin(), prevIndices.end());
   }
@@ -591,8 +591,8 @@ struct PrepareTransferReadConversion
     if (checkPrepareXferOp(xferOp, options).failed())
       return failure();
 
-    auto buffers = allocBuffers(rewriter, xferOp);
-    auto *newXfer = rewriter.clone(*xferOp.getOperation());
+    BufferAllocs buffers = allocBuffers(rewriter, xferOp);
+    Operation *newXfer = rewriter.clone(*xferOp.getOperation());
     newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
     if (xferOp.getMask()) {
       dyn_cast<TransferReadOp>(newXfer).getMaskMutable().assign(
@@ -885,8 +885,7 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
     // If the xferOp has a mask: Find and cast mask buffer.
     Value castedMaskBuffer;
     if (xferOp.getMask()) {
-      auto maskBuffer = getMaskBuffer(xferOp);
-      auto maskBufferType = dyn_cast<MemRefType>(maskBuffer.getType());
+      Value maskBuffer = getMaskBuffer(xferOp);
       if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
         // Do not unpack a dimension of the mask, if:
         // * To-be-unpacked transfer op dimension is a broadcast.
@@ -897,7 +896,8 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
       } else {
         // It's safe to assume the mask buffer can be unpacked if the data
         // buffer was unpacked.
-        auto castedMaskType = *unpackOneDim(maskBufferType);
+        auto maskBufferType = dyn_cast<MemRefType>(maskBuffer.getType());
+        MemRefType castedMaskType = *unpackOneDim(maskBufferType);
         castedMaskBuffer =
             locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
       }
@@ -938,11 +938,18 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
                   b.setInsertionPoint(newXfer); // Insert load before newXfer.
 
                   SmallVector<Value, 8> loadIndices;
-                  Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
-                  // In case of broadcast: Use same indices to load from memref
-                  // as before.
-                  if (!xferOp.isBroadcastDim(0))
+                  if (auto memrefType =
+                          castedMaskBuffer.getType().dyn_cast<MemRefType>()) {
+                    // If castedMaskBuffer is a memref, then one dim was
+                    // unpacked; see above.
                     loadIndices.push_back(iv);
+                  } else {
+                    Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
+                    // In case of broadcast: Use same indices to load from
+                    // memref as before.
+                    if (!xferOp.isBroadcastDim(0))
+                      loadIndices.push_back(iv);
+                  }
 
                   auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
                                                        loadIndices);
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 93327a28234ea9..a332fe253ba645 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -1615,8 +1615,10 @@ GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
 //===----------------------------------------------------------------------===//
 
 LogicalResult LoadOp::verify() {
-  if (getNumOperands() != 1 + getMemRefType().getRank())
-    return emitOpError("incorrect number of indices for load");
+  if (static_cast<int64_t>(getIndices().size()) != getMemRefType().getRank()) {
+    return emitOpError("incorrect number of indices for load, expected ")
+           << getMemRefType().getRank() << " but got " << getIndices().size();
+  }
   return success();
 }
 
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index ad78f0c945b24d..953fcee0c372fa 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -740,6 +740,23 @@ func.func @cannot_lower_transfer_read_with_leading_scalable(%arg0: memref<?x4xf3
 
 //  -----
 
+// Check that the `unpackOneDim` case in the `TransferOpConversion` generates valid indices for the LoadOp.
+
+#map1 = affine_map<(d0, d1, d2, d3) -> (d0, 0, 0, d3)>
+func.func @does_not_crash_on_unpack_one_dim(%subview:  memref<1x1x1x1xi32>, %mask: vector<1x1xi1>) -> vector<1x1x1x1xi32> {
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %3 = vector.transfer_read %subview[%c0, %c0, %c0, %c0], %c0_i32, %mask {permutation_map = #map1}
+          : memref<1x1x1x1xi32>, vector<1x1x1x1xi32>
+  return %3 : vector<1x1x1x1xi32>
+}
+// CHECK-LABEL: func.func @does_not_crash_on_unpack_one_dim
+// CHECK: %[[ALLOCA_0:.*]] = memref.alloca() : memref<vector<1x1xi1>>
+// CHECK: %[[MASK:.*]] = vector.type_cast %[[ALLOCA_0]] : memref<vector<1x1xi1>> to memref<1xvector<1xi1>>
+// CHECK: memref.load %[[MASK]][%{{.*}}] : memref<1xvector<1xi1>>
+
+//  -----
+
 // FULL-UNROLL-LABEL: @cannot_fully_unroll_transfer_write_of_nd_scalable_vector
 func.func @cannot_fully_unroll_transfer_write_of_nd_scalable_vector(%vec: vector<[4]x[4]xf32>, %memref: memref<?x?xf32>) {
   // FULL-UNROLL-NOT: vector.extract
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 55b759cbb3ce7c..f9b870f77266e1 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -896,6 +896,15 @@ func.func @bad_alloc_wrong_symbol_count() {
 
 // -----
 
+func.func @load_invalid_memref_indexes() {
+  %0 = memref.alloca() : memref<10xi32>
+  %c0 = arith.constant 0 : index
+  // expected-error@+1 {{incorrect number of indices for load, expected 1 but got 2}}
+  %1 = memref.load %0[%c0, %c0] : memref<10xi32>
+}
+
+// -----
+
 func.func @test_store_zero_results() {
 ^bb0:
   %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>

From 3eaed9e6f574f59d76389c055b047ef5c50afb8a Mon Sep 17 00:00:00 2001
From: melonedo <44501064+melonedo@users.noreply.github.com>
Date: Sun, 17 Dec 2023 19:29:40 +0800
Subject: [PATCH 06/32] [RISCV] Implement intrinsics for XCVbitmanip extension
 in CV32E40P (#74993)

Implement XCVbitmanip intrinsics for CV32E40P according to the
specification.

This commit is part of a patch-set to upstream the vendor specific
extensions of CV32E40P that need LLVM intrinsics to implement Clang
builtins.

Contributors: @CharKeaney, @ChunyuLiao, @jeremybennett, @lewis-revill,
@NandniJamnadas, @PaoloS02, @simonpcook, @xingmingjie.

Spec:
https://github.com/openhwgroup/core-v-sw/blob/05481cf0ef7aa7b09067b14ff3f71faead7ba310/specifications/corev-builtin-spec.md#listing-of-pulp-bit-manipulation-builtins-xcvbitmanip.

Previously reviewed on Phabricator: https://reviews.llvm.org/D157510.
Parallel GCC patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635795.html.

Co-authored-by: melonedo <funanzeng@gmail.com>
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |   1 +
 llvm/include/llvm/IR/IntrinsicsRISCVXCV.td    |  37 +++
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  23 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td    |  48 +++-
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   4 +-
 llvm/test/CodeGen/RISCV/xcvbitmanip.ll        | 231 ++++++++++++++++++
 6 files changed, 335 insertions(+), 9 deletions(-)
 create mode 100644 llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
 create mode 100644 llvm/test/CodeGen/RISCV/xcvbitmanip.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 20c6a525a86ba7..fc830fca392fc5 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1879,3 +1879,4 @@ let TargetPrefix = "riscv" in {
 //===----------------------------------------------------------------------===//
 include "llvm/IR/IntrinsicsRISCVXTHead.td"
 include "llvm/IR/IntrinsicsRISCVXsf.td"
+include "llvm/IR/IntrinsicsRISCVXCV.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
new file mode 100644
index 00000000000000..f1590ad66e362b
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
@@ -0,0 +1,37 @@
+//===- IntrinsicsRISCVXCV.td - CORE-V intrinsics -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the CORE-V vendor intrinsics for RISC-V.
+//
+//===----------------------------------------------------------------------===//
+
+class ScalarCoreVBitManipGprGprIntrinsic
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                            [IntrNoMem, IntrSpeculatable]>;
+
+class ScalarCoreVBitManipGprIntrinsic
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty],
+                            [IntrNoMem, IntrSpeculatable]>;
+
+let TargetPrefix = "riscv" in {
+  def int_riscv_cv_bitmanip_extract : ScalarCoreVBitManipGprGprIntrinsic;
+  def int_riscv_cv_bitmanip_extractu : ScalarCoreVBitManipGprGprIntrinsic;
+  def int_riscv_cv_bitmanip_bclr : ScalarCoreVBitManipGprGprIntrinsic;
+  def int_riscv_cv_bitmanip_bset : ScalarCoreVBitManipGprGprIntrinsic;
+
+  def int_riscv_cv_bitmanip_insert
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                            [IntrNoMem, IntrSpeculatable]>;
+
+  def int_riscv_cv_bitmanip_clb : ScalarCoreVBitManipGprIntrinsic;
+
+  def int_riscv_cv_bitmanip_bitrev
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                            [IntrNoMem, IntrWillReturn, IntrSpeculatable,
+                            ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+} // TargetPrefix = "riscv"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a8ff73ec47295..782a9e1db569f5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -335,6 +335,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     if (Subtarget.is64Bit())
       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
+  } else if (Subtarget.hasVendorXCVbitmanip()) {
+    setOperationAction({ISD::ROTL}, XLenVT, Expand);
   } else {
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
     if (RV64LegalI32 && Subtarget.is64Bit())
@@ -355,9 +357,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                            ? Promote
                            : Expand);
 
-  // Zbkb can use rev8+brev8 to implement bitreverse.
-  setOperationAction(ISD::BITREVERSE, XLenVT,
-                     Subtarget.hasStdExtZbkb() ? Custom : Expand);
+
+  if (Subtarget.hasVendorXCVbitmanip()) {
+    setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
+  } else {
+    // Zbkb can use rev8+brev8 to implement bitreverse.
+    setOperationAction(ISD::BITREVERSE, XLenVT,
+                       Subtarget.hasStdExtZbkb() ? Custom : Expand);
+  }
 
   if (Subtarget.hasStdExtZbb()) {
     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
@@ -372,13 +379,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       else
         setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
     }
-  } else {
+  } else if (!Subtarget.hasVendorXCVbitmanip()) {
     setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
     if (RV64LegalI32 && Subtarget.is64Bit())
       setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
   }
 
-  if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
+  if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+      Subtarget.hasVendorXCVbitmanip()) {
     // We need the custom lowering to make sure that the resulting sequence
     // for the 32bit case is efficient on 64bit targets.
     if (Subtarget.is64Bit()) {
@@ -1796,11 +1804,12 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
 }
 
 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
-  return Subtarget.hasStdExtZbb();
+  return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
 }
 
 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
-  return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
+  return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+         Subtarget.hasVendorXCVbitmanip();
 }
 
 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index 6622e811bbb86d..924e91e15c348f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -33,7 +33,7 @@ let DecoderNamespace = "XCVbitmanip" in {
 
   class CVBitManipR<bits<7> funct7, string opcodestr>
       : RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd),
-                (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1"> {
+                (ins GPR:$rs1), opcodestr, "$rd, $rs1"> {
     let rs2 = 0b00000;
   }
 }
@@ -658,3 +658,49 @@ let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0,
   // Event load
   def CV_ELW : CVLoad_ri<0b011, "cv.elw">;
 }
+
+def cv_tuimm2 : TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]>;
+def cv_tuimm5 : TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]>;
+def cv_uimm10 : ImmLeaf<XLenVT, [{return isUInt<10>(Imm);}]>;
+
+def CV_LO5: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0x1f, SDLoc(N),
+                                   N->getValueType(0));
+}]>;
+
+def CV_HI5: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 5, SDLoc(N),
+                                   N->getValueType(0));
+}]>;
+
+multiclass PatCoreVBitManip<Intrinsic intr> {
+  def : PatGprGpr<intr, !cast<RVInst>("CV_" # NAME # "R")>;
+  def : Pat<(intr GPR:$rs1, cv_uimm10:$imm),
+            (!cast<RVInst>("CV_" # NAME) 
+             GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>;
+}
+
+let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
+  defm EXTRACT : PatCoreVBitManip<int_riscv_cv_bitmanip_extract>;
+  defm EXTRACTU : PatCoreVBitManip<int_riscv_cv_bitmanip_extractu>;
+  defm BCLR : PatCoreVBitManip<int_riscv_cv_bitmanip_bclr>;
+  defm BSET : PatCoreVBitManip<int_riscv_cv_bitmanip_bset>;
+
+  def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, GPR:$rs2, GPR:$rd),
+            (CV_INSERTR GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+  def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, cv_uimm10:$imm, GPR:$rd),
+            (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm), 
+                                          (CV_LO5 cv_uimm10:$imm))>;
+
+  def : PatGpr<cttz, CV_FF1>;
+  def : PatGpr<ctlz, CV_FL1>;
+  def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>;
+  def : PatGpr<ctpop, CV_CNT>;
+
+  def : PatGprGpr<rotr, CV_ROR>;
+
+  def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts, 
+             cv_tuimm2:$radix),
+            (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
+  def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3a2f2f39cd1c9b..4614446b2150b7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -172,7 +172,9 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
 TargetTransformInfo::PopcntSupportKind
 RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
+  return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
+             ? TTI::PSK_FastHardware
+             : TTI::PSK_Software;
 }
 
 bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
diff --git a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll
new file mode 100644
index 00000000000000..d25ff28475c4b7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=riscv32 -mattr=+xcvbitmanip -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-O0
+; RUN: llc -O3 -mtriple=riscv32 -mattr=+xcvbitmanip -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-O3
+
+declare i32 @llvm.riscv.cv.bitmanip.extract(i32, i32)
+
+define i32 @test.cv.extractr(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.extractr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extractr a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+define i32 @test.cv.extract(i32 %a) {
+; CHECK-LABEL: test.cv.extract:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extract a0, a0, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 65)
+  ret i32 %1
+}
+
+define i32 @test.cv.extract1023(i32 %a) {
+; CHECK-LABEL: test.cv.extract1023:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extract a0, a0, 31, 31
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.extract(i32 %a, i32 1023)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.extractu(i32, i32)
+
+define i32 @test.cv.extractur(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.extractur:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extractur a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.extractu(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+define i32 @test.cv.extractu(i32 %a) {
+; CHECK-LABEL: test.cv.extractu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extractu a0, a0, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.extractu(i32 %a, i32 65)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.insert(i32, i32, i32)
+
+define i32 @test.cv.insert(i32 %c, i32 %a) {
+; CHECK-LABEL: test.cv.insert:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.insert a0, a1, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.insert(i32 %a, i32 65, i32 %c)
+  ret i32 %1
+}
+
+define i32 @test.cv.insertr(i32 %c, i32 %b, i32 %a) {
+; CHECK-LABEL: test.cv.insertr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.insertr a0, a2, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.insert(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.bclr(i32, i32)
+
+define i32 @test.cv.bclrr(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.bclrr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bclrr a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.bclr(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+define i32 @test.cv.bclr(i32 %a) {
+; CHECK-LABEL: test.cv.bclr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bclr a0, a0, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.bclr(i32 %a, i32 65)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.bset(i32, i32)
+
+define i32 @test.cv.bsetr(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.bsetr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bsetr a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.bset(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+define i32 @test.cv.bset(i32 %a) {
+; CHECK-LABEL: test.cv.bset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bset a0, a0, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.bset(i32 %a, i32 65)
+  ret i32 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i32 @test.cv.ff1(i32 %a) {
+; CHECK-LABEL: test.cv.ff1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.ff1 a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 0)
+  ret i32 %1
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i32 @test.cv.fl1(i32 %a) {
+; CHECK-LABEL: test.cv.fl1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.fl1 a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.clb(i32)
+
+define i32 @test.cv.clb(i32 %a) {
+; CHECK-LABEL: test.cv.clb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.clb a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.clb(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.ctpop(i32)
+
+define i32 @test.cv.cnt(i32 %a) {
+; CHECK-LABEL: test.cv.cnt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.cnt a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.ctpop(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @test.llvm.fshl.imm(i32 %a) {
+; CHECK-LABEL: test.llvm.fshl.imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 30
+; CHECK-NEXT:    cv.ror a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 2)
+  ret i32 %1
+}
+
+define i32 @test.llvm.fshl.reg(i32 %a, i32 %b) {
+; CHECK-O0-LABEL: test.llvm.fshl.reg:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    mv a2, a1
+; CHECK-O0-NEXT:    li a1, 0
+; CHECK-O0-NEXT:    sub a1, a1, a2
+; CHECK-O0-NEXT:    cv.ror a0, a0, a1
+; CHECK-O0-NEXT:    ret
+;
+; CHECK-O3-LABEL: test.llvm.fshl.reg:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    neg a1, a1
+; CHECK-O3-NEXT:    cv.ror a0, a0, a1
+; CHECK-O3-NEXT:    ret
+  %1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @test.llvm.fshr.imm(i32 %a) {
+; CHECK-LABEL: test.llvm.fshr.imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 2
+; CHECK-NEXT:    cv.ror a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 2)
+  ret i32 %1
+}
+
+define i32 @test.llvm.fshr.reg(i32 %a, i32 %b) {
+; CHECK-LABEL: test.llvm.fshr.reg:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.ror a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.bitmanip.bitrev(i32, i32, i32)
+
+define i32 @test.cv.bitrev(i32 %a) {
+; CHECK-LABEL: test.cv.bitrev:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bitrev a0, a0, 2, 1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.bitmanip.bitrev(i32 %a, i32 1, i32 2)
+  ret i32 %1
+}
+
+declare i32 @llvm.bitreverse(i32)
+
+define i32 @test.llvm.bitrev(i32 %a) {
+; CHECK-LABEL: test.llvm.bitrev:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.bitrev a0, a0, 0, 0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.bitreverse(i32 %a)
+  ret i32 %1
+}

From fb877c19c048040702bb99423b0f11539192e89c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 17 Dec 2023 11:31:12 +0000
Subject: [PATCH 07/32] [X86] combineLoad - don't bother truncating the
 alternative target constant data. NFC.

We only iterate over the original target constant/undef width, so keep the alternative data in its original form.

This should help if we try to merge constant data in the future.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99c492087a4585..13f69883ad6d52 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49946,12 +49946,10 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
         }
         auto MatchingBits = [](const APInt &Undefs, const APInt &UserUndefs,
                                ArrayRef<APInt> Bits, ArrayRef<APInt> UserBits) {
-          if (!UserUndefs.isSubsetOf(Undefs))
-            return false;
           for (unsigned I = 0, E = Undefs.getBitWidth(); I != E; ++I) {
             if (Undefs[I])
               continue;
-            if (Bits[I] != UserBits[I])
+            if (UserUndefs[I] || Bits[I] != UserBits[I])
               return false;
           }
           return true;
@@ -49970,8 +49968,6 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
             if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
                 getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
                                               UserBits)) {
-              UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
-              UserBits.truncate(Bits.size());
               if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
                 SDValue Extract = extractSubVector(
                     SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());

From 9f5afc3de95d6f2b5f85024a8cf7f021fef41db0 Mon Sep 17 00:00:00 2001
From: Rik Huijzer <github@huijzer.xyz>
Date: Sun, 17 Dec 2023 12:34:17 +0100
Subject: [PATCH 08/32] Revert "[mlir][vector] Fix invalid `LoadOp` indices
 being created (#75519)"

This reverts commit 3a1ae2f46db473cfde4baa6e1b090f5dae67e8db.
---
 .../Conversion/VectorToSCF/VectorToSCF.cpp    | 27 +++++++------------
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp      |  6 ++---
 .../Conversion/VectorToSCF/vector-to-scf.mlir | 17 ------------
 mlir/test/Dialect/MemRef/invalid.mlir         |  9 -------
 4 files changed, 12 insertions(+), 47 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 2026d0cd216a9e..2ee314e9fedfe3 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -369,7 +369,7 @@ struct Strategy<TransferReadOp> {
   /// Retrieve the indices of the current StoreOp that stores into the buffer.
   static void getBufferIndices(TransferReadOp xferOp,
                                SmallVector<Value, 8> &indices) {
-    memref::StoreOp storeOp = getStoreOp(xferOp);
+    auto storeOp = getStoreOp(xferOp);
     auto prevIndices = memref::StoreOpAdaptor(storeOp).getIndices();
     indices.append(prevIndices.begin(), prevIndices.end());
   }
@@ -591,8 +591,8 @@ struct PrepareTransferReadConversion
     if (checkPrepareXferOp(xferOp, options).failed())
       return failure();
 
-    BufferAllocs buffers = allocBuffers(rewriter, xferOp);
-    Operation *newXfer = rewriter.clone(*xferOp.getOperation());
+    auto buffers = allocBuffers(rewriter, xferOp);
+    auto *newXfer = rewriter.clone(*xferOp.getOperation());
     newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
     if (xferOp.getMask()) {
       dyn_cast<TransferReadOp>(newXfer).getMaskMutable().assign(
@@ -885,7 +885,8 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
     // If the xferOp has a mask: Find and cast mask buffer.
     Value castedMaskBuffer;
     if (xferOp.getMask()) {
-      Value maskBuffer = getMaskBuffer(xferOp);
+      auto maskBuffer = getMaskBuffer(xferOp);
+      auto maskBufferType = dyn_cast<MemRefType>(maskBuffer.getType());
       if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
         // Do not unpack a dimension of the mask, if:
         // * To-be-unpacked transfer op dimension is a broadcast.
@@ -896,8 +897,7 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
       } else {
         // It's safe to assume the mask buffer can be unpacked if the data
         // buffer was unpacked.
-        auto maskBufferType = dyn_cast<MemRefType>(maskBuffer.getType());
-        MemRefType castedMaskType = *unpackOneDim(maskBufferType);
+        auto castedMaskType = *unpackOneDim(maskBufferType);
         castedMaskBuffer =
             locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
       }
@@ -938,18 +938,11 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
                   b.setInsertionPoint(newXfer); // Insert load before newXfer.
 
                   SmallVector<Value, 8> loadIndices;
-                  if (auto memrefType =
-                          castedMaskBuffer.getType().dyn_cast<MemRefType>()) {
-                    // If castedMaskBuffer is a memref, then one dim was
-                    // unpacked; see above.
+                  Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
+                  // In case of broadcast: Use same indices to load from memref
+                  // as before.
+                  if (!xferOp.isBroadcastDim(0))
                     loadIndices.push_back(iv);
-                  } else {
-                    Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
-                    // In case of broadcast: Use same indices to load from
-                    // memref as before.
-                    if (!xferOp.isBroadcastDim(0))
-                      loadIndices.push_back(iv);
-                  }
 
                   auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
                                                        loadIndices);
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index a332fe253ba645..93327a28234ea9 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -1615,10 +1615,8 @@ GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
 //===----------------------------------------------------------------------===//
 
 LogicalResult LoadOp::verify() {
-  if (static_cast<int64_t>(getIndices().size()) != getMemRefType().getRank()) {
-    return emitOpError("incorrect number of indices for load, expected ")
-           << getMemRefType().getRank() << " but got " << getIndices().size();
-  }
+  if (getNumOperands() != 1 + getMemRefType().getRank())
+    return emitOpError("incorrect number of indices for load");
   return success();
 }
 
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index 953fcee0c372fa..ad78f0c945b24d 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -740,23 +740,6 @@ func.func @cannot_lower_transfer_read_with_leading_scalable(%arg0: memref<?x4xf3
 
 //  -----
 
-// Check that the `unpackOneDim` case in the `TransferOpConversion` generates valid indices for the LoadOp.
-
-#map1 = affine_map<(d0, d1, d2, d3) -> (d0, 0, 0, d3)>
-func.func @does_not_crash_on_unpack_one_dim(%subview:  memref<1x1x1x1xi32>, %mask: vector<1x1xi1>) -> vector<1x1x1x1xi32> {
-  %c0 = arith.constant 0 : index
-  %c0_i32 = arith.constant 0 : i32
-  %3 = vector.transfer_read %subview[%c0, %c0, %c0, %c0], %c0_i32, %mask {permutation_map = #map1}
-          : memref<1x1x1x1xi32>, vector<1x1x1x1xi32>
-  return %3 : vector<1x1x1x1xi32>
-}
-// CHECK-LABEL: func.func @does_not_crash_on_unpack_one_dim
-// CHECK: %[[ALLOCA_0:.*]] = memref.alloca() : memref<vector<1x1xi1>>
-// CHECK: %[[MASK:.*]] = vector.type_cast %[[ALLOCA_0]] : memref<vector<1x1xi1>> to memref<1xvector<1xi1>>
-// CHECK: memref.load %[[MASK]][%{{.*}}] : memref<1xvector<1xi1>>
-
-//  -----
-
 // FULL-UNROLL-LABEL: @cannot_fully_unroll_transfer_write_of_nd_scalable_vector
 func.func @cannot_fully_unroll_transfer_write_of_nd_scalable_vector(%vec: vector<[4]x[4]xf32>, %memref: memref<?x?xf32>) {
   // FULL-UNROLL-NOT: vector.extract
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index f9b870f77266e1..55b759cbb3ce7c 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -896,15 +896,6 @@ func.func @bad_alloc_wrong_symbol_count() {
 
 // -----
 
-func.func @load_invalid_memref_indexes() {
-  %0 = memref.alloca() : memref<10xi32>
-  %c0 = arith.constant 0 : index
-  // expected-error@+1 {{incorrect number of indices for load, expected 1 but got 2}}
-  %1 = memref.load %0[%c0, %c0] : memref<10xi32>
-}
-
-// -----
-
 func.func @test_store_zero_results() {
 ^bb0:
   %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>

From b6cce87110072a2db19276e042cd40b06285abbc Mon Sep 17 00:00:00 2001
From: Jie Fu <jiefu@tencent.com>
Date: Sun, 17 Dec 2023 19:59:42 +0800
Subject: [PATCH 09/32] [RISCV] Fix -Wbraced-scalar-init in
 RISCVISelLowering.cpp (NFC)

llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp:339:24:
 error: braces around scalar initializer [-Werror,-Wbraced-scalar-init]
  339 |     setOperationAction({ISD::ROTL}, XLenVT, Expand);
      |                        ^~~~~~~~~~~
1 error generated.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 782a9e1db569f5..03e994586d0c44 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -336,7 +336,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
   } else if (Subtarget.hasVendorXCVbitmanip()) {
-    setOperationAction({ISD::ROTL}, XLenVT, Expand);
+    setOperationAction(ISD::ROTL, XLenVT, Expand);
   } else {
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
     if (RV64LegalI32 && Subtarget.is64Bit())

From a418be96de7872f6058207c695ef4698cb1dbb93 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 17 Dec 2023 13:44:08 +0000
Subject: [PATCH 10/32] [X86] combineLoad - extract target constants at the
 minimum scalar element width.

No need to extract at the byte level, and will make it easier to reconstruct constants in a future patch.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 13f69883ad6d52..b80c766c7ffa75 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49956,6 +49956,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
         };
         // See if we are loading a constant that matches in the lower
         // bits of a longer constant (but from a different constant pool ptr).
+        EVT UserVT = User->getValueType(0);
         SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
         const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
         const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
@@ -49965,9 +49966,12 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
           if (LdSize < UserSize || !ISD::isNormalLoad(User)) {
             APInt Undefs, UserUndefs;
             SmallVector<APInt> Bits, UserBits;
-            if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
-                getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
-                                              UserBits)) {
+            unsigned NumBits = std::min(RegVT.getScalarSizeInBits(),
+                                        UserVT.getScalarSizeInBits());
+            if (getTargetConstantBitsFromNode(SDValue(N, 0), NumBits, Undefs,
+                                              Bits) &&
+                getTargetConstantBitsFromNode(SDValue(User, 0), NumBits,
+                                              UserUndefs, UserBits)) {
               if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
                 SDValue Extract = extractSubVector(
                     SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());

From 4b3078ef2d8b4ce833c2b493421486bb25802b32 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 09:09:37 -0800
Subject: [PATCH 11/32] [CodeGen] Remove unnecessary includes (NFC)

---
 llvm/include/llvm/CodeGen/AntiDepBreaker.h             | 1 -
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h         | 1 -
 llvm/lib/CodeGen/MachineStableHash.cpp                 | 1 -
 llvm/unittests/CodeGen/DwarfStringPoolEntryRefTest.cpp | 1 -
 4 files changed, 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/llvm/include/llvm/CodeGen/AntiDepBreaker.h
index c5c2b574861370..eba642684c95c6 100644
--- a/llvm/include/llvm/CodeGen/AntiDepBreaker.h
+++ b/llvm/include/llvm/CodeGen/AntiDepBreaker.h
@@ -19,7 +19,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"
-#include <cassert>
 #include <utility>
 #include <vector>
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 2faa057f46073c..dc772bb459c956 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/Support/Casting.h"
-#include <cassert>
 #include <cstdint>
 #include <memory>
 
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index debb2b3809e3f7..1cd90474898e77 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -14,7 +14,6 @@
 #include "llvm/CodeGen/MachineStableHash.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/llvm/unittests/CodeGen/DwarfStringPoolEntryRefTest.cpp b/llvm/unittests/CodeGen/DwarfStringPoolEntryRefTest.cpp
index 25db003ea81deb..8e82d11732e254 100644
--- a/llvm/unittests/CodeGen/DwarfStringPoolEntryRefTest.cpp
+++ b/llvm/unittests/CodeGen/DwarfStringPoolEntryRefTest.cpp
@@ -12,7 +12,6 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include <string>
 
 using namespace llvm;
 

From 2570c7e284c8ad1ee6db069e22d72b836ae935f6 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 09:09:39 -0800
Subject: [PATCH 12/32] [CodeGen] Remove unused forward declarations (NFC)

---
 llvm/include/llvm/CodeGen/AccelTable.h                | 1 -
 llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 --
 llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h          | 1 -
 llvm/lib/CodeGen/AsmPrinter/DwarfFile.h               | 1 -
 4 files changed, 5 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index 0f35fd3514fae7..af874aa5e91a32 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -103,7 +103,6 @@
 namespace llvm {
 
 class AsmPrinter;
-class DwarfUnit;
 class DwarfDebug;
 class DwarfTypeUnit;
 class MCSymbol;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index a4e9c92b489767..e7debc652a0a8b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -28,11 +28,9 @@
 namespace llvm {
 
 class GISelChangeObserver;
-class APFloat;
 class APInt;
 class ConstantFP;
 class GPtrAdd;
-class GStore;
 class GZExtLoad;
 class MachineIRBuilder;
 class MachineInstrBuilder;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index c9378ace4fd1be..6f553dc85c6462 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -18,7 +18,6 @@ namespace llvm {
 class AsmPrinter;
 class DbgVariable;
 class DwarfCompileUnit;
-class MachineInstr;
 class MCSymbol;
 
 /// Byte stream of .debug_loc entries.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index e10fd2b2642ac9..f76858fc2f36a0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -28,7 +28,6 @@ class DbgLabel;
 class DINode;
 class DILocalScope;
 class DwarfCompileUnit;
-class DwarfTypeUnit;
 class DwarfUnit;
 class LexicalScope;
 class MCSection;

From 6eaf15d05e3d4490bf0b32fea553027ae3a4e996 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 09:41:20 -0800
Subject: [PATCH 13/32] [Analysis] Use llvm::erase (NFC)

---
 llvm/unittests/Analysis/DomTreeUpdaterTest.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp b/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp
index 4a5e2d73f962ca..0777bbe3887bce 100644
--- a/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp
+++ b/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp
@@ -375,12 +375,7 @@ TEST(DomTreeUpdater, LazyUpdateDTInheritedPreds) {
   std::vector<BasicBlock *> BasicBlocks;
   BasicBlocks.push_back(BB1);
   BasicBlocks.push_back(BB2);
-  auto Eraser = [&](BasicBlock *BB) {
-    BasicBlocks.erase(
-        std::remove_if(BasicBlocks.begin(), BasicBlocks.end(),
-                       [&](const BasicBlock *i) { return i == BB; }),
-        BasicBlocks.end());
-  };
+  auto Eraser = [&](BasicBlock *BB) { llvm::erase(BasicBlocks, BB); };
   ASSERT_EQ(BasicBlocks.size(), static_cast<size_t>(2));
   // Remove bb2 from F. This has to happen before the call to
   // applyUpdates() for DTU to detect there is no longer an edge between

From 6655581038f8479f0f6942b7d34cbd6556d00a0e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 09:41:22 -0800
Subject: [PATCH 14/32] [Dialect] Use llvm::is_contained (NFC)

---
 mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp
index 902ad8fc19c5d8..a6f2f435f36d68 100644
--- a/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp
+++ b/mlir/lib/Dialect/Mesh/Interfaces/ShardingInterface.cpp
@@ -231,9 +231,7 @@ static LogicalResult fillShardingOption(Operation *op,
       continue;
 
     for (int32_t axis : meshAxes) {
-      if (std::find(shardingOption.shardingArray[i].begin(),
-                    shardingOption.shardingArray[i].end(),
-                    axis) != shardingOption.shardingArray[i].end()) {
+      if (llvm::is_contained(shardingOption.shardingArray[i], axis)) {
         LLVM_DEBUG(DBGS() << "sharding option conflicts because mesh axes "
                           << axis << " duplicate");
         return failure();

From 6561efe142ae2a5d434ff646319b0bfb1dd39dee Mon Sep 17 00:00:00 2001
From: Rik Huijzer <github@huijzer.xyz>
Date: Sun, 17 Dec 2023 20:24:47 +0100
Subject: [PATCH 15/32] [mlir][python][nfc] Test `-print-ir-after-all` (#75742)

The functionality to `-print-ir-after-all` was added in
https://github.com/llvm/llvm-project/commit/caa159f044a05f782701a525d8b0e8f346abbd64.
This PR adds a test and, with that, some documentation.

---------

Co-authored-by: Maksim Levental <maksim.levental@gmail.com>
---
 mlir/test/python/pass_manager.py | 33 ++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 0face028b73ff1..43af80b53166cc 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -281,3 +281,36 @@ def testPostPassOpInvalidation():
         # CHECK:   return
         # CHECK: }
         log(module)
+
+
+# CHECK-LABEL: TEST: testPrintIrAfterAll
+@run
+def testPrintIrAfterAll():
+    with Context() as ctx:
+        module = ModuleOp.parse(
+            """
+          module {
+            func.func @main() {
+              %0 = arith.constant 10
+              return
+            }
+          }
+        """
+        )
+        pm = PassManager.parse("builtin.module(canonicalize)")
+        ctx.enable_multithreading(False)
+        pm.enable_ir_printing()
+        # CHECK: // -----// IR Dump Before Canonicalizer (canonicalize) ('builtin.module' operation) //----- //
+        # CHECK: module {
+        # CHECK:   func.func @main() {
+        # CHECK:     %[[C10:.*]] = arith.constant 10 : i64
+        # CHECK:     return
+        # CHECK:   }
+        # CHECK: }
+        # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) ('builtin.module' operation) //----- //
+        # CHECK: module {
+        # CHECK:   func.func @main() {
+        # CHECK:     return
+        # CHECK:   }
+        # CHECK: }
+        pm.run(module)

From d14ee76181fba376a04cb50afd9ab30cc406ee90 Mon Sep 17 00:00:00 2001
From: darkbuck <michael.hliao@gmail.com>
Date: Sun, 17 Dec 2023 15:02:10 -0500
Subject: [PATCH 16/32] [GISel][TableGen] Enhance default ops support (#75689)

- Instead of checking the default ops directly, this change queries DAG
default operands collected during patterns reading. It does not only
simplify the code but also handle few cases where integer values are
converted from convertible types, such as 'bits'.
- A test case is added GlobalISelEmitter.td as the regression test of
default 'bits' values.
---
 llvm/test/TableGen/GlobalISelEmitter.td   | 90 +++++++++++++++--------
 llvm/utils/TableGen/GlobalISelEmitter.cpp | 30 +++-----
 2 files changed, 72 insertions(+), 48 deletions(-)

diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index eab2acd6cb1e7e..f9d7d2dcccdbb8 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -59,6 +59,7 @@ def gi_cimm9 : GICustomOperandRenderer<"renderImm">;
 def m1 : OperandWithDefaultOps <i32, (ops (i32 -1))>;
 def Z : OperandWithDefaultOps <i32, (ops R0)>;
 def m1Z : OperandWithDefaultOps <i32, (ops (i32 -1), R0)>;
+def mb : OperandWithDefaultOps <i32, (ops (i32 0b1101))>;
 
 def HasA : Predicate<"Subtarget->hasA()">;
 def HasB : Predicate<"Subtarget->hasB()">;
@@ -297,7 +298,7 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; }
 // R19C-NEXT:    GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0,
 // R19C-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // R19C-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// R19C-NEXT:    // GIR_Coverage, 19,
+// R19C-NEXT:    // GIR_Coverage, 20,
 // R19C-NEXT:    GIR_Done,
 // R19C-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 //
@@ -330,12 +331,12 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R21O-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
 // R21O-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/3, /*Type*/GILLT_s32,
 //
-// R21C-NEXT:  GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 19 //
+// R21C-NEXT:  GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 20 //
 // R21C-NOT:     GIR_Done,
-// R21C:         // GIR_Coverage, 19,
+// R21C:         // GIR_Coverage, 20,
 // R21C-NEXT:    GIR_Done,
 // R21C-NEXT:  // Label [[PREV_NUM]]: @[[PREV]]
-// R21C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]), // Rule ID 21 //
+// R21C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]), // Rule ID 22 //
 //
 // R21O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
 // R21O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
@@ -366,7 +367,7 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R21C-NEXT:    GIR_MergeMemOperands, /*InsnID*/0, /*NumInsns*/1, /*MergeInsnID's*/0
 // R21C-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // R21C-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// R21C-NEXT:    // GIR_Coverage, 21,
+// R21C-NEXT:    // GIR_Coverage, 22,
 // R21C-NEXT:    GIR_Done,
 // R21C-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 //
@@ -390,10 +391,10 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R20O-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
 // R20O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
 //
-// R20N:       GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 21 //
+// R20N:       GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 22 //
 // R20N:       // Label [[PREV_NUM]]: @[[PREV]]
 //
-// R20C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]), // Rule ID 20 //
+// R20C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]), // Rule ID 21 //
 //
 // R20N-NEXT:    GIM_CheckNumOperands, /*MI*/0, /*Expected*/3,
 // R20N-NEXT:    GIM_CheckOpcode, /*MI*/0, GIMT_Encode2(TargetOpcode::G_SUB),
@@ -415,7 +416,7 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R20C-NEXT:    GIR_ComplexRenderer, /*InsnID*/0, /*RendererID*/GIMT_Encode2(0),
 // R20C-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // R20C-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// R20C-NEXT:    // GIR_Coverage, 20,
+// R20C-NEXT:    // GIR_Coverage, 21,
 // R20C-NEXT:    GIR_Done,
 // R20C-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 //
@@ -455,7 +456,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
 // R00O-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
 // R00O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
 //
-// R00C:       GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 20 //
+// R00C:       GIM_Try, /*On fail goto*//*Label [[PREV_NUM:[0-9]+]]*/ GIMT_Encode4([[PREV:[0-9]+]]), // Rule ID 21 //
 // R00C:       // Label [[PREV_NUM]]: @[[PREV]]
 //
 // R00C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]), // Rule ID 0 //
@@ -517,7 +518,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
 // R00O-NEXT:  GIM_Reject,
 // R00O:       // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
 // R00O-NEXT:  GIM_Reject,
-// R00O-NEXT:  }; // Size: 1978 bytes
+// R00O-NEXT:  }; // Size: 2007 bytes
 
 def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
                  [(set GPR32:$dst,
@@ -709,6 +710,35 @@ def XORlike : I<(outs GPR32:$dst), (ins m1Z:$src2, GPR32:$src1),
 def XORManyDefaults : I<(outs GPR32:$dst), (ins m1Z:$src3, Z:$src2, GPR32:$src1),
                         [(set GPR32:$dst, (xor GPR32:$src1, -5))]>;
 
+//===- Test a simple pattern with a default bits operand. -----------------===//
+//
+// NOOPT-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ GIMT_Encode4([[LABEL:[0-9]+]]),
+// NOOPT-NEXT:    GIM_CheckNumOperands, /*MI*/0, /*Expected*/3,
+// NOOPT-NEXT:    GIM_CheckOpcode, /*MI*/0, GIMT_Encode2(TargetOpcode::G_XOR),
+// NOOPT-NEXT:    // MIs[0] DstI[dst]
+// NOOPT-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+// NOOPT-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
+// NOOPT-NEXT:    // MIs[0] src1
+// NOOPT-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
+// NOOPT-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/GIMT_Encode2(MyTarget::GPR32RegClassID),
+// NOOPT-NEXT:    // MIs[0] Operand 2
+// NOOPT-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
+// NOOPT-NEXT:    GIM_CheckConstantInt8, /*MI*/0, /*Op*/2, uint8_t(-6)
+// NOOPT-NEXT:    // (xor:{ *:[i32] } GPR32:{ *:[i32] }:$src1, -6:{ *:[i32] }) => (XORIb:{ *:[i32] } GPR32:{ *:[i32] }:$src1)
+// NOOPT-NEXT:    GIR_BuildMI, /*InsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::XORIb),
+// NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // DstI[dst]
+// NOOPT-NEXT:    GIR_AddImm8, /*InsnID*/0, /*Imm*/13,
+// NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src1
+// NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
+// NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
+// NOOPT-NEXT:    // GIR_Coverage, 6,
+// NOOPT-NEXT:    GIR_Done,
+// NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
+
+// The -6 is just to distinguish it from the other cases.
+def XORIb : I<(outs GPR32:$dst), (ins mb:$src2, GPR32:$src1),
+              [(set GPR32:$dst, (xor GPR32:$src1, -6))]>;
+
 //===- Test a simple pattern with constant immediate operands. ------------===//
 //
 // This must precede the 3-register variants because constant immediates have
@@ -733,7 +763,7 @@ def XORManyDefaults : I<(outs GPR32:$dst), (ins m1Z:$src3, Z:$src2, GPR32:$src1)
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // Wm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 22,
+// NOOPT-NEXT:    // GIR_Coverage, 23,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -774,7 +804,7 @@ def : Pat<(not GPR32:$Wm), (ORN R0, GPR32:$Wm)>;
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/2, // src3
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 6,
+// NOOPT-NEXT:    // GIR_Coverage, 7,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -812,7 +842,7 @@ def : Pat<(not GPR32:$Wm), (ORN R0, GPR32:$Wm)>;
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src3
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 27,
+// NOOPT-NEXT:    // GIR_Coverage, 28,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -836,7 +866,7 @@ def MULADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3),
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, //  DstI[dst]
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 7,
+// NOOPT-NEXT:    // GIR_Coverage, 8,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -859,7 +889,7 @@ def MOV1 : I<(outs GPR32:$dst), (ins), [(set GPR32:$dst, 1)]>;
 // NOOPT-NEXT:    GIR_CopyConstantAsSImm, /*NewInsnID*/0, /*OldInsnID*/0, // imm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 8,
+// NOOPT-NEXT:    // GIR_Coverage, 9,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -883,7 +913,7 @@ def MOVimm8 : I<(outs GPR32:$dst), (ins i32imm:$imm), [(set GPR32:$dst, simm8:$i
 // NOOPT-NEXT:    GIR_CopyConstantAsSImm, /*NewInsnID*/0, /*OldInsnID*/0, // imm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 9,
+// NOOPT-NEXT:    // GIR_Coverage, 10,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -907,7 +937,7 @@ def MOVimm9 : I<(outs GPR32:$dst), (ins i32imm:$imm), [(set GPR32:$dst, simm9:$i
 // NOOPT-NEXT:    GIR_CustomRenderer, /*InsnID*/0, /*OldInsnID*/0, /*Renderer*/GIMT_Encode2(GICR_renderImm), // imm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 10,
+// NOOPT-NEXT:    // GIR_Coverage, 11,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -930,7 +960,7 @@ def MOVcimm8 : I<(outs GPR32:$dst), (ins i32imm:$imm), [(set GPR32:$dst, cimm8:$
 // NOOPT-NEXT:    GIR_CopyFConstantAsFPImm, /*NewInsnID*/0, /*OldInsnID*/0, // imm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 17,
+// NOOPT-NEXT:    // GIR_Coverage, 18,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -950,7 +980,7 @@ def MOVcimm8 : I<(outs GPR32:$dst), (ins i32imm:$imm), [(set GPR32:$dst, cimm8:$
 // NOOPT-NEXT:    // (ld:{ *:[i32] } GPR32:{ *:[i32] }:$src1)<<P:Predicate_unindexedload>><<P:Predicate_load>> => (LOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src1)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::LOAD),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 11,
+// NOOPT-NEXT:    // GIR_Coverage, 12,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -973,7 +1003,7 @@ def LOAD : I<(outs GPR32:$dst), (ins GPR32:$src1),
 // NOOPT-NEXT:    // (ld:{ *:[i32] } GPR32:{ *:[i32] }:$src)<<P:Predicate_unindexedload>><<P:Predicate_load>>  =>  (LOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::LOAD),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 23,
+// NOOPT-NEXT:    // GIR_Coverage, 24,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -996,7 +1026,7 @@ def : Pat<(load GPR32:$src),
 // NOOPT-NEXT:    // (ld:{ *:[i32] } GPR32:{ *:[i32] }:$src1)<<P:Predicate_unindexedload>><<P:Predicate_sextload>><<P:Predicate_sextloadi16>>  =>  (SEXTLOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src1)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::SEXTLOAD),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 12,
+// NOOPT-NEXT:    // GIR_Coverage, 13,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1020,7 +1050,7 @@ def SEXTLOAD : I<(outs GPR32:$dst), (ins GPR32:$src1),
 // NOOPT-NEXT:    // (add:{ *:[i32] } GPR32:{ *:[i32] }:$src1, GPR32:{ *:[i32] }:$src2) => (ADD:{ *:[i32] } GPR32:{ *:[i32] }:$src1, GPR32:{ *:[i32] }:$src2)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::ADD),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 13,
+// NOOPT-NEXT:    // GIR_Coverage, 14,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1046,7 +1076,7 @@ def ADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2),
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 14,
+// NOOPT-NEXT:    // GIR_Coverage, 15,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1073,7 +1103,7 @@ def DOUBLE : I<(outs GPR32:$dst), (ins GPR32:$src), [(set GPR32:$dst, (add GPR32
 // NOOPT-NEXT:    // (add:{ *:[i32] } i32:{ *:[i32] }:$samename, i32:{ *:[i32] }:$othername)  =>  (InsnWithSpeciallyNamedDef:{ *:[i32] } i32:{ *:[i32] }:$samename, i32:{ *:[i32] }:$othername)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::InsnWithSpeciallyNamedDef),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 24,
+// NOOPT-NEXT:    // GIR_Coverage, 25,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1096,7 +1126,7 @@ def : Pat<(add i32:$samename, i32:$othername),
 // NOOPT-NEXT:    // (add:{ *:[i32] } i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2) => (ADD:{ *:[i32] } i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::ADD),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 25,
+// NOOPT-NEXT:    // GIR_Coverage, 26,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1125,7 +1155,7 @@ def : Pat<(add i32:$src1, i32:$src2),
 // NOOPT-NEXT:    GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src1
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 15,
+// NOOPT-NEXT:    // GIR_Coverage, 16,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1148,7 +1178,7 @@ def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1),
 // NOOPT-NEXT:    // (bitconvert:{ *:[i32] } FPR32:{ *:[f32] }:$src1) => (COPY_TO_REGCLASS:{ *:[i32] } FPR32:{ *:[f32] }:$src1, GPR32:{ *:[i32] })
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
 // NOOPT-NEXT:    GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
-// NOOPT-NEXT:    // GIR_Coverage, 26,
+// NOOPT-NEXT:    // GIR_Coverage, 27,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1171,7 +1201,7 @@ def : Pat<(i32 (bitconvert FPR32:$src1)),
 // NOOPT-NEXT:    GIR_CopyConstantAsSImm, /*NewInsnID*/0, /*OldInsnID*/0, // imm
 // NOOPT-NEXT:    GIR_EraseFromParent, /*InsnID*/0,
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 16,
+// NOOPT-NEXT:    // GIR_Coverage, 17,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1190,7 +1220,7 @@ def MOVfpimmz : I<(outs FPR32:$dst), (ins f32imm:$imm), [(set FPR32:$dst, fpimmz
 // NOOPT-NEXT:    // (br (bb:{ *:[Other] }):$target) => (BR (bb:{ *:[Other] }):$target)
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::BR),
 // NOOPT-NEXT:    GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
-// NOOPT-NEXT:    // GIR_Coverage, 18,
+// NOOPT-NEXT:    // GIR_Coverage, 19,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
 
@@ -1198,5 +1228,5 @@ def BR : I<(outs), (ins unknown:$target),
             [(br bb:$target)]>;
 
 // NOOPT-NEXT:    GIM_Reject,
-// NOOPT-NEXT:  }; // Size: 1680 bytes
+// NOOPT-NEXT:  }; // Size: 1738 bytes
 // NOOPT-NEXT:  return MatchTable0;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index f1b2ff68e34319..c204b9819dc21a 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -408,7 +408,7 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
       const TreePatternNode *DstChild, const TreePatternNode *Src);
   Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
                                       BuildMIAction &DstMIBuilder,
-                                      DagInit *DefaultOps) const;
+                                      const DAGDefaultOperand &DefaultOp) const;
   Error
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
                              const std::vector<Record *> &ImplicitDefs) const;
@@ -1681,11 +1681,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
       // overridden, or which we aren't letting it override; emit the 'default
       // ops' operands.
 
-      const CGIOperandList::OperandInfo &DstIOperand = DstI->Operands[InstOpNo];
-      DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
-      if (auto Error = importDefaultOperandRenderers(InsertPt, M, DstMIBuilder,
-                                                     DefaultOps))
+      Record *OperandNode = DstI->Operands[InstOpNo].Rec;
+      if (auto Error = importDefaultOperandRenderers(
+              InsertPt, M, DstMIBuilder, CGP.getDefaultOperand(OperandNode)))
         return std::move(Error);
+
       ++NumDefaultOps;
       continue;
     }
@@ -1710,22 +1710,16 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
 
 Error GlobalISelEmitter::importDefaultOperandRenderers(
     action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
-    DagInit *DefaultOps) const {
-  for (const auto *DefaultOp : DefaultOps->getArgs()) {
-    std::optional<LLTCodeGen> OpTyOrNone;
+    const DAGDefaultOperand &DefaultOp) const {
+  for (const auto &Op : DefaultOp.DefaultOps) {
+    const auto *N = Op.get();
+    if (!N->isLeaf())
+      return failedImport("Could not add default op");
 
-    // Look through ValueType operators.
-    if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
-      if (const DefInit *DefaultDagOperator =
-              dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
-        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType")) {
-          OpTyOrNone = MVTToLLT(getValueType(DefaultDagOperator->getDef()));
-          DefaultOp = DefaultDagOp->getArg(0);
-        }
-      }
-    }
+    const auto *DefaultOp = N->getLeafValue();
 
     if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
+      std::optional<LLTCodeGen> OpTyOrNone = MVTToLLT(N->getSimpleType(0));
       auto Def = DefaultDefOp->getDef();
       if (Def->getName() == "undef_tied_input") {
         unsigned TempRegID = M.allocateTempRegID();

From fc520f8b29416a3b0738e6c8c3a6d4eee67e42a6 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me@antoniofrighetto.com>
Date: Sun, 17 Dec 2023 17:34:46 +0100
Subject: [PATCH 17/32] [InstCombine] Precommit tests for PR75745 (NFC)

---
 .../test/Transforms/InstCombine/insert-const-shuf.ll |  1 +
 llvm/test/Transforms/InstCombine/vec_shuffle.ll      | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
index 68dcc45e4b6c36..d2fa651b394497 100644
--- a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
+++ b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
@@ -92,6 +92,7 @@ define <3 x float> @twoShufUses(<3 x float> %x) {
 
 ; The inserted scalar constant index is out-of-bounds for the shuffle vector constant.
 
+; FIXME: This is a miscompilation
 define <5 x i8> @longerMask(<3 x i8> %x) {
 ; CHECK-LABEL: @longerMask(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> <i8 poison, i8 1, i8 poison>, <5 x i32> <i32 2, i32 1, i32 4, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 0081da2c0aad75..e1174007b0fe0b 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2332,3 +2332,15 @@ define <2 x float> @uitofp_shuf_narrow(<4 x i32> %x, <4 x i32> %y) {
   %r = shufflevector <4 x float> %nx, <4 x float> %ny, <2 x i32> <i32 3, i32 5>
   ret <2 x float> %r
 }
+
+; FIXME: This is a miscompilation
+define <4 x i16> @blend_elements_from_load(ptr align 8 %_0) {
+; CHECK-LABEL: @blend_elements_from_load(
+; CHECK-NEXT:    [[LOAD:%.*]] = load <3 x i16>, ptr [[_0:%.*]], align 8
+; CHECK-NEXT:    [[RV:%.*]] = shufflevector <3 x i16> <i16 0, i16 poison, i16 poison>, <3 x i16> [[LOAD]], <4 x i32> <i32 0, i32 poison, i32 3, i32 5>
+; CHECK-NEXT:    ret <4 x i16> [[RV]]
+;
+  %load = load <3 x i16>, ptr %_0, align 8
+  %rv = shufflevector <3 x i16> <i16 0, i16 undef, i16 undef>, <3 x i16> %load, <4 x i32> <i32 0, i32 1, i32 3, i32 5>
+  ret <4 x i16> %rv
+}

From 151ddf07a6f7a6c1440c587f2df52b127f29f99c Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me@antoniofrighetto.com>
Date: Sun, 17 Dec 2023 17:43:38 +0100
Subject: [PATCH 18/32] [InstCombine] Stop propagating `undef` when element is
 demanded

Do not poison `undef` demanded elements in `SimplifyDemandedVectorElts`.
A miscompilation issue has been addressed with refined checking.

Proofs: https://alive2.llvm.org/ce/z/WA5oD5.
---
 .../lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 2 +-
 llvm/test/Transforms/InstCombine/insert-const-shuf.ll          | 3 +--
 llvm/test/Transforms/InstCombine/vec_shuffle.ll                | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 846116a929b156..2490f5b9b97eb8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1378,7 +1378,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       if (!Elt) return nullptr;
 
       Elts.push_back(Elt);
-      if (isa<UndefValue>(Elt))   // Already undef or poison.
+      if (isa<PoisonValue>(Elt)) // Already poison.
         UndefElts.setBit(i);
     }
 
diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
index d2fa651b394497..1a6528d8855685 100644
--- a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
+++ b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
@@ -92,10 +92,9 @@ define <3 x float> @twoShufUses(<3 x float> %x) {
 
 ; The inserted scalar constant index is out-of-bounds for the shuffle vector constant.
 
-; FIXME: This is a miscompilation
 define <5 x i8> @longerMask(<3 x i8> %x) {
 ; CHECK-LABEL: @longerMask(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> <i8 poison, i8 1, i8 poison>, <5 x i32> <i32 2, i32 1, i32 4, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> <i8 undef, i8 1, i8 poison>, <5 x i32> <i32 2, i32 1, i32 4, i32 3, i32 poison>
 ; CHECK-NEXT:    [[INS:%.*]] = insertelement <5 x i8> [[SHUF]], i8 42, i64 4
 ; CHECK-NEXT:    ret <5 x i8> [[INS]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index e1174007b0fe0b..978d90d7df94ed 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2333,11 +2333,10 @@ define <2 x float> @uitofp_shuf_narrow(<4 x i32> %x, <4 x i32> %y) {
   ret <2 x float> %r
 }
 
-; FIXME: This is a miscompilation
 define <4 x i16> @blend_elements_from_load(ptr align 8 %_0) {
 ; CHECK-LABEL: @blend_elements_from_load(
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <3 x i16>, ptr [[_0:%.*]], align 8
-; CHECK-NEXT:    [[RV:%.*]] = shufflevector <3 x i16> <i16 0, i16 poison, i16 poison>, <3 x i16> [[LOAD]], <4 x i32> <i32 0, i32 poison, i32 3, i32 5>
+; CHECK-NEXT:    [[RV:%.*]] = shufflevector <3 x i16> <i16 0, i16 undef, i16 poison>, <3 x i16> [[LOAD]], <4 x i32> <i32 0, i32 1, i32 3, i32 5>
 ; CHECK-NEXT:    ret <4 x i16> [[RV]]
 ;
   %load = load <3 x i16>, ptr %_0, align 8

From c014454f43bf523fee2bf695c075882b1cefd21c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 17 Dec 2023 21:08:25 +0000
Subject: [PATCH 19/32] [ConstraintElim] Add extra tests with AND and OR of
 conditions.

Add additional tests where one of the operands of the AND/OR implies the
other.
---
 .../and-implied-by-operands.ll                |  84 +++++
 .../or-implied-by-operands.ll                 | 310 ++++++++++++++++++
 2 files changed, 394 insertions(+)
 create mode 100644 llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll

diff --git a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll
index 22f20f739b9e6f..dc3b0f17c79602 100644
--- a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll
+++ b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll
@@ -26,6 +26,31 @@ else:
   ret i1 1
 }
 
+define i1 @test_first_and_condition_implied_by_second_ops(i8 %x) {
+; CHECK-LABEL: @test_first_and_condition_implied_by_second_ops(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[T_1]], [[C_1]]
+; CHECK-NEXT:    br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %and = and i1 %t.1, %c.1
+  br i1 %and, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
 define i1 @test_second_and_condition_implied_by_first_select_form(i8 %x) {
 ; CHECK-LABEL: @test_second_and_condition_implied_by_first_select_form(
 ; CHECK-NEXT:  entry:
@@ -51,6 +76,31 @@ else:
   ret i1 1
 }
 
+define i1 @test_first_and_condition_implied_by_second_select_form(i8 %x) {
+; CHECK-LABEL: @test_first_and_condition_implied_by_second_select_form(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[AND:%.*]] = select i1 [[T_1]], i1 [[C_1]], i1 false
+; CHECK-NEXT:    br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %and = select i1 %t.1, i1 %c.1, i1 false
+  br i1 %and, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
 define i1 @test_same_cond_for_and(i8 %x) {
 ; CHECK-LABEL: @test_same_cond_for_and(
 ; CHECK-NEXT:  entry:
@@ -394,3 +444,37 @@ then:
 else:
   ret i1 %t.1
 }
+
+define i1 @and_select_first_implies_second_may_be_poison(ptr noundef %A, ptr noundef %B) {
+; CHECK-LABEL: @and_select_first_implies_second_may_be_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ne ptr [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 -1
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ugt ptr [[GEP]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = select i1 [[C_2]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+entry:
+  %c.1 = icmp ne ptr %A, %B
+  %gep = getelementptr inbounds ptr, ptr %B, i64 -1
+  %c.2 = icmp ugt ptr %gep, %A
+  %and = select i1 %c.2, i1 %c.1, i1 false
+  ret i1 %and
+}
+
+define i1 @and_select_second_implies_first_may_be_poison(ptr noundef %A, ptr noundef %B) {
+; CHECK-LABEL: @and_select_second_implies_first_may_be_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ne ptr [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 -1
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ugt ptr [[GEP]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = select i1 [[C_1]], i1 [[C_2]], i1 false
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+entry:
+  %c.1 = icmp ne ptr %A, %B
+  %gep = getelementptr inbounds ptr, ptr %B, i64 -1
+  %c.2 = icmp ugt ptr %gep, %A
+  %and = select i1 %c.1, i1 %c.2, i1 false
+  ret i1 %and
+}
diff --git a/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll b/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll
new file mode 100644
index 00000000000000..61e6e250f6dd99
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+define i1 @test_second_or_condition_implied_by_first(i8 %x) {
+; CHECK-LABEL: @test_second_or_condition_implied_by_first(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], [[T_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ule i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %or = or i1 %c.1, %t.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_first_or_condition_implied_by_second_ops(i8 %x) {
+; CHECK-LABEL: @test_first_or_condition_implied_by_second_ops(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[T_1]], [[C_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ule i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %or = or i1 %t.1, %c.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_second_or_condition_implied_by_first_select_form(i8 %x) {
+; CHECK-LABEL: @test_second_or_condition_implied_by_first_select_form(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C_1]], i1 false, i1 [[T_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ule i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %or = select i1 %c.1, i1 false, i1 %t.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_first_or_condition_implied_by_second_select_form(i8 %x) {
+; CHECK-LABEL: @test_first_or_condition_implied_by_second_select_form(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[T_1]], i1 false, i1 [[C_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ule i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %or = select i1 %t.1, i1 false, i1 %c.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_same_cond_for_or(i8 %x) {
+; CHECK-LABEL: @test_same_cond_for_or(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], [[C_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %or = or i1 %c.1, %c.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_same_cond_for_or_select_form(i8 %x) {
+; CHECK-LABEL: @test_same_cond_for_or_select_form(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C_1]], i1 false, i1 [[C_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %or = select i1 %c.1, i1 false, i1 %c.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_second_or_condition_not_implied_by_first(i8 %x) {
+; CHECK-LABEL: @test_second_or_condition_not_implied_by_first(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_2]], [[C_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %c.2 = icmp ugt i8 %x, 5
+  %or = or i1 %c.2, %c.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 0
+
+else:
+  ret i1 1
+}
+
+define i1 @test_remove_variables(i1 %c, ptr %A, i64 %B, ptr %C) {
+; CHECK-LABEL: @test_remove_variables(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN_1:%.*]], label [[EXIT:%.*]]
+; CHECK:       then.1:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ult ptr [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[THEN_2:%.*]], label [[ELSE_2:%.*]]
+; CHECK:       then.2:
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ne ptr [[A]], null
+; CHECK-NEXT:    [[C_3:%.*]] = icmp sgt i64 [[B:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_2]], [[C_3]]
+; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK:       else.2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  br i1 %c, label %then.1, label %exit
+
+then.1:
+  %0 = load ptr, ptr %C, align 8
+  %c.1 = icmp ult ptr %0, %A
+  br i1 %c.1, label %then.2, label %else.2
+
+then.2:
+  %c.2 = icmp ne ptr %A, null
+  %c.3 = icmp sgt i64 %B, 0
+  %or = or i1 %c.2, %c.3
+  ret i1 %or
+
+else.2:
+  ret i1 0
+
+exit:
+  %t = icmp eq ptr null, null
+  ret i1 %t
+}
+
+define i1 @test_or_op_0_simplified(i32 %v) {
+; CHECK-LABEL: @test_or_op_0_simplified(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp sgt i32 [[V:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i1 false, [[C_1]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+entry:
+  %c.1 = icmp sgt i32 %v, 0
+  %t.1 = icmp sgt i32 0, 0
+  %or = or i1 %t.1, %c.1
+  ret i1 %or
+}
+
+define i1 @test_or_op_1_simplified(i32 %v) {
+; CHECK-LABEL: @test_or_op_1_simplified(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp sgt i32 [[V:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], false
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+entry:
+  %c.1 = icmp sgt i32 %v, 0
+  %t.1 = icmp sgt i32 0, 0
+  %or = or i1 %c.1, %t.1
+  ret i1 %or
+}
+
+define i1 @test_or_used_in_false_branch(i8 %x) {
+; CHECK-LABEL: @test_or_used_in_false_branch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], [[T_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 [[T_1]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+
+entry:
+  %c.1 = icmp ule i8 %x, 10
+  %t.1 = icmp ule i8 %x, 5
+  %or = or i1 %c.1, %t.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 %t.1
+
+else:
+  ret i1 %t.1
+}
+
+define i1 @test_or_used_in_false_branch2(i8 %x) {
+; CHECK-LABEL: @test_or_used_in_false_branch2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i8 [[X]], 5
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], [[T_1]]
+; CHECK-NEXT:    br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 [[T_1]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+
+entry:
+  %c.1 = icmp ugt i8 %x, 10
+  %t.1 = icmp ugt i8 %x, 5
+  %or = or i1 %c.1, %t.1
+  br i1 %or, label %then, label %else
+
+then:
+  ret i1 %t.1
+
+else:
+  ret i1 %t.1
+}
+
+define i1 @select_or_set_operand(ptr noundef %a, ptr noundef %b) {
+; CHECK-LABEL: @select_or_set_operand(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[INCDEC_PTR12_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; CHECK-NEXT:    [[CMP_EQ_1:%.*]] = icmp eq ptr [[INCDEC_PTR12_I]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[CMP_EQ]], i1 true, i1 [[CMP_EQ_1]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+entry:
+  %cmp.eq = icmp eq ptr %a, %b
+  %incdec.ptr12.i = getelementptr inbounds i32, ptr %a, i64 1
+  %cmp.eq.1 = icmp eq ptr %incdec.ptr12.i, %b
+  %or = select i1 %cmp.eq, i1 true, i1 %cmp.eq.1
+  ret i1 %or
+}

From aad5c2f887d3cd1c69f798186e6502f0ed6e3dde Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Sun, 17 Dec 2023 22:43:34 +0100
Subject: [PATCH 20/32] [cmake] Honor CMAKE_VERBOSE_MAKEFILE when building
 external projects (#75749)

When the top-level CMake invocation has `CMAKE_VERBOSE_MAKEFILE=ON`,
indicating the user wants to have verbose builds (i.e. all executed
commands explicitly echoed), some of the subprojects and runtimes (such
as compiler-rt, libcxx, etc) do not build in verbose mode. For example,
with Ninja:

```
[ 99% 6252/6308] cd /build/runtimes/builtins-bins && /usr/local/bin/cmake --build .
[  0% 6/308] Building C object CMakeFiles/clang_rt.builtins-i386.dir/absvti2.c.o
[  0% 7/308] Building C object CMakeFiles/clang_rt.builtins-i386.dir/absvdi2.c.o
[  0% 8/308] Building C object CMakeFiles/clang_rt.builtins-i386.dir/absvsi2.c.o
...
```

This is because `llvm_ExternalProject_Add()` and `add_custom_libcxx()`
use CMake's `ExternalProject_Add()` function to configure such
subproject builds, and do not pass through the `CMAKE_VERBOSE_MAKEFILE`
setting.

Similar to what is done in `clang/CMakeLists.txt`, add
`-DCMAKE_VERBOSE_MAKEFILE=ON` to the `ExternalProject_Add()` invocations
in `llvm_ExternalProject_Add()` and `add_custom_libcxx()`, whenever the
top-level CMake invocation had `CMAKE_VERBOSE_MAKEFILE` turned on.
---
 compiler-rt/cmake/Modules/AddCompilerRT.cmake     | 5 +++++
 llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 4d9b68a3cc25bf..7aca0abc637d4e 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -670,6 +670,10 @@ macro(add_custom_libcxx name prefix)
   get_property(CXX_FLAGS CACHE CMAKE_CXX_FLAGS PROPERTY VALUE)
   set(LIBCXX_CXX_FLAGS "${LIBCXX_CXX_FLAGS} ${CXX_FLAGS}")
 
+  if(CMAKE_VERBOSE_MAKEFILE)
+    set(verbose -DCMAKE_VERBOSE_MAKEFILE=ON)
+  endif()
+
   ExternalProject_Add(${name}
     DEPENDS ${name}-clobber ${LIBCXX_DEPS}
     PREFIX ${CMAKE_CURRENT_BINARY_DIR}/${name}
@@ -677,6 +681,7 @@ macro(add_custom_libcxx name prefix)
     BINARY_DIR ${prefix}
     CMAKE_ARGS ${CMAKE_PASSTHROUGH_VARIABLES}
                ${compiler_args}
+               ${verbose}
                -DCMAKE_C_FLAGS=${LIBCXX_C_FLAGS}
                -DCMAKE_CXX_FLAGS=${LIBCXX_CXX_FLAGS}
                -DCMAKE_BUILD_TYPE=Release
diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index 4b5b600307ec93..2089f979acd008 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -319,6 +319,10 @@ function(llvm_ExternalProject_Add name source_dir)
     list(APPEND compiler_args -DCMAKE_ASM_COMPILER_TARGET=${ARG_TARGET_TRIPLE})
   endif()
 
+  if(CMAKE_VERBOSE_MAKEFILE)
+    set(verbose -DCMAKE_VERBOSE_MAKEFILE=ON)
+  endif()
+
   ExternalProject_Add(${name}
     DEPENDS ${ARG_DEPENDS} llvm-config
     ${name}-clobber
@@ -330,6 +334,7 @@ function(llvm_ExternalProject_Add name source_dir)
     CMAKE_ARGS ${${nameCanon}_CMAKE_ARGS}
                --no-warn-unused-cli
                ${compiler_args}
+               ${verbose}
                -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
                ${sysroot_arg}
                -DLLVM_BINARY_DIR=${PROJECT_BINARY_DIR}

From 68c976bf64f50fe9c16a335378a964c166851962 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 15 Dec 2023 13:01:55 -0800
Subject: [PATCH 21/32] [X86] Fix referencing local tagged globals

We should treat the medium code model like the small code model.
Classifying non-local references already properly handled this.
---
 llvm/lib/Target/X86/X86Subtarget.cpp          |  9 ++--
 llvm/test/CodeGen/X86/tagged-globals-pic.ll   | 46 ++++++++++++++++++-
 .../test/CodeGen/X86/tagged-globals-static.ll | 26 ++++++++++-
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index d63f1ca1695b20..07f535685e8f97 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -69,11 +69,11 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
 
 unsigned char
 X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+  CodeModel::Model CM = TM.getCodeModel();
   // Tagged globals have non-zero upper bits, which makes direct references
-  // require a 64-bit immediate.  On the small code model this causes relocation
-  // errors, so we go through the GOT instead.
-  if (AllowTaggedGlobals && TM.getCodeModel() == CodeModel::Small && GV &&
-      !isa<Function>(GV))
+  // require a 64-bit immediate. With the small/medium code models this causes
+  // relocation errors, so we go through the GOT instead.
+  if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(GV))
     return X86II::MO_GOTPCREL_NORELAX;
 
   // If we're not PIC, it's not very interesting.
@@ -83,7 +83,6 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
   if (is64Bit()) {
     // 64-bit ELF PIC local references may use GOTOFF relocations.
     if (isTargetELF()) {
-      CodeModel::Model CM = TM.getCodeModel();
       assert(CM != CodeModel::Tiny &&
              "Tiny codesize model not supported on X86");
       // In the large code model, all text is far from any global data, so we
diff --git a/llvm/test/CodeGen/X86/tagged-globals-pic.ll b/llvm/test/CodeGen/X86/tagged-globals-pic.ll
index 4f85b5ed99695d..156487ee163a2a 100644
--- a/llvm/test/CodeGen/X86/tagged-globals-pic.ll
+++ b/llvm/test/CodeGen/X86/tagged-globals-pic.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --relocation-model=pic < %s | FileCheck %s
+; RUN: llc --relocation-model=pic -code-model=small < %s | FileCheck %s
+; RUN: llc --relocation-model=pic -code-model=medium < %s | FileCheck %s
+; RUN: llc --relocation-model=pic -code-model=large < %s | FileCheck %s --check-prefix=LARGE
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +14,16 @@ define ptr @global_addr() #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_addr:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:  .L0$pb:
+; LARGE-NEXT:    leaq .L0$pb(%rip), %rax
+; LARGE-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L0$pb, %rcx
+; LARGE-NEXT:    addq %rax, %rcx
+; LARGE-NEXT:    movabsq $global@GOT, %rax
+; LARGE-NEXT:    movq (%rcx,%rax), %rax
+; LARGE-NEXT:    retq
   ret ptr @global
 }
 
@@ -21,6 +33,17 @@ define i32 @global_load() #0 {
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    movl (%rax), %eax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_load:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:  .L1$pb:
+; LARGE-NEXT:    leaq .L1$pb(%rip), %rax
+; LARGE-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L1$pb, %rcx
+; LARGE-NEXT:    addq %rax, %rcx
+; LARGE-NEXT:    movabsq $global@GOT, %rax
+; LARGE-NEXT:    movq (%rcx,%rax), %rax
+; LARGE-NEXT:    movl (%rax), %eax
+; LARGE-NEXT:    retq
   %load = load i32, ptr @global
   ret i32 %load
 }
@@ -31,6 +54,17 @@ define void @global_store() #0 {
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    movl $0, (%rax)
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_store:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:  .L2$pb:
+; LARGE-NEXT:    leaq .L2$pb(%rip), %rax
+; LARGE-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L2$pb, %rcx
+; LARGE-NEXT:    addq %rax, %rcx
+; LARGE-NEXT:    movabsq $global@GOT, %rax
+; LARGE-NEXT:    movq (%rcx,%rax), %rax
+; LARGE-NEXT:    movl $0, (%rax)
+; LARGE-NEXT:    retq
   store i32 0, ptr @global
   ret void
 }
@@ -40,6 +74,16 @@ define ptr @func_addr() #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq func@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: func_addr:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:  .L3$pb:
+; LARGE-NEXT:    leaq .L3$pb(%rip), %rax
+; LARGE-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L3$pb, %rcx
+; LARGE-NEXT:    addq %rax, %rcx
+; LARGE-NEXT:    movabsq $func@GOT, %rax
+; LARGE-NEXT:    movq (%rcx,%rax), %rax
+; LARGE-NEXT:    retq
   ret ptr @func
 }
 
diff --git a/llvm/test/CodeGen/X86/tagged-globals-static.ll b/llvm/test/CodeGen/X86/tagged-globals-static.ll
index bddbaa5592da58..0eb21267b06e03 100644
--- a/llvm/test/CodeGen/X86/tagged-globals-static.ll
+++ b/llvm/test/CodeGen/X86/tagged-globals-static.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --relocation-model=static < %s | FileCheck %s
+; RUN: llc --relocation-model=static -code-model=small < %s | FileCheck %s
+; RUN: llc --relocation-model=static -code-model=medium < %s | FileCheck %s
+; RUN: llc --relocation-model=static -code-model=large < %s | FileCheck %s --check-prefix=LARGE
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +14,11 @@ define ptr @global_addr() #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_addr:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:    movabsq $global, %rax
+; LARGE-NEXT:    retq
   ret ptr @global
 }
 
@@ -21,6 +28,12 @@ define i32 @global_load() #0 {
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    movl (%rax), %eax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_load:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:    movabsq $global, %rax
+; LARGE-NEXT:    movl (%rax), %eax
+; LARGE-NEXT:    retq
   %load = load i32, ptr @global
   ret i32 %load
 }
@@ -31,6 +44,12 @@ define void @global_store() #0 {
 ; CHECK-NEXT:    movq global@GOTPCREL_NORELAX(%rip), %rax
 ; CHECK-NEXT:    movl $0, (%rax)
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: global_store:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:    movabsq $global, %rax
+; LARGE-NEXT:    movl $0, (%rax)
+; LARGE-NEXT:    retq
   store i32 0, ptr @global
   ret void
 }
@@ -40,6 +59,11 @@ define ptr @func_addr() #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl $func, %eax
 ; CHECK-NEXT:    retq
+;
+; LARGE-LABEL: func_addr:
+; LARGE:       # %bb.0:
+; LARGE-NEXT:    movabsq $func, %rax
+; LARGE-NEXT:    retq
   ret ptr @func
 }
 

From 401f0396c3070567ce1ad0b12be7e48713ec0c65 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Sun, 17 Dec 2023 15:07:11 -0800
Subject: [PATCH 22/32] [clang-format] Fix a bug in `IndentExternBlock:
 NoIndent` (#75731)

Fixes #36620.
Fixes #75719.
---
 clang/lib/Format/Format.cpp           | 5 -----
 clang/unittests/Format/FormatTest.cpp | 7 +++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 668e959a9416ba..28271181e07d0c 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1315,7 +1315,6 @@ static void expandPresetsBraceWrapping(FormatStyle &Expanded) {
     Expanded.BraceWrapping.AfterStruct = true;
     Expanded.BraceWrapping.AfterUnion = true;
     Expanded.BraceWrapping.AfterExternBlock = true;
-    Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
     Expanded.BraceWrapping.SplitEmptyFunction = true;
     Expanded.BraceWrapping.SplitEmptyRecord = false;
     break;
@@ -1335,7 +1334,6 @@ static void expandPresetsBraceWrapping(FormatStyle &Expanded) {
     Expanded.BraceWrapping.AfterStruct = true;
     Expanded.BraceWrapping.AfterUnion = true;
     Expanded.BraceWrapping.AfterExternBlock = true;
-    Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
     Expanded.BraceWrapping.BeforeCatch = true;
     Expanded.BraceWrapping.BeforeElse = true;
     Expanded.BraceWrapping.BeforeLambdaBody = true;
@@ -1350,7 +1348,6 @@ static void expandPresetsBraceWrapping(FormatStyle &Expanded) {
     Expanded.BraceWrapping.AfterObjCDeclaration = true;
     Expanded.BraceWrapping.AfterStruct = true;
     Expanded.BraceWrapping.AfterExternBlock = true;
-    Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
     Expanded.BraceWrapping.BeforeCatch = true;
     Expanded.BraceWrapping.BeforeElse = true;
     Expanded.BraceWrapping.BeforeLambdaBody = true;
@@ -1375,7 +1372,6 @@ static void expandPresetsBraceWrapping(FormatStyle &Expanded) {
         /*SplitEmptyFunction=*/true,
         /*SplitEmptyRecord=*/true,
         /*SplitEmptyNamespace=*/true};
-    Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
     break;
   case FormatStyle::BS_WebKit:
     Expanded.BraceWrapping.AfterFunction = true;
@@ -1909,7 +1905,6 @@ FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) {
   Style.BraceWrapping.AfterObjCDeclaration = true;
   Style.BraceWrapping.AfterStruct = true;
   Style.BraceWrapping.AfterExternBlock = true;
-  Style.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock;
   Style.BraceWrapping.BeforeCatch = true;
   Style.BraceWrapping.BeforeElse = true;
   Style.BraceWrapping.BeforeWhile = false;
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 24b2fd599dc397..0e08723aa9e947 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -4571,6 +4571,13 @@ TEST_F(FormatTest, IndentExternBlockStyle) {
                "}",
                Style);
 
+  Style.BreakBeforeBraces = FormatStyle::BS_Allman;
+  verifyFormat("extern \"C\"\n"
+               "{\n"
+               "int i;\n"
+               "}",
+               Style);
+
   Style.BreakBeforeBraces = FormatStyle::BS_Custom;
   Style.BraceWrapping.AfterExternBlock = true;
   Style.IndentExternBlock = FormatStyle::IEBS_Indent;

From f1ab90ab632d137fc3c7deaa237751db31bcb22e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 15:36:42 -0800
Subject: [PATCH 23/32] [IR] Use llvm::find (NFC)

---
 llvm/lib/IR/Metadata.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 7bc25e30b89327..515893d079b8cb 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1566,7 +1566,7 @@ void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {
            "Expect existing attachment to be mapped");
 
     auto &InstVec = InstrsIt->second;
-    auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this);
+    auto *InstIt = llvm::find(InstVec, this);
     assert(InstIt != InstVec.end() &&
            "Expect instruction to be mapped to attachment");
     // The vector contains a ptr to this. If this is the only element in the

From 211f5d00e26b62edc80bc86655a73c28e57b6964 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 15:36:44 -0800
Subject: [PATCH 24/32] [llvm] Fix typos in documentation

---
 llvm/docs/AliasAnalysis.rst        | 2 +-
 llvm/docs/ConvergentOperations.rst | 2 +-
 llvm/docs/JITLink.rst              | 2 +-
 llvm/docs/LangRef.rst              | 6 +++---
 llvm/docs/NVPTXUsage.rst           | 2 +-
 llvm/docs/TableGen/ProgRef.rst     | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/docs/AliasAnalysis.rst b/llvm/docs/AliasAnalysis.rst
index 046dd24d7332e2..7afe0e277bd4f7 100644
--- a/llvm/docs/AliasAnalysis.rst
+++ b/llvm/docs/AliasAnalysis.rst
@@ -207,7 +207,7 @@ Writing a new ``AliasAnalysis`` Implementation
 
 Writing a new alias analysis implementation for LLVM is quite straight-forward.
 There are already several implementations that you can use for examples, and the
-following information should help fill in any details.  For a examples, take a
+following information should help fill in any details.  For examples, take a
 look at the `various alias analysis implementations`_ included with LLVM.
 
 Different Pass styles
diff --git a/llvm/docs/ConvergentOperations.rst b/llvm/docs/ConvergentOperations.rst
index 5dd3ac2f3d98b9..332675f3edefd7 100644
--- a/llvm/docs/ConvergentOperations.rst
+++ b/llvm/docs/ConvergentOperations.rst
@@ -607,7 +607,7 @@ those in the caller.
    only if both threads entered the function by executing converged
    dynamic instances of the call-site.
 
-This intrinsic can occur at most once in a function, and only in the the entry
+This intrinsic can occur at most once in a function, and only in the entry
 block of the function. If this intrinsic occurs in a basic block, then it must
 precede any other convergent operation in the same basic block.
 
diff --git a/llvm/docs/JITLink.rst b/llvm/docs/JITLink.rst
index 72607a8c085ad1..b0a0dc77880dfd 100644
--- a/llvm/docs/JITLink.rst
+++ b/llvm/docs/JITLink.rst
@@ -466,7 +466,7 @@ finally transferring linked memory to the executing process.
 
       Calls the ``JITLinkContext``'s ``JITLinkMemoryManager`` to allocate both
       working and target memory for the graph. As part of this process the
-      ``JITLinkMemoryManager`` will update the the addresses of all nodes
+      ``JITLinkMemoryManager`` will update the addresses of all nodes
       defined in the graph to their assigned target address.
 
       Note: This step only updates the addresses of nodes defined in this graph.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8f0c45f674ead8..7f4a316a21acee 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1515,7 +1515,7 @@ Currently, only the following parameter attributes are defined:
     over-alignment specification through language attributes).
 
 ``allocalign``
-    The function parameter marked with this attribute is is the alignment in bytes of the
+    The function parameter marked with this attribute is the alignment in bytes of the
     newly allocated block returned by this function. The returned value must either have
     the specified alignment or be the null pointer. The return value MAY be more aligned
     than the requested alignment, but not less aligned.  Invalid (e.g. non-power-of-2)
@@ -22798,7 +22798,7 @@ Semantics:
 
 The '``llvm.vp.fcmp``' compares its first two operands according to the
 condition code given as the third operand. The operands are compared element by
-element on each enabled lane, where the the semantics of the comparison are
+element on each enabled lane, where the semantics of the comparison are
 defined :ref:`according to the condition code <fcmp_md_cc_sem>`. Masked-off
 lanes are ``poison``.
 
@@ -22856,7 +22856,7 @@ Semantics:
 
 The '``llvm.vp.icmp``' compares its first two operands according to the
 condition code given as the third operand. The operands are compared element by
-element on each enabled lane, where the the semantics of the comparison are
+element on each enabled lane, where the semantics of the comparison are
 defined :ref:`according to the condition code <icmp_md_cc_sem>`. Masked-off
 lanes are ``poison``.
 
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 5c28a3f3eee90d..22acc6c9cb37f5 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -329,7 +329,7 @@ optimization pipeline before dead-code elimination.
 The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
 of your pass manager; just use the following code when setting up your pass
 manager and the PassBuilder will use ``registerPassBuilderCallbacks`` to let
-NVPTXTargetMachine::registerPassBuilderCallbacks add the the pass to the
+NVPTXTargetMachine::registerPassBuilderCallbacks add the pass to the
 pass manager:
 
 .. code-block:: c++
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index e5420a05dad78c..59ddef975c4877 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -661,7 +661,7 @@ The argument values can be specified in two forms:
   argument with name ``a`` and ``a1`` will be assigned to the argument with
   name ``b``.
 
-Required arguments can alse be specified as named argument.
+Required arguments can also be specified as named argument.
 
 Note that the argument can only be specified once regardless of the way (named
 or positional) to specify and positional arguments should be put before named

From 364d7e775fcad5ef20a5c5788586f79c467b47db Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 15:51:48 -0800
Subject: [PATCH 25/32] [lldb] Use StringRef::starts_with (NFC)

This patch replaces uses of StringRef::startswith with
StringRef::starts_with for consistency with
std::{string,string_view}::starts_with in C++20.

I'm planning to deprecate and eventually remove
StringRef::{starts,ends}with.
---
 lldb/bindings/python/python-typemaps.swig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
index 7660e0282c8fcf..8d4b740e5f35ca 100644
--- a/lldb/bindings/python/python-typemaps.swig
+++ b/lldb/bindings/python/python-typemaps.swig
@@ -110,7 +110,7 @@ AND call SWIG_fail at the same time, because it will result in a double free.
         SWIG_fail;
       }
 
-      if (llvm::StringRef(type_name.get()).startswith("SB")) {
+      if (llvm::StringRef(type_name.get()).starts_with("SB")) {
         std::string error_msg = "Input type is invalid: " + type_name.get();
         PyErr_SetString(PyExc_TypeError, error_msg.c_str());
         SWIG_fail;

From 5ac12951b4e9bbfcc5791282d0961ec2b65575e9 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 17 Dec 2023 15:52:50 -0800
Subject: [PATCH 26/32] [ADT] Deprecate StringRef::{starts,ends}with (#75491)

This patch deprecates StringRef::{starts,ends}with.  Note that I've
replaced all known uses of StringRef::{starts,ends}with with
StringRef::{starts,ends}_with for consistency with
std::{string,string_view}::{starts,ends}_with in C++20.
---
 llvm/include/llvm/ADT/StringRef.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 4e69d5b633546d..d892333de391ce 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -258,7 +258,9 @@ namespace llvm {
       return Length >= Prefix.Length &&
              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
     }
-    [[nodiscard]] bool startswith(StringRef Prefix) const {
+    [[nodiscard]] LLVM_DEPRECATED(
+        "Use starts_with instead",
+        "starts_with") bool startswith(StringRef Prefix) const {
       return starts_with(Prefix);
     }
 
@@ -271,7 +273,9 @@ namespace llvm {
              compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) ==
                  0;
     }
-    [[nodiscard]] bool endswith(StringRef Suffix) const {
+    [[nodiscard]] LLVM_DEPRECATED(
+        "Use ends_with instead",
+        "ends_with") bool endswith(StringRef Suffix) const {
       return ends_with(Suffix);
     }
 

From dbe9a602561d5eecfc1652aab7e127754cb963c0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 17 Dec 2023 17:39:49 -0800
Subject: [PATCH 27/32] [RISCV] Correct the VLOperand for masked vssrl/vssra
 intrinsics.

Though I can't prove it matters for anything. The only use of
VLOperand I know of is for handling i64 splat operands to .vx
intrinsics on RV32. Shifts are special and always use XLen for .vx
so they are always legal.
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index fc830fca392fc5..a391bc53cdb0e9 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -702,7 +702,7 @@ let TargetPrefix = "riscv" in {
                      LLVMMatchType<2>, LLVMMatchType<2>],
                     [ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem, IntrHasSideEffects]>,
                     RISCVVIntrinsic {
-    let VLOperand = 6;
+    let VLOperand = 5;
   }
   // For Saturating binary operations.
   // The destination vector type is NOT the same as first source vector.

From dd45be028d2788cc401400e208ab0fa64d929b0a Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub@nod-labs.com>
Date: Sun, 17 Dec 2023 20:59:46 -0500
Subject: [PATCH 28/32] [mlir][gpu] Trim trailing whitespace in dialect docs.
 NFC.

---
 mlir/include/mlir/Dialect/GPU/IR/GPUBase.td | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index ccb9580adbd1f5..7b9d46fda12f51 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -52,14 +52,14 @@ def GPU_Dialect : Dialect {
     /// Returns the numeric value used to identify the private memory address
     /// space.
     static AddressSpace getPrivateAddressSpace() { return AddressSpace::Private; }
-    
-    /// Return true if the given MemRefType has an address space that matches 
+
+    /// Return true if the given MemRefType has an address space that matches
     /// with the gpu::AddressSpaceAttr attribute with value 'workgroup`.
     static bool hasWorkgroupMemoryAddressSpace(MemRefType type);
 
-    /// Return true if the given Attribute is an gpu::AddressSpaceAttr 
+    /// Return true if the given Attribute is an gpu::AddressSpaceAttr
     /// attribute with value 'workgroup`.
-    static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);  
+    static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);
   }];
 
   let dependentDialects = ["arith::ArithDialect"];

From 31429e7a89590f88034920edd3e997aeabff8124 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanak@gmail.com>
Date: Sun, 17 Dec 2023 18:22:44 -0800
Subject: [PATCH 29/32] [CodeGen] Emit a more accurate alignment for
 non-temporal loads/stores (#75675)

Call EmitPointerWithAlignment to compute the alignment based on the
underlying lvalue's alignment when it's available.
---
 clang/lib/CodeGen/CGBuiltin.cpp    |  8 ++++----
 clang/test/CodeGen/Nontemporal.cpp | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3327866d2b9623..c96f86a823a461 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -232,19 +232,19 @@ static Value *MakeBinaryAtomicValue(
 
 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
-  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
+  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
 
   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
-  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType());
+  LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
   LV.setNontemporal(true);
   CGF.EmitStoreOfScalar(Val, LV, false);
   return nullptr;
 }
 
 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
-  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
+  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
 
-  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
+  LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
   LV.setNontemporal(true);
   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
 }
diff --git a/clang/test/CodeGen/Nontemporal.cpp b/clang/test/CodeGen/Nontemporal.cpp
index e14ca18717928d..5052cb225d4111 100644
--- a/clang/test/CodeGen/Nontemporal.cpp
+++ b/clang/test/CodeGen/Nontemporal.cpp
@@ -46,3 +46,17 @@ void test_all_sizes(void)                 // CHECK-LABEL: test_all_sizes
   vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
   vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
 }
+
+struct S { char c[16]; };
+S x;
+
+typedef int v4si __attribute__ ((vector_size(16)));
+
+// CHECK-LABEL: define void @_Z14test_alignmentv()
+// CHECK: load <4 x i32>, ptr @x, align 1, !nontemporal
+// CHECK: store <4 x i32> %1, ptr @x, align 1, !nontemporal
+
+void test_alignment() {
+ auto t =  __builtin_nontemporal_load((v4si*)x.c);
+ __builtin_nontemporal_store(t, (v4si*)x.c);
+}

From 2c668fddadd885384381107be42f936f08ec0c4d Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub@nod-labs.com>
Date: Sun, 17 Dec 2023 21:34:25 -0500
Subject: [PATCH 30/32] [mlir][gpu] Trim trailing whitespace in GPUOps.td. NFC.

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 42 +++++++++++-----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 7cad1cd89fd633..2e1a5f5cc78aed 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -68,7 +68,7 @@ def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
 
 def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
   let description = [{
-    Returns the cluster id, i.e. the index of the current cluster within the 
+    Returns the cluster id, i.e. the index of the current cluster within the
     grid along the x, y, or z `dimension`.
 
     Example:
@@ -462,23 +462,23 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
 def GPU_DynamicSharedMemoryOp : GPU_Op<"dynamic_shared_memory", [Pure]>
 {
   let summary = "Get the memref for dynamic shared memory";
-  
+
   let description = [{
-    This operation provides a memref pointer to the start of dynamic shared 
+    This operation provides a memref pointer to the start of dynamic shared
     memory, often referred to as workgroup memory. It's important to note that
-    this dynamic shared memory needs to be allocated at kernel launch. One can 
-    conveniently utilize `the dynamic_shared_memory_size` parameter of 
+    this dynamic shared memory needs to be allocated at kernel launch. One can
+    conveniently utilize `the dynamic_shared_memory_size` parameter of
     `gpu.launch` for this purpose.
-   
-    Examples: 
-    ```mlir        
+
+    Examples:
+    ```mlir
     %0 = gpu.dynamic.shared.memory : memref<?xi8, #gpu.address_space<workgroup>>
-    %1 = memref.view %0[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> 
+    %1 = memref.view %0[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>>
                             to memref<32x64xf32, #gpu.address_space<workgroup>>
-    %2 = memref.view %0[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>> 
+    %2 = memref.view %0[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>>
                             to memref<32x64xf32, #gpu.address_space<workgroup>>
     ```
-  }];  
+  }];
   let arguments = (ins);
   let results = (outs Arg<MemRefRankOf<[I8], [1]>>:$resultMemref);
   let assemblyFormat = [{ attr-dict `:` type($resultMemref) }];
@@ -493,11 +493,11 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
                      "blockSizeY", "blockSizeZ"]>]>,
     Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
                SymbolRefAttr:$kernel,
-               LaunchIndx:$gridSizeX, 
-               LaunchIndx:$gridSizeY, 
+               LaunchIndx:$gridSizeX,
+               LaunchIndx:$gridSizeY,
                LaunchIndx:$gridSizeZ,
-               LaunchIndx:$blockSizeX, 
-               LaunchIndx:$blockSizeY, 
+               LaunchIndx:$blockSizeX,
+               LaunchIndx:$blockSizeY,
                LaunchIndx:$blockSizeZ,
                Optional<LaunchIndx>:$clusterSizeX,
                Optional<LaunchIndx>:$clusterSizeY,
@@ -539,10 +539,10 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
     The remaining operands if present are passed as arguments to the kernel
     function.
 
-    The `gpu.launch_func` also supports kernel launching with clusters if 
-    supported by the target architecture. The cluster size can be set by 
-    `clusterSizeX`, `clusterSizeY`, and `clusterSizeZ` arguments. When these 
-    arguments are present, the Op launches a kernel that clusters the given 
+    The `gpu.launch_func` also supports kernel launching with clusters if
+    supported by the target architecture. The cluster size can be set by
+    `clusterSizeX`, `clusterSizeY`, and `clusterSizeZ` arguments. When these
+    arguments are present, the Op launches a kernel that clusters the given
     thread blocks. This feature is exclusive to certain architectures.
 
     Example:
@@ -593,7 +593,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
           async                           // (Optional) Don't block host, return token.
           [%t0]                           // (Optional) Execute only after %t0 has completed.
           @kernels::@kernel_1             // Kernel function.
-          clusters in (%cst, %cst, %cst)  // (Optional) Cluster size only for support architectures. 
+          clusters in (%cst, %cst, %cst)  // (Optional) Cluster size only for support architectures.
           blocks in (%cst, %cst, %cst)    // Grid size.
           threads in (%cst, %cst, %cst)   // Block size.
           dynamic_shared_memory_size %s   // (Optional) Amount of dynamic shared
@@ -659,7 +659,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
   let assemblyFormat = [{
       custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
       (`<` $asyncObject^ `:` type($asyncObject) `>`)?
-      $kernel      
+      $kernel
       ( `clusters` `in` ` ` `(` $clusterSizeX^ `,` $clusterSizeY `,` $clusterSizeZ `)` )?
       `blocks` `in` ` ` `(` $gridSizeX `,` $gridSizeY `,` $gridSizeZ `)`
       `threads` `in` ` ` `(` $blockSizeX `,` $blockSizeY `,` $blockSizeZ `)`

From 5c1f44193dd6a7d3453fc002130f5cbc7cb351c2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 17 Dec 2023 19:21:36 -0800
Subject: [PATCH 31/32] [RISCV] Simplify PrintExtension. NFC (#75427)

Instead of using a format string that needs to be parsed, we can use
left_justify to print each string with padding.
---
 llvm/lib/Support/RISCVISAInfo.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index bbbaf26a7bd493..4a800ceb0c810c 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -215,11 +215,12 @@ static void verifyTables() {
 #endif
 }
 
-static void PrintExtension(const std::string Name, const std::string Version,
-                           const std::string Description) {
-  outs() << "    "
-         << format(Description.empty() ? "%-20s%s\n" : "%-20s%-10s%s\n",
-                   Name.c_str(), Version.c_str(), Description.c_str());
+static void PrintExtension(StringRef Name, StringRef Version,
+                           StringRef Description) {
+  outs().indent(4);
+  unsigned VersionWidth = Description.empty() ? 0 : 10;
+  outs() << left_justify(Name, 20) << left_justify(Version, VersionWidth)
+         << Description << "\n";
 }
 
 void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
@@ -233,7 +234,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
   for (const auto &E : ExtMap) {
     std::string Version = std::to_string(E.second.MajorVersion) + "." +
                           std::to_string(E.second.MinorVersion);
-    PrintExtension(E.first, Version, DescMap[E.first].str());
+    PrintExtension(E.first, Version, DescMap[E.first]);
   }
 
   outs() << "\nExperimental extensions\n";
@@ -243,7 +244,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
   for (const auto &E : ExtMap) {
     std::string Version = std::to_string(E.second.MajorVersion) + "." +
                           std::to_string(E.second.MinorVersion);
-    PrintExtension(E.first, Version, DescMap["experimental-" + E.first].str());
+    PrintExtension(E.first, Version, DescMap["experimental-" + E.first]);
   }
 
   outs() << "\nUse -march to specify the target's extension.\n"

From b83b28779ee56236aaf8827398f889334abbd28d Mon Sep 17 00:00:00 2001
From: Yeting Kuo <46629943+yetingk@users.noreply.github.com>
Date: Mon, 18 Dec 2023 11:46:22 +0800
Subject: [PATCH 32/32] [RISCV] Make Zhinx and Zvfh imply Zhinxmin and Zvfhmin
 respectively (#75735)

Zhinxmin is a subset of Zhinx and Zvfhmin is also a subset of Zvfh.
---
 llvm/lib/Support/RISCVISAInfo.cpp     | 4 ++--
 llvm/test/CodeGen/RISCV/attributes.ll | 4 ++++
 llvm/test/MC/RISCV/attribute-arch.s   | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 4a800ceb0c810c..54363e988b702d 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -1014,7 +1014,7 @@ static const char *ImpliedExtsZfbfmin[] = {"f"};
 static const char *ImpliedExtsZfh[] = {"zfhmin"};
 static const char *ImpliedExtsZfhmin[] = {"f"};
 static const char *ImpliedExtsZfinx[] = {"zicsr"};
-static const char *ImpliedExtsZhinx[] = {"zfinx"};
+static const char *ImpliedExtsZhinx[] = {"zhinxmin"};
 static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
 static const char *ImpliedExtsZicntr[] = {"zicsr"};
 static const char *ImpliedExtsZihpm[] = {"zicsr"};
@@ -1030,7 +1030,7 @@ static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
 static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
 static const char *ImpliedExtsZvfbfmin[] = {"zve32f", "zfbfmin"};
 static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin"};
-static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"};
+static const char *ImpliedExtsZvfh[] = {"zvfhmin", "zfhmin"};
 static const char *ImpliedExtsZvfhmin[] = {"zve32f"};
 static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"};
 static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index b3d4dc8bb638a8..25f6e4a56d9324 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -82,6 +82,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -mattr=+experimental-zvksg %s -o - | FileCheck --check-prefix=RV32ZVKSG %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvksh %s -o - | FileCheck --check-prefix=RV32ZVKSH %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvkt %s -o - | FileCheck --check-prefix=RV32ZVKT %s
+; RUN: llc -mtriple=riscv32 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV32ZVFH %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicond %s -o - | FileCheck --check-prefix=RV32ZICOND %s
 ; RUN: llc -mtriple=riscv32 -mattr=+smaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SMAIA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SSAIA %s
@@ -172,6 +173,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvksg %s -o - | FileCheck --check-prefix=RV64ZVKSG %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvksh %s -o - | FileCheck --check-prefix=RV64ZVKSH %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvkt %s -o - | FileCheck --check-prefix=RV64ZVKT %s
+; RUN: llc -mtriple=riscv64 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV64ZVFH %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicond %s -o - | FileCheck --check-prefix=RV64ZICOND %s
 ; RUN: llc -mtriple=riscv64 -mattr=+smaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SMAIA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SSAIA %s
@@ -264,6 +266,7 @@
 ; RV32ZVKSG: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zve64x1p0_zvkb1p0_zvkg1p0_zvks1p0_zvksed1p0_zvksg1p0_zvksh1p0_zvkt1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZVKSH: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0"
 ; RV32ZVKT: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0"
+; RV32ZVFH: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0"
 ; RV32ZICOND: .attribute 5, "rv32i2p1_zicond1p0"
 ; RV32SMAIA: .attribute 5, "rv32i2p1_smaia1p0"
 ; RV32SSAIA: .attribute 5, "rv32i2p1_ssaia1p0"
@@ -353,6 +356,7 @@
 ; RV64ZVKSG: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvkb1p0_zvkg1p0_zvks1p0_zvksed1p0_zvksg1p0_zvksh1p0_zvkt1p0_zvl32b1p0"
 ; RV64ZVKSH: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0"
 ; RV64ZVKT: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0"
+; RV64ZVFH: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0"
 ; RV64ZICOND: .attribute 5, "rv64i2p1_zicond1p0"
 ; RV64SMAIA: .attribute 5, "rv64i2p1_smaia1p0"
 ; RV64SSAIA: .attribute 5, "rv64i2p1_ssaia1p0"
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 25f84f3cc1232b..0fedef007a39ce 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -178,7 +178,7 @@
 # CHECK: attribute      5, "rv32i2p1_zicsr2p0_zfinx1p0_zhinxmin1p0"
 
 .attribute arch, "rv32izfinx_zhinx1p0"
-# CHECK: attribute      5, "rv32i2p1_zicsr2p0_zfinx1p0_zhinx1p0"
+# CHECK: attribute      5, "rv32i2p1_zicsr2p0_zfinx1p0_zhinx1p0_zhinxmin1p0"
 
 .attribute arch, "rv32i_zbkb1p0"
 # CHECK: attribute      5, "rv32i2p1_zbkb1p0"