[CIR][CIRGen] Aarch64 Builtins: add more load/store variants

Now that alignment computation is correct for neon, add more neon types for load/store.
jopperm · May 31, 2024 · 4ffa090 · 4ffa090
1 parent 5a0a234
commit 4ffa090
Show file tree

Hide file tree

Showing 2 changed files with 141 additions and 100 deletions.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1362,10 +1362,15 @@ static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags,
   case NeonTypeFlags::Int8:
   case NeonTypeFlags::Poly8:
     return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
-                                      CGF->UInt8Ty, V1Ty ? 1 : (8 << IsQuad));
+                                      TypeFlags.isUnsigned() ? CGF->UInt8Ty
+                                                             : CGF->SInt8Ty,
+                                      V1Ty ? 1 : (8 << IsQuad));
   case NeonTypeFlags::Int16:
   case NeonTypeFlags::Poly16:
-    llvm_unreachable("NYI");
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt16Ty
+                                                             : CGF->SInt16Ty,
+                                      V1Ty ? 1 : (4 << IsQuad));
   case NeonTypeFlags::BFloat16:
     if (AllowBFloatArgsAndRet)
       llvm_unreachable("NYI");
@@ -1377,10 +1382,16 @@ static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags,
     else
       llvm_unreachable("NYI");
   case NeonTypeFlags::Int32:
-    llvm_unreachable("NYI");
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt32Ty
+                                                             : CGF->SInt32Ty,
+                                      V1Ty ? 1 : (2 << IsQuad));
   case NeonTypeFlags::Int64:
   case NeonTypeFlags::Poly64:
-    llvm_unreachable("NYI");
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt64Ty
+                                                             : CGF->SInt64Ty,
+                                      V1Ty ? 1 : (1 << IsQuad));
   case NeonTypeFlags::Poly128:
     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
     // There is a lot of i128 and f128 API missing.

diff --git a/clang/test/CIR/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CIR/CodeGen/aarch64-neon-intrinsics.c
@@ -8975,54 +8975,75 @@ uint8x16_t test_vld1q_u8(uint8_t const *a) {
   // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1,
 }
 
-// NYI-LABEL: @test_vld1q_u16(
-// NYI:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
-// NYI:   ret <8 x i16> [[TMP2]]
-// uint16x8_t test_vld1q_u16(uint16_t const *a) {
-//   return vld1q_u16(a);
-// }
+uint16x8_t test_vld1q_u16(uint16_t const *a) {
+  return vld1q_u16(a);
+  // CIR-LABEL: @test_vld1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!u16i x 8>>, !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_u16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2,
+}
 
-// NYI-LABEL: @test_vld1q_u32(
-// NYI:   [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
-// NYI:   ret <4 x i32> [[TMP2]]
-// uint32x4_t test_vld1q_u32(uint32_t const *a) {
-//   return vld1q_u32(a);
-// }
+uint32x4_t test_vld1q_u32(uint32_t const *a) {
+  return vld1q_u32(a);
+  // CIR-LABEL: @test_vld1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!u32i x 4>>, !cir.vector<!u32i x 4>
 
-// NYI-LABEL: @test_vld1q_u64(
-// NYI:   [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
-// NYI:   ret <2 x i64> [[TMP2]]
-// uint64x2_t test_vld1q_u64(uint64_t const *a) {
-//   return vld1q_u64(a);
-// }
+  // LLVM-LABEL: @test_vld1q_u32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4,
+}
 
-// NYI-LABEL: @test_vld1q_s8(
-// NYI:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
-// NYI:   ret <16 x i8> [[TMP1]]
-// int8x16_t test_vld1q_s8(int8_t const *a) {
-//   return vld1q_s8(a);
-// }
+uint64x2_t test_vld1q_u64(uint64_t const *a) {
+  return vld1q_u64(a);
+  // CIR-LABEL: @test_vld1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!u64i x 2>>, !cir.vector<!u64i x 2>
 
-// NYI-LABEL: @test_vld1q_s16(
-// NYI:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
-// NYI:   ret <8 x i16> [[TMP2]]
-// int16x8_t test_vld1q_s16(int16_t const *a) {
-//   return vld1q_s16(a);
-// }
+  // LLVM-LABEL: @test_vld1q_u64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8,
+}
 
-// NYI-LABEL: @test_vld1q_s32(
-// NYI:   [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
-// NYI:   ret <4 x i32> [[TMP2]]
-// int32x4_t test_vld1q_s32(int32_t const *a) {
-//   return vld1q_s32(a);
-// }
+int8x16_t test_vld1q_s8(int8_t const *a) {
+  return vld1q_s8(a);
+  // CIR-LABEL: @test_vld1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.load align(1) %[[CAST]] : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
 
-// NYI-LABEL: @test_vld1q_s64(
-// NYI:   [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
-// NYI:   ret <2 x i64> [[TMP2]]
-// int64x2_t test_vld1q_s64(int64_t const *a) {
-//   return vld1q_s64(a);
-// }
+  // LLVM-LABEL: @test_vld1q_s8
+  // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1,
+}
+
+int16x8_t test_vld1q_s16(int16_t const *a) {
+  return vld1q_s16(a);
+  // CIR-LABEL: @test_vld1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!s16i x 8>>, !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_s16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2,
+}
+
+int32x4_t test_vld1q_s32(int32_t const *a) {
+  return vld1q_s32(a);
+  // CIR-LABEL: @test_vld1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_vld1q_s32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4,
+}
+
+int64x2_t test_vld1q_s64(int64_t const *a) {
+  return vld1q_s64(a);
+  // CIR-LABEL: @test_vld1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_vld1q_s64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8,
+}
 
 // NYI-LABEL: @test_vld1q_f16(
 // NYI:   [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
@@ -10187,66 +10208,75 @@ void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
   // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1,
 }
 
-// NYI-LABEL: @test_vst1q_u16(
-// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// NYI:   store <8 x i16> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
-//   vst1q_u16(a, b);
-// }
+void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
+  vst1q_u16(a, b);
+  // CIR-LABEL: @test_vst1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!u16i x 8>, !cir.ptr<!cir.vector<!u16i x 8>>
 
-// NYI-LABEL: @test_vst1q_u32(
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// NYI:   store <4 x i32> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
-//   vst1q_u32(a, b);
-// }
+  // LLVM-LABEL: @test_vst1q_u16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2,
+}
 
-// NYI-LABEL: @test_vst1q_u64(
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// NYI:   store <2 x i64> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
-//   vst1q_u64(a, b);
-// }
+void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
+  vst1q_u32(a, b);
+  // CIR-LABEL: @test_vst1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
 
-// NYI-LABEL: @test_vst1q_s8(
-// NYI:   store <16 x i8> %b, ptr %a
-// NYI:   ret void
-// void test_vst1q_s8(int8_t *a, int8x16_t b) {
-//   vst1q_s8(a, b);
-// }
+  // LLVM-LABEL: @test_vst1q_u32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4,
+}
 
-// NYI-LABEL: @test_vst1q_s16(
-// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// NYI:   store <8 x i16> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_s16(int16_t *a, int16x8_t b) {
-//   vst1q_s16(a, b);
-// }
+void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
+  vst1q_u64(a, b);
+  // CIR-LABEL: @test_vst1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!u64i x 2>, !cir.ptr<!cir.vector<!u64i x 2>>
 
-// NYI-LABEL: @test_vst1q_s32(
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// NYI:   store <4 x i32> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_s32(int32_t *a, int32x4_t b) {
-//   vst1q_s32(a, b);
-// }
+  // LLVM-LABEL: @test_vst1q_u64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8,
+}
 
-// NYI-LABEL: @test_vst1q_s64(
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// NYI:   store <2 x i64> [[TMP3]], ptr %a
-// NYI:   ret void
-// void test_vst1q_s64(int64_t *a, int64x2_t b) {
-//   vst1q_s64(a, b);
-// }
+void test_vst1q_s8(int8_t *a, int8x16_t b) {
+  vst1q_s8(a, b);
+  // CIR-LABEL: @test_vst1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.store align(1) %{{.*}}, %[[CAST]] : !cir.vector<!s8i x 16>, !cir.ptr<!cir.vector<!s8i x 16>>
+
+  // LLVM-LABEL: @test_vst1q_s8
+  // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1,
+}
+
+void test_vst1q_s16(int16_t *a, int16x8_t b) {
+  vst1q_s16(a, b);
+  // CIR-LABEL: @test_vst1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM-LABEL: @test_vst1q_s16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2,
+}
+
+void test_vst1q_s32(int32_t *a, int32x4_t b) {
+  vst1q_s32(a, b);
+  // CIR-LABEL: @test_vst1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-LABEL: @test_vst1q_s32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4,
+}
+
+void test_vst1q_s64(int64_t *a, int64x2_t b) {
+  vst1q_s64(a, b);
+  // CIR-LABEL: @test_vst1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>
+
+  // LLVM-LABEL: @test_vst1q_s64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8,
+}
 
 // NYI-LABEL: @test_vst1q_f16(
 // NYI:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>