From 3e70aaca1f1d7de6c68e7bc8cb98798176757895 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 20 May 2024 16:21:18 +0100 Subject: [PATCH 1/6] ARM64-SVE: LeadingSignCount + LeadingZeroCount --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 + .../Arm/Sve.PlatformNotSupported.cs | 103 ++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 103 ++++++ .../ref/System.Runtime.Intrinsics.cs | 13 + .../GenerateHWIntrinsicTests_Arm.cs | 14 + .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 41 +++ .../_SveMasklessUnaryOpTestTemplate.template | 303 ++++++++++++++++++ 7 files changed, 579 insertions(+) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveMasklessUnaryOpTestTemplate.template diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 00b0739604b3c..d9d70072efffa 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -68,6 +68,8 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 346b2c1de1f00..ee49d604eb7d5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -1319,6 +1319,109 @@ internal Arm64() { } /// public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// Count leading sign bits + + /// + /// svuint8_t svcls[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// svuint8_t svcls[_s8]_x(svbool_t pg, svint8_t op) + /// svuint8_t svcls[_s8]_z(svbool_t pg, svint8_t op) + /// CLS Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svcls[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// svuint16_t svcls[_s16]_x(svbool_t pg, svint16_t op) + /// svuint16_t svcls[_s16]_z(svbool_t pg, svint16_t op) + /// CLS Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svcls[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// svuint32_t svcls[_s32]_x(svbool_t pg, svint32_t op) + /// svuint32_t svcls[_s32]_z(svbool_t pg, svint32_t op) + /// CLS Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svcls[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// svuint64_t svcls[_s64]_x(svbool_t pg, svint64_t op) + /// svuint64_t svcls[_s64]_z(svbool_t pg, svint64_t op) + /// CLS Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); { throw new PlatformNotSupportedException(); } + + + /// Count leading zero bits + + /// + /// svuint8_t svclz[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// svuint8_t svclz[_s8]_x(svbool_t pg, svint8_t op) + /// svuint8_t svclz[_s8]_z(svbool_t pg, svint8_t op) + /// CLZ Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint8_t svclz[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) + /// svuint8_t svclz[_u8]_x(svbool_t pg, svuint8_t op) + /// svuint8_t svclz[_u8]_z(svbool_t pg, svuint8_t op) + /// CLZ Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svclz[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// svuint16_t svclz[_s16]_x(svbool_t pg, svint16_t op) + /// svuint16_t svclz[_s16]_z(svbool_t pg, svint16_t op) + /// CLZ Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svclz[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) + /// svuint16_t svclz[_u16]_x(svbool_t pg, svuint16_t op) + /// svuint16_t svclz[_u16]_z(svbool_t pg, svuint16_t op) + /// CLZ Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svclz[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// svuint32_t svclz[_s32]_x(svbool_t pg, svint32_t op) + /// svuint32_t svclz[_s32]_z(svbool_t pg, svint32_t op) + /// CLZ Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svclz[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) + /// svuint32_t svclz[_u32]_x(svbool_t pg, svuint32_t op) + /// svuint32_t svclz[_u32]_z(svbool_t pg, svuint32_t op) + /// CLZ Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svclz[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// svuint64_t svclz[_s64]_x(svbool_t pg, svint64_t op) + /// svuint64_t svclz[_s64]_z(svbool_t pg, svint64_t op) + /// CLZ Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svclz[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) + /// svuint64_t svclz[_u64]_x(svbool_t pg, svuint64_t op) + /// svuint64_t svclz[_u64]_z(svbool_t pg, svuint64_t op) + /// CLZ Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + + /// LoadVector : Unextended load /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index b6c4be424c056..a549349178eb3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -1375,6 +1375,109 @@ internal Arm64() { } /// public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) => FusedMultiplySubtractNegated(minuend, left, right); + + /// LeadingSignCount : Count leading sign bits + + /// + /// svuint8_t svcls[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// svuint8_t svcls[_s8]_x(svbool_t pg, svint8_t op) + /// svuint8_t svcls[_s8]_z(svbool_t pg, svint8_t op) + /// CLS Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); + + /// + /// svuint16_t svcls[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// svuint16_t svcls[_s16]_x(svbool_t pg, svint16_t op) + /// svuint16_t svcls[_s16]_z(svbool_t pg, svint16_t op) + /// CLS Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); + + /// + /// svuint32_t svcls[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// svuint32_t svcls[_s32]_x(svbool_t pg, svint32_t op) + /// svuint32_t svcls[_s32]_z(svbool_t pg, svint32_t op) + /// CLS Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); + + /// + /// svuint64_t svcls[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// svuint64_t svcls[_s64]_x(svbool_t pg, svint64_t op) + /// svuint64_t svcls[_s64]_z(svbool_t pg, svint64_t op) + /// CLS Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); + + + /// LeadingZeroCount : Count leading zero bits + + /// + /// svuint8_t svclz[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// svuint8_t svclz[_s8]_x(svbool_t pg, svint8_t op) + /// svuint8_t svclz[_s8]_z(svbool_t pg, svint8_t op) + /// CLZ Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint8_t svclz[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) + /// svuint8_t svclz[_u8]_x(svbool_t pg, svuint8_t op) + /// svuint8_t svclz[_u8]_z(svbool_t pg, svuint8_t op) + /// CLZ Ztied.B, Pg/M, Zop.B + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint16_t svclz[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// svuint16_t svclz[_s16]_x(svbool_t pg, svint16_t op) + /// svuint16_t svclz[_s16]_z(svbool_t pg, svint16_t op) + /// CLZ Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint16_t svclz[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) + /// svuint16_t svclz[_u16]_x(svbool_t pg, svuint16_t op) + /// svuint16_t svclz[_u16]_z(svbool_t pg, svuint16_t op) + /// CLZ Ztied.H, Pg/M, Zop.H + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint32_t svclz[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// svuint32_t svclz[_s32]_x(svbool_t pg, svint32_t op) + /// svuint32_t svclz[_s32]_z(svbool_t pg, svint32_t op) + /// CLZ Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint32_t svclz[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) + /// svuint32_t svclz[_u32]_x(svbool_t pg, svuint32_t op) + /// svuint32_t svclz[_u32]_z(svbool_t pg, svuint32_t op) + /// CLZ Ztied.S, Pg/M, Zop.S + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint64_t svclz[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// svuint64_t svclz[_s64]_x(svbool_t pg, svint64_t op) + /// svuint64_t svclz[_s64]_z(svbool_t pg, svint64_t op) + /// CLZ Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// + /// svuint64_t svclz[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) + /// svuint64_t svclz[_u64]_x(svbool_t pg, svuint64_t op) + /// svuint64_t svclz[_u64]_z(svbool_t pg, svuint64_t op) + /// CLZ Ztied.D, Pg/M, Zop.D + /// + public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value); + + /// LoadVector : Unextended load /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 665f5d41ee98d..a13394dc07f33 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4365,6 +4365,19 @@ internal Arm64() { } public static System.Numerics.Vector FusedMultiplySubtractNegated(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector FusedMultiplySubtractNegated(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector LeadingSignCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingSignCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingSignCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingSignCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector LeadingZeroCount(System.Numerics.Vector value) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, int* address) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index b37ec3a274b17..a4337e54e28c2 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -148,6 +148,7 @@ ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveImmTernOpFirstArgTestTemplate.template", "SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveMinimalUnaryOpTestTemplate.template", "SveVecReduceUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = VecReduceOpTest_ValidationLogic }), + ("_SveMasklessUnaryOpTestTemplate.template","SveMasklessSimpleVecOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic }), }; (string templateFileName, Dictionary templateData)[] AdvSimdInputs = new [] @@ -3104,6 +3105,19 @@ ("SveCreateTrueMaskTest.template", new Dictionary { ["TestName"] = "Sve_CreateTrueMaskUInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateTrueMaskUInt32", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1Type"] = "SveMaskPattern"}), ("SveCreateTrueMaskTest.template", new Dictionary { ["TestName"] = "Sve_CreateTrueMaskUInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateTrueMaskUInt64", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1Type"] = "SveMaskPattern"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingSignCount_byte_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingSignCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.CountLeadingSignBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingSignBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingSignCount_ushort_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingSignCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.CountLeadingSignBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingSignBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingSignCount_uint_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingSignCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.CountLeadingSignBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingSignBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingSignCount_ulong_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingSignCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "((ulong)Helpers.CountLeadingSignBits(firstOp[i])) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingSignBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_byte_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_ushort_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_uint_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_ulong_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "((ulong)Helpers.CountLeadingZeroBits(firstOp[i])) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_LeadingZeroCount_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "LeadingZeroCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.CountLeadingZeroBits(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.CountLeadingZeroBits(leftOp[i])"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index d6b561bd0fab5..e600618035651 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -28,6 +28,11 @@ public static int CountLeadingSignBits(int op1) return (int)(CountLeadingZeroBits((int)((ulong)op1 ^ ((ulong)op1 >> 1))) - 1); } + public static long CountLeadingSignBits(long op1) + { + return (long)(CountLeadingZeroBits((long)((ulong)op1 ^ ((ulong)op1 >> 1))) - 1); + } + public static sbyte CountLeadingZeroBits(sbyte op1) { return (sbyte)(8 * sizeof(sbyte) - (HighestSetBit(op1) + 1)); @@ -136,6 +141,42 @@ private static int HighestSetBit(uint op1) return -1; } + public static long CountLeadingZeroBits(long op1) + { + return (long)(8 * sizeof(long) - (HighestSetBit(op1) + 1)); + } + + private static int HighestSetBit(long op1) + { + for (int i = 8 * sizeof(long) - 1; i >= 0; i--) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + return i; + } + } + + return -1; + } + + public static ulong CountLeadingZeroBits(ulong op1) + { + return (ulong)(8 * sizeof(ulong) - (HighestSetBit(op1) + 1)); + } + + private static int HighestSetBit(ulong op1) + { + for (int i = 8 * sizeof(ulong) - 1; i >= 0; i--) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + return i; + } + } + + return -1; + } + public static sbyte BitCount(sbyte op1) { int result = 0; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveMasklessUnaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveMasklessUnaryOpTestTemplate.template new file mode 100644 index 0000000000000..1d789ccbb6133 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveMasklessUnaryOpTestTemplate.template @@ -0,0 +1,303 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}UnaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}UnaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException($"Invalid value of alignment: {alignment}, sizeOfinArray1: {sizeOfinArray1}, sizeOfoutArray: {sizeOfoutArray}"); + } + + this.inArray1 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}UnaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _mask; + private {Op1VectorType}<{Op1BaseType}> _fld1; + + private DataTable _dataTable; + + public {TemplateName}UnaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + _dataTable = new DataTable(_data1, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{Op1BaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var result = {Isa}.{Method}(op1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} From ae89eba67bf0c125615cc62edbcc8040ec8e1ef9 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 May 2024 11:07:58 +0100 Subject: [PATCH 2/6] Add popcount --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 45 ++++----- .../Arm/Sve.PlatformNotSupported.cs | 93 ++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 93 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 11 +++ .../GenerateHWIntrinsicTests_Arm.cs | 11 +++ .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 94 +++++++++++++++++++ 6 files changed, 325 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index d9d70072efffa..55f66fd445aed 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -95,36 +95,37 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Max, -1, -1, false, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, false, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, MaxNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmv, INS_sve_fmaxnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Min, -1, -1, false, {INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_fmin, INS_sve_fmin}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, false, {INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, Max, -1, -1, false, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, false, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, MaxNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmv, INS_sve_fmaxnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, Min, -1, -1, false, {INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_fmin, INS_sve_fmin}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, false, {INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Negate, -1, -1, false, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Negate, -1, -1, false, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Xor, -1, -1, false, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, false, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Xor, -1, -1, false, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, false, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, true, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, true, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, true, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index ee49d604eb7d5..e3f178b23bc0c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -2593,6 +2593,99 @@ internal Arm64() { } public static unsafe Vector OrAcross(Vector value) { throw new PlatformNotSupportedException(); } + /// Count nonzero bits + + /// + /// svuint8_t svcnt[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B + /// svuint8_t svcnt[_s8]_x(svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Ztied.B + /// svuint8_t svcnt[_s8]_z(svbool_t pg, svint8_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint8_t svcnt[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B + /// svuint8_t svcnt[_u8]_x(svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Ztied.B + /// svuint8_t svcnt[_u8]_z(svbool_t pg, svuint8_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svcnt[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H + /// svuint16_t svcnt[_s16]_x(svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Ztied.H + /// svuint16_t svcnt[_s16]_z(svbool_t pg, svint16_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svcnt[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H + /// svuint16_t svcnt[_u16]_x(svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Ztied.H + /// svuint16_t svcnt[_u16]_z(svbool_t pg, svuint16_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svcnt[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_s32]_x(svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_s32]_z(svbool_t pg, svint32_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svcnt[_f32]_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_f32]_x(svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_f32]_z(svbool_t pg, svfloat32_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svcnt[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_u32]_x(svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_u32]_z(svbool_t pg, svuint32_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svcnt[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_f64]_x(svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_f64]_z(svbool_t pg, svfloat64_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svcnt[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_s64]_x(svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_s64]_z(svbool_t pg, svint64_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svcnt[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_u64]_x(svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_u64]_z(svbool_t pg, svuint64_t op) + /// + public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } + + /// SignExtend16 : Sign-extend the low 16 bits /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index a549349178eb3..993cc39352ed8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -2648,6 +2648,99 @@ internal Arm64() { } public static unsafe Vector OrAcross(Vector value) => OrAcross(value); + /// Count nonzero bits + + /// + /// svuint8_t svcnt[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B + /// svuint8_t svcnt[_s8]_x(svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Ztied.B + /// svuint8_t svcnt[_s8]_z(svbool_t pg, svint8_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint8_t svcnt[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B + /// svuint8_t svcnt[_u8]_x(svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Ztied.B + /// svuint8_t svcnt[_u8]_z(svbool_t pg, svuint8_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint16_t svcnt[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H + /// svuint16_t svcnt[_s16]_x(svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Ztied.H + /// svuint16_t svcnt[_s16]_z(svbool_t pg, svint16_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint16_t svcnt[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H + /// svuint16_t svcnt[_u16]_x(svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Ztied.H + /// svuint16_t svcnt[_u16]_z(svbool_t pg, svuint16_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint32_t svcnt[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_s32]_x(svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_s32]_z(svbool_t pg, svint32_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint32_t svcnt[_f32]_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_f32]_x(svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_f32]_z(svbool_t pg, svfloat32_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint32_t svcnt[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S + /// svuint32_t svcnt[_u32]_x(svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Ztied.S + /// svuint32_t svcnt[_u32]_z(svbool_t pg, svuint32_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint64_t svcnt[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_f64]_x(svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_f64]_z(svbool_t pg, svfloat64_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint64_t svcnt[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_s64]_x(svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_s64]_z(svbool_t pg, svint64_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// + /// svuint64_t svcnt[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D + /// svuint64_t svcnt[_u64]_x(svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Ztied.D + /// svuint64_t svcnt[_u64]_z(svbool_t pg, svuint64_t op) + /// + public static unsafe Vector PopCount(Vector value) => PopCount(value); + + /// SignExtend16 : Sign-extend the low 16 bits /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index a13394dc07f33..f3953edbd16a4 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4522,6 +4522,17 @@ internal Arm64() { } public static System.Numerics.Vector OrAcross(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector OrAcross(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector PopCount(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector SignExtend16(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector SignExtend16(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector SignExtend32(System.Numerics.Vector value) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index a4337e54e28c2..351be48e0f7b5 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3270,6 +3270,17 @@ ("SveVecReduceUnOpTest.template", new Dictionary { ["TestName"] = "Sve_OrAcross_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "OrAcross", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateReduceOpResult"] = "Helpers.OrAcross(firstOp) != result[0]", ["ValidateRemainingResults"] = "result[i] != 0"}), ("SveVecReduceUnOpTest.template", new Dictionary { ["TestName"] = "Sve_OrAcross_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "OrAcross", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateReduceOpResult"] = "Helpers.OrAcross(firstOp) != result[0]", ["ValidateRemainingResults"] = "result[i] != 0"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "(uint)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(uint)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "(ulong)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(ulong)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_byte_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "(byte)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(byte)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ushort_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "(ushort)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(ushort)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "(uint)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(uint)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "(ulong)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(ulong)Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_SignExtend16_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "SignExtend16", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "result[i] != Helpers.SignExtend(firstOp[i], 16, false)", ["GetIterResult"] = "Helpers.SignExtend(leftOp[i], 16, false)"}), ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_SignExtend16_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "SignExtend16", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "result[i] != Helpers.SignExtend(firstOp[i], 16, false)", ["GetIterResult"] = "Helpers.SignExtend(leftOp[i], 16, false)"}), ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_SignExtend32_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "SignExtend32", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "result[i] != Helpers.SignExtend(firstOp[i], 32, false)", ["GetIterResult"] = "Helpers.SignExtend(leftOp[i], 32, false)"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index e600618035651..fa4424056b397 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -207,6 +207,100 @@ public static byte BitCount(byte op1) return (byte)result; } + public static short BitCount(short op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(short); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (short)result; + } + + public static ushort BitCount(ushort op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(ushort); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (ushort)result; + } + + public static int BitCount(int op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(int); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (int)result; + } + + public static uint BitCount(uint op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(uint); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (uint)result; + } + + public static long BitCount(long op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(long); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (long)result; + } + + public static ulong BitCount(ulong op1) + { + int result = 0; + + for (int i = 0; i < 8 * sizeof(ulong); i++) + { + if (((ulong)op1 & (1UL << i)) != 0) + { + result = result + 1; + } + } + + return (ulong)result; + } + + public static int BitCount(float op1) => BitCount(BitConverter.SingleToInt32Bits(op1)); + + public static long BitCount(double op1) => BitCount(BitConverter.DoubleToInt64Bits(op1)); + public static byte ReverseElementBits(byte op1) { byte val = (byte)op1; From bdb3bb63a58fb374d3b8ffd48e55072886570395 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 May 2024 11:22:20 +0100 Subject: [PATCH 3/6] Fix PlatformNotSupported --- .../Arm/Sve.PlatformNotSupported.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index e3f178b23bc0c..57d768ac30336 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -1328,7 +1328,7 @@ internal Arm64() { } /// svuint8_t svcls[_s8]_z(svbool_t pg, svint8_t op) /// CLS Ztied.B, Pg/M, Zop.B /// - public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingSignCount(Vector value){ throw new PlatformNotSupportedException(); } /// /// svuint16_t svcls[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) @@ -1336,7 +1336,7 @@ internal Arm64() { } /// svuint16_t svcls[_s16]_z(svbool_t pg, svint16_t op) /// CLS Ztied.H, Pg/M, Zop.H /// - public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingSignCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svcls[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) @@ -1344,7 +1344,7 @@ internal Arm64() { } /// svuint32_t svcls[_s32]_z(svbool_t pg, svint32_t op) /// CLS Ztied.S, Pg/M, Zop.S /// - public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingSignCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svcls[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) @@ -1352,7 +1352,7 @@ internal Arm64() { } /// svuint64_t svcls[_s64]_z(svbool_t pg, svint64_t op) /// CLS Ztied.D, Pg/M, Zop.D /// - public static unsafe Vector LeadingSignCount(Vector value) => LeadingSignCount(value); { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingSignCount(Vector value) { throw new PlatformNotSupportedException(); } /// Count leading zero bits @@ -1363,7 +1363,7 @@ internal Arm64() { } /// svuint8_t svclz[_s8]_z(svbool_t pg, svint8_t op) /// CLZ Ztied.B, Pg/M, Zop.B /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint8_t svclz[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) @@ -1371,7 +1371,7 @@ internal Arm64() { } /// svuint8_t svclz[_u8]_z(svbool_t pg, svuint8_t op) /// CLZ Ztied.B, Pg/M, Zop.B /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint16_t svclz[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) @@ -1379,7 +1379,7 @@ internal Arm64() { } /// svuint16_t svclz[_s16]_z(svbool_t pg, svint16_t op) /// CLZ Ztied.H, Pg/M, Zop.H /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint16_t svclz[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) @@ -1387,7 +1387,7 @@ internal Arm64() { } /// svuint16_t svclz[_u16]_z(svbool_t pg, svuint16_t op) /// CLZ Ztied.H, Pg/M, Zop.H /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svclz[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) @@ -1395,7 +1395,7 @@ internal Arm64() { } /// svuint32_t svclz[_s32]_z(svbool_t pg, svint32_t op) /// CLZ Ztied.S, Pg/M, Zop.S /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svclz[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) @@ -1403,7 +1403,7 @@ internal Arm64() { } /// svuint32_t svclz[_u32]_z(svbool_t pg, svuint32_t op) /// CLZ Ztied.S, Pg/M, Zop.S /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svclz[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) @@ -1411,7 +1411,7 @@ internal Arm64() { } /// svuint64_t svclz[_s64]_z(svbool_t pg, svint64_t op) /// CLZ Ztied.D, Pg/M, Zop.D /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svclz[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) @@ -1419,7 +1419,7 @@ internal Arm64() { } /// svuint64_t svclz[_u64]_z(svbool_t pg, svuint64_t op) /// CLZ Ztied.D, Pg/M, Zop.D /// - public static unsafe Vector LeadingZeroCount(Vector value) => LeadingZeroCount(value) { throw new PlatformNotSupportedException(); } + public static unsafe Vector LeadingZeroCount(Vector value) { throw new PlatformNotSupportedException(); } /// LoadVector : Unextended load From 8c3b821641f7ac6d6a587e8f04cd80d3cd5f0093 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 May 2024 11:33:16 +0100 Subject: [PATCH 4/6] Fix summary headers for popcount --- .../Arm/Sve.PlatformNotSupported.cs | 30 +++++++------------ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 30 +++++++------------ 2 files changed, 20 insertions(+), 40 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 57d768ac30336..a86d7721a3a82 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -2597,91 +2597,81 @@ internal Arm64() { } /// /// svuint8_t svcnt[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) - /// CNT Ztied.B, Pg/M, Zop.B /// svuint8_t svcnt[_s8]_x(svbool_t pg, svint8_t op) - /// CNT Ztied.B, Pg/M, Ztied.B /// svuint8_t svcnt[_s8]_z(svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint8_t svcnt[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) - /// CNT Ztied.B, Pg/M, Zop.B /// svuint8_t svcnt[_u8]_x(svbool_t pg, svuint8_t op) - /// CNT Ztied.B, Pg/M, Ztied.B /// svuint8_t svcnt[_u8]_z(svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint16_t svcnt[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) - /// CNT Ztied.H, Pg/M, Zop.H /// svuint16_t svcnt[_s16]_x(svbool_t pg, svint16_t op) - /// CNT Ztied.H, Pg/M, Ztied.H /// svuint16_t svcnt[_s16]_z(svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint16_t svcnt[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) - /// CNT Ztied.H, Pg/M, Zop.H /// svuint16_t svcnt[_u16]_x(svbool_t pg, svuint16_t op) - /// CNT Ztied.H, Pg/M, Ztied.H /// svuint16_t svcnt[_u16]_z(svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svcnt[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_s32]_x(svbool_t pg, svint32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_s32]_z(svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svcnt[_f32]_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_f32]_x(svbool_t pg, svfloat32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_f32]_z(svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint32_t svcnt[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_u32]_x(svbool_t pg, svuint32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_u32]_z(svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svcnt[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_f64]_x(svbool_t pg, svfloat64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_f64]_z(svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svcnt[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_s64]_x(svbool_t pg, svint64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_s64]_z(svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } /// /// svuint64_t svcnt[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_u64]_x(svbool_t pg, svuint64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_u64]_z(svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) { throw new PlatformNotSupportedException(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 993cc39352ed8..cf9831ec77fa1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -2652,91 +2652,81 @@ internal Arm64() { } /// /// svuint8_t svcnt[_s8]_m(svuint8_t inactive, svbool_t pg, svint8_t op) - /// CNT Ztied.B, Pg/M, Zop.B /// svuint8_t svcnt[_s8]_x(svbool_t pg, svint8_t op) - /// CNT Ztied.B, Pg/M, Ztied.B /// svuint8_t svcnt[_s8]_z(svbool_t pg, svint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint8_t svcnt[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) - /// CNT Ztied.B, Pg/M, Zop.B /// svuint8_t svcnt[_u8]_x(svbool_t pg, svuint8_t op) - /// CNT Ztied.B, Pg/M, Ztied.B /// svuint8_t svcnt[_u8]_z(svbool_t pg, svuint8_t op) + /// CNT Ztied.B, Pg/M, Zop.B /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint16_t svcnt[_s16]_m(svuint16_t inactive, svbool_t pg, svint16_t op) - /// CNT Ztied.H, Pg/M, Zop.H /// svuint16_t svcnt[_s16]_x(svbool_t pg, svint16_t op) - /// CNT Ztied.H, Pg/M, Ztied.H /// svuint16_t svcnt[_s16]_z(svbool_t pg, svint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint16_t svcnt[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) - /// CNT Ztied.H, Pg/M, Zop.H /// svuint16_t svcnt[_u16]_x(svbool_t pg, svuint16_t op) - /// CNT Ztied.H, Pg/M, Ztied.H /// svuint16_t svcnt[_u16]_z(svbool_t pg, svuint16_t op) + /// CNT Ztied.H, Pg/M, Zop.H /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint32_t svcnt[_s32]_m(svuint32_t inactive, svbool_t pg, svint32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_s32]_x(svbool_t pg, svint32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_s32]_z(svbool_t pg, svint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint32_t svcnt[_f32]_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_f32]_x(svbool_t pg, svfloat32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_f32]_z(svbool_t pg, svfloat32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint32_t svcnt[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) - /// CNT Ztied.S, Pg/M, Zop.S /// svuint32_t svcnt[_u32]_x(svbool_t pg, svuint32_t op) - /// CNT Ztied.S, Pg/M, Ztied.S /// svuint32_t svcnt[_u32]_z(svbool_t pg, svuint32_t op) + /// CNT Ztied.S, Pg/M, Zop.S /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint64_t svcnt[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_f64]_x(svbool_t pg, svfloat64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_f64]_z(svbool_t pg, svfloat64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint64_t svcnt[_s64]_m(svuint64_t inactive, svbool_t pg, svint64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_s64]_x(svbool_t pg, svint64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_s64]_z(svbool_t pg, svint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) => PopCount(value); /// /// svuint64_t svcnt[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) - /// CNT Ztied.D, Pg/M, Zop.D /// svuint64_t svcnt[_u64]_x(svbool_t pg, svuint64_t op) - /// CNT Ztied.D, Pg/M, Ztied.D /// svuint64_t svcnt[_u64]_z(svbool_t pg, svuint64_t op) + /// CNT Ztied.D, Pg/M, Zop.D /// public static unsafe Vector PopCount(Vector value) => PopCount(value); From ac2b6a5ef32f96c4b056d8beefde5e0424137074 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 May 2024 17:58:36 +0100 Subject: [PATCH 5/6] Use SveSimpleVecOpTest for unsigned popcounts --- .../GenerateHWIntrinsicTests_Arm.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 351be48e0f7b5..63c9683afb058 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3276,10 +3276,10 @@ ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ushort_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "(ushort)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(ushort)Helpers.BitCount(leftOp[i])"}), ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "(uint)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(uint)Helpers.BitCount(leftOp[i])"}), ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "(ulong)Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "(ulong)Helpers.BitCount(leftOp[i])"}), - ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), - ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), - ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), - ("SveMasklessSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), + ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_PopCount_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "PopCount", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.BitCount(firstOp[i]) != result[i]", ["GetIterResult"] = "Helpers.BitCount(leftOp[i])"}), ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_SignExtend16_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "SignExtend16", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "result[i] != Helpers.SignExtend(firstOp[i], 16, false)", ["GetIterResult"] = "Helpers.SignExtend(leftOp[i], 16, false)"}), ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_SignExtend16_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "SignExtend16", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "result[i] != Helpers.SignExtend(firstOp[i], 16, false)", ["GetIterResult"] = "Helpers.SignExtend(leftOp[i], 16, false)"}), From 56d8a8d2b4c1ff0b11af6efcec78e562e1ae7663 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 14:21:34 -0700 Subject: [PATCH 6/6] Add HW_Flag_LowMaskedOperation() to LeadingSignCount() and LeadingZeroCount() --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 1c8a3b3c866ac..ba1514d65fea6 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -68,8 +68,8 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)