diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index ffc7a933b5cb5..45cdcfea0f46b 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3459,6 +3459,14 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) #if defined(TARGET_ARM64) case NI_ArmBase_Yield: + case NI_Sve_PrefetchBytes: + case NI_Sve_PrefetchInt16: + case NI_Sve_PrefetchInt32: + case NI_Sve_PrefetchInt64: + case NI_Sve_GatherPrefetch16Bit: + case NI_Sve_GatherPrefetch32Bit: + case NI_Sve_GatherPrefetch64Bit: + case NI_Sve_GatherPrefetch8Bit: { assert(tree->OperRequiresCallFlag(this)); expectedFlags |= GTF_GLOB_REF; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a0a709985c1ea..786e7c92f5f89 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27459,17 +27459,9 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_Sve_Load2xVectorAndUnzip: case NI_Sve_Load3xVectorAndUnzip: case NI_Sve_Load4xVectorAndUnzip: - case NI_Sve_PrefetchBytes: - case NI_Sve_PrefetchInt16: - case NI_Sve_PrefetchInt32: - case NI_Sve_PrefetchInt64: addr = Op(2); break; - case NI_Sve_GatherPrefetch8Bit: - case NI_Sve_GatherPrefetch16Bit: - case NI_Sve_GatherPrefetch32Bit: - case NI_Sve_GatherPrefetch64Bit: case NI_Sve_GatherVector: case NI_Sve_GatherVectorByteZeroExtend: case NI_Sve_GatherVectorInt16SignExtend: @@ -27963,6 +27955,14 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const #if defined(TARGET_ARM64) case NI_ArmBase_Yield: + case NI_Sve_PrefetchBytes: + case NI_Sve_PrefetchInt16: + case NI_Sve_PrefetchInt32: + case NI_Sve_PrefetchInt64: + case NI_Sve_GatherPrefetch16Bit: + case NI_Sve_GatherPrefetch32Bit: + case NI_Sve_GatherPrefetch64Bit: + case NI_Sve_GatherPrefetch8Bit: { return true; } @@ -28145,6 +28145,14 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) #if defined(TARGET_ARM64) case NI_ArmBase_Yield: + case NI_Sve_PrefetchBytes: + case NI_Sve_PrefetchInt16: + case NI_Sve_PrefetchInt32: + case NI_Sve_PrefetchInt64: + case NI_Sve_GatherPrefetch16Bit: + case NI_Sve_GatherPrefetch32Bit: + case NI_Sve_GatherPrefetch64Bit: + case NI_Sve_GatherPrefetch8Bit: { // Mark as a call and global reference, much as is done for GT_KEEPALIVE gtFlags |= (GTF_CALL | GTF_GLOB_REF); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index eb48a9b041ae9..4f016940840b2 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1856,14 +1856,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, break; #elif defined(TARGET_ARM64) - case NI_Sve_GatherPrefetch8Bit: - case NI_Sve_GatherPrefetch16Bit: - case NI_Sve_GatherPrefetch32Bit: - case NI_Sve_GatherPrefetch64Bit: - assert(varTypeIsSIMD(op2->TypeGet())); - retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - break; - case NI_Sve_GatherVector: case NI_Sve_GatherVectorByteZeroExtend: case NI_Sve_GatherVectorInt16SignExtend: @@ -1893,22 +1885,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert(!isScalar); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); - -#if defined(TARGET_ARM64) - switch (intrinsic) - { - case NI_Sve_GatherPrefetch8Bit: - case NI_Sve_GatherPrefetch16Bit: - case NI_Sve_GatherPrefetch32Bit: - case NI_Sve_GatherPrefetch64Bit: - assert(varTypeIsSIMD(op3->TypeGet())); - retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - break; - - default: - break; - } -#endif break; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0d367269c66ff..d69b0a9016575 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2725,6 +2725,82 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op1BaseJitType); break; } + case NI_Sve_GatherPrefetch8Bit: + case NI_Sve_GatherPrefetch16Bit: + case NI_Sve_GatherPrefetch32Bit: + case NI_Sve_GatherPrefetch64Bit: + case NI_Sve_PrefetchBytes: + case NI_Sve_PrefetchInt16: + case NI_Sve_PrefetchInt32: + case NI_Sve_PrefetchInt64: + { + assert((sig->numArgs == 3) || (sig->numArgs == 4)); + assert(!isScalar); + + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + int immLowerBound = 0; + int immUpperBound = 0; + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + + HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, 1, &immLowerBound, &immUpperBound); + + if (sig->numArgs == 3) + { + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + assert(HWIntrinsicInfo::isImmOp(intrinsic, op3)); + op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immLowerBound, immUpperBound); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + +#ifdef DEBUG + + if ((intrinsic == NI_Sve_GatherPrefetch8Bit) || (intrinsic == NI_Sve_GatherPrefetch16Bit) || + (intrinsic == NI_Sve_GatherPrefetch32Bit) || (intrinsic == NI_Sve_GatherPrefetch64Bit)) + { + assert(varTypeIsSIMD(op2->TypeGet())); + } + else + { + assert(varTypeIsIntegral(op2->TypeGet())); + } +#endif + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op2BaseJitType); + } + else + { + CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass))); + op4 = getArgForHWIntrinsic(argType, argClass); + + assert(HWIntrinsicInfo::isImmOp(intrinsic, op4)); + op3 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + CorInfoType op3BaseJitType = getBaseJitTypeOfSIMDType(argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + + assert(varTypeIsSIMD(op3->TypeGet())); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op3BaseJitType); + } + + break; + } default: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 5149ac6fc867c..6933f8e2f9b81 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -84,10 +84,10 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, GatherPrefetch16Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_prfh, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, GatherPrefetch32Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, GatherPrefetch64Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, GatherPrefetch8Bit, -1, -1, false, {INS_sve_prfb, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, GatherPrefetch16Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_prfh, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, GatherPrefetch32Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, GatherPrefetch64Bit, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, GatherPrefetch8Bit, -1, -1, false, {INS_sve_prfb, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) @@ -177,10 +177,10 @@ HARDWARE_INTRINSIC(Sve, Not, HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) -HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) +HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand) HARDWARE_INTRINSIC(Sve, ReciprocalEstimate, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpe, INS_sve_frecpe}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReciprocalExponent, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpx, INS_sve_frecpx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReciprocalSqrtEstimate, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrte, INS_sve_frsqrte}, HW_Category_SIMD, HW_Flag_Scalable)