Skip to content

Commit

Permalink
JIT ARM64-SVE: Add Sve.LoadVector*ZeroExtendTo*() (dotnet#101291)
Browse files Browse the repository at this point in the history
* JIT ARM64-SVE: Add Sve.LoadVector*ZeroExtendTo*()

Add the following APIs:

LoadVectorByteZeroExtendToInt16
LoadVectorByteZeroExtendToInt32
LoadVectorByteZeroExtendToInt64
LoadVectorByteZeroExtendToUInt16
LoadVectorByteZeroExtendToUInt32
LoadVectorByteZeroExtendToUInt64
LoadVectorInt16SignExtendToInt32
LoadVectorInt16SignExtendToInt64
LoadVectorInt16SignExtendToUInt32
LoadVectorInt16SignExtendToUInt64
LoadVectorInt32SignExtendToInt64
LoadVectorInt32SignExtendToUInt64
LoadVectorSByteSignExtendToInt16
LoadVectorSByteSignExtendToInt32
LoadVectorSByteSignExtendToInt64
LoadVectorSByteSignExtendToUInt16
LoadVectorSByteSignExtendToUInt32
LoadVectorSByteSignExtendToUInt64
LoadVectorUInt16ZeroExtendToInt32
LoadVectorUInt16ZeroExtendToInt64
LoadVectorUInt16ZeroExtendToUInt32
LoadVectorUInt16ZeroExtendToUInt64
LoadVectorUInt32ZeroExtendToInt64
LoadVectorUInt32ZeroExtendToUInt64

* cleanup: remove unwatnted comments

Remove comments that mentions instuctions that APIs are never mapped to.

* fix merge conflict

* fix merge conflict

* fix spacing

* Mark LoadVector*Extend* as having HW_Flag_ExplicitMaskedOperation

---------

Co-authored-by: Kunal Pathak <[email protected]>
  • Loading branch information
2 people authored and matouskozak committed Apr 30, 2024
1 parent de2e1b8 commit 90412d1
Show file tree
Hide file tree
Showing 7 changed files with 535 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4349,8 +4349,11 @@ void emitter::emitInsSve_R_R_R(instruction ins,
break;

case INS_sve_ld1b:
case INS_sve_ld1sb:
case INS_sve_ld1h:
case INS_sve_ld1sh:
case INS_sve_ld1w:
case INS_sve_ld1sw:
case INS_sve_ld1d:
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);

Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26508,6 +26508,30 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
break;

case NI_Sve_LoadVector:
case NI_Sve_LoadVectorByteZeroExtendToInt16:
case NI_Sve_LoadVectorByteZeroExtendToInt32:
case NI_Sve_LoadVectorByteZeroExtendToInt64:
case NI_Sve_LoadVectorByteZeroExtendToUInt16:
case NI_Sve_LoadVectorByteZeroExtendToUInt32:
case NI_Sve_LoadVectorByteZeroExtendToUInt64:
case NI_Sve_LoadVectorInt16SignExtendToInt32:
case NI_Sve_LoadVectorInt16SignExtendToInt64:
case NI_Sve_LoadVectorInt16SignExtendToUInt32:
case NI_Sve_LoadVectorInt16SignExtendToUInt64:
case NI_Sve_LoadVectorInt32SignExtendToInt64:
case NI_Sve_LoadVectorInt32SignExtendToUInt64:
case NI_Sve_LoadVectorSByteSignExtendToInt16:
case NI_Sve_LoadVectorSByteSignExtendToInt32:
case NI_Sve_LoadVectorSByteSignExtendToInt64:
case NI_Sve_LoadVectorSByteSignExtendToUInt16:
case NI_Sve_LoadVectorSByteSignExtendToUInt32:
case NI_Sve_LoadVectorSByteSignExtendToUInt64:
case NI_Sve_LoadVectorUInt16ZeroExtendToInt32:
case NI_Sve_LoadVectorUInt16ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt32:
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
addr = Op(2);
break;
#endif // TARGET_ARM64
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32,
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)

HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)



Expand Down
Loading

0 comments on commit 90412d1

Please sign in to comment.