Skip to content

Commit

Permalink
JIT ARM64-SVE: Add simple bitwise ops (dotnet#101762)
Browse files Browse the repository at this point in the history
* JIT ARM64-SVE: Add simple bitwise ops

And,AndAcross,Or,OrAcross,Xor,XorAcross

* Fix fadda

* Fix unpkh/fexpa/frecpe

* Reorder System.Runtime.Intrinsics.cs

* Fix API head comments
  • Loading branch information
a74nh authored and Ruihan-Yin committed May 30, 2024
1 parent 3e68740 commit 6f6b2c0
Show file tree
Hide file tree
Showing 8 changed files with 1,055 additions and 28 deletions.
46 changes: 23 additions & 23 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4798,11 +4798,11 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_D); // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>

// IF_SVE_CN_3A
theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_H,
theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_S,
theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_V14, REG_P0, REG_V17, INS_OPTS_SCALABLE_D,
theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V14, REG_P0, REG_V17, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T>

// IF_SVE_CO_3A
Expand Down Expand Up @@ -5112,11 +5112,11 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_H); // FMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>

// IF_SVE_HJ_3A
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14,
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_SCALABLE, REG_V21, REG_P6, REG_V14,
INS_OPTS_SCALABLE_H); // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13,
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_SCALABLE, REG_V22, REG_P5, REG_V13,
INS_OPTS_SCALABLE_S); // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12,
theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_SCALABLE, REG_V23, REG_P4, REG_V12,
INS_OPTS_SCALABLE_D); // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>

// IF_SVE_HL_3A
Expand Down Expand Up @@ -5288,13 +5288,13 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_H); // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>

// IF_SVE_AF_3A
theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0,
theEmitter->emitIns_R_R_R(INS_sve_andv, EA_SCALABLE, REG_V0, REG_P0, REG_V0,
INS_OPTS_SCALABLE_B); // ANDV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_eorv, EA_2BYTE, REG_V1, REG_P1, REG_V1,
theEmitter->emitIns_R_R_R(INS_sve_eorv, EA_SCALABLE, REG_V1, REG_P1, REG_V1,
INS_OPTS_SCALABLE_H); // EORV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_orv, EA_4BYTE, REG_V2, REG_P2, REG_V2,
theEmitter->emitIns_R_R_R(INS_sve_orv, EA_SCALABLE, REG_V2, REG_P2, REG_V2,
INS_OPTS_SCALABLE_S); // ORV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_orv, EA_8BYTE, REG_V3, REG_P3, REG_V3,
theEmitter->emitIns_R_R_R(INS_sve_orv, EA_SCALABLE, REG_V3, REG_P3, REG_V3,
INS_OPTS_SCALABLE_D); // ORV <V><d>, <Pg>, <Zn>.<T>

// IF_SVE_AG_3A
Expand Down Expand Up @@ -5324,13 +5324,13 @@ void CodeGen::genArm64EmitterUnitTestsSve()
#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED

// IF_SVE_AK_3A
theEmitter->emitIns_R_R_R(INS_sve_smaxv, EA_8BYTE, REG_V15, REG_P7, REG_V4,
theEmitter->emitIns_R_R_R(INS_sve_smaxv, EA_SCALABLE, REG_V15, REG_P7, REG_V4,
INS_OPTS_SCALABLE_D); // SMAXV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_sminv, EA_4BYTE, REG_V16, REG_P6, REG_V14,
theEmitter->emitIns_R_R_R(INS_sve_sminv, EA_SCALABLE, REG_V16, REG_P6, REG_V14,
INS_OPTS_SCALABLE_S); // SMINV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_umaxv, EA_2BYTE, REG_V17, REG_P5, REG_V24,
theEmitter->emitIns_R_R_R(INS_sve_umaxv, EA_SCALABLE, REG_V17, REG_P5, REG_V24,
INS_OPTS_SCALABLE_H); // UMAXV <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_uminv, EA_1BYTE, REG_V18, REG_P4, REG_V31,
theEmitter->emitIns_R_R_R(INS_sve_uminv, EA_SCALABLE, REG_V18, REG_P4, REG_V31,
INS_OPTS_SCALABLE_B); // UMINV <V><d>, <Pg>, <Zn>.<T>

// IF_SVE_AL_3A
Expand Down Expand Up @@ -6218,13 +6218,13 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_D); // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T>

// IF_SVE_CP_3A
theEmitter->emitIns_R_R_R(INS_sve_cpy, EA_1BYTE, REG_V14, REG_P1, REG_V11, INS_OPTS_SCALABLE_B,
theEmitter->emitIns_R_R_R(INS_sve_cpy, EA_SCALABLE, REG_V14, REG_P1, REG_V11, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // CPY <Zd>.<T>, <Pg>/M, <V><n>
theEmitter->emitIns_R_R_R(INS_sve_cpy, EA_4BYTE, REG_V13, REG_P2, REG_V10, INS_OPTS_SCALABLE_S,
theEmitter->emitIns_R_R_R(INS_sve_cpy, EA_SCALABLE, REG_V13, REG_P2, REG_V10, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // CPY <Zd>.<T>, <Pg>/M, <V><n>
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_2BYTE, REG_V12, REG_P3, REG_V9, INS_OPTS_SCALABLE_H,
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V12, REG_P3, REG_V9, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // MOV <Zd>.<T>, <Pg>/M, <V><n>
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_8BYTE, REG_V11, REG_P4, REG_V8, INS_OPTS_SCALABLE_D,
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V11, REG_P4, REG_V8, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // MOV <Zd>.<T>, <Pg>/M, <V><n>

// IF_SVE_CQ_3A
Expand All @@ -6239,13 +6239,13 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_B); // MOV <Zd>.<T>, <Pg>/M, <R><n|SP>

// IF_SVE_CR_3A
theEmitter->emitIns_R_R_R(INS_sve_lasta, EA_1BYTE, REG_V6, REG_P1, REG_V27, INS_OPTS_SCALABLE_B,
theEmitter->emitIns_R_R_R(INS_sve_lasta, EA_SCALABLE, REG_V6, REG_P1, REG_V27, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // LASTA <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_lasta, EA_2BYTE, REG_V5, REG_P2, REG_V26, INS_OPTS_SCALABLE_H,
theEmitter->emitIns_R_R_R(INS_sve_lasta, EA_SCALABLE, REG_V5, REG_P2, REG_V26, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // LASTA <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_lastb, EA_4BYTE, REG_V4, REG_P3, REG_V25, INS_OPTS_SCALABLE_S,
theEmitter->emitIns_R_R_R(INS_sve_lastb, EA_SCALABLE, REG_V4, REG_P3, REG_V25, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // LASTB <V><d>, <Pg>, <Zn>.<T>
theEmitter->emitIns_R_R_R(INS_sve_lastb, EA_8BYTE, REG_V3, REG_P4, REG_V24, INS_OPTS_SCALABLE_D,
theEmitter->emitIns_R_R_R(INS_sve_lastb, EA_SCALABLE, REG_V3, REG_P4, REG_V24, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); // LASTB <V><d>, <Pg>, <Zn>.<T>

// IF_SVE_CS_3A
Expand Down Expand Up @@ -8905,7 +8905,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
// SUNPKLO <Zd>.<T>, <Zn>.<Tb>
theEmitter->emitIns_R_R(INS_sve_sunpklo, EA_SCALABLE, REG_V1, REG_V5, INS_OPTS_SCALABLE_S);
// UUNPKHI <Zd>.<T>, <Zn>.<Tb>
theEmitter->emitIns_R_R(INS_sve_uunpkhi, EA_SCALABLE, REG_V5, REG_V1, INS_OPTS_SCALABLE_D);
theEmitter->emitIns_R_R(INS_sve_uunpkhi, EA_SCALABLE, REG_V5, REG_V1, INS_OPTS_SCALABLE_B);
// UUNPKLO <Zd>.<T>, <Zn>.<Tb>
theEmitter->emitIns_R_R(INS_sve_uunpklo, EA_SCALABLE, REG_V8, REG_V6, INS_OPTS_SCALABLE_S);

Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2919,7 +2919,10 @@ void emitter::emitInsSve_R_R_R(instruction ins,

if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
{
assert(opt == INS_OPTS_SCALABLE_D);
// The instruction only has a .D variant. However, this doesn't matter as
// it operates on bits not lanes. Effectively this means all standard opt
// sizes are supported.
assert(insOptsScalableStandard(opt));
assert(isVectorRegister(reg2)); // nnnnn
fmt = IF_SVE_AU_3A;
}
Expand Down Expand Up @@ -3728,7 +3731,7 @@ void emitter::emitInsSve_R_R_R(instruction ins,
else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR)
{
assert(isFloatReg(reg1));
assert(isValidVectorElemsize(size));
assert(isScalableVectorSize(size));
fmt = IF_SVE_CN_3A;
}
else
Expand All @@ -3748,7 +3751,7 @@ void emitter::emitInsSve_R_R_R(instruction ins,
if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
{
assert(ins == INS_sve_mov);
assert(opt == INS_OPTS_SCALABLE_D);
assert(insOptsScalableStandard(opt));
assert(isVectorRegister(reg1)); // ddddd
assert(isVectorRegister(reg2)); // nnnnn
assert(isVectorRegister(reg3)); // mmmmm
Expand Down Expand Up @@ -11817,6 +11820,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
code |= insEncodeSveElemsize(optGetSveElemsize((insOpts)(id->idInsOpt() + 1))); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
Expand Down Expand Up @@ -12618,7 +12622,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isLowPredicateRegister(id->idReg2())); // ggg
assert(isVectorRegister(id->idReg3())); // mmmmm
assert(isValidVectorElemsize(id->idOpSize()));
assert(isScalableVectorSize(id->idOpSize()));
break;

// Scalable to FP SIMD scalar.
Expand Down Expand Up @@ -14389,8 +14393,13 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
break;

case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
break;

case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
assert(insOptsScalableWide(id->idInsOpt()));
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
Expand Down
Loading

0 comments on commit 6f6b2c0

Please sign in to comment.