From fcb9b1825c43fe87ec3f740e874c3ca3bcf27e01 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 21 Jul 2024 09:56:06 +0200 Subject: [PATCH] JIT: Add an emitter peephole for post-indexed addressing (#105181) This transforms sequences like ```asm ldr x0, [x1] add x1, x1, #8 ``` into the equivalent ```asm ldr x0, [x1], #8 ``` --- src/coreclr/jit/emitarm64.cpp | 158 ++++++++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 2 + 2 files changed, 160 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f831e0aa1b691..422d539d3b8c0 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5844,6 +5844,12 @@ void emitter::emitIns_R_R_I(instruction ins, return; } + if ((reg1 == reg2) && (EA_SIZE(attr) == EA_PTRSIZE) && emitComp->opts.OptimizationEnabled() && + OptimizePostIndexed(ins, reg1, imm, attr)) + { + return; + } + reg1 = encodingSPtoZR(reg1); reg2 = encodingSPtoZR(reg2); } @@ -11070,6 +11076,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= ((code_t)imm << 12); // iiiiiiiii code |= insEncodeReg_Rn(id->idReg2()); // nnnnn dst += emitOutput_Instr(dst, code); + + // With pre or post-indexing we may have a second GC register to + // update. + if (insOptsIndexed(id->idInsOpt()) && !id->idIsSmallDsc()) + { + if (emitInsIsLoad(ins)) + { + // Load will write the destination (reg1). + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + } + else + { + emitGCregDeadUpd(id->idReg1(), dst); + } + } + + // We will always write reg2. + if (id->idGCrefReg2() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); + } + else + { + emitGCregDeadUpd(id->idReg2(), dst); + } + + goto SKIP_GC_UPDATE; + } + break; case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn @@ -17150,6 +17187,127 @@ bool emitter::IsOptimizableLdrToMov( return true; } +//----------------------------------------------------------------------------------- +// OptimizePostIndexed: Optimize an addition/subtraction from a register by +// replacing the previous instruction with a post-indexed addressing form if +// possible. +// +// Arguments: +// ins - Whether this is an add or subtraction +// reg - The register that is being updated +// imm - Immediate that is being added/subtracted +// +// Returns: +// True if the previous instruction was optimized to perform the add/sub. +// +bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr) +{ + assert((ins == INS_add) || (ins == INS_sub)); + + if (!emitCanPeepholeLastIns() || !emitInsIsLoadOrStore(emitLastIns->idIns())) + { + return false; + } + + if ((emitLastIns->idInsFmt() != IF_LS_2A) || emitLastIns->idIsTlsGD()) + { + return false; + } + + // Cannot allow post indexing if the load itself is already modifying the + // register. + regNumber loadStoreDataReg = emitLastIns->idReg1(); + if (loadStoreDataReg == reg) + { + return false; + } + + // We must be updating the same register that the addressing is happening + // on. The SP register is stored as ZR, so make sure to normalize that too. + regNumber loadStoreAddrReg = encodingZRtoSP(emitLastIns->idReg2()); + if (loadStoreAddrReg != reg) + { + return false; + } + + // Only some stores/loads are eligible + switch (emitLastIns->idIns()) + { + case INS_ldrb: + case INS_strb: + case INS_ldurb: + case INS_sturb: + case INS_ldrh: + case INS_strh: + case INS_ldurh: + case INS_sturh: + case INS_ldrsb: + case INS_ldursb: + case INS_ldrsh: + case INS_ldursh: + case INS_ldrsw: + case INS_ldursw: + case INS_ldr: + case INS_str: + case INS_ldur: + case INS_stur: + break; + + default: + return false; + } + + if (ins == INS_sub) + { + imm = -imm; + } + + // Only some post-indexing offsets can be represented. + if ((imm < -256) || (imm >= 256)) + { + return false; + } + + instruction newIns = emitLastIns->idIns(); + emitAttr newAttr; + + switch (emitLastIns->idGCref()) + { + case GCT_BYREF: + newAttr = EA_BYREF; + break; + case GCT_GCREF: + newAttr = EA_GCREF; + break; + default: + newAttr = emitLastIns->idOpSize(); + break; + } + + emitRemoveLastInstruction(); + + instrDesc* id = emitNewInstrCns(newAttr, imm); + id->idIns(newIns); + id->idInsFmt(IF_LS_2C); + id->idInsOpt(INS_OPTS_POST_INDEX); + + id->idReg1(loadStoreDataReg); + id->idReg2(encodingSPtoZR(loadStoreAddrReg)); + + if (EA_IS_BYREF(regAttr)) + { + id->idGCrefReg2(GCT_BYREF); + } + else if (EA_IS_GCREF(regAttr)) + { + id->idGCrefReg2(GCT_GCREF); + } + + dispIns(id); + appendToCurIG(id); + return true; +} + #if defined(FEATURE_SIMD) //----------------------------------------------------------------------------------- // emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 1aeeb46942e5f..8e2ed80c6cdf2 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -193,6 +193,8 @@ FORCEINLINE bool OptimizeLdrStr(instruction ins, int varx = -1, int offs = -1 DEBUG_ARG(bool useRsvdReg = false)); +bool OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr); + emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id) { assert(id->idIsLclVarPair());