diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 33f3570deae2e..0b58393f1f304 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -782,17 +782,18 @@ class emitter unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables #ifdef TARGET_ARM64 - opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 - insOpts _idInsOpt : 6; // options for instructions - unsigned _idLclVar : 1; // access a local on stack + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // options for instructions + unsigned _idLclVar : 1; // access a local on stack + unsigned _idLclVarPair : 1 // carries information for 2 GC lcl vars. #endif #ifdef TARGET_LOONGARCH64 - // TODO-LoongArch64: maybe delete on future. - opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 - insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the - // accessing a local on stack. - unsigned _idLclVar : 1; // access a local on stack. + // TODO-LoongArch64: maybe delete on future. + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the + // accessing a local on stack. + unsigned _idLclVar : 1; // access a local on stack. #endif #ifdef TARGET_RISCV64 @@ -815,7 +816,7 @@ class emitter // x86: 46 bits // amd64: 46 bits // arm: 48 bits - // arm64: 49 bits + // arm64: 50 bits // loongarch64: 46 bits // @@ -827,7 +828,7 @@ class emitter #if defined(TARGET_ARM) #define ID_EXTRA_BITFIELD_BITS (16) #elif defined(TARGET_ARM64) -#define ID_EXTRA_BITFIELD_BITS (17) +#define ID_EXTRA_BITFIELD_BITS (18) #elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define ID_EXTRA_BITFIELD_BITS (14) #else @@ -867,7 +868,7 @@ class emitter // x86: 52/48 bits // amd64: 53/48 bits // arm: 54/50 bits - // arm64: 56/51 bits + // arm64: 57/52 bits // loongarch64: 53/48 bits CLANG_FORMAT_COMMENT_ANCHOR; @@ -885,7 +886,7 @@ class emitter // x86: 12/16 bits // amd64: 11/16 bits // arm: 10/14 bits - // arm64: 8/13 bits + // arm64: 7/12 bits // loongarch64: 11/16 bits unsigned _idSmallCns : ID_BIT_SMALL_CNS; @@ -1432,6 +1433,16 @@ class emitter { _idLclVar = 1; } +#ifdef TARGET_ARM64 + bool idIsLclVarPair() const + { + return _idLclVarPair != 0; + } + void idSetIsLclVarPair() + { + _idLclVarPair = 1; + } +#endif // TARGET_ARM64 #endif // TARGET_ARMARCH #if defined(TARGET_ARM) @@ -1819,6 +1830,22 @@ class emitter #endif // TARGET_XARCH +#ifdef TARGET_ARM64 + struct instrDescLclVarPair : instrDesc // contains 2 gc vars to be tracked + { + instrDescLclVarPair() = delete; + + emitLclVarAddr iiaLclVar2; + }; + + struct instrDescLclVarPairCns : instrDescCns // contains 2 gc vars to be tracked, with large cons + { + instrDescLclVarPairCns() = delete; + + emitLclVarAddr iiaLclVar2; + }; +#endif + struct instrDescCGCA : instrDesc // call with ... { instrDescCGCA() = delete; @@ -2600,7 +2627,26 @@ class emitter #endif // EMITTER_STATS return (instrDescLbl*)emitAllocAnyInstr(sizeof(instrDescLbl), EA_4BYTE); } -#endif // !TARGET_ARM64 +#endif // TARGET_ARM64 + +#if defined(TARGET_ARM64) + instrDescLclVarPair* emitAllocInstrLclVarPair(emitAttr attr) + { + instrDescLclVarPair* result = (instrDescLclVarPair*)emitAllocAnyInstr(sizeof(instrDescLclVarPair), attr); + result->idSetIsLclVarPair(); + return result; + } + + instrDescLclVarPairCns* emitAllocInstrLclVarPairCns(emitAttr attr, cnsval_size_t cns) + { + instrDescLclVarPairCns* result = + (instrDescLclVarPairCns*)emitAllocAnyInstr(sizeof(instrDescLclVarPairCns), attr); + result->idSetIsLargeCns(); + result->idSetIsLclVarPair(); + result->idcCnsVal = cns; + return result; + } +#endif // TARGET_ARM64 instrDescCns* emitAllocInstrCns(emitAttr attr) { @@ -2686,6 +2732,8 @@ class emitter #if !defined(TARGET_ARM64) instrDescLbl* emitNewInstrLbl(); +#else + instrDesc* emitNewInstrLclVarPair(emitAttr attr, cnsval_ssize_t cns); #endif // !TARGET_ARM64 static const BYTE emitFmtToOps[]; @@ -3249,6 +3297,36 @@ inline emitter::instrDescLbl* emitter::emitNewInstrLbl() { return emitAllocInstrLbl(); } +#else +inline emitter::instrDesc* emitter::emitNewInstrLclVarPair(emitAttr attr, cnsval_ssize_t cns) +{ +#if EMITTER_STATS + emitTotalIDescCnt++; + emitTotalIDescCnsCnt++; +#endif // EMITTER_STATS + + if (instrDesc::fitsInSmallCns(cns)) + { + instrDescLclVarPair* id = emitAllocInstrLclVarPair(attr); + id->idSmallCns(cns); +#if EMITTER_STATS + emitSmallCnsCnt++; + if ((cns - ID_MIN_SMALL_CNS) >= (SMALL_CNS_TSZ - 1)) + emitSmallCns[SMALL_CNS_TSZ - 1]++; + else + emitSmallCns[cns - ID_MIN_SMALL_CNS]++; +#endif + return id; + } + else + { + instrDescLclVarPairCns* id = emitAllocInstrLclVarPairCns(attr, cns); +#if EMITTER_STATS + emitLargeCnsCnt++; +#endif + return id; + } +} #endif // !TARGET_ARM64 inline emitter::instrDesc* emitter::emitNewInstrDsp(emitAttr attr, target_ssize_t dsp) @@ -3329,10 +3407,22 @@ inline size_t emitter::emitGetInstrDescSize(const instrDesc* id) } else if (id->idIsLargeCns()) { +#ifdef TARGET_ARM64 + if (id->idIsLclVarPair()) + { + return sizeof(instrDescLclVarPairCns); + } +#endif return sizeof(instrDescCns); } else { +#ifdef TARGET_ARM64 + if (id->idIsLclVarPair()) + { + return sizeof(instrDescLclVarPair); + } +#endif return sizeof(instrDesc); } } diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 657f9645de04c..b537146be5b40 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -128,15 +128,29 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) if (id->idIsLargeCns()) { - if (id->idIsLargeDsp()) + if (id->idIsLclVarPair()) + { + return sizeof(instrDescLclVarPairCns); + } + else if (id->idIsLargeDsp()) + { return sizeof(instrDescCnsDsp); + } else + { return sizeof(instrDescCns); + } } else { - if (id->idIsLargeDsp()) + if (id->idIsLclVarPair()) + { + return sizeof(instrDescLclVarPair); + } + else if (id->idIsLargeDsp()) + { return sizeof(instrDescDsp); + } else { #if FEATURE_LOOP_ALIGN @@ -2399,18 +2413,7 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) /*static*/ unsigned emitter::NaturalScale_helper(emitAttr size) { assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE); - - unsigned result = 0; - unsigned utemp = (unsigned)size; - - // Compute log base 2 of utemp (aka 'size') - while (utemp > 1) - { - result++; - utemp >>= 1; - } - - return result; + return BitOperations::Log2((unsigned)size); } /************************************************************************ @@ -5686,7 +5689,8 @@ void emitter::emitIns_R_R_I( } // Try to optimize a load/store with an alternative instruction. - if (isLdrStr && emitComp->opts.OptimizationEnabled() && OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt)) + if (isLdrStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, false, -1, -1 DEBUG_ARG(false))) { return; } @@ -6493,6 +6497,132 @@ void emitter::emitIns_R_R_R( appendToCurIG(id); } +//----------------------------------------------------------------------------------- +// emitIns_R_R_R_I_LdStPair: Add an instruction storing 2 registers into a memory +// (pointed by reg3) and the offset (immediate). +// +// Arguments: +// ins - The instruction code +// attr - The emit attribute for register 1 +// attr2 - The emit attribute for register 2 +// reg1 - Register 1 +// reg2 - Register 2 +// reg3 - Register 3 +// imm - Immediate offset, prior to scaling by operand size +// varx1 - LclVar number 1 +// varx2 - LclVar number 2 +// offs1 - Memory offset of lclvar number 1 +// offs2 - Memory offset of lclvar number 2 +// +void emitter::emitIns_R_R_R_I_LdStPair(instruction ins, + emitAttr attr, + emitAttr attr2, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + int varx1, + int varx2, + int offs1, + int offs2) +{ + assert((ins == INS_stp) || (ins == INS_ldp)); + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + unsigned scale = 0; + + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + assert(isValidVectorLSPDatasize(size)); + assert(isVectorRegister(reg2)); + + scale = NaturalScale_helper(size); + assert((scale >= 2) && (scale <= 4)); + } + else + { + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg2)); + scale = (size == EA_8BYTE) ? 3 : 2; + } + + reg3 = encodingSPtoZR(reg3); + + fmt = IF_LS_3C; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + fmt = IF_LS_3B; + } + else + { + if ((imm & mask) == 0) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + // Unlike emitIns_S_S_R_R(), we would never come here when + // (imm & mask) != 0. + unreached(); + } + } + + bool validVar1 = varx1 != -1; + bool validVar2 = varx2 != -1; + + instrDesc* id; + + if (validVar1 && validVar2) + { + id = emitNewInstrLclVarPair(attr, imm); + id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); + id->idSetIsLclVar(); + + emitGetLclVarPairLclVar2(id)->initLclVarAddr(varx2, offs2); + } + else + { + id = emitNewInstrCns(attr, imm); + if (validVar1) + { + id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); + id->idSetIsLclVar(); + } + if (validVar2) + { + id->idAddr()->iiaLclVar.initLclVarAddr(varx2, offs2); + id->idSetIsLclVar(); + } + } + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attr2)) + { + id->idGCrefReg2(GCT_BYREF); + } + else + { + id->idGCrefReg2(GCT_NONE); + } + + dispIns(id); + appendToCurIG(id); +} + /***************************************************************************** * * Add an instruction referencing three registers and a constant. @@ -7610,8 +7740,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va disp = base + offs; assert((scale >= 0) && (scale <= 4)); - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - reg2 = encodingSPtoZR(reg2); + bool useRegForImm = false; + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + reg2 = encodingSPtoZR(reg2); if (ins == INS_lea) { @@ -7639,9 +7770,8 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } else { - bool useRegForImm = false; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + imm = disp; if (imm == 0) { fmt = IF_LS_2A; @@ -7683,7 +7813,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va // Try to optimize a load/store with an alternative instruction. if (isLdrStr && emitComp->opts.OptimizationEnabled() && - OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs)) + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) { return; } @@ -7917,7 +8047,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va // Try to optimize a store with an alternative instruction. if (isStr && emitComp->opts.OptimizationEnabled() && - OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs)) + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) { return; } @@ -11731,31 +11861,81 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) vt = tmpDsc->tdTempType(); } if (vt == TYP_REF || vt == TYP_BYREF) + { emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum)); + } } if (emitInsWritesToLclVarStackLocPair(id)) { - unsigned ofs2 = ofs + TARGET_POINTER_SIZE; + int varNum2 = varNum; + int adr2 = adr; + unsigned ofs2 = ofs; + unsigned ofs2Dist; + + if (id->idIsLclVarPair()) + { + bool FPbased2; + + emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id); + varNum2 = lclVarAddr2->lvaVarNum(); + ofs2 = lclVarAddr2->lvaOffset(); + + // If there are 2 GC vars in this instrDesc, get the 2nd variable + // that should be tracked. + adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2); + ofs2Dist = EA_SIZE_IN_BYTES(size); +#ifdef DEBUG + assert(FPbased == FPbased2); + if (FPbased) + { + assert(id->idReg3() == REG_FP); + } + else + { + assert(id->idReg3() == REG_SP); + } + assert(varNum2 != -1); +#endif // DEBUG + } + else + { + ofs2Dist = TARGET_POINTER_SIZE; + ofs2 += ofs2Dist; + } + + ofs2 = AlignDown(ofs2, ofs2Dist); + if (id->idGCrefReg2() != GCT_NONE) { - emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), dst DEBUG_ARG(varNum)); +#ifdef DEBUG + if (id->idGCref() != GCT_NONE) + { + // If 1st register was a gc-var, then make sure the offset + // are correctly set for the 2nd register that is holding + // another gc-var. + assert((adr + ofs + ofs2Dist) == (adr2 + ofs2)); + } +#endif + emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2)); } else { // If the type of the local is a gc ref type, update the liveness. var_types vt; - if (varNum >= 0) + if (varNum2 >= 0) { // "Regular" (non-spill-temp) local. - vt = var_types(emitComp->lvaTable[varNum].lvType); + vt = var_types(emitComp->lvaTable[varNum2].lvType); } else { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2); vt = tmpDsc->tdTempType(); } if (vt == TYP_REF || vt == TYP_BYREF) - emitGCvarDeadUpd(adr + ofs2, dst DEBUG_ARG(varNum)); + { + emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2)); + } } } } @@ -16180,6 +16360,71 @@ bool emitter::IsRedundantLdStr( return false; } +//----------------------------------------------------------------------------------- +// OptimizeLdrStr: Try to optimize "ldr" or "str" instruction with an alternative +// instruction. +// +// Arguments: +// ins - The instruction code +// reg1Attr - The emit attribute for register 1 +// reg1 - Register 1 +// reg2 - Register 2 +// imm - Immediate offset, prior to scaling by operand size +// size - Operand size +// fmt - Instruction format +// localVar - If current instruction has local var +// varx - LclVarNum if this instruction contains local variable +// offs - Stack offset where it is accessed (loaded / stored). +// useRsvdReg - If this instruction needs reserved register. +// +// Return Value: +// "true" if the previous instruction has been overwritten. +// +bool emitter::OptimizeLdrStr(instruction ins, + emitAttr reg1Attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + emitAttr size, + insFormat fmt, + bool localVar, + int varx, + int offs DEBUG_ARG(bool useRsvdReg)) +{ + assert(ins == INS_ldr || ins == INS_str); + + if (!emitCanPeepholeLastIns() || (emitLastIns->idIns() != ins)) + { + return false; + } + + // Is the ldr/str even necessary? + if (IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + { + return true; + } + + // Register 2 needs conversion to unencoded value for following optimisation checks. + reg2 = encodingZRtoSP(reg2); + + // If the previous instruction was a matching load/store, then try to replace it instead of emitting. + // + if (ReplaceLdrStrWithPairInstr(ins, reg1Attr, reg1, reg2, imm, size, fmt, localVar, varx, offs)) + { + assert(!useRsvdReg); + return true; + } + + // If we have a second LDR instruction from the same source, then try to replace it with a MOV. + if (IsOptimizableLdrToMov(ins, reg1, reg2, imm, size, fmt)) + { + emitIns_Mov(INS_mov, reg1Attr, reg1, emitLastIns->idReg1(), true); + return true; + } + + return false; +} + //----------------------------------------------------------------------------------- // ReplaceLdrStrWithPairInstr: Potentially, overwrite a previously-emitted "ldr" or "str" // instruction with an "ldp" or "stp" instruction. @@ -16191,66 +16436,92 @@ bool emitter::IsRedundantLdStr( // reg2 - Register 2 // imm - Immediate offset, prior to scaling by operand size // size - Operand size -// fmt - Instruction format +// fmt - Instruction format +// localVar - If current instruction has local var +// currLclVarNum - LclVarNum if this instruction contains local variable +// offs - Stack offset where it is accessed (loaded / stored). // // Return Value: // "true" if the previous instruction has been overwritten. // -bool emitter::ReplaceLdrStrWithPairInstr( - instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) +bool emitter::ReplaceLdrStrWithPairInstr(instruction ins, + emitAttr reg1Attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + emitAttr size, + insFormat fmt, + bool isCurrLclVar, + int varx, + int offs) { RegisterOrder optimizationOrder = IsOptimizableLdrStrWithPair(ins, reg1, reg2, imm, size, fmt); - if (optimizationOrder != eRO_none) + if (optimizationOrder == eRO_none) { - regNumber oldReg1 = emitLastIns->idReg1(); + return false; + } - ssize_t oldImm = emitGetInsSC(emitLastIns); - instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp; + regNumber prevReg1 = emitLastIns->idReg1(); - emitAttr oldReg1Attr; - switch (emitLastIns->idGCref()) - { - case GCT_GCREF: - oldReg1Attr = EA_GCREF; - break; - case GCT_BYREF: - oldReg1Attr = EA_BYREF; - break; - default: - oldReg1Attr = emitLastIns->idOpSize(); - break; - } + ssize_t prevImm = emitGetInsSC(emitLastIns); + instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp; - // Remove the last instruction written. - emitRemoveLastInstruction(); + emitAttr prevReg1Attr; + ssize_t prevImmSize = prevImm * size; + ssize_t newImmSize = imm * size; + bool isLastLclVar = emitLastIns->idIsLclVar(); + int prevOffset = -1; + int prevLclVarNum = -1; - // Combine two 32 bit stores of value zero into one 64 bit store - if (ins == INS_str && reg1 == REG_ZR && oldReg1 == REG_ZR && size == EA_4BYTE) - { + if (emitLastIns->idIsLclVar()) + { + prevOffset = emitLastIns->idAddr()->iiaLclVar.lvaOffset(); + prevLclVarNum = emitLastIns->idAddr()->iiaLclVar.lvaVarNum(); + } - // The first register is at the lower offset for the ascending order - ssize_t offset = (optimizationOrder == eRO_ascending ? oldImm : imm) * size; - emitIns_R_R_I(INS_str, EA_8BYTE, REG_ZR, reg2, offset, INS_OPTS_NONE); - return true; - } + if (!isCurrLclVar) + { + assert((varx == -1) && (offs == -1)); + } - // Emit the new instruction. Make sure to scale the immediate value by the operand size. - if (optimizationOrder == eRO_ascending) - { - // The FIRST register is at the lower offset - emitIns_R_R_R_I(optIns, oldReg1Attr, oldReg1, reg1, reg2, oldImm * size, INS_OPTS_NONE, reg1Attr); - } - else - { - // The SECOND register is at the lower offset - emitIns_R_R_R_I(optIns, reg1Attr, reg1, oldReg1, reg2, imm * size, INS_OPTS_NONE, oldReg1Attr); - } + switch (emitLastIns->idGCref()) + { + case GCT_GCREF: + prevReg1Attr = EA_GCREF; + break; + case GCT_BYREF: + prevReg1Attr = EA_BYREF; + break; + default: + prevReg1Attr = emitLastIns->idOpSize(); + break; + } + + // Remove the last instruction written. + emitRemoveLastInstruction(); + // Combine two 32 bit stores of value zero into one 64 bit store + if ((ins == INS_str) && (reg1 == REG_ZR) && (prevReg1 == REG_ZR) && (size == EA_4BYTE)) + { + // The first register is at the lower offset for the ascending order + ssize_t offset = (optimizationOrder == eRO_ascending ? prevImm : imm) * size; + emitIns_R_R_I(INS_str, EA_8BYTE, REG_ZR, reg2, offset, INS_OPTS_NONE); return true; } - return false; + if (optimizationOrder == eRO_ascending) + { + emitIns_R_R_R_I_LdStPair(optIns, prevReg1Attr, reg1Attr, prevReg1, reg1, reg2, prevImmSize, prevLclVarNum, varx, + prevOffset, offs); + } + else + { + emitIns_R_R_R_I_LdStPair(optIns, reg1Attr, prevReg1Attr, reg1, prevReg1, reg2, newImmSize, varx, prevLclVarNum, + offs, prevOffset); + } + + return true; } //----------------------------------------------------------------------------------- @@ -16308,7 +16579,7 @@ emitter::RegisterOrder emitter::IsOptimizableLdrStrWithPair( // For LDR/ STR, there are 9 bits, so we need to limit the range explicitly in software. if ((imm < -64) || (imm > 63) || (prevImm < -64) || (prevImm > 63)) { - // Then one or more of the immediate values is out of range, so we cannot optimise. + // Then one or more of the immediate values is out of range, so we cannot optimize. return eRO_none; } diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index ee0e029c84b68..240f4e03dd585 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -128,66 +128,39 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); RegisterOrder IsOptimizableLdrStrWithPair( instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); -bool ReplaceLdrStrWithPairInstr( - instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); +bool ReplaceLdrStrWithPairInstr(instruction ins, + emitAttr reg1Attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + emitAttr size, + insFormat fmt, + bool localVar = false, + int varx = -1, + int offs = -1); bool IsOptimizableLdrToMov(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); - -// Try to optimize a Ldr or Str with an alternative instruction. -inline bool OptimizeLdrStr(instruction ins, - emitAttr reg1Attr, - regNumber reg1, - regNumber reg2, - ssize_t imm, - emitAttr size, - insFormat fmt, - bool localVar = false, - int varx = 0, - int offs = 0) +FORCEINLINE bool OptimizeLdrStr(instruction ins, + emitAttr reg1Attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + emitAttr size, + insFormat fmt, + bool localVar = false, + int varx = -1, + int offs = -1 DEBUG_ARG(bool useRsvdReg = false)); + +emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id) { - assert(ins == INS_ldr || ins == INS_str); - - if (!emitCanPeepholeLastIns() || (emitLastIns->idIns() != ins)) - { - return false; - } - - // Is the ldr/str even necessary? - if (IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + assert(id->idIsLclVarPair()); + if (id->idIsLargeCns()) { - return true; + return &(((instrDescLclVarPairCns*)id)->iiaLclVar2); } - - // Register 2 needs conversion to unencoded value for following optimisation checks. - reg2 = encodingZRtoSP(reg2); - - // If the previous instruction was a matching load/store, then try to replace it instead of emitting. - // - bool canReplaceWithPair = true; - if (ins == INS_str) + else { - // For INS_str, don't do this if either instruction had a local GC variable. - // For INS_ldr, it is fine to perform this optimization because the output code already handles the code of - // updating the gc refs. We do not need offset tracking for load cases. - if ((localVar && EA_IS_GCREF_OR_BYREF(reg1Attr)) || - (emitLastIns->idIsLclVar() && (emitLastIns->idGCref() != GCT_NONE))) - { - canReplaceWithPair = false; - } + return &(((instrDescLclVarPair*)id)->iiaLclVar2); } - - if (canReplaceWithPair && ReplaceLdrStrWithPairInstr(ins, reg1Attr, reg1, reg2, imm, size, fmt)) - { - return true; - } - - // If we have a second LDR instruction from the same source, then try to replace it with a MOV. - if (IsOptimizableLdrToMov(ins, reg1, reg2, imm, size, fmt)) - { - emitIns_Mov(INS_mov, reg1Attr, reg1, emitLastIns->idReg1(), true); - return true; - } - - return false; } /************************************************************************ @@ -890,6 +863,18 @@ void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int o void emitIns_S_S_R_R( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); +void emitIns_R_R_R_I_LdStPair(instruction ins, + emitAttr attr, + emitAttr attr2, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + int varx1 = -1, + int varx2 = -1, + int offs1 = -1, + int offs2 = -1); + void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); void emitIns_R_R_S_S(