Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace successive "ldr" and "str" instructions with "ldp" and "stp" #77540

Merged
merged 30 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b88ff31
Replace successive "ldr" and "str" instructions with "ldp" and "stp"
AndyJGraham Sep 6, 2022
f0c918c
No longer use a temporary buffer to build the optimized instruction.
AndyJGraham Oct 31, 2022
f1b236e
Addressed assorted review comments.
AndyJGraham Nov 1, 2022
c0533bd
Now optimizes ascending locations and decending locations with
AndyJGraham Nov 3, 2022
372ee97
Modification to remove last instructions.
AndyJGraham Nov 14, 2022
12fc291
Merge branch 'main'
AndyJGraham Nov 15, 2022
0b377ed
Ongoing improvements to remove previously-emitted instruction
AndyJGraham Nov 29, 2022
46b85f8
Stopped optimization of consecutive instructions that straddled an in…
AndyJGraham Dec 1, 2022
e4741f9
Addressed code change requests in GitHub.
AndyJGraham Dec 1, 2022
2822f64
Merge branch 'main'
AndyJGraham Dec 1, 2022
10a4510
Various fixes to ldp/stp optimization
BruceForstall Dec 2, 2022
d80a69a
Merge pull request #1 from BruceForstall/LdpStp_Modifications_Fixes
AndyJGraham Dec 5, 2022
f6a49bf
Delete unnecessary and incorrect assert
BruceForstall Dec 7, 2022
ed4d070
Merge pull request #2 from BruceForstall/LdpStp_Modifications_FixAsse…
AndyJGraham Dec 7, 2022
4b0e51e
Diagnostic change only, to confirm whether a theory is correct or
AndyJGraham Dec 9, 2022
2997a8e
Revert "Diagnostic change only, to confirm whether a theory is correc…
AndyJGraham Dec 14, 2022
f0907cc
Do not merge. Temporarily removed calls to
AndyJGraham Dec 14, 2022
c5c4234
Modifications to better update the IP mapping table for a replaced in…
AndyJGraham Dec 15, 2022
bb8fdea
Merge branch 'main' of ssh://gerrit.oss.arm.com/enterprise-llt/dotnet…
AndyJGraham Dec 16, 2022
65eed90
Minor formatting change.
AndyJGraham Dec 16, 2022
e03b375
Check for out of range offsets
a74nh Jan 10, 2023
2cef6fc
Don't optimise during prolog/epilog
a74nh Jan 16, 2023
41a9828
Merge branch 'dotnet:main' into LdpStp_Modifications
a74nh Jan 16, 2023
ba89fd3
Fix windows build error
a74nh Jan 16, 2023
1fbf423
Merge branch main
a74nh Jan 19, 2023
ca9a325
IGF_HAS_REMOVED_INSTR is ARM64 only
a74nh Jan 20, 2023
e66ad66
Add OptimizeLdrStr function
a74nh Jan 20, 2023
8b44843
Fix formatting
a74nh Jan 20, 2023
2e7aaf6
Ensure local variables are tracked
a74nh Jan 24, 2023
fe76782
Don't peephole local variables
a74nh Jan 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 264 additions & 6 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5676,6 +5676,14 @@ void emitter::emitIns_R_R_I(
{
return;
}

// If we have replaced an LDR or STR instruction with
// an LDP or STP then we do not want to carry on to
// emit the second instruction.
if (ReplacedLdrStr(ins, attr, reg1, reg2, imm, size, fmt))
{
return;
}
}
else if (isAddSub)
{
Expand Down Expand Up @@ -6491,7 +6499,8 @@ void emitter::emitIns_R_R_R_I(instruction ins,
regNumber reg3,
ssize_t imm,
insOpts opt /* = INS_OPTS_NONE */,
emitAttr attrReg2 /* = EA_UNKNOWN */)
emitAttr attrReg2 /* = EA_UNKNOWN */,
AndyJGraham marked this conversation as resolved.
Show resolved Hide resolved
instrDesc* reuseInstr /* = nullptr */)
{
emitAttr size = EA_SIZE(attr);
emitAttr elemsize = EA_UNKNOWN;
Expand Down Expand Up @@ -6626,6 +6635,7 @@ void emitter::emitIns_R_R_R_I(instruction ins,
scale = (size == EA_8BYTE) ? 3 : 2;
}
isLdSt = true;
fmt = IF_LS_3C;
break;

case INS_ld1:
Expand Down Expand Up @@ -6906,7 +6916,52 @@ void emitter::emitIns_R_R_R_I(instruction ins,
}
assert(fmt != IF_NONE);

instrDesc* id = emitNewInstrCns(attr, imm);
// An "instrDesc" will *always* be required.
// Under normal circumstances the instruction
// will be added to the emitted group. However,
// this is not correct for instructions that
// are going to overwrite already-emitted
// instructions.
instrDesc* id;
AndyJGraham marked this conversation as resolved.
Show resolved Hide resolved
INDEBUG(size_t reusedInstrSize = (reuseInstr != nullptr) ? emitSizeOfInsDsc(reuseInstr) : 0);

// Now the instruction is either emitted OR
// used to overwrite the previously-emitted
// instruction.
if (reuseInstr == nullptr)
{
id = emitNewInstrCns(attr, imm);
}
else
{
id = reuseInstr;
memset(id, 0, sizeof(instrDesc));

// Store the size and handle the two special
// values that indicate GCref and ByRef

if (EA_IS_GCREF(attr))
{
// A special value indicates a GCref pointer value

id->idGCref(GCT_GCREF);
id->idOpSize(EA_PTRSIZE);
}
else if (EA_IS_BYREF(attr))
{
// A special value indicates a Byref pointer value

id->idGCref(GCT_BYREF);
id->idOpSize(EA_PTRSIZE);
}
else
{
id->idGCref(GCT_NONE);
id->idOpSize(EA_SIZE(attr));
}

id->idSmallCns(imm);
}

id->idIns(ins);
id->idInsFmt(fmt);
Expand All @@ -6932,8 +6987,15 @@ void emitter::emitIns_R_R_R_I(instruction ins,
}
}

dispIns(id);
appendToCurIG(id);
assert((reuseInstr == nullptr) || (emitSizeOfInsDsc(reuseInstr) == reusedInstrSize));

// Now the instruction is EITHER emitted OR used to overwrite the previously-emitted instruction.
if (reuseInstr == nullptr)
{
// Then this is the standard exit path and the instruction is to be appended to the instruction group.
dispIns(id);
appendToCurIG(id);
}
}

/*****************************************************************************
Expand Down Expand Up @@ -7623,8 +7685,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
{
bool useRegForImm = false;
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate

imm = disp;
imm = disp;
if (imm == 0)
{
fmt = IF_LS_2A;
Expand Down Expand Up @@ -7670,6 +7731,18 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va

assert(fmt != IF_NONE);

// This handles LDR duplicate instructions

// If we have replaced an LDR or STR instruction with
// an LDP or STP then we do not want to carry on to
// emit the second instruction.
if (ReplacedLdrStr(ins, attr, reg1, reg2, imm, size, fmt))
{
return;
}

// We need to simply emit the instruction unchanged

instrDesc* id = emitNewInstrCns(attr, imm);

id->idIns(ins);
Expand Down Expand Up @@ -7901,6 +7974,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va

assert(fmt != IF_NONE);

// If we have replaced an LDR or STR instruction with
// an LDP or STP then we do not want to carry on to
// emit the second instruction.
if (ReplacedLdrStr(ins, attr, reg1, reg2, imm, size, fmt))
{
return;
}

instrDesc* id = emitNewInstrCns(attr, imm);

id->idIns(ins);
Expand Down Expand Up @@ -16128,4 +16209,181 @@ bool emitter::IsRedundantLdStr(

return false;
}

//-----------------------------------------------------------------------------------
// ReplacedLdrStr: Potentially, overwrite a previously-emitted
// "ldr" or "str" instruction with an "ldp" or
// "stp" instruction.
//
// Arguments:
// ins - The instruction code
// reg1Attr - The emit attribute for register 1
// reg1 - Register 1 number
// reg2 - Register 2 number
// imm - Immediate offset, prior to scaling by operand size
// size - Operand size
// fmt - Instruction format
//
// Return Value:
// "true" if the previous instruction HAS been overwritten.

bool emitter::ReplacedLdrStr(
AndyJGraham marked this conversation as resolved.
Show resolved Hide resolved
instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
{
if (emitComp->opts.OptimizationEnabled())
AndyJGraham marked this conversation as resolved.
Show resolved Hide resolved
{
RegisterOrder optimizationOrder = IsOptimizableLdrStr(ins, reg1, reg2, imm, size, fmt);

if (optimizationOrder != eRO_none)
{
regNumber oldReg1 = emitLastIns->idReg1();
ssize_t oldImm =
emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp;

emitAttr oldReg1Attr;
switch (emitLastIns->idGCref())
{
case GCT_GCREF:
oldReg1Attr = EA_GCREF;
break;
case GCT_BYREF:
oldReg1Attr = EA_BYREF;
break;
default:
oldReg1Attr = emitLastIns->idOpSize();
break;
}

// Overwrite the "sub-optimal" instruction with the *optimised* instruction, directly
// into the output buffer.
if (optimizationOrder == eRO_ascending)
{
// The FIRST register is at the lower offset
emitIns_R_R_R_I(optIns, oldReg1Attr, oldReg1, reg1, reg2, oldImm * size, INS_OPTS_NONE, reg1Attr,
emitLastIns);
}
else
{
// The SECOND register is at the lower offset
emitIns_R_R_R_I(optIns, reg1Attr, reg1, oldReg1, reg2, imm * size, INS_OPTS_NONE, oldReg1Attr,
emitLastIns);
}

// And now return true, to indicate that the second instruction descriptor is no longer to be emitted.
return true;
}
}

return false;
}

//-----------------------------------------------------------------------------------
// IsOptimizableLdrStr: Check if it is possible to optimize two "ldr" or "str"
// instructions into a single "ldp" or "stp" instruction.
//
// Arguments:
// ins - The instruction code
// reg1 - Register 1 number
// reg2 - Register 2 number
// imm - Immediate offset, prior to scaling by operand size
// size - Operand size
// fmt - Instruction format
//
// Return Value:
// eRO_none - No optimization of consecutive instructions is possible
// eRO_ascending - Registers can be loaded/ stored into ascending store locations
// eRO_descending - Registers can be loaded/ stored into decending store locations.

emitter::RegisterOrder emitter::IsOptimizableLdrStr(
instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
{
bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);

RegisterOrder optimisationOrder = eRO_none;

if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr))
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
return eRO_none;
}

if (ins != emitLastIns->idIns())
{
// Not successive ldr or str instructions
return eRO_none;
}

if (emitSizeOfInsDsc(emitLastIns) != sizeof(instrDesc))
{
// Not instruction descriptors of the same, standard size.
return eRO_none;
}

regNumber prevReg1 = emitLastIns->idReg1();
regNumber prevReg2 = emitLastIns->idReg2();
insFormat lastInsFmt = emitLastIns->idInsFmt();
emitAttr prevSize = emitLastIns->idOpSize();
ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();

// Signed, *raw* immediate value fits in 7 bits, so for LDP/ STP the raw value is from -64 to +63.
// For LDR/ STR, there are 9 bits, so we need to limit the range explicitly in software.
if ((imm < -64) || (imm > 63) || (prevImm < -64) || (prevImm > 63))
{
// Then one or more of the immediate values is out of range, so we cannot optimise.
return eRO_none;
}

if ((!isGeneralRegisterOrZR(reg1)) || (!isGeneralRegisterOrZR(prevReg1)))
{
// Either register 1 is not a general register or previous register 1 is not a general register
// or the zero register, so we cannot optimise.
return eRO_none;
}

if (lastInsFmt != fmt)
{
// The formats of the two instructions differ.
return eRO_none;
}

if ((emitInsIsLoad(ins)) && (reg1 == prevReg1))
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
// Cannot load to the same register twice.
return eRO_none;
}

if (prevSize != size)
{
// Operand sizes differ.
return eRO_none;
}

// There are two possible orders for consecutive registers.
// These may be stored to or loaded from increasing or
// decreasing store locations.
if (imm == (prevImm + 1))
{
// Previous Register 1 is at a higher offset than This Register 1
optimisationOrder = eRO_ascending;
}
else if (imm == (prevImm - 1))
{
// Previous Register 1 is at a loker offset than This Register 1
optimisationOrder = eRO_descending;
}
else
{
// Not consecutive immediate values.
return eRO_none;
}

if ((reg2 != prevReg2) || !isGeneralRegisterOrSP(reg2))
{
// The "register 2" should be same as previous instruction and should either be a general register or stack
// pointer.
return eRO_none;
}
return optimisationOrder;
}

#endif // defined(TARGET_ARM64)
21 changes: 18 additions & 3 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ instrDesc* emitNewInstrCallInd(int argCnt,
emitAttr retSize,
emitAttr secondRetSize);

/************************************************************************/
/* enum to allow instruction optimisation to specify register order */
/************************************************************************/

enum RegisterOrder
{
eRO_none = 0,
eRO_ascending,
eRO_descending
};

/************************************************************************/
/* Private helpers for instruction output */
/************************************************************************/
Expand Down Expand Up @@ -112,7 +123,10 @@ static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size);
static bool IsMovInstruction(instruction ins);
bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);

bool ReplacedLdrStr(
instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
RegisterOrder IsOptimizableLdrStr(
instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
/************************************************************************
*
* This union is used to to encode/decode the special ARM64 immediate values
Expand Down Expand Up @@ -775,8 +789,9 @@ void emitIns_R_R_R_I(instruction ins,
regNumber reg2,
regNumber reg3,
ssize_t imm,
insOpts opt = INS_OPTS_NONE,
emitAttr attrReg2 = EA_UNKNOWN);
insOpts opt = INS_OPTS_NONE,
emitAttr attrReg2 = EA_UNKNOWN,
instrDesc* reuseInstr = nullptr);

void emitIns_R_R_R_Ext(instruction ins,
emitAttr attr,
Expand Down