Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ class CodeGen final : public CodeGenInterface

#if defined(TARGET_ARM64)
void genUnknownSizeFrame();
void genZeroInitializeUnknownSizeFrame();
#endif

#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
Expand All @@ -464,6 +465,9 @@ class CodeGen final : public CodeGenInterface
void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);

void genPoisonFrame(regMaskTP bbRegLiveIn);
#ifdef TARGET_ARM64
void genPoisonUnknownSizeVariable(int varNum, char poisonVal);
#endif

#if defined(TARGET_ARM)

Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6115,4 +6115,20 @@ BasicBlock* CodeGen::genGetThrowHelper(SpecialCodeKind codeKind)
return excpRaisingBlock;
}

//
void CodeGen::genPoisonUnknownSizeVariable(int varNum, char poisonVal)
{
assert(varNum >= 0);
LclVarDsc* varDsc = m_compiler->lvaGetDesc(varNum);

// We should not see mask locals being address exposed.
assert(varDsc->IsAddressExposed());
noway_assert(varDsc->TypeGet() == TYP_SIMD);

// mov z9.b, #poisonVal
GetEmitter()->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_SCRATCH_V, (ssize_t)poisonVal, INS_OPTS_SCALABLE_B);
// str z9, [x19, $index MUL VL]
GetEmitter()->emitIns_S_R(INS_sve_str, EA_SCALABLE, REG_SCRATCH_V, varNum, 0);
}

#endif // TARGET_ARM64
48 changes: 48 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4917,6 +4917,54 @@ void CodeGen::genUnknownSizeFrame()
GetEmitter()->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_SP, rsvd, REG_SCRATCH, REG_SP);
}
}

//----------------------------------------------------------------------------
//
// genZeroInitializeUnknownSizeFrame: Zero-initialize the UnknownSizeFrame stack space.
//
// Remarks:
// This function emits code that assumes the state of sp has not been modified since
// establishing the UnknownSizeFrame. sp must point to the end of the UnknownSizeFrame.
//
void CodeGen::genZeroInitializeUnknownSizeFrame()
{
assert(m_compiler->compUsesUnknownSizeFrame);

unsigned vectorCount = m_compiler->unkSizeFrame.FrameSizeInVectors();

assert(vectorCount > 0);

// z9 <== {0, 0, ...}
GetEmitter()->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_SCRATCH_V, 0, INS_OPTS_SCALABLE_B);

// For small vector counts, emit unrolled loop of vector stores.
// Unrolling to a maximum of 5 stores optimizes for code size rather than performance.
// TODO-SVE: Does unrolling further improve performance?
if (vectorCount <= 5)
{
for (unsigned i = 0; i < vectorCount; i++)
{
// str z9, [sp, #i MUL VL]
GetEmitter()->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_SCRATCH_V, REG_SP, i);
}
}
else
{
// $cursor <== x19
inst_Mov(TYP_BYREF, REG_SCRATCH, REG_UNKBASE, false);
BasicBlock* loop = genCreateTempLabel();
// loop:
genDefineInlineTempLabel(loop);
// addvl $cursor, $cursor, #-1
GetEmitter()->emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, REG_SCRATCH, REG_SCRATCH, -1);
// str z9, [$cursor]
GetEmitter()->emitIns_R_R(INS_sve_str, EA_SCALABLE, REG_SCRATCH_V, REG_SCRATCH);
// cmp sp, $cursor
GetEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, REG_SP, REG_SCRATCH, INS_OPTS_UXTX);
// b.ne loop
GetEmitter()->emitIns_J(INS_bne, loop);
}
}
#endif

/*****************************************************************************
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8318,6 +8318,14 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)

assert(varDsc->lvOnFrame);

#ifdef TARGET_ARM64
if (m_compiler->lvaIsUnknownSizeLocal(varNum))
{
genPoisonUnknownSizeVariable(varNum, (char)poisonVal);
continue;
}
#endif

unsigned int size = m_compiler->lvaLclStackHomeSize(varNum);
if ((size / TARGET_POINTER_SIZE) > 16)
{
Expand Down
22 changes: 16 additions & 6 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,14 +468,24 @@ void CodeGen::genCodeForBlock(BasicBlock* block)
}
#endif

#ifndef TARGET_WASM // TODO-WASM: enable genPoisonFrame
// Emit poisoning into the init BB that comes right after prolog.
// We cannot emit this code in the prolog as it might make the prolog too large.
if (m_compiler->compShouldPoisonFrame() && block->IsFirst())
// Emit any code that needs to occur straight after the prolog, but does not want
// to be part of the prolog itself.
if (block->IsFirst())
{
genPoisonFrame(newLiveRegSet);
}
#ifdef TARGET_ARM64
if (m_compiler->compUsesUnknownSizeFrame)
{
genZeroInitializeUnknownSizeFrame();
}
#endif

#ifndef TARGET_WASM // TODO-WASM: enable genPoisonFrame
if (m_compiler->compShouldPoisonFrame())
{
genPoisonFrame(newLiveRegSet);
}
#endif // !TARGET_WASM
}

// Traverse the block in linear order, generating code for each node as we
// as we encounter it.
Expand Down
28 changes: 18 additions & 10 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)

case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm
assert(isValidGeneralDatasize(id->idOpSize()));
assert(isGeneralRegister(id->idReg1()));
assert(isGeneralRegisterOrZR(id->idReg1()));
assert(isGeneralRegister(id->idReg2()));
break;

Expand Down Expand Up @@ -2497,13 +2497,6 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
return (imm >= -256) && (imm <= 255);
}

// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction
/*static*/ bool emitter::emitIns_valid_imm_for_scaled_sve_ldst_offset(INT64 imm)
{
// TODO-SVE: This assumes 128bit SVE.
return ((imm % 16) == 0 && (imm / 16) <= 255 && (imm / 16) >= -256);
}

// true if this 'imm' can be encoded as the offset in a ldr/str instruction
/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr)
{
Expand Down Expand Up @@ -8240,8 +8233,23 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
case INS_lea:
// We shouldn't be materializing the address of a mask.
assert(m_compiler->lvaGetActualType(varx) != TYP_MASK);
// addvl reg1, x19, #imm
emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, reg1, REG_UNKBASE, imm);
if (isValidSimm<6>(imm))
{
// addvl reg1, x19, #imm
emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, reg1, REG_UNKBASE, imm);
}
else
{
// Cannot encode immediate, generate `addr = fp + imm * VL`.
//
// set reg1 = imm
// rdvl rsvd, #1
// madd reg1, reg1, rsvd, x19
regNumber rsvd = codeGen->rsGetRsvdReg();
codeGen->instGen_Set_Reg_To_Imm(EA_8BYTE, reg1, imm);
emitIns_R_I(INS_sve_rdvl, EA_8BYTE, rsvd, 1);
emitIns_R_R_R_R(INS_madd, EA_8BYTE, reg1, reg1, rsvd, REG_UNKBASE);
}
return;

case INS_sve_ldr:
Expand Down
42 changes: 6 additions & 36 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2741,27 +2741,12 @@ void emitter::emitInsSve_R_R_I(instruction ins,
case INS_sve_ldr:
assert(insOptsNone(opt));
assert(isScalableVectorSize(size));
assert(isGeneralRegister(reg2)); // nnnnn
assert(isGeneralRegisterOrSP(reg2)); // nnnnn
assert(insScalableOptsNone(sopt));

// imm is the number of bytes to offset by. The instruction requires a multiple of the
// vector length ([#imm mul vl]). If it doesn't fit then stash the resulting address
// into a register.
if (emitIns_valid_imm_for_scaled_sve_ldst_offset(imm))
{
// TODO-SVE: This assumes 128bit SVE.
imm = imm / 16;
}
else
{
regNumber rsvdReg = codeGen->rsGetRsvdReg();
codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm);
reg2 = rsvdReg;
imm = 0;
}

assert(isValidSimm<9>(imm));

reg2 = encodingSPtoZR(reg2);

if (isVectorRegister(reg1))
{
fmt = IF_SVE_IE_2A;
Expand All @@ -2776,27 +2761,12 @@ void emitter::emitInsSve_R_R_I(instruction ins,
case INS_sve_str:
assert(insOptsNone(opt));
assert(isScalableVectorSize(size));
assert(isGeneralRegister(reg2)); // nnnnn
assert(isGeneralRegisterOrSP(reg2)); // nnnnn
assert(insScalableOptsNone(sopt));

// imm is the number of bytes to offset by. The instruction requires a multiple of the
// vector length ([#imm mul vl]). If it doesn't fit then stash the resulting address
// into a register.
if (emitIns_valid_imm_for_scaled_sve_ldst_offset(imm))
{
// TODO-SVE: This assumes 128bit SVE.
imm = imm / 16;
}
else
{
regNumber rsvdReg = codeGen->rsGetRsvdReg();
codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm);
reg2 = rsvdReg;
imm = 0;
}

assert(isValidSimm<9>(imm));

reg2 = encodingSPtoZR(reg2);

if (isVectorRegister(reg1))
{
fmt = IF_SVE_JH_2A;
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,18 @@ void LinearScan::buildIntervals()
currentLoc += 2;
}

#ifdef TARGET_ARM64
if (m_compiler->compUsesUnknownSizeFrame && (block == m_compiler->fgFirstBB))
{
regMaskTP killed;
killed.AddRegNumInMask(REG_SCRATCH);
killed.AddRegNumInMask(REG_SCRATCH_V);

addKillForRegs(killed, currentLoc + 1);
currentLoc += 2;
}
#endif

LIR::Range& blockRange = LIR::AsRange(block);
for (GenTree* node : blockRange)
{
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@
#define REG_SHIFT REG_NA
#define RBM_SHIFT RBM_ALLINT

// This is a general scratch register that does not conflict with the argument registers
// Scratch registers that do not conflict with the argument registers, usually for use in function prolog
#define REG_SCRATCH REG_R9
#define REG_SCRATCH_V REG_V9

// This is a general register that can be optionally reserved for other purposes during codegen
#define REG_OPT_RSVD REG_IP1
Expand Down
Loading