Skip to content

Commit b2144ed

Browse files
committed
Rework a little
1 parent f101ba2 commit b2144ed

File tree

2 files changed

+419
-59
lines changed

2 files changed

+419
-59
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2286,21 +2286,6 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
22862286
}
22872287
}
22882288

2289-
/// Find a GPR restored in the epilogue that is not reserved.
2290-
static Register findRestoredCalleeSaveGPR(const MachineFunction &MF) {
2291-
const MachineFrameInfo &MFI = MF.getFrameInfo();
2292-
const MachineRegisterInfo &MRI = MF.getRegInfo();
2293-
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2294-
for (auto &CS : CSI) {
2295-
Register Reg = CS.getReg();
2296-
if (!CS.isRestored() || MRI.isReserved(Reg) ||
2297-
!AArch64::GPR64RegClass.contains(Reg))
2298-
continue;
2299-
return Reg;
2300-
}
2301-
return AArch64::NoRegister;
2302-
}
2303-
23042289
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23052290
MachineBasicBlock &MBB) const {
23062291
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -2550,49 +2535,69 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25502535
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
25512536
NeedsWinCFI, &HasWinCFI);
25522537
} else if (SVEStackSize) {
2553-
// If we have stack realignment or variable sized objects on the stack,
2554-
// restore the stack pointer from the frame pointer prior to SVE CSR
2555-
// restoration.
2556-
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
2557-
if (int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
2558-
// Set SP to start of SVE callee-save area from which they can
2559-
// be reloaded.
2560-
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2561-
if (!AFI->isStackRealigned() && RegInfo->hasBasePointer(MF)) {
2562-
// If the stack is not realigned we can use the base pointer to find
2563-
// the start of the SVE callee-saves (and deallocate locals).
2564-
emitFrameOffset(
2565-
MBB, RestoreBegin, DL, AArch64::SP, RegInfo->getBaseRegister(),
2566-
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy);
2567-
} else {
2568-
Register CalleeSaveBase = AArch64::FP;
2569-
if (int64_t CalleeSaveBaseOffset =
2570-
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
2571-
// This will find a GPR that is about to be restored -- so safe
2572-
// to clobber. SVE functions have a "big stack" so always spill at
2573-
// least one GPR (as a scratch register).
2574-
CalleeSaveBase = findRestoredCalleeSaveGPR(MF);
2575-
assert(CalleeSaveBase != AArch64::NoRegister);
2576-
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2577-
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
2578-
MachineInstr::FrameDestroy);
2579-
}
2580-
// The code below will deallocate the stack space space by moving the
2581-
// SP to the start of the SVE callee-save area.
2582-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2583-
StackOffset::getScalable(-SVECalleeSavedSize), TII,
2584-
MachineInstr::FrameDestroy);
2538+
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2539+
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
2540+
Register BaseForSVERestore = [&]() -> Register {
2541+
// With stack realignment we must use the FP to restore SVE CSRs (as both
2542+
// the SP and BP can't be used due to the unknown alignment padding).
2543+
if (AFI->isStackRealigned())
2544+
return AArch64::FP;
2545+
// With variable sized objects on the stack, we can use the BP or FP to
2546+
// restore the SVE callee saves. If there are no SVE locals the BP will
2547+
// be more efficient (a single ADD).
2548+
if (MFI.hasVarSizedObjects()) {
2549+
if (DeallocateBefore && !AFI->hasStackHazardSlotIndex()) {
2550+
// If there's SVE locals and no hazard padding we can do:
2551+
// ADDVL SP, X29, #(-SVECalleeSavedSize)
2552+
return AArch64::FP;
25852553
}
2554+
// If there's SVE locals and hazard padding we can choose between:
2555+
// SUB TMP, X29, #(-CalleeSaveBaseOffset)
2556+
// ADDVL SP, TMP, #(-SVECalleeSavedSize)
2557+
// OR:
2558+
// ADD SP, BP, #NumBytes
2559+
// ADDVL SP, SP, #DeallocateBefore
2560+
// This chooses the latter as the "ADDVL" can be omitted if there's no
2561+
// SVE locals.
2562+
assert(RegInfo->hasBasePointer(MF) && "Expected base pointer!");
2563+
return RegInfo->getBaseRegister();
25862564
}
2587-
} else {
2588-
if (AFI->getSVECalleeSavedStackSize()) {
2565+
// In the standard case we use the SP.
2566+
return AArch64::SP;
2567+
}();
2568+
2569+
if (SVECalleeSavedSize && BaseForSVERestore == AArch64::FP) {
2570+
Register CalleeSaveBase = AArch64::FP;
2571+
if (int64_t CalleeSaveBaseOffset =
2572+
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
2573+
// If we have have an non-zero offset to the non-SVE CS base we need to
2574+
// compute the base address by subtracting the offest in a temporary
2575+
// register. SVE functions have a "big stack" so there should be at
2576+
// least one scratch register available.
2577+
RegScavenger RS;
2578+
RS.enterBasicBlockEnd(MBB);
2579+
RS.backward(MBBI);
2580+
CalleeSaveBase = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
2581+
assert(CalleeSaveBase != AArch64::NoRegister);
2582+
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2583+
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
2584+
MachineInstr::FrameDestroy);
2585+
}
2586+
// The code below will deallocate the stack space space by moving the
2587+
// SP to the start of the SVE callee-save area.
2588+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2589+
StackOffset::getScalable(-SVECalleeSavedSize), TII,
2590+
MachineInstr::FrameDestroy);
2591+
} else if (BaseForSVERestore == AArch64::SP || SVECalleeSavedSize) {
2592+
if (SVECalleeSavedSize) {
25892593
// Deallocate the non-SVE locals first before we can deallocate (and
25902594
// restore callee saves) from the SVE area.
25912595
emitFrameOffset(
2592-
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
2596+
MBB, RestoreBegin, DL, AArch64::SP, BaseForSVERestore,
25932597
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
25942598
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
25952599
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
2600+
25962601
NumBytes = 0;
25972602
}
25982603

@@ -2602,11 +2607,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26022607
SVEStackSize +
26032608
StackOffset::getFixed(NumBytes + PrologueSaveSize));
26042609

2605-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2606-
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
2607-
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
2608-
DeallocateAfter +
2609-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
2610+
if (BaseForSVERestore == AArch64::SP) {
2611+
// Note: If the base is not SP it is the base pointer, in which case the
2612+
// SVE CSs will be implicitly deallocated by setting the SP to the FP to
2613+
// restore the non-SVE CSs.
2614+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2615+
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
2616+
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
2617+
DeallocateAfter +
2618+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
2619+
}
26102620
}
26112621
if (EmitCFI)
26122622
emitCalleeSavedSVERestores(MBB, RestoreEnd);

0 commit comments

Comments
 (0)