@@ -2286,21 +2286,6 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
2286
2286
}
2287
2287
}
2288
2288
2289
- // / Find a GPR restored in the epilogue that is not reserved.
2290
- static Register findRestoredCalleeSaveGPR (const MachineFunction &MF) {
2291
- const MachineFrameInfo &MFI = MF.getFrameInfo ();
2292
- const MachineRegisterInfo &MRI = MF.getRegInfo ();
2293
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
2294
- for (auto &CS : CSI) {
2295
- Register Reg = CS.getReg ();
2296
- if (!CS.isRestored () || MRI.isReserved (Reg) ||
2297
- !AArch64::GPR64RegClass.contains (Reg))
2298
- continue ;
2299
- return Reg;
2300
- }
2301
- return AArch64::NoRegister;
2302
- }
2303
-
2304
2289
void AArch64FrameLowering::emitEpilogue (MachineFunction &MF,
2305
2290
MachineBasicBlock &MBB) const {
2306
2291
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr ();
@@ -2550,49 +2535,69 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2550
2535
DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2551
2536
NeedsWinCFI, &HasWinCFI);
2552
2537
} else if (SVEStackSize) {
2553
- // If we have stack realignment or variable sized objects on the stack,
2554
- // restore the stack pointer from the frame pointer prior to SVE CSR
2555
- // restoration.
2556
- if (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) {
2557
- if (int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ()) {
2558
- // Set SP to start of SVE callee-save area from which they can
2559
- // be reloaded.
2560
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
2561
- if (!AFI->isStackRealigned () && RegInfo->hasBasePointer (MF)) {
2562
- // If the stack is not realigned we can use the base pointer to find
2563
- // the start of the SVE callee-saves (and deallocate locals).
2564
- emitFrameOffset (
2565
- MBB, RestoreBegin, DL, AArch64::SP, RegInfo->getBaseRegister (),
2566
- StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy);
2567
- } else {
2568
- Register CalleeSaveBase = AArch64::FP;
2569
- if (int64_t CalleeSaveBaseOffset =
2570
- AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
2571
- // This will find a GPR that is about to be restored -- so safe
2572
- // to clobber. SVE functions have a "big stack" so always spill at
2573
- // least one GPR (as a scratch register).
2574
- CalleeSaveBase = findRestoredCalleeSaveGPR (MF);
2575
- assert (CalleeSaveBase != AArch64::NoRegister);
2576
- emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2577
- StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
2578
- MachineInstr::FrameDestroy);
2579
- }
2580
- // The code below will deallocate the stack space space by moving the
2581
- // SP to the start of the SVE callee-save area.
2582
- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2583
- StackOffset::getScalable (-SVECalleeSavedSize), TII,
2584
- MachineInstr::FrameDestroy);
2538
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
2539
+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
2540
+ Register BaseForSVERestore = [&]() -> Register {
2541
+ // With stack realignment we must use the FP to restore SVE CSRs (as both
2542
+ // the SP and BP can't be used due to the unknown alignment padding).
2543
+ if (AFI->isStackRealigned ())
2544
+ return AArch64::FP;
2545
+ // With variable sized objects on the stack, we can use the BP or FP to
2546
+ // restore the SVE callee saves. If there are no SVE locals the BP will
2547
+ // be more efficient (a single ADD).
2548
+ if (MFI.hasVarSizedObjects ()) {
2549
+ if (DeallocateBefore && !AFI->hasStackHazardSlotIndex ()) {
2550
+ // If there's SVE locals and no hazard padding we can do:
2551
+ // ADDVL SP, X29, #(-SVECalleeSavedSize)
2552
+ return AArch64::FP;
2585
2553
}
2554
+ // If there's SVE locals and hazard padding we can choose between:
2555
+ // SUB TMP, X29, #(-CalleeSaveBaseOffset)
2556
+ // ADDVL SP, TMP, #(-SVECalleeSavedSize)
2557
+ // OR:
2558
+ // ADD SP, BP, #NumBytes
2559
+ // ADDVL SP, SP, #DeallocateBefore
2560
+ // This chooses the latter as the "ADDVL" can be omitted if there's no
2561
+ // SVE locals.
2562
+ assert (RegInfo->hasBasePointer (MF) && " Expected base pointer!" );
2563
+ return RegInfo->getBaseRegister ();
2586
2564
}
2587
- } else {
2588
- if (AFI->getSVECalleeSavedStackSize ()) {
2565
+ // In the standard case we use the SP.
2566
+ return AArch64::SP;
2567
+ }();
2568
+
2569
+ if (SVECalleeSavedSize && BaseForSVERestore == AArch64::FP) {
2570
+ Register CalleeSaveBase = AArch64::FP;
2571
+ if (int64_t CalleeSaveBaseOffset =
2572
+ AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
2573
+ // If we have have an non-zero offset to the non-SVE CS base we need to
2574
+ // compute the base address by subtracting the offest in a temporary
2575
+ // register. SVE functions have a "big stack" so there should be at
2576
+ // least one scratch register available.
2577
+ RegScavenger RS;
2578
+ RS.enterBasicBlockEnd (MBB);
2579
+ RS.backward (MBBI);
2580
+ CalleeSaveBase = RS.FindUnusedReg (&AArch64::GPR64commonRegClass);
2581
+ assert (CalleeSaveBase != AArch64::NoRegister);
2582
+ emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2583
+ StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
2584
+ MachineInstr::FrameDestroy);
2585
+ }
2586
+ // The code below will deallocate the stack space space by moving the
2587
+ // SP to the start of the SVE callee-save area.
2588
+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2589
+ StackOffset::getScalable (-SVECalleeSavedSize), TII,
2590
+ MachineInstr::FrameDestroy);
2591
+ } else if (BaseForSVERestore == AArch64::SP || SVECalleeSavedSize) {
2592
+ if (SVECalleeSavedSize) {
2589
2593
// Deallocate the non-SVE locals first before we can deallocate (and
2590
2594
// restore callee saves) from the SVE area.
2591
2595
emitFrameOffset (
2592
- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP ,
2596
+ MBB, RestoreBegin, DL, AArch64::SP, BaseForSVERestore ,
2593
2597
StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy,
2594
2598
false , NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
2595
2599
SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize));
2600
+
2596
2601
NumBytes = 0 ;
2597
2602
}
2598
2603
@@ -2602,11 +2607,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2602
2607
SVEStackSize +
2603
2608
StackOffset::getFixed (NumBytes + PrologueSaveSize));
2604
2609
2605
- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2606
- DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2607
- NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
2608
- DeallocateAfter +
2609
- StackOffset::getFixed (NumBytes + PrologueSaveSize));
2610
+ if (BaseForSVERestore == AArch64::SP) {
2611
+ // Note: If the base is not SP it is the base pointer, in which case the
2612
+ // SVE CSs will be implicitly deallocated by setting the SP to the FP to
2613
+ // restore the non-SVE CSs.
2614
+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2615
+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2616
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
2617
+ DeallocateAfter +
2618
+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
2619
+ }
2610
2620
}
2611
2621
if (EmitCFI)
2612
2622
emitCalleeSavedSVERestores (MBB, RestoreEnd);
0 commit comments