Skip to content

Commit 1a85f37

Browse files
committed
AArch64: align pair-wise spills on WoS to 16-byte
Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.
1 parent 9cca883 commit 1a85f37

File tree

2 files changed

+71
-27
lines changed

2 files changed

+71
-27
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,8 +1554,10 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
15541554
!AFL.requiresSaveVG(MF) && !AFI->isSVECC();
15551555
}
15561556

1557-
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1558-
bool NeedsWinCFI, bool IsFirst,
1557+
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
1558+
unsigned SpillCount, unsigned Reg1,
1559+
unsigned Reg2, bool NeedsWinCFI,
1560+
bool IsFirst,
15591561
const TargetRegisterInfo *TRI) {
15601562
// If we are generating register pairs for a Windows function that requires
15611563
// EH support, then pair consecutive registers only. There are no unwind
@@ -1568,8 +1570,18 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15681570
return true;
15691571
if (!NeedsWinCFI)
15701572
return false;
1573+
1574+
// ARM64EC introduced `save_any_regp`, which expects 16-byte alignment.
1575+
// This is handled by only allowing paired spills for registers spilled at
1576+
// even positions (which should be 16-byte aligned, as other GPRs/FPRs are
1577+
// 8-bytes). We carve out an exception for {FP,LR}, which does not require
1578+
// 16-byte alignment in the uop representation.
15711579
if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
1572-
return false;
1580+
return SpillExtendedVolatile
1581+
? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
1582+
(SpillCount % 2) == 0)
1583+
: false;
1584+
15731585
// If pairing a GPR with LR, the pair can be described by the save_lrpair
15741586
// opcode. If this is the first register pair, it would end up with a
15751587
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -1585,12 +1597,15 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15851597
/// WindowsCFI requires that only consecutive registers can be paired.
15861598
/// LR and FP need to be allocated together when the frame needs to save
15871599
/// the frame-record. This means any other register pairing with LR is invalid.
1588-
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1589-
bool UsesWinAAPCS, bool NeedsWinCFI,
1590-
bool NeedsFrameRecord, bool IsFirst,
1600+
static bool invalidateRegisterPairing(bool SpillExtendedVolatile,
1601+
unsigned SpillCount, unsigned Reg1,
1602+
unsigned Reg2, bool UsesWinAAPCS,
1603+
bool NeedsWinCFI, bool NeedsFrameRecord,
1604+
bool IsFirst,
15911605
const TargetRegisterInfo *TRI) {
15921606
if (UsesWinAAPCS)
1593-
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
1607+
return invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
1608+
Reg1, Reg2, NeedsWinCFI, IsFirst,
15941609
TRI);
15951610

15961611
// If we need to store the frame record, don't pair any register
@@ -1688,6 +1703,17 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
16881703
}
16891704

16901705
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
1706+
// Windows AAPCS has x9-x15 as volatile registers, x16-x17 as intra-procedural
1707+
// scratch, x18 as platform reserved. However, clang has extended calling
1708+
// convensions such as preserve_most and preserve_all which treat these as
1709+
// CSR. As such, the ARM64 unwind uOPs bias registers by 19. We use ARM64EC
1710+
// uOPs which have separate restrictions. We need to check for that.
1711+
bool SpillExtendedVolatile =
1712+
IsWindows && std::any_of(std::begin(CSI), std::end(CSI),
1713+
[](const CalleeSavedInfo &CSI) {
1714+
const auto &Reg = CSI.getReg();
1715+
return Reg > AArch64::X8 && Reg < AArch64::X19;
1716+
});
16911717

16921718
int ZPRByteOffset = 0;
16931719
int PPRByteOffset = 0;
@@ -1749,17 +1775,19 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
17491775
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
17501776
MCRegister NextReg = CSI[i + RegInc].getReg();
17511777
bool IsFirst = i == FirstReg;
1778+
unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
17521779
switch (RPI.Type) {
17531780
case RegPairInfo::GPR:
17541781
if (AArch64::GPR64RegClass.contains(NextReg) &&
1755-
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
1756-
NeedsWinCFI, NeedsFrameRecord, IsFirst,
1757-
TRI))
1782+
!invalidateRegisterPairing(
1783+
SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
1784+
NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
17581785
RPI.Reg2 = NextReg;
17591786
break;
17601787
case RegPairInfo::FPR64:
17611788
if (AArch64::FPR64RegClass.contains(NextReg) &&
1762-
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1789+
!invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
1790+
RPI.Reg1, NextReg, NeedsWinCFI,
17631791
IsFirst, TRI))
17641792
RPI.Reg2 = NextReg;
17651793
break;
Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
2+
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype obj -o - %s | llvm-readobj -u - | FileCheck %s -check-prefix CHECK-UNWIND
23

34
declare dso_local void @g(ptr noundef)
45
define dso_local preserve_mostcc void @f(ptr noundef %p) #0 {
@@ -12,23 +13,38 @@ entry:
1213

1314
attributes #0 = { nounwind uwtable(sync) }
1415

15-
; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
16-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
17-
; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
18-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
19-
; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
20-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
21-
; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
22-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
16+
; CHECK: str x30, [sp, #16]
17+
; CHECK-NEXT: .seh_save_reg x30, 16
18+
; CHECK: str x9, [sp, #24]
19+
; CHECK-NEXT: .seh_save_any_reg x9, 24
20+
; CHECK: stp x10, x11, [sp, #32
21+
; CHECK-NEXT: .seh_save_any_reg_p x10, 32
22+
; CHECK: stp x12, x13, [sp, #48]
23+
; CHECK-NEXT: .seh_save_any_reg_p x12, 48
24+
; CHECK: stp x14, x15, [sp, #64]
25+
; CHECK-NEXT: .seh_save_any_reg_p x14, 64
2326
; CHECK: .seh_endprologue
2427

2528
; CHECK: .seh_startepilogue
26-
; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
27-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
28-
; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
29-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
30-
; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
31-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
32-
; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
33-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
29+
; CHECK: ldp x14, x15, [sp, #64]
30+
; CHECK-NEXT: .seh_save_any_reg_p x14, 64
31+
; CHECK: ldp x12, x13, [sp, #48]
32+
; CHECK-NEXT: .seh_save_any_reg_p x12, 48
33+
; CHECK: ldp x10, x11, [sp, #32
34+
; CHECK-NEXT: .seh_save_any_reg_p x10, 32
35+
; CHECK: ldr x9, [sp, #24]
36+
; CHECK-NEXT: .seh_save_any_reg x9, 24
37+
; CHECK: ldr x30, [sp, #16]
38+
; CHECK-NEXT: .seh_save_reg x30, 16
39+
3440
; CHECK: .seh_endepilogue
41+
42+
; CHECK-UNWIND: Prologue [
43+
; CHECK-UNWIND: 0xe74e04 ; stp x14, x15, [sp, #64]
44+
; CHECK-UNWIND: 0xe74c03 ; stp x12, x13, [sp, #48]
45+
; CHECK-UNWIND: 0xe74a02 ; stp x10, x11, [sp, #32]
46+
; CHECK-UNWIND: 0xe70903 ; str x9, [sp, #24]
47+
; CHECK-UNWIND: 0xd2c2 ; str x30, [sp, #16]
48+
; CHECK-UNWIND: 0x05 ; sub sp, #80
49+
; CHECK-UNWIND: 0xe4 ; end
50+
; CHECK-UNWIND: ]

0 commit comments

Comments
 (0)