Skip to content

Commit d13c0c4

Browse files
committed
AArch64: align pair-wise spills on WoS to 16-byte
Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.
1 parent 9cca883 commit d13c0c4

File tree

5 files changed

+104
-75
lines changed

5 files changed

+104
-75
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,8 +1554,9 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
15541554
!AFL.requiresSaveVG(MF) && !AFI->isSVECC();
15551555
}
15561556

1557-
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1558-
bool NeedsWinCFI, bool IsFirst,
1557+
static bool invalidateWindowsRegisterPairing(unsigned SpillCount, unsigned Reg1,
1558+
unsigned Reg2, bool NeedsWinCFI,
1559+
bool IsFirst,
15591560
const TargetRegisterInfo *TRI) {
15601561
// If we are generating register pairs for a Windows function that requires
15611562
// EH support, then pair consecutive registers only. There are no unwind
@@ -1568,8 +1569,16 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15681569
return true;
15691570
if (!NeedsWinCFI)
15701571
return false;
1572+
// ARM64EC introduced `save_any_regp`, which expects 16-byte alignment.
1573+
// This is handled by only allowing paired spills for registers spilled at
1574+
// even positions (which should be 16-byte aligned, as other GPRs/FPRs are
1575+
// 8-bytes). We carve out an exception for {FP,LR}, which does not require
1576+
// 16-byte alignment in the uop representation.
15711577
if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
1572-
return false;
1578+
return ((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
1579+
(SpillCount % 2) == 0)
1580+
? false
1581+
: true;
15731582
// If pairing a GPR with LR, the pair can be described by the save_lrpair
15741583
// opcode. If this is the first register pair, it would end up with a
15751584
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -1585,13 +1594,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15851594
/// WindowsCFI requires that only consecutive registers can be paired.
15861595
/// LR and FP need to be allocated together when the frame needs to save
15871596
/// the frame-record. This means any other register pairing with LR is invalid.
1588-
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1589-
bool UsesWinAAPCS, bool NeedsWinCFI,
1590-
bool NeedsFrameRecord, bool IsFirst,
1597+
static bool invalidateRegisterPairing(unsigned SpillCount, unsigned Reg1,
1598+
unsigned Reg2, bool UsesWinAAPCS,
1599+
bool NeedsWinCFI, bool NeedsFrameRecord,
1600+
bool IsFirst,
15911601
const TargetRegisterInfo *TRI) {
15921602
if (UsesWinAAPCS)
1593-
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
1594-
TRI);
1603+
return invalidateWindowsRegisterPairing(SpillCount, Reg1, Reg2, NeedsWinCFI,
1604+
IsFirst, TRI);
15951605

15961606
// If we need to store the frame record, don't pair any register
15971607
// with LR other than FP.
@@ -1749,18 +1759,19 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
17491759
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
17501760
MCRegister NextReg = CSI[i + RegInc].getReg();
17511761
bool IsFirst = i == FirstReg;
1762+
unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
17521763
switch (RPI.Type) {
17531764
case RegPairInfo::GPR:
17541765
if (AArch64::GPR64RegClass.contains(NextReg) &&
1755-
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
1766+
!invalidateRegisterPairing(SpillCount, RPI.Reg1, NextReg, IsWindows,
17561767
NeedsWinCFI, NeedsFrameRecord, IsFirst,
17571768
TRI))
17581769
RPI.Reg2 = NextReg;
17591770
break;
17601771
case RegPairInfo::FPR64:
17611772
if (AArch64::FPR64RegClass.contains(NextReg) &&
1762-
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1763-
IsFirst, TRI))
1773+
!invalidateWindowsRegisterPairing(SpillCount, RPI.Reg1, NextReg,
1774+
NeedsWinCFI, IsFirst, TRI))
17641775
RPI.Reg2 = NextReg;
17651776
break;
17661777
case RegPairInfo::FPR128:

llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -120,26 +120,30 @@ define void @caller1(ptr %a) {
120120
; WIN-NEXT: .seh_save_regp x27, 64
121121
; WIN-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
122122
; WIN-NEXT: .seh_save_reg x30, 80
123-
; WIN-NEXT: stp d8, d9, [sp, #88] // 16-byte Folded Spill
124-
; WIN-NEXT: .seh_save_fregp d8, 88
125-
; WIN-NEXT: stp d10, d11, [sp, #104] // 16-byte Folded Spill
126-
; WIN-NEXT: .seh_save_fregp d10, 104
127-
; WIN-NEXT: stp d12, d13, [sp, #120] // 16-byte Folded Spill
128-
; WIN-NEXT: .seh_save_fregp d12, 120
129-
; WIN-NEXT: stp d14, d15, [sp, #136] // 16-byte Folded Spill
130-
; WIN-NEXT: .seh_save_fregp d14, 136
123+
; WIN-NEXT: str d8, [sp, #88] // 8-byte Folded Spill
124+
; WIN-NEXT: .seh_save_freg d8, 88
125+
; WIN-NEXT: stp d9, d10, [sp, #96] // 16-byte Folded Spill
126+
; WIN-NEXT: .seh_save_fregp d9, 96
127+
; WIN-NEXT: stp d11, d12, [sp, #112] // 16-byte Folded Spill
128+
; WIN-NEXT: .seh_save_fregp d11, 112
129+
; WIN-NEXT: stp d13, d14, [sp, #128] // 16-byte Folded Spill
130+
; WIN-NEXT: .seh_save_fregp d13, 128
131+
; WIN-NEXT: str d15, [sp, #144] // 8-byte Folded Spill
132+
; WIN-NEXT: .seh_save_freg d15, 144
131133
; WIN-NEXT: .seh_endprologue
132134
; WIN-NEXT: mov x20, x0
133135
; WIN-NEXT: bl callee
134136
; WIN-NEXT: .seh_startepilogue
135-
; WIN-NEXT: ldp d14, d15, [sp, #136] // 16-byte Folded Reload
136-
; WIN-NEXT: .seh_save_fregp d14, 136
137-
; WIN-NEXT: ldp d12, d13, [sp, #120] // 16-byte Folded Reload
138-
; WIN-NEXT: .seh_save_fregp d12, 120
139-
; WIN-NEXT: ldp d10, d11, [sp, #104] // 16-byte Folded Reload
140-
; WIN-NEXT: .seh_save_fregp d10, 104
141-
; WIN-NEXT: ldp d8, d9, [sp, #88] // 16-byte Folded Reload
142-
; WIN-NEXT: .seh_save_fregp d8, 88
137+
; WIN-NEXT: ldr d15, [sp, #144] // 8-byte Folded Reload
138+
; WIN-NEXT: .seh_save_freg d15, 144
139+
; WIN-NEXT: ldp d13, d14, [sp, #128] // 16-byte Folded Reload
140+
; WIN-NEXT: .seh_save_fregp d13, 128
141+
; WIN-NEXT: ldp d11, d12, [sp, #112] // 16-byte Folded Reload
142+
; WIN-NEXT: .seh_save_fregp d11, 112
143+
; WIN-NEXT: ldp d9, d10, [sp, #96] // 16-byte Folded Reload
144+
; WIN-NEXT: .seh_save_fregp d9, 96
145+
; WIN-NEXT: ldr d8, [sp, #88] // 8-byte Folded Reload
146+
; WIN-NEXT: .seh_save_freg d8, 88
143147
; WIN-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
144148
; WIN-NEXT: .seh_save_reg x30, 80
145149
; WIN-NEXT: ldp x27, x28, [sp, #64] // 16-byte Folded Reload
@@ -456,14 +460,16 @@ define i64 @caller3() {
456460
; WIN-NEXT: .seh_save_regp x27, 80
457461
; WIN-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
458462
; WIN-NEXT: .seh_save_reg x30, 96
459-
; WIN-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill
460-
; WIN-NEXT: .seh_save_fregp d8, 104
461-
; WIN-NEXT: stp d10, d11, [sp, #120] // 16-byte Folded Spill
462-
; WIN-NEXT: .seh_save_fregp d10, 120
463-
; WIN-NEXT: stp d12, d13, [sp, #136] // 16-byte Folded Spill
464-
; WIN-NEXT: .seh_save_fregp d12, 136
465-
; WIN-NEXT: stp d14, d15, [sp, #152] // 16-byte Folded Spill
466-
; WIN-NEXT: .seh_save_fregp d14, 152
463+
; WIN-NEXT: str d8, [sp, #104] // 8-byte Folded Spill
464+
; WIN-NEXT: .seh_save_freg d8, 104
465+
; WIN-NEXT: stp d9, d10, [sp, #112] // 16-byte Folded Spill
466+
; WIN-NEXT: .seh_save_fregp d9, 112
467+
; WIN-NEXT: stp d11, d12, [sp, #128] // 16-byte Folded Spill
468+
; WIN-NEXT: .seh_save_fregp d11, 128
469+
; WIN-NEXT: stp d13, d14, [sp, #144] // 16-byte Folded Spill
470+
; WIN-NEXT: .seh_save_fregp d13, 144
471+
; WIN-NEXT: str d15, [sp, #160] // 8-byte Folded Spill
472+
; WIN-NEXT: .seh_save_freg d15, 160
467473
; WIN-NEXT: .seh_endprologue
468474
; WIN-NEXT: mov w8, #24 // =0x18
469475
; WIN-NEXT: mov w20, #1 // =0x1
@@ -492,14 +498,16 @@ define i64 @caller3() {
492498
; WIN-NEXT: str x8, [sp]
493499
; WIN-NEXT: bl callee_with_many_param
494500
; WIN-NEXT: .seh_startepilogue
495-
; WIN-NEXT: ldp d14, d15, [sp, #152] // 16-byte Folded Reload
496-
; WIN-NEXT: .seh_save_fregp d14, 152
497-
; WIN-NEXT: ldp d12, d13, [sp, #136] // 16-byte Folded Reload
498-
; WIN-NEXT: .seh_save_fregp d12, 136
499-
; WIN-NEXT: ldp d10, d11, [sp, #120] // 16-byte Folded Reload
500-
; WIN-NEXT: .seh_save_fregp d10, 120
501-
; WIN-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload
502-
; WIN-NEXT: .seh_save_fregp d8, 104
501+
; WIN-NEXT: ldr d15, [sp, #160] // 8-byte Folded Reload
502+
; WIN-NEXT: .seh_save_freg d15, 160
503+
; WIN-NEXT: ldp d13, d14, [sp, #144] // 16-byte Folded Reload
504+
; WIN-NEXT: .seh_save_fregp d13, 144
505+
; WIN-NEXT: ldp d11, d12, [sp, #128] // 16-byte Folded Reload
506+
; WIN-NEXT: .seh_save_fregp d11, 128
507+
; WIN-NEXT: ldp d9, d10, [sp, #112] // 16-byte Folded Reload
508+
; WIN-NEXT: .seh_save_fregp d9, 112
509+
; WIN-NEXT: ldr d8, [sp, #104] // 8-byte Folded Reload
510+
; WIN-NEXT: .seh_save_freg d8, 104
503511
; WIN-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
504512
; WIN-NEXT: .seh_save_reg x30, 96
505513
; WIN-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/seh-extended-spills.ll

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,28 @@ entry:
1212

1313
attributes #0 = { nounwind uwtable(sync) }
1414

15-
; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
16-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
17-
; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
18-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
19-
; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
20-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
21-
; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
22-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
15+
; CHECK: str x30, [sp, #[[OFFSET_0:[0-9]+]]]
16+
; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
17+
; CHECK: str x9, [sp, #[[OFFSET_1:[0-9]+]]]
18+
; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
19+
; CHECK: stp x10, x11, [sp, #[[OFFSET_2:[0-9]+]]]
20+
; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
21+
; CHECK: stp x12, x13, [sp, #[[OFFSET_3:[0-9]+]]]
22+
; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
23+
; CHECK: stp x14, x15, [sp, #[[OFFSET_4:[0-9]+]]]
24+
; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
2325
; CHECK: .seh_endprologue
2426

2527
; CHECK: .seh_startepilogue
26-
; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
27-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
28-
; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
29-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
30-
; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
31-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
32-
; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
33-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
28+
; CHECK: ldp x14, x15, [sp, #[[OFFSET_4]]]
29+
; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
30+
; CHECK: ldp x12, x13, [sp, #[[OFFSET_3]]]
31+
; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
32+
; CHECK: ldp x10, x11, [sp, #[[OFFSET_2]]]
33+
; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
34+
; CHECK: ldr x9, [sp, #[[OFFSET_1]]]
35+
; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
36+
; CHECK: ldr x30, [sp, #[[OFFSET_0]]]
37+
; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
38+
3439
; CHECK: .seh_endepilogue

llvm/test/CodeGen/AArch64/stack-hazard-windows.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
1111
; CHECK0-NEXT: .seh_save_reg_x x23, 48
1212
; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
1313
; CHECK0-NEXT: .seh_save_fplr 8
14-
; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
15-
; CHECK0-NEXT: .seh_save_fregp d9, 24
14+
; CHECK0-NEXT: str d9, [sp, #24] // 8-byte Folded Spill
15+
; CHECK0-NEXT: .seh_save_freg d9, 24
16+
; CHECK0-NEXT: str d10, [sp, #32] // 8-byte Folded Spill
17+
; CHECK0-NEXT: .seh_save_freg d10, 32
1618
; CHECK0-NEXT: add x29, sp, #8
1719
; CHECK0-NEXT: .seh_add_fp 8
1820
; CHECK0-NEXT: .seh_endprologue
@@ -21,8 +23,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
2123
; CHECK0-NEXT: //NO_APP
2224
; CHECK0-NEXT: str d0, [x29, #32]
2325
; CHECK0-NEXT: .seh_startepilogue
24-
; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
25-
; CHECK0-NEXT: .seh_save_fregp d9, 24
26+
; CHECK0-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload
27+
; CHECK0-NEXT: .seh_save_freg d10, 32
28+
; CHECK0-NEXT: ldr d9, [sp, #24] // 8-byte Folded Reload
29+
; CHECK0-NEXT: .seh_save_freg d9, 24
2630
; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
2731
; CHECK0-NEXT: .seh_save_fplr 8
2832
; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/wineh-frame2.mir

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,21 @@
44

55
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -48
66
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -48
7-
# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 1
8-
# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 8
9-
# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 3
10-
# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 24
11-
# CHECK-NEXT: frame-setup STRDui killed $d12, $sp, 5
12-
# CHECK-NEXT: frame-setup SEH_SaveFReg 12, 40
7+
# CHECK-NEXT: frame-setup STRDui killed $d8, $sp, 1
8+
# CHECK-NEXT: frame-setup SEH_SaveFReg 8, 8
9+
# CHECK-NEXT: frame-setup STPDi killed $d9, killed $d10, $sp, 2
10+
# CHECK-NEXT: frame-setup SEH_SaveFRegP 9, 10, 16
11+
# CHECK-NEXT: frame-setup STPDi killed $d11, killed $d12, $sp, 4
12+
# CHECK-NEXT: frame-setup SEH_SaveFRegP 11, 12, 32
1313
# CHECK-NEXT: frame-setup SEH_PrologEnd
14+
1415
# CHECK: frame-destroy SEH_EpilogStart
15-
# CHECK-NEXT: $d12 = frame-destroy LDRDui $sp, 5
16-
# CHECK-NEXT: frame-destroy SEH_SaveFReg 12, 40
17-
# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 3
18-
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 24
19-
# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 1
20-
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 8
16+
# CHECK-NEXT: $d11, $d12 = frame-destroy LDPDi $sp, 4
17+
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 11, 12, 32
18+
# CHECK-NEXT: $d9, $d10 = frame-destroy LDPDi $sp, 2
19+
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 9, 10, 16
20+
# CHECK-NEXT: $d8 = frame-destroy LDRDui $sp, 1
21+
# CHECK-NEXT: frame-destroy SEH_SaveFReg 8, 8
2122
# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 48
2223
# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -48
2324
# CHECK-NEXT: frame-destroy SEH_EpilogEnd

0 commit comments

Comments
 (0)