Skip to content

Commit 205e76f

Browse files
committed
AArch64: align pair-wise spills on WoS to 16-byte
Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.
1 parent a7bf45a commit 205e76f

File tree

5 files changed

+103
-77
lines changed

5 files changed

+103
-77
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,8 +1554,9 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
15541554
!AFL.requiresSaveVG(MF) && !AFI->isSVECC();
15551555
}
15561556

1557-
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1558-
bool NeedsWinCFI, bool IsFirst,
1557+
static bool invalidateWindowsRegisterPairing(unsigned Spilled, unsigned Reg1,
1558+
unsigned Reg2, bool NeedsWinCFI,
1559+
bool IsFirst,
15591560
const TargetRegisterInfo *TRI) {
15601561
// If we are generating register pairs for a Windows function that requires
15611562
// EH support, then pair consecutive registers only. There are no unwind
@@ -1568,8 +1569,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15681569
return true;
15691570
if (!NeedsWinCFI)
15701571
return false;
1572+
// ARM64EC introduced `save_any_regp` which expects 16-byte alignment.
1573+
// Accomodate that by ensuring that we re-align to 16-bytes when doing paired
1574+
// spills. Carve out an exception for {FP,LR} pairs which we perform without
1575+
// 16-byte alignment.
15711576
if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
1572-
return false;
1577+
return ((Reg1 == AArch64::FP && Reg2 == AArch64::LR) || (Spilled % 2) == 0)
1578+
? false
1579+
: true;
15731580
// If pairing a GPR with LR, the pair can be described by the save_lrpair
15741581
// opcode. If this is the first register pair, it would end up with a
15751582
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -1585,13 +1592,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
15851592
/// WindowsCFI requires that only consecutive registers can be paired.
15861593
/// LR and FP need to be allocated together when the frame needs to save
15871594
/// the frame-record. This means any other register pairing with LR is invalid.
1588-
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1589-
bool UsesWinAAPCS, bool NeedsWinCFI,
1590-
bool NeedsFrameRecord, bool IsFirst,
1595+
static bool invalidateRegisterPairing(unsigned Spilled, unsigned Reg1,
1596+
unsigned Reg2, bool UsesWinAAPCS,
1597+
bool NeedsWinCFI, bool NeedsFrameRecord,
1598+
bool IsFirst,
15911599
const TargetRegisterInfo *TRI) {
15921600
if (UsesWinAAPCS)
1593-
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
1594-
TRI);
1601+
return invalidateWindowsRegisterPairing(Spilled, Reg1, Reg2, NeedsWinCFI,
1602+
IsFirst, TRI);
15951603

15961604
// If we need to store the frame record, don't pair any register
15971605
// with LR other than FP.
@@ -1752,15 +1760,15 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
17521760
switch (RPI.Type) {
17531761
case RegPairInfo::GPR:
17541762
if (AArch64::GPR64RegClass.contains(NextReg) &&
1755-
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
1756-
NeedsWinCFI, NeedsFrameRecord, IsFirst,
1757-
TRI))
1763+
!invalidateRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
1764+
IsWindows, NeedsWinCFI, NeedsFrameRecord,
1765+
IsFirst, TRI))
17581766
RPI.Reg2 = NextReg;
17591767
break;
17601768
case RegPairInfo::FPR64:
17611769
if (AArch64::FPR64RegClass.contains(NextReg) &&
1762-
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1763-
IsFirst, TRI))
1770+
!invalidateWindowsRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
1771+
NeedsWinCFI, IsFirst, TRI))
17641772
RPI.Reg2 = NextReg;
17651773
break;
17661774
case RegPairInfo::FPR128:

llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -120,26 +120,30 @@ define void @caller1(ptr %a) {
120120
; WIN-NEXT: .seh_save_regp x27, 64
121121
; WIN-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
122122
; WIN-NEXT: .seh_save_reg x30, 80
123-
; WIN-NEXT: stp d8, d9, [sp, #88] // 16-byte Folded Spill
124-
; WIN-NEXT: .seh_save_fregp d8, 88
125-
; WIN-NEXT: stp d10, d11, [sp, #104] // 16-byte Folded Spill
126-
; WIN-NEXT: .seh_save_fregp d10, 104
127-
; WIN-NEXT: stp d12, d13, [sp, #120] // 16-byte Folded Spill
128-
; WIN-NEXT: .seh_save_fregp d12, 120
129-
; WIN-NEXT: stp d14, d15, [sp, #136] // 16-byte Folded Spill
130-
; WIN-NEXT: .seh_save_fregp d14, 136
123+
; WIN-NEXT: str d8, [sp, #88] // 8-byte Folded Spill
124+
; WIN-NEXT: .seh_save_freg d8, 88
125+
; WIN-NEXT: stp d9, d10, [sp, #96] // 16-byte Folded Spill
126+
; WIN-NEXT: .seh_save_fregp d9, 96
127+
; WIN-NEXT: stp d11, d12, [sp, #112] // 16-byte Folded Spill
128+
; WIN-NEXT: .seh_save_fregp d11, 112
129+
; WIN-NEXT: stp d13, d14, [sp, #128] // 16-byte Folded Spill
130+
; WIN-NEXT: .seh_save_fregp d13, 128
131+
; WIN-NEXT: str d15, [sp, #144] // 8-byte Folded Spill
132+
; WIN-NEXT: .seh_save_freg d15, 144
131133
; WIN-NEXT: .seh_endprologue
132134
; WIN-NEXT: mov x20, x0
133135
; WIN-NEXT: bl callee
134136
; WIN-NEXT: .seh_startepilogue
135-
; WIN-NEXT: ldp d14, d15, [sp, #136] // 16-byte Folded Reload
136-
; WIN-NEXT: .seh_save_fregp d14, 136
137-
; WIN-NEXT: ldp d12, d13, [sp, #120] // 16-byte Folded Reload
138-
; WIN-NEXT: .seh_save_fregp d12, 120
139-
; WIN-NEXT: ldp d10, d11, [sp, #104] // 16-byte Folded Reload
140-
; WIN-NEXT: .seh_save_fregp d10, 104
141-
; WIN-NEXT: ldp d8, d9, [sp, #88] // 16-byte Folded Reload
142-
; WIN-NEXT: .seh_save_fregp d8, 88
137+
; WIN-NEXT: ldr d15, [sp, #144] // 8-byte Folded Reload
138+
; WIN-NEXT: .seh_save_freg d15, 144
139+
; WIN-NEXT: ldp d13, d14, [sp, #128] // 16-byte Folded Reload
140+
; WIN-NEXT: .seh_save_fregp d13, 128
141+
; WIN-NEXT: ldp d11, d12, [sp, #112] // 16-byte Folded Reload
142+
; WIN-NEXT: .seh_save_fregp d11, 112
143+
; WIN-NEXT: ldp d9, d10, [sp, #96] // 16-byte Folded Reload
144+
; WIN-NEXT: .seh_save_fregp d9, 96
145+
; WIN-NEXT: ldr d8, [sp, #88] // 8-byte Folded Reload
146+
; WIN-NEXT: .seh_save_freg d8, 88
143147
; WIN-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
144148
; WIN-NEXT: .seh_save_reg x30, 80
145149
; WIN-NEXT: ldp x27, x28, [sp, #64] // 16-byte Folded Reload
@@ -456,14 +460,16 @@ define i64 @caller3() {
456460
; WIN-NEXT: .seh_save_regp x27, 80
457461
; WIN-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
458462
; WIN-NEXT: .seh_save_reg x30, 96
459-
; WIN-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill
460-
; WIN-NEXT: .seh_save_fregp d8, 104
461-
; WIN-NEXT: stp d10, d11, [sp, #120] // 16-byte Folded Spill
462-
; WIN-NEXT: .seh_save_fregp d10, 120
463-
; WIN-NEXT: stp d12, d13, [sp, #136] // 16-byte Folded Spill
464-
; WIN-NEXT: .seh_save_fregp d12, 136
465-
; WIN-NEXT: stp d14, d15, [sp, #152] // 16-byte Folded Spill
466-
; WIN-NEXT: .seh_save_fregp d14, 152
463+
; WIN-NEXT: str d8, [sp, #104] // 8-byte Folded Spill
464+
; WIN-NEXT: .seh_save_freg d8, 104
465+
; WIN-NEXT: stp d9, d10, [sp, #112] // 16-byte Folded Spill
466+
; WIN-NEXT: .seh_save_fregp d9, 112
467+
; WIN-NEXT: stp d11, d12, [sp, #128] // 16-byte Folded Spill
468+
; WIN-NEXT: .seh_save_fregp d11, 128
469+
; WIN-NEXT: stp d13, d14, [sp, #144] // 16-byte Folded Spill
470+
; WIN-NEXT: .seh_save_fregp d13, 144
471+
; WIN-NEXT: str d15, [sp, #160] // 8-byte Folded Spill
472+
; WIN-NEXT: .seh_save_freg d15, 160
467473
; WIN-NEXT: .seh_endprologue
468474
; WIN-NEXT: mov w8, #24 // =0x18
469475
; WIN-NEXT: mov w20, #1 // =0x1
@@ -492,14 +498,16 @@ define i64 @caller3() {
492498
; WIN-NEXT: str x8, [sp]
493499
; WIN-NEXT: bl callee_with_many_param
494500
; WIN-NEXT: .seh_startepilogue
495-
; WIN-NEXT: ldp d14, d15, [sp, #152] // 16-byte Folded Reload
496-
; WIN-NEXT: .seh_save_fregp d14, 152
497-
; WIN-NEXT: ldp d12, d13, [sp, #136] // 16-byte Folded Reload
498-
; WIN-NEXT: .seh_save_fregp d12, 136
499-
; WIN-NEXT: ldp d10, d11, [sp, #120] // 16-byte Folded Reload
500-
; WIN-NEXT: .seh_save_fregp d10, 120
501-
; WIN-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload
502-
; WIN-NEXT: .seh_save_fregp d8, 104
501+
; WIN-NEXT: ldr d15, [sp, #160] // 8-byte Folded Reload
502+
; WIN-NEXT: .seh_save_freg d15, 160
503+
; WIN-NEXT: ldp d13, d14, [sp, #144] // 16-byte Folded Reload
504+
; WIN-NEXT: .seh_save_fregp d13, 144
505+
; WIN-NEXT: ldp d11, d12, [sp, #128] // 16-byte Folded Reload
506+
; WIN-NEXT: .seh_save_fregp d11, 128
507+
; WIN-NEXT: ldp d9, d10, [sp, #112] // 16-byte Folded Reload
508+
; WIN-NEXT: .seh_save_fregp d9, 112
509+
; WIN-NEXT: ldr d8, [sp, #104] // 8-byte Folded Reload
510+
; WIN-NEXT: .seh_save_freg d8, 104
503511
; WIN-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
504512
; WIN-NEXT: .seh_save_reg x30, 96
505513
; WIN-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/seh-extended-spills.ll

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,28 @@ entry:
1212

1313
attributes #0 = { nounwind uwtable(sync) }
1414

15-
; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
16-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
17-
; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
18-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
19-
; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
20-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
21-
; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
22-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
15+
; CHECK: str x30, [sp, #[[OFFSET_0:[0-9]+]]]
16+
; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
17+
; CHECK: str x9, [sp, #[[OFFSET_1:[0-9]+]]]
18+
; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
19+
; CHECK: stp x10, x11, [sp, #[[OFFSET_2:[0-9]+]]]
20+
; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
21+
; CHECK: stp x12, x13, [sp, #[[OFFSET_3:[0-9]+]]]
22+
; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
23+
; CHECK: stp x14, x15, [sp, #[[OFFSET_4:[0-9]+]]]
24+
; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
2325
; CHECK: .seh_endprologue
2426

2527
; CHECK: .seh_startepilogue
26-
; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
27-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
28-
; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
29-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
30-
; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
31-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
32-
; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
33-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
28+
; CHECK: ldp x14, x15, [sp, #[[OFFSET_4]]]
29+
; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
30+
; CHECK: ldp x12, x13, [sp, #[[OFFSET_3]]]
31+
; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
32+
; CHECK: ldp x10, x11, [sp, #[[OFFSET_2]]]
33+
; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
34+
; CHECK: ldr x9, [sp, #[[OFFSET_1]]]
35+
; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
36+
; CHECK: ldr x30, [sp, #[[OFFSET_0]]]
37+
; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
38+
3439
; CHECK: .seh_endepilogue

llvm/test/CodeGen/AArch64/stack-hazard-windows.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
1111
; CHECK0-NEXT: .seh_save_reg_x x23, 48
1212
; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
1313
; CHECK0-NEXT: .seh_save_fplr 8
14-
; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
15-
; CHECK0-NEXT: .seh_save_fregp d9, 24
14+
; CHECK0-NEXT: str d9, [sp, #24] // 8-byte Folded Spill
15+
; CHECK0-NEXT: .seh_save_freg d9, 24
16+
; CHECK0-NEXT: str d10, [sp, #32] // 8-byte Folded Spill
17+
; CHECK0-NEXT: .seh_save_freg d10, 32
1618
; CHECK0-NEXT: add x29, sp, #8
1719
; CHECK0-NEXT: .seh_add_fp 8
1820
; CHECK0-NEXT: .seh_endprologue
@@ -21,8 +23,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
2123
; CHECK0-NEXT: //NO_APP
2224
; CHECK0-NEXT: str d0, [x29, #32]
2325
; CHECK0-NEXT: .seh_startepilogue
24-
; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
25-
; CHECK0-NEXT: .seh_save_fregp d9, 24
26+
; CHECK0-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload
27+
; CHECK0-NEXT: .seh_save_freg d10, 32
28+
; CHECK0-NEXT: ldr d9, [sp, #24] // 8-byte Folded Reload
29+
; CHECK0-NEXT: .seh_save_freg d9, 24
2630
; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
2731
; CHECK0-NEXT: .seh_save_fplr 8
2832
; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/wineh-frame2.mir

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,21 @@
44

55
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -48
66
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -48
7-
# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 1
8-
# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 8
9-
# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 3
10-
# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 24
11-
# CHECK-NEXT: frame-setup STRDui killed $d12, $sp, 5
12-
# CHECK-NEXT: frame-setup SEH_SaveFReg 12, 40
7+
# CHECK-NEXT: frame-setup STRDui killed $d8, $sp, 1
8+
# CHECK-NEXT: frame-setup SEH_SaveFReg 8, 8
9+
# CHECK-NEXT: frame-setup STPDi killed $d9, killed $d10, $sp, 2
10+
# CHECK-NEXT: frame-setup SEH_SaveFRegP 9, 10, 16
11+
# CHECK-NEXT: frame-setup STPDi killed $d11, killed $d12, $sp, 4
12+
# CHECK-NEXT: frame-setup SEH_SaveFRegP 11, 12, 32
1313
# CHECK-NEXT: frame-setup SEH_PrologEnd
14+
1415
# CHECK: frame-destroy SEH_EpilogStart
15-
# CHECK-NEXT: $d12 = frame-destroy LDRDui $sp, 5
16-
# CHECK-NEXT: frame-destroy SEH_SaveFReg 12, 40
17-
# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 3
18-
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 24
19-
# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 1
20-
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 8
16+
# CHECK-NEXT: $d11, $d12 = frame-destroy LDPDi $sp, 4
17+
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 11, 12, 32
18+
# CHECK-NEXT: $d9, $d10 = frame-destroy LDPDi $sp, 2
19+
# CHECK-NEXT: frame-destroy SEH_SaveFRegP 9, 10, 16
20+
# CHECK-NEXT: $d8 = frame-destroy LDRDui $sp, 1
21+
# CHECK-NEXT: frame-destroy SEH_SaveFReg 8, 8
2122
# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 48
2223
# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -48
2324
# CHECK-NEXT: frame-destroy SEH_EpilogEnd

0 commit comments

Comments
 (0)