Skip to content

Conversation

@compnerd
Copy link
Member

@compnerd compnerd commented Nov 7, 2025

Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.

@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2025

@llvm/pr-subscribers-platform-windows

@llvm/pr-subscribers-backend-aarch64

Author: Saleem Abdulrasool (compnerd)

Changes

Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.


Full diff: https://github.com/llvm/llvm-project/pull/166902.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+21-13)
  • (modified) llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll (+40-32)
  • (modified) llvm/test/CodeGen/AArch64/seh-extended-spills.ll (+21-16)
  • (modified) llvm/test/CodeGen/AArch64/stack-hazard-windows.ll (+8-4)
  • (modified) llvm/test/CodeGen/AArch64/wineh-frame2.mir (+13-12)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 70c5c29149288..6082e20b8327f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1554,8 +1554,9 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
          !AFL.requiresSaveVG(MF) && !AFI->isSVECC();
 }
 
-static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
-                                             bool NeedsWinCFI, bool IsFirst,
+static bool invalidateWindowsRegisterPairing(unsigned Spilled, unsigned Reg1,
+                                             unsigned Reg2, bool NeedsWinCFI,
+                                             bool IsFirst,
                                              const TargetRegisterInfo *TRI) {
   // If we are generating register pairs for a Windows function that requires
   // EH support, then pair consecutive registers only.  There are no unwind
@@ -1568,8 +1569,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
     return true;
   if (!NeedsWinCFI)
     return false;
+  // ARM64EC introduced `save_any_regp` which expects 16-byte alignment.
+  // Accomodate that by ensuring that we re-align to 16-bytes when doing paired
+  // spills. Carve out an exception for {FP,LR} pairs which we perform without
+  // 16-byte alignment.
   if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
-    return false;
+    return ((Reg1 == AArch64::FP && Reg2 == AArch64::LR) || (Spilled % 2) == 0)
+               ? false
+               : true;
   // If pairing a GPR with LR, the pair can be described by the save_lrpair
   // opcode. If this is the first register pair, it would end up with a
   // predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -1585,13 +1592,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
 /// WindowsCFI requires that only consecutive registers can be paired.
 /// LR and FP need to be allocated together when the frame needs to save
 /// the frame-record. This means any other register pairing with LR is invalid.
-static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
-                                      bool UsesWinAAPCS, bool NeedsWinCFI,
-                                      bool NeedsFrameRecord, bool IsFirst,
+static bool invalidateRegisterPairing(unsigned Spilled, unsigned Reg1,
+                                      unsigned Reg2, bool UsesWinAAPCS,
+                                      bool NeedsWinCFI, bool NeedsFrameRecord,
+                                      bool IsFirst,
                                       const TargetRegisterInfo *TRI) {
   if (UsesWinAAPCS)
-    return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
-                                            TRI);
+    return invalidateWindowsRegisterPairing(Spilled, Reg1, Reg2, NeedsWinCFI,
+                                            IsFirst, TRI);
 
   // If we need to store the frame record, don't pair any register
   // with LR other than FP.
@@ -1752,15 +1760,15 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
       switch (RPI.Type) {
       case RegPairInfo::GPR:
         if (AArch64::GPR64RegClass.contains(NextReg) &&
-            !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
-                                       NeedsWinCFI, NeedsFrameRecord, IsFirst,
-                                       TRI))
+            !invalidateRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
+                                       IsWindows, NeedsWinCFI, NeedsFrameRecord,
+                                       IsFirst, TRI))
           RPI.Reg2 = NextReg;
         break;
       case RegPairInfo::FPR64:
         if (AArch64::FPR64RegClass.contains(NextReg) &&
-            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
-                                              IsFirst, TRI))
+            !invalidateWindowsRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
+                                              NeedsWinCFI, IsFirst, TRI))
           RPI.Reg2 = NextReg;
         break;
       case RegPairInfo::FPR128:
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
index 9b9717c19321e..575c93d62bd04 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -120,26 +120,30 @@ define void @caller1(ptr %a) {
 ; WIN-NEXT:    .seh_save_regp x27, 64
 ; WIN-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
 ; WIN-NEXT:    .seh_save_reg x30, 80
-; WIN-NEXT:    stp d8, d9, [sp, #88] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d8, 88
-; WIN-NEXT:    stp d10, d11, [sp, #104] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d10, 104
-; WIN-NEXT:    stp d12, d13, [sp, #120] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d12, 120
-; WIN-NEXT:    stp d14, d15, [sp, #136] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d14, 136
+; WIN-NEXT:    str d8, [sp, #88] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_freg d8, 88
+; WIN-NEXT:    stp d9, d10, [sp, #96] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d9, 96
+; WIN-NEXT:    stp d11, d12, [sp, #112] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d11, 112
+; WIN-NEXT:    stp d13, d14, [sp, #128] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d13, 128
+; WIN-NEXT:    str d15, [sp, #144] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_freg d15, 144
 ; WIN-NEXT:    .seh_endprologue
 ; WIN-NEXT:    mov x20, x0
 ; WIN-NEXT:    bl callee
 ; WIN-NEXT:    .seh_startepilogue
-; WIN-NEXT:    ldp d14, d15, [sp, #136] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d14, 136
-; WIN-NEXT:    ldp d12, d13, [sp, #120] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d12, 120
-; WIN-NEXT:    ldp d10, d11, [sp, #104] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d10, 104
-; WIN-NEXT:    ldp d8, d9, [sp, #88] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d8, 88
+; WIN-NEXT:    ldr d15, [sp, #144] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_freg d15, 144
+; WIN-NEXT:    ldp d13, d14, [sp, #128] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d13, 128
+; WIN-NEXT:    ldp d11, d12, [sp, #112] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d11, 112
+; WIN-NEXT:    ldp d9, d10, [sp, #96] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d9, 96
+; WIN-NEXT:    ldr d8, [sp, #88] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_freg d8, 88
 ; WIN-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; WIN-NEXT:    .seh_save_reg x30, 80
 ; WIN-NEXT:    ldp x27, x28, [sp, #64] // 16-byte Folded Reload
@@ -456,14 +460,16 @@ define i64 @caller3() {
 ; WIN-NEXT:    .seh_save_regp x27, 80
 ; WIN-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
 ; WIN-NEXT:    .seh_save_reg x30, 96
-; WIN-NEXT:    stp d8, d9, [sp, #104] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d8, 104
-; WIN-NEXT:    stp d10, d11, [sp, #120] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d10, 120
-; WIN-NEXT:    stp d12, d13, [sp, #136] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d12, 136
-; WIN-NEXT:    stp d14, d15, [sp, #152] // 16-byte Folded Spill
-; WIN-NEXT:    .seh_save_fregp d14, 152
+; WIN-NEXT:    str d8, [sp, #104] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_freg d8, 104
+; WIN-NEXT:    stp d9, d10, [sp, #112] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d9, 112
+; WIN-NEXT:    stp d11, d12, [sp, #128] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d11, 128
+; WIN-NEXT:    stp d13, d14, [sp, #144] // 16-byte Folded Spill
+; WIN-NEXT:    .seh_save_fregp d13, 144
+; WIN-NEXT:    str d15, [sp, #160] // 8-byte Folded Spill
+; WIN-NEXT:    .seh_save_freg d15, 160
 ; WIN-NEXT:    .seh_endprologue
 ; WIN-NEXT:    mov w8, #24 // =0x18
 ; WIN-NEXT:    mov w20, #1 // =0x1
@@ -492,14 +498,16 @@ define i64 @caller3() {
 ; WIN-NEXT:    str x8, [sp]
 ; WIN-NEXT:    bl callee_with_many_param
 ; WIN-NEXT:    .seh_startepilogue
-; WIN-NEXT:    ldp d14, d15, [sp, #152] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d14, 152
-; WIN-NEXT:    ldp d12, d13, [sp, #136] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d12, 136
-; WIN-NEXT:    ldp d10, d11, [sp, #120] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d10, 120
-; WIN-NEXT:    ldp d8, d9, [sp, #104] // 16-byte Folded Reload
-; WIN-NEXT:    .seh_save_fregp d8, 104
+; WIN-NEXT:    ldr d15, [sp, #160] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_freg d15, 160
+; WIN-NEXT:    ldp d13, d14, [sp, #144] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d13, 144
+; WIN-NEXT:    ldp d11, d12, [sp, #128] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d11, 128
+; WIN-NEXT:    ldp d9, d10, [sp, #112] // 16-byte Folded Reload
+; WIN-NEXT:    .seh_save_fregp d9, 112
+; WIN-NEXT:    ldr d8, [sp, #104] // 8-byte Folded Reload
+; WIN-NEXT:    .seh_save_freg d8, 104
 ; WIN-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; WIN-NEXT:    .seh_save_reg x30, 96
 ; WIN-NEXT:    ldp x27, x28, [sp, #80] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
index ecc22703ef584..e0c2a2e042077 100644
--- a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
+++ b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
@@ -12,23 +12,28 @@ entry:
 
 attributes #0 = { nounwind uwtable(sync) }
 
-; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
-; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
-; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
-; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
+; CHECK: str x30, [sp, #[[OFFSET_0:[0-9]+]]]
+; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
+; CHECK: str x9, [sp, #[[OFFSET_1:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
+; CHECK: stp x10, x11, [sp, #[[OFFSET_2:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
+; CHECK: stp x12, x13, [sp, #[[OFFSET_3:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
+; CHECK: stp x14, x15, [sp, #[[OFFSET_4:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
 ; CHECK: .seh_endprologue
 
 ; CHECK: .seh_startepilogue
-; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
-; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
-; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
-; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
-; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
-; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
-; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
-; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
+; CHECK: ldp x14, x15, [sp, #[[OFFSET_4]]]
+; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
+; CHECK: ldp x12, x13, [sp, #[[OFFSET_3]]]
+; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
+; CHECK: ldp x10, x11, [sp, #[[OFFSET_2]]]
+; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
+; CHECK: ldr x9, [sp, #[[OFFSET_1]]]
+; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
+; CHECK: ldr x30, [sp, #[[OFFSET_0]]]
+; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
+
 ; CHECK: .seh_endepilogue
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
index 927d8b68c46be..f519cabc13157 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
@@ -11,8 +11,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
 ; CHECK0-NEXT:    .seh_save_reg_x x23, 48
 ; CHECK0-NEXT:    stp x29, x30, [sp, #8] // 16-byte Folded Spill
 ; CHECK0-NEXT:    .seh_save_fplr 8
-; CHECK0-NEXT:    stp d9, d10, [sp, #24] // 16-byte Folded Spill
-; CHECK0-NEXT:    .seh_save_fregp d9, 24
+; CHECK0-NEXT:    str d9, [sp, #24] // 8-byte Folded Spill
+; CHECK0-NEXT:    .seh_save_freg d9, 24
+; CHECK0-NEXT:    str d10, [sp, #32] // 8-byte Folded Spill
+; CHECK0-NEXT:    .seh_save_freg d10, 32
 ; CHECK0-NEXT:    add x29, sp, #8
 ; CHECK0-NEXT:    .seh_add_fp 8
 ; CHECK0-NEXT:    .seh_endprologue
@@ -21,8 +23,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
 ; CHECK0-NEXT:    //NO_APP
 ; CHECK0-NEXT:    str d0, [x29, #32]
 ; CHECK0-NEXT:    .seh_startepilogue
-; CHECK0-NEXT:    ldp d9, d10, [sp, #24] // 16-byte Folded Reload
-; CHECK0-NEXT:    .seh_save_fregp d9, 24
+; CHECK0-NEXT:    ldr d10, [sp, #32] // 8-byte Folded Reload
+; CHECK0-NEXT:    .seh_save_freg d10, 32
+; CHECK0-NEXT:    ldr d9, [sp, #24] // 8-byte Folded Reload
+; CHECK0-NEXT:    .seh_save_freg d9, 24
 ; CHECK0-NEXT:    ldp x29, x30, [sp, #8] // 16-byte Folded Reload
 ; CHECK0-NEXT:    .seh_save_fplr 8
 ; CHECK0-NEXT:    ldr x23, [sp], #48 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame2.mir b/llvm/test/CodeGen/AArch64/wineh-frame2.mir
index 1c8cb1e79a998..385d846bd2bfa 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame2.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame2.mir
@@ -4,20 +4,21 @@
 
 # CHECK:       early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -48
 # CHECK-NEXT:  frame-setup SEH_SaveReg_X 19, -48
-# CHECK-NEXT:  frame-setup STPDi killed $d8, killed $d9, $sp, 1
-# CHECK-NEXT:  frame-setup SEH_SaveFRegP 8, 9, 8
-# CHECK-NEXT:  frame-setup STPDi killed $d10, killed $d11, $sp, 3
-# CHECK-NEXT:  frame-setup SEH_SaveFRegP 10, 11, 24
-# CHECK-NEXT:  frame-setup STRDui killed $d12, $sp, 5
-# CHECK-NEXT:  frame-setup SEH_SaveFReg 12, 40
+# CHECK-NEXT:  frame-setup STRDui killed $d8, $sp, 1
+# CHECK-NEXT:  frame-setup SEH_SaveFReg 8, 8
+# CHECK-NEXT:  frame-setup STPDi killed $d9, killed $d10, $sp, 2
+# CHECK-NEXT:  frame-setup SEH_SaveFRegP 9, 10, 16
+# CHECK-NEXT:  frame-setup STPDi killed $d11, killed $d12, $sp, 4
+# CHECK-NEXT:  frame-setup SEH_SaveFRegP 11, 12, 32
 # CHECK-NEXT:  frame-setup SEH_PrologEnd
+
 # CHECK:       frame-destroy SEH_EpilogStart
-# CHECK-NEXT:  $d12 = frame-destroy LDRDui $sp, 5
-# CHECK-NEXT:  frame-destroy SEH_SaveFReg 12, 40
-# CHECK-NEXT:  $d10, $d11 = frame-destroy LDPDi $sp, 3
-# CHECK-NEXT:  frame-destroy SEH_SaveFRegP 10, 11, 24
-# CHECK-NEXT:  $d8, $d9 = frame-destroy LDPDi $sp, 1
-# CHECK-NEXT:  frame-destroy SEH_SaveFRegP 8, 9, 8
+# CHECK-NEXT:  $d11, $d12 = frame-destroy LDPDi $sp, 4
+# CHECK-NEXT:  frame-destroy SEH_SaveFRegP 11, 12, 32
+# CHECK-NEXT:  $d9, $d10 = frame-destroy LDPDi $sp, 2
+# CHECK-NEXT:  frame-destroy SEH_SaveFRegP 9, 10, 16
+# CHECK-NEXT:  $d8 = frame-destroy LDRDui $sp, 1
+# CHECK-NEXT:  frame-destroy SEH_SaveFReg 8, 8
 # CHECK-NEXT:  early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 48
 # CHECK-NEXT:  frame-destroy SEH_SaveReg_X 19, -48
 # CHECK-NEXT:  frame-destroy SEH_EpilogEnd

Adjust the frame setup code for Windows ARM64 to attempt to align
pair-wise spills to 16-byte boundaries. This enables us to properly emit
the spills for custom clang calling convensions such as preserve most
which spills r9-r15 which are normally nonvolatile registers. Even when
using the ARM64EC opcodes for the unwinding, we cannot represent the
spill if it is unaligned.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants