diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 22683237fa0a8..a30666506a3cf 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2535,20 +2535,33 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, DeallocateAfter, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } else if (SVEStackSize) { - // If we have stack realignment or variable sized objects on the stack, - // restore the stack pointer from the frame pointer prior to SVE CSR - // restoration. - if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - // Set SP to start of SVE callee-save area from which they can - // be reloaded. The code below will deallocate the stack space - // space by moving FP -> SP. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, - StackOffset::getScalable(-CalleeSavedSize), TII, + int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize(); + // If we have stack realignment or variable-sized objects we must use the + // FP to restore SVE callee saves (as there is an unknown amount of + // data/padding between the SP and SVE CS area). + Register BaseForSVEDealloc = + (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP + : AArch64::SP; + if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) { + Register CalleeSaveBase = AArch64::FP; + if (int64_t CalleeSaveBaseOffset = + AFI->getCalleeSaveBaseToFrameRecordOffset()) { + // If we have have an non-zero offset to the non-SVE CS base we need to + // compute the base address by subtracting the offest in a temporary + // register first (to avoid briefly deallocating the SVE CS). + CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( + &AArch64::GPR64RegClass); + emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, + StackOffset::getFixed(-CalleeSaveBaseOffset), TII, MachineInstr::FrameDestroy); } - } else { - if (AFI->getSVECalleeSavedStackSize()) { + // The code below will deallocate the stack space space by moving the + // SP to the start of the SVE callee-save area. + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, + StackOffset::getScalable(-SVECalleeSavedSize), TII, + MachineInstr::FrameDestroy); + } else if (BaseForSVEDealloc == AArch64::SP) { + if (SVECalleeSavedSize) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset( diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index e169b199733bd..3a33405200132 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -3143,3 +3143,1176 @@ entry: call void @bar(ptr noundef nonnull %b) ret i32 0 } + + +define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -40 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: mov w9, w0 +; CHECK0-NEXT: mov x8, sp +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: add x9, x9, #15 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: and x9, x9, #0x1fffffff0 +; CHECK0-NEXT: sub x8, x8, x9 +; CHECK0-NEXT: mov sp, x8 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x20, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w20, #0, .LBB35_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB35_2: // %entry +; CHECK0-NEXT: mov x0, x8 +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w20, #0, .LBB35_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB35_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w20 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -8 +; CHECK64-NEXT: .cfi_offset w20, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub sp, sp, #64 +; CHECK64-NEXT: mov w9, w0 +; CHECK64-NEXT: mov x8, sp +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: add x9, x9, #15 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: and x9, x9, #0x1fffffff0 +; CHECK64-NEXT: sub x8, x8, x9 +; CHECK64-NEXT: mov sp, x8 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x20, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w20, #0, .LBB35_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB35_2: // %entry +; CHECK64-NEXT: mov x0, x8 +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w20, #0, .LBB35_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB35_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #96] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w20 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -8 +; CHECK1024-NEXT: .cfi_offset w20, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub sp, sp, #1024 +; CHECK1024-NEXT: mov w9, w0 +; CHECK1024-NEXT: mov x8, sp +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: add x9, x9, #15 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: and x9, x9, #0x1fffffff0 +; CHECK1024-NEXT: sub x8, x8, x9 +; CHECK1024-NEXT: mov sp, x8 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x20, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w20, #0, .LBB35_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB35_2: // %entry +; CHECK1024-NEXT: mov x0, x8 +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w20, #0, .LBB35_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB35_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w20 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 %P1 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_realign: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: sub x9, sp, #1024 +; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x19, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w19, #0, .LBB36_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB36_2: // %entry +; CHECK0-NEXT: mov x0, sp +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w19, #0, .LBB36_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB36_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_realign: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub x9, sp, #1088 +; CHECK64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x19, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w19, #0, .LBB36_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB36_2: // %entry +; CHECK64-NEXT: mov x0, sp +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w19, #0, .LBB36_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB36_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_realign: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub x9, sp, #2048 +; CHECK1024-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x19, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w19, #0, .LBB36_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB36_2: // %entry +; CHECK1024-NEXT: mov x0, sp +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w19, #0, .LBB36_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB36_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 1000, align 32 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: sub sp, sp, #48 +; CHECK0-NEXT: addvl sp, sp, #-1 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK0-NEXT: ubfiz x8, x0, #2, #32 +; CHECK0-NEXT: mov x9, sp +; CHECK0-NEXT: add x8, x8, #15 +; CHECK0-NEXT: and x8, x8, #0x7fffffff0 +; CHECK0-NEXT: sub x20, x9, x8 +; CHECK0-NEXT: mov sp, x20 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: add x0, x19, #8 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: addvl x0, x29, #-19 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov x0, x20 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x26, x20, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #112 +; CHECK64-NEXT: addvl sp, sp, #-1 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w20, -24 +; CHECK64-NEXT: .cfi_offset w26, -32 +; CHECK64-NEXT: .cfi_offset w27, -40 +; CHECK64-NEXT: .cfi_offset w28, -48 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK64-NEXT: ubfiz x8, x0, #2, #32 +; CHECK64-NEXT: mov x9, sp +; CHECK64-NEXT: add x8, x8, #15 +; CHECK64-NEXT: and x8, x8, #0x7fffffff0 +; CHECK64-NEXT: sub x20, x9, x8 +; CHECK64-NEXT: mov sp, x20 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: add x0, x19, #8 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: sub x0, x29, #64 +; CHECK64-NEXT: addvl x0, x0, #-19 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov x0, x20 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: ldp x20, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x30, x28, [sp, #72] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: sub sp, sp, #1072 +; CHECK1024-NEXT: addvl sp, sp, #-1 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w20, -24 +; CHECK1024-NEXT: .cfi_offset w26, -32 +; CHECK1024-NEXT: .cfi_offset w27, -40 +; CHECK1024-NEXT: .cfi_offset w28, -48 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK1024-NEXT: ubfiz x8, x0, #2, #32 +; CHECK1024-NEXT: mov x9, sp +; CHECK1024-NEXT: add x8, x8, #15 +; CHECK1024-NEXT: and x8, x8, #0x7fffffff0 +; CHECK1024-NEXT: sub x20, x9, x8 +; CHECK1024-NEXT: mov sp, x20 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: add x0, x19, #8 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: sub x0, x29, #1024 +; CHECK1024-NEXT: addvl x0, x0, #-19 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov x0, x20 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: ret +entry: + %a = alloca i32, i32 10 + %b = alloca + %c = alloca i32, i32 %P1, align 4 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + call void @bar(ptr noundef nonnull %a) + call void @bar(ptr noundef nonnull %b) + call void @bar(ptr noundef nonnull %c) + ret i32 -396142473 +}