Skip to content

Commit 17946da

Browse files
[AIEX] Add a Pass to expand unallocated 2D/3D into individual ones
If we don't need a full register, we can expand to individual lanes. Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent 9b71552 commit 17946da

12 files changed

+266
-55
lines changed

llvm/lib/Target/AIE/AIE.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ MachineFunctionPass *createAIEEliminateDuplicatePHI();
6060
FunctionPass *createAIEOutlineMemoryGEP();
6161
FunctionPass *createAIESuperRegRewriter();
6262
FunctionPass *createAIEWawRegRewriter();
63+
FunctionPass *createAIEUnallocatedSuperRegRewriter();
6364
FunctionPass *createAIEPostSelectOptimize();
6465
MachineFunctionPass *
6566
createDeadMachineInstructionElim(bool KeepLifetimeInstructions);
@@ -84,6 +85,8 @@ extern char &AIESuperRegRewriterID;
8485
void initializeAIESuperRegRewriterPass(PassRegistry &);
8586
extern char &AIEWawRegRewriterID;
8687
void initializeAIEWawRegRewriterPass(PassRegistry &);
88+
extern char &AIEUnallocatedSuperRegRewriterID;
89+
void initializeAIEUnallocatedSuperRegRewriterPass(PassRegistry &);
8790
extern char &AIEOutlineMemoryGEPID;
8891
void initializeAIEOutlineMemoryGEPPass(PassRegistry &);
8992

llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAIETarget() {
158158
initializeAIEPseudoBranchExpansionPass(*PR);
159159
initializeAIESubRegConstrainerPass(*PR);
160160
initializeAIESuperRegRewriterPass(*PR);
161+
initializeAIEUnallocatedSuperRegRewriterPass(*PR);
161162
initializeAIEWawRegRewriterPass(*PR);
162163
initializeAIEOutlineMemoryGEPPass(*PR);
163164
initializeAIEFinalizeBundlePass(*PR);

llvm/lib/Target/AIE/AIESuperRegUtils.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,21 +188,26 @@ LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
188188
return LiveLanes;
189189
}
190190

191-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
191+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
192192
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
193193
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
194194
LiveRegMatrix &LRM, LiveIntervals &LIS,
195195
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
196196
LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << '\n');
197-
auto *TII = static_cast<const AIEBaseInstrInfo *>(
198-
VRM.getMachineFunction().getSubtarget().getInstrInfo());
197+
MachineFunction &MF = VRM.getMachineFunction();
198+
auto *TII =
199+
static_cast<const AIEBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
199200

200201
// Collect all the subreg indices to rewrite as independent vregs.
201202
SmallMapVector<int, Register, 8> SubRegToVReg;
202203
const TargetRegisterClass *SuperRC = MRI.getRegClass(Reg);
203204
assert(!SubRegs.empty());
204205
for (int SubReg : SubRegs) {
205-
const TargetRegisterClass *SubRC = TRI.getSubRegisterClass(SuperRC, SubReg);
206+
const TargetRegisterClass *SubRC =
207+
AssignedPhysReg.has_value()
208+
? TRI.getSubRegisterClass(SuperRC, SubReg)
209+
: TRI.getLargestLegalSuperClass(
210+
TRI.getSubRegisterClass(SuperRC, SubReg), MF);
206211
SubRegToVReg[SubReg] = MRI.createVirtualRegister(SubRC);
207212
}
208213

@@ -248,7 +253,6 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
248253
LIS.removeInterval(Reg);
249254

250255
for (auto &[SubRegIdx, VReg] : SubRegToVReg) {
251-
MCRegister SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx);
252256
LiveInterval &SubRegLI = LIS.getInterval(VReg);
253257
LLVM_DEBUG(dbgs() << " Assigning Range: " << SubRegLI << '\n');
254258

@@ -259,6 +263,10 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
259263
LIComponents.push_back(&SubRegLI);
260264
VRM.grow();
261265

266+
if (!AssignedPhysReg.has_value())
267+
continue;
268+
269+
MCRegister SubPhysReg = TRI.getSubReg(*AssignedPhysReg, SubRegIdx);
262270
for (LiveInterval *LI : LIComponents) {
263271
LRM.assign(*LI, SubPhysReg);
264272
VRM.setRequiredPhys(LI->reg(), SubPhysReg);

llvm/lib/Target/AIE/AIESuperRegUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_LIB_TARGET_AIE_AIESUPERREGUTILS_H
1616

1717
#include "llvm/ADT/SmallSet.h"
18+
#include <optional>
1819

1920
namespace llvm {
2021
class Register;
@@ -63,7 +64,7 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
6364
LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
6465
const LiveIntervals &LIS);
6566

66-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
67+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
6768
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
6869
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
6970
LiveRegMatrix &LRM, LiveIntervals &LIS,
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
//===-- AIEUnallocatedSuperRegRewriter.cpp - Constrain tied sub-registers -===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "AIEBaseInstrInfo.h"
12+
#include "AIEBaseRegisterInfo.h"
13+
#include "AIESuperRegUtils.h"
14+
15+
#include "llvm/ADT/MapVector.h"
16+
#include "llvm/ADT/SmallSet.h"
17+
#include "llvm/CodeGen/LiveDebugVariables.h"
18+
#include "llvm/CodeGen/LiveIntervals.h"
19+
#include "llvm/CodeGen/LiveRegMatrix.h"
20+
#include "llvm/CodeGen/LiveStacks.h"
21+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
22+
#include "llvm/CodeGen/MachineFunction.h"
23+
#include "llvm/CodeGen/MachineFunctionPass.h"
24+
#include "llvm/CodeGen/MachineInstr.h"
25+
#include "llvm/CodeGen/MachineInstrBuilder.h"
26+
#include "llvm/CodeGen/MachineOperand.h"
27+
#include "llvm/CodeGen/MachineRegisterInfo.h"
28+
#include "llvm/CodeGen/Passes.h"
29+
#include "llvm/CodeGen/SlotIndexes.h"
30+
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
32+
#include "llvm/CodeGen/VirtRegMap.h"
33+
#include "llvm/Support/Debug.h"
34+
#include "llvm/Support/raw_ostream.h"
35+
36+
using namespace llvm;
37+
38+
#define DEBUG_TYPE "aie-ra-prepare"
39+
40+
namespace {
41+
42+
using RegRewriteInfo = std::vector<std::pair<Register, SmallSet<int, 8>>>;
43+
44+
/// Split large unallocated compound registers into multiple new smaller vregs
45+
/// Than can be allocated to scalar registers.
46+
class AIEUnallocatedSuperRegRewriter : public MachineFunctionPass {
47+
48+
public:
49+
static char ID;
50+
AIEUnallocatedSuperRegRewriter() : MachineFunctionPass(ID) {}
51+
52+
void getAnalysisUsage(AnalysisUsage &AU) const override {
53+
AU.setPreservesCFG();
54+
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
55+
AU.addRequired<VirtRegMapWrapperLegacy>();
56+
AU.addPreserved<VirtRegMapWrapperLegacy>();
57+
AU.addRequired<SlotIndexesWrapperPass>();
58+
AU.addPreserved<SlotIndexesWrapperPass>();
59+
AU.addRequired<LiveDebugVariablesWrapperLegacy>();
60+
AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
61+
AU.addRequired<LiveStacksWrapperLegacy>();
62+
AU.addPreserved<LiveStacksWrapperLegacy>();
63+
AU.addRequired<LiveIntervalsWrapperPass>();
64+
AU.addPreserved<LiveIntervalsWrapperPass>();
65+
AU.addRequired<LiveRegMatrixWrapperLegacy>();
66+
AU.addPreserved<LiveRegMatrixWrapperLegacy>();
67+
MachineFunctionPass::getAnalysisUsage(AU);
68+
}
69+
70+
bool runOnMachineFunction(MachineFunction &Fn) override;
71+
};
72+
73+
/// Identify unallocated virtual registers that can be split into subregisters.
74+
/// Returns a list of candidate registers with their rewritable subregister
75+
/// indices, excluding unused registers and those already assigned to physical
76+
/// registers.
77+
static RegRewriteInfo getRewriteCandidates(MachineRegisterInfo &MRI,
78+
const AIEBaseRegisterInfo &TRI,
79+
VirtRegMap &VRM) {
80+
RegRewriteInfo RegistersToRewrite;
81+
for (unsigned VRegIdx = 0, End = MRI.getNumVirtRegs(); VRegIdx != End;
82+
++VRegIdx) {
83+
const Register Reg = Register::index2VirtReg(VRegIdx);
84+
85+
// Ignore un-used od already allocated registers.
86+
if (MRI.reg_nodbg_empty(Reg) || VRM.hasPhys(Reg))
87+
continue;
88+
89+
const SmallSet<int, 8> RewritableSubRegs =
90+
AIESuperRegUtils::getRewritableSubRegs(Reg, MRI, TRI);
91+
92+
if (RewritableSubRegs.empty())
93+
continue;
94+
95+
LLVM_DEBUG(dbgs() << "Candidate " << printReg(Reg, &TRI, 0, &MRI) << ":"
96+
<< printRegClassOrBank(Reg, MRI, &TRI) << '\n');
97+
98+
RegistersToRewrite.push_back({Reg, RewritableSubRegs});
99+
}
100+
101+
LLVM_DEBUG(dbgs() << "Found " << RegistersToRewrite.size()
102+
<< " candidate register(s) for rewriting\n");
103+
104+
return RegistersToRewrite;
105+
}
106+
107+
/// Split candidate registers into independent virtual registers for each
108+
/// subregister. Each composite register is rewritten using its subregister
109+
/// indices, with live intervals and debug information updated accordingly.
110+
void rewriteCandidates(RegRewriteInfo &RegistersToRewrite,
111+
MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI,
112+
VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS,
113+
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
114+
115+
LLVM_DEBUG(dbgs() << "Rewriting " << RegistersToRewrite.size()
116+
<< " candidate register(s)\n");
117+
118+
for (auto [VReg, SubRegs] : RegistersToRewrite) {
119+
LLVM_DEBUG(dbgs() << " Rewriting " << printReg(VReg, &TRI, 0, &MRI)
120+
<< " into " << SubRegs.size() << " subregister(s)\n");
121+
std::optional<Register> NoPhysReg = {};
122+
AIESuperRegUtils::rewriteSuperReg(VReg, NoPhysReg, SubRegs, MRI, TRI, VRM,
123+
LRM, LIS, Indexes, DebugVars);
124+
}
125+
}
126+
127+
bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) {
128+
LLVM_DEBUG(llvm::dbgs() << "*** Splitting unallocated super-registers: "
129+
<< MF.getName() << " ***\n");
130+
131+
MachineRegisterInfo &MRI = MF.getRegInfo();
132+
VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
133+
LiveRegMatrix &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
134+
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
135+
SlotIndexes &Indexes = getAnalysis<SlotIndexesWrapperPass>().getSI();
136+
LiveDebugVariables &DebugVars =
137+
getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
138+
auto &TRI =
139+
*static_cast<const AIEBaseRegisterInfo *>(MRI.getTargetRegisterInfo());
140+
141+
LLVM_DEBUG(dbgs() << "Identifying rewrite candidates...\n");
142+
RegRewriteInfo RegistersToRewrite = getRewriteCandidates(MRI, TRI, VRM);
143+
144+
if (RegistersToRewrite.empty()) {
145+
LLVM_DEBUG(dbgs() << "No candidates found, skipping rewrite\n");
146+
return false;
147+
}
148+
149+
LLVM_DEBUG(dbgs() << "Performing register rewrites...\n");
150+
rewriteCandidates(RegistersToRewrite, MRI, TRI, VRM, LRM, LIS, Indexes,
151+
DebugVars);
152+
153+
LLVM_DEBUG(dbgs() << "Successfully rewrote " << RegistersToRewrite.size()
154+
<< " register(s)\n");
155+
156+
return !RegistersToRewrite.empty();
157+
}
158+
159+
} // end anonymous namespace
160+
161+
char AIEUnallocatedSuperRegRewriter::ID = 0;
162+
char &llvm::AIEUnallocatedSuperRegRewriterID =
163+
AIEUnallocatedSuperRegRewriter::ID;
164+
165+
INITIALIZE_PASS(AIEUnallocatedSuperRegRewriter,
166+
"aie-unallocated-superreg-rewrite",
167+
"AIE unallocated super-reg rewrite", false, false)
168+
169+
llvm::FunctionPass *llvm::createAIEUnallocatedSuperRegRewriter() {
170+
return new AIEUnallocatedSuperRegRewriter();
171+
}

llvm/lib/Target/AIE/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ add_llvm_target(AIECodeGen
141141
AIE2TargetMachine.cpp
142142
AIE2TargetTransformInfo.cpp
143143
AIETiedRegOperands.cpp
144+
AIEUnallocatedSuperRegRewriter.cpp
144145
ReservedRegsLICM.cpp
145146
AIEOutlineMemoryGEP.cpp
146147
AIEWawRegRewriter.cpp

llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
114114
addPass(createAIESuperRegRewriter());
115115
addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
116116
addPass(createAIESuperRegRewriter());
117+
if (EnableFineGrainedStagedRA)
118+
addPass(createAIEUnallocatedSuperRegRewriter());
117119
}
118120
addPass(createGreedyRegisterAllocator());
119121
if (EnableWAWRegRewrite) {

llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@
247247
; AIE-O1-NEXT: AIE super-reg rewrite
248248
; AIE-O1-NEXT: Greedy Register Allocator
249249
; AIE-O1-NEXT: AIE super-reg rewrite
250+
; AIE-O1-NEXT: AIE unallocated super-reg rewrite
250251
; AIE-O1-NEXT: Greedy Register Allocator
251252
; AIE-O1-NEXT: AIE waw-reg rewrite
252253
; AIE-O1-NEXT: Greedy Register Allocator
@@ -472,6 +473,7 @@
472473
; AIE-O23-NEXT: AIE super-reg rewrite
473474
; AIE-O23-NEXT: Greedy Register Allocator
474475
; AIE-O23-NEXT: AIE super-reg rewrite
476+
; AIE-O23-NEXT: AIE unallocated super-reg rewrite
475477
; AIE-O23-NEXT: Greedy Register Allocator
476478
; AIE-O23-NEXT: AIE waw-reg rewrite
477479
; AIE-O23-NEXT: Greedy Register Allocator

llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-spill.mir

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,68 @@
66
#
77
# (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88

9+
# RUN: llc -O2 -mtriple=aie2p -verify-machineinstrs --aie-staged-ra -start-before=greedy -aie-staged-ra-fine-grained-alloc=false \
10+
# RUN: -stop-after=virtregrewriter %s -o - | FileCheck %s --check-prefix=RA-STAGED
911
# RUN: llc -O2 -mtriple=aie2p -verify-machineinstrs --aie-staged-ra -start-before=greedy -stop-after=virtregrewriter %s -o - \
10-
# RUN: | FileCheck %s --check-prefix=RA
12+
# RUN: | FileCheck %s --check-prefix=RA-STAGED-FG
1113

12-
# Test what happens the 2D allocation stage needs to spill, and then the
14+
# Test what happens the 2D allocation stage needs to spill, and then the
1315
# last allocation stage needs to spill again to make space for allocating
14-
# %7:edj = MOV_PD_imm10_pseudo 12.
16+
# %7:edj = MOV_PD_imm10_pseudo 12. Please note that in RA-STAGED-FG
17+
# (FG = fine grained) we can avoid spills by using scalar registers.
1518
---
1619
name: test_spill_2d_last_stage
1720
tracksRegLiveness: true
1821
body: |
1922
bb.1.entry:
2023
liveins: $p0, $p1, $d1, $d2, $d3, $d4, $d5, $d6, $d7
21-
; RA-LABEL: name: test_spill_2d_last_stage
22-
; RA: liveins: $d1, $d2, $d3, $d4, $d5, $d6, $d7, $p0, $p1
23-
; RA-NEXT: {{ $}}
24-
; RA-NEXT: renamable $dn0 = LDA_dms_lda_idx_imm renamable $p1, 0
25-
; RA-NEXT: renamable $m0 = LDA_dms_lda_idx_imm renamable $p1, 4
26-
; RA-NEXT: renamable $dj0 = LDA_dms_lda_idx_imm renamable $p1, 8
27-
; RA-NEXT: ST_D_SPILL renamable $d0, %stack.1, implicit $sp :: (store (s128) into %stack.1, align 4)
28-
; RA-NEXT: renamable $dj0 = MOV_PD_imm11_pseudo 12
29-
; RA-NEXT: renamable $r0 = LDA_dms_lda_idx renamable $p1, killed renamable $dj0
30-
; RA-NEXT: renamable $d0 = LDA_D_SPILL %stack.1, implicit $sp :: (load (s128) from %stack.1, align 4)
31-
; RA-NEXT: renamable $dc0 = COPY killed renamable $r0
32-
; RA-NEXT: ST_D_SPILL killed renamable $d0, %stack.1, implicit $sp :: (store (s128) into %stack.1, align 4)
33-
; RA-NEXT: renamable $dn0 = LDA_dms_lda_idx_imm renamable $p1, 16
34-
; RA-NEXT: renamable $m0 = LDA_dms_lda_idx_imm renamable $p1, 20
35-
; RA-NEXT: renamable $dj0 = LDA_dms_lda_idx_imm renamable $p1, 24
36-
; RA-NEXT: renamable $dc0 = LDA_dms_lda_idx_imm killed renamable $p1, 28
37-
; RA-NEXT: ST_D_SPILL killed renamable $d0, %stack.0, implicit $sp :: (store (s128) into %stack.0, align 4)
38-
; RA-NEXT: renamable $d0 = LDA_D_SPILL %stack.1, implicit $sp :: (load (s128) from %stack.1, align 4)
39-
; RA-NEXT: $p0, $dc0 = PADDA_2D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0
40-
; RA-NEXT: ST_D_SPILL renamable $d0, %stack.1, implicit $sp :: (store (s128) into %stack.1, align 4)
41-
; RA-NEXT: renamable $d0 = LDA_D_SPILL %stack.0, implicit $sp :: (load (s128) from %stack.0, align 4)
42-
; RA-NEXT: $p0, dead $dc0 = PADDA_2D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0
43-
; RA-NEXT: renamable $d0 = LDA_D_SPILL %stack.1, implicit $sp :: (load (s128) from %stack.1, align 4)
44-
; RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7
24+
; RA-STAGED-LABEL: name: test_spill_2d_last_stage
25+
; RA-STAGED: liveins: $d1, $d2, $d3, $d4, $d5, $d6, $d7, $p0, $p1
26+
; RA-STAGED-NEXT: {{ $}}
27+
; RA-STAGED-NEXT: renamable $dn0 = LDA_dms_lda_idx_imm renamable $p1, 0
28+
; RA-STAGED-NEXT: renamable $m0 = LDA_dms_lda_idx_imm renamable $p1, 4
29+
; RA-STAGED-NEXT: renamable $dj0 = LDA_dms_lda_idx_imm renamable $p1, 8
30+
; RA-STAGED-NEXT: ST_D_SPILL renamable $d0, %stack.0, implicit $sp :: (store (s128) into %stack.0, align 4)
31+
; RA-STAGED-NEXT: renamable $dj0 = MOV_PD_imm11_pseudo 12
32+
; RA-STAGED-NEXT: renamable $r0 = LDA_dms_lda_idx renamable $p1, killed renamable $dj0
33+
; RA-STAGED-NEXT: renamable $d0 = LDA_D_SPILL %stack.0, implicit $sp :: (load (s128) from %stack.0, align 4)
34+
; RA-STAGED-NEXT: renamable $dc0 = COPY killed renamable $r0
35+
; RA-STAGED-NEXT: ST_D_SPILL killed renamable $d0, %stack.0, implicit $sp :: (store (s128) into %stack.0, align 4)
36+
; RA-STAGED-NEXT: renamable $dn0 = LDA_dms_lda_idx_imm renamable $p1, 16
37+
; RA-STAGED-NEXT: renamable $m0 = LDA_dms_lda_idx_imm renamable $p1, 20
38+
; RA-STAGED-NEXT: renamable $dj0 = LDA_dms_lda_idx_imm renamable $p1, 24
39+
; RA-STAGED-NEXT: renamable $dc0 = LDA_dms_lda_idx_imm killed renamable $p1, 28
40+
; RA-STAGED-NEXT: ST_D_SPILL killed renamable $d0, %stack.1, implicit $sp :: (store (s128) into %stack.1, align 4)
41+
; RA-STAGED-NEXT: renamable $d0 = LDA_D_SPILL %stack.0, implicit $sp :: (load (s128) from %stack.0, align 4)
42+
; RA-STAGED-NEXT: $p0, $dc0 = PADDA_2D_split killed $p0, $m0, $dn0, $dj0, $dc0
43+
; RA-STAGED-NEXT: ST_D_SPILL killed renamable $d0, %stack.0, implicit $sp :: (store (s128) into %stack.0, align 4)
44+
; RA-STAGED-NEXT: renamable $d0 = LDA_D_SPILL %stack.1, implicit $sp :: (load (s128) from %stack.1, align 4)
45+
; RA-STAGED-NEXT: $p0, dead $dc0 = PADDA_2D_split killed $p0, $m0, $dn0, $dj0, $dc0
46+
; RA-STAGED-NEXT: renamable $d0 = LDA_D_SPILL %stack.0, implicit $sp :: (load (s128) from %stack.0, align 4)
47+
; RA-STAGED-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7
48+
;
49+
; RA-STAGED-FG-LABEL: name: test_spill_2d_last_stage
50+
; RA-STAGED-FG: liveins: $d1, $d2, $d3, $d4, $d5, $d6, $d7, $p0, $p1
51+
; RA-STAGED-FG-NEXT: {{ $}}
52+
; RA-STAGED-FG-NEXT: renamable $dn0 = LDA_dms_lda_idx_imm renamable $p1, 0
53+
; RA-STAGED-FG-NEXT: renamable $m0 = LDA_dms_lda_idx_imm renamable $p1, 4
54+
; RA-STAGED-FG-NEXT: renamable $r0 = LDA_dms_lda_idx_imm renamable $p1, 8
55+
; RA-STAGED-FG-NEXT: renamable $dj0 = MOV_PD_imm11_pseudo 12
56+
; RA-STAGED-FG-NEXT: renamable $r1 = LDA_dms_lda_idx renamable $p1, killed renamable $dj0
57+
; RA-STAGED-FG-NEXT: renamable $dc0 = COPY killed renamable $r1
58+
; RA-STAGED-FG-NEXT: renamable $dj0 = COPY killed renamable $r0
59+
; RA-STAGED-FG-NEXT: renamable $r1 = LDA_dms_lda_idx_imm renamable $p1, 16
60+
; RA-STAGED-FG-NEXT: renamable $r3 = LDA_dms_lda_idx_imm renamable $p1, 20
61+
; RA-STAGED-FG-NEXT: renamable $r2 = LDA_dms_lda_idx_imm renamable $p1, 24
62+
; RA-STAGED-FG-NEXT: renamable $r0 = LDA_dms_lda_idx_imm killed renamable $p1, 28
63+
; RA-STAGED-FG-NEXT: $p0, $dc0 = PADDA_2D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0
64+
; RA-STAGED-FG-NEXT: renamable $r4 = COPY killed renamable $dc0
65+
; RA-STAGED-FG-NEXT: renamable $dc0 = COPY killed renamable $r0
66+
; RA-STAGED-FG-NEXT: renamable $dn0 = COPY killed renamable $r1
67+
; RA-STAGED-FG-NEXT: renamable $dj0 = COPY killed renamable $r2
68+
; RA-STAGED-FG-NEXT: renamable $m0 = COPY killed renamable $r3
69+
; RA-STAGED-FG-NEXT: $p0, dead $dc0 = PADDA_2D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0
70+
; RA-STAGED-FG-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $r4, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7
4571
%20:ep = COPY $p0
4672
%21:ep = COPY $p1
4773
undef %100.sub_dim_size:ed = LDA_dms_lda_idx_imm %21, 0
@@ -58,3 +84,4 @@ body: |
5884
%20:ep, %101.sub_dim_count:ed = PADDA_2D_split %20, %101.sub_mod, %101.sub_dim_size, %101.sub_dim_stride, %101.sub_dim_count
5985
PseudoRET implicit $lr, implicit %20, implicit %100.sub_dim_count, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7
6086
...
87+

0 commit comments

Comments
 (0)