Skip to content

Commit e668391

Browse files
[AIEX] Add a Pass to expand unallocated 2D/3D into individual ones
If we don't need a full register, we can expand to individual lanes. Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent 368cbd2 commit e668391

12 files changed

+270
-55
lines changed

llvm/lib/Target/AIE/AIE.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ MachineFunctionPass *createAIEEliminateDuplicatePHI();
6060
FunctionPass *createAIEOutlineMemoryGEP();
6161
FunctionPass *createAIESuperRegRewriter();
6262
FunctionPass *createAIEWawRegRewriter();
63+
FunctionPass *createAIEUnallocatedSuperRegRewriter();
6364
FunctionPass *createAIEPostSelectOptimize();
6465
MachineFunctionPass *
6566
createDeadMachineInstructionElim(bool KeepLifetimeInstructions);
@@ -84,6 +85,8 @@ extern char &AIESuperRegRewriterID;
8485
void initializeAIESuperRegRewriterPass(PassRegistry &);
8586
extern char &AIEWawRegRewriterID;
8687
void initializeAIEWawRegRewriterPass(PassRegistry &);
88+
extern char &AIEUnallocatedSuperRegRewriterID;
89+
void initializeAIEUnallocatedSuperRegRewriterPass(PassRegistry &);
8790
extern char &AIEOutlineMemoryGEPID;
8891
void initializeAIEOutlineMemoryGEPPass(PassRegistry &);
8992

llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAIETarget() {
158158
initializeAIEPseudoBranchExpansionPass(*PR);
159159
initializeAIESubRegConstrainerPass(*PR);
160160
initializeAIESuperRegRewriterPass(*PR);
161+
initializeAIEUnallocatedSuperRegRewriterPass(*PR);
161162
initializeAIEWawRegRewriterPass(*PR);
162163
initializeAIEOutlineMemoryGEPPass(*PR);
163164
initializeAIEFinalizeBundlePass(*PR);

llvm/lib/Target/AIE/AIESuperRegUtils.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,21 +191,26 @@ LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
191191
return LiveLanes;
192192
}
193193

194-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
194+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
195195
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
196196
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
197197
LiveRegMatrix &LRM, LiveIntervals &LIS,
198198
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
199199
LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << '\n');
200-
auto *TII = static_cast<const AIEBaseInstrInfo *>(
201-
VRM.getMachineFunction().getSubtarget().getInstrInfo());
200+
MachineFunction &MF = VRM.getMachineFunction();
201+
auto *TII =
202+
static_cast<const AIEBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
202203

203204
// Collect all the subreg indices to rewrite as independent vregs.
204205
SmallMapVector<int, Register, 8> SubRegToVReg;
205206
const TargetRegisterClass *SuperRC = MRI.getRegClass(Reg);
206207
assert(!SubRegs.empty());
207208
for (int SubReg : SubRegs) {
208-
const TargetRegisterClass *SubRC = TRI.getSubRegisterClass(SuperRC, SubReg);
209+
const TargetRegisterClass *SubRC =
210+
AssignedPhysReg.has_value()
211+
? TRI.getSubRegisterClass(SuperRC, SubReg)
212+
: TRI.getLargestLegalSuperClass(
213+
TRI.getSubRegisterClass(SuperRC, SubReg), MF);
209214
SubRegToVReg[SubReg] = MRI.createVirtualRegister(SubRC);
210215
}
211216

@@ -251,7 +256,6 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
251256
LIS.removeInterval(Reg);
252257

253258
for (auto &[SubRegIdx, VReg] : SubRegToVReg) {
254-
MCRegister SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx);
255259
LiveInterval &SubRegLI = LIS.getInterval(VReg);
256260
LLVM_DEBUG(dbgs() << " Assigning Range: " << SubRegLI << '\n');
257261

@@ -262,6 +266,10 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
262266
LIComponents.push_back(&SubRegLI);
263267
VRM.grow();
264268

269+
if (!AssignedPhysReg.has_value())
270+
continue;
271+
272+
MCRegister SubPhysReg = TRI.getSubReg(*AssignedPhysReg, SubRegIdx);
265273
for (LiveInterval *LI : LIComponents) {
266274
LRM.assign(*LI, SubPhysReg);
267275
VRM.setRequiredPhys(LI->reg(), SubPhysReg);

llvm/lib/Target/AIE/AIESuperRegUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_LIB_TARGET_AIE_AIESUPERREGUTILS_H
1616

1717
#include "llvm/ADT/SmallSet.h"
18+
#include <optional>
1819

1920
namespace llvm {
2021
class Register;
@@ -63,7 +64,7 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
6364
LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
6465
const LiveIntervals &LIS);
6566

66-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
67+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
6768
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
6869
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
6970
LiveRegMatrix &LRM, LiveIntervals &LIS,
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//===-- AIEUnallocatedSuperRegRewriter.cpp - Constrain tied sub-registers -===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "AIEBaseInstrInfo.h"
12+
#include "AIEBaseRegisterInfo.h"
13+
#include "AIESuperRegUtils.h"
14+
15+
#include "llvm/ADT/MapVector.h"
16+
#include "llvm/ADT/SmallSet.h"
17+
#include "llvm/CodeGen/LiveDebugVariables.h"
18+
#include "llvm/CodeGen/LiveIntervals.h"
19+
#include "llvm/CodeGen/LiveRegMatrix.h"
20+
#include "llvm/CodeGen/LiveStacks.h"
21+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
22+
#include "llvm/CodeGen/MachineFunction.h"
23+
#include "llvm/CodeGen/MachineFunctionPass.h"
24+
#include "llvm/CodeGen/MachineInstr.h"
25+
#include "llvm/CodeGen/MachineInstrBuilder.h"
26+
#include "llvm/CodeGen/MachineOperand.h"
27+
#include "llvm/CodeGen/MachineRegisterInfo.h"
28+
#include "llvm/CodeGen/Passes.h"
29+
#include "llvm/CodeGen/SlotIndexes.h"
30+
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
32+
#include "llvm/CodeGen/VirtRegMap.h"
33+
#include "llvm/Support/Debug.h"
34+
#include "llvm/Support/raw_ostream.h"
35+
36+
using namespace llvm;
37+
38+
#define DEBUG_TYPE "aie-ra-prepare"
39+
40+
namespace {
41+
42+
using RegRewriteInfo = std::vector<std::pair<Register, SmallSet<int, 8>>>;
43+
44+
/// Split large unallocated compound registers into multiple new smaller vregs
45+
/// Than can be allocated to scalar registers. This pass will handle registers
46+
/// that were not allocated by Greedy so far. Currently, it is expected to
47+
/// process registers used in copies created during Greedy's LR split.
48+
/// Registers used in *2d or *3d instructions should be already allocated
49+
/// at this point.
50+
class AIEUnallocatedSuperRegRewriter : public MachineFunctionPass {
51+
52+
public:
53+
static char ID;
54+
AIEUnallocatedSuperRegRewriter() : MachineFunctionPass(ID) {}
55+
56+
void getAnalysisUsage(AnalysisUsage &AU) const override {
57+
AU.setPreservesCFG();
58+
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
59+
AU.addRequired<VirtRegMapWrapperLegacy>();
60+
AU.addPreserved<VirtRegMapWrapperLegacy>();
61+
AU.addRequired<SlotIndexesWrapperPass>();
62+
AU.addPreserved<SlotIndexesWrapperPass>();
63+
AU.addRequired<LiveDebugVariablesWrapperLegacy>();
64+
AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
65+
AU.addRequired<LiveStacksWrapperLegacy>();
66+
AU.addPreserved<LiveStacksWrapperLegacy>();
67+
AU.addRequired<LiveIntervalsWrapperPass>();
68+
AU.addPreserved<LiveIntervalsWrapperPass>();
69+
AU.addRequired<LiveRegMatrixWrapperLegacy>();
70+
AU.addPreserved<LiveRegMatrixWrapperLegacy>();
71+
MachineFunctionPass::getAnalysisUsage(AU);
72+
}
73+
74+
bool runOnMachineFunction(MachineFunction &Fn) override;
75+
};
76+
77+
/// Identify unallocated virtual registers that can be split into subregisters.
78+
/// Returns a list of candidate registers with their rewritable subregister
79+
/// indices, excluding unused registers and those already assigned to physical
80+
/// registers.
81+
static RegRewriteInfo getRewriteCandidates(MachineRegisterInfo &MRI,
82+
const AIEBaseRegisterInfo &TRI,
83+
VirtRegMap &VRM) {
84+
RegRewriteInfo RegistersToRewrite;
85+
for (unsigned VRegIdx = 0, End = MRI.getNumVirtRegs(); VRegIdx != End;
86+
++VRegIdx) {
87+
const Register Reg = Register::index2VirtReg(VRegIdx);
88+
89+
// Ignore un-used or already allocated registers.
90+
if (MRI.reg_nodbg_empty(Reg) || VRM.hasPhys(Reg))
91+
continue;
92+
93+
const SmallSet<int, 8> RewritableSubRegs =
94+
AIESuperRegUtils::getRewritableSubRegs(Reg, MRI, TRI);
95+
96+
if (RewritableSubRegs.empty())
97+
continue;
98+
99+
LLVM_DEBUG(dbgs() << "Candidate " << printReg(Reg, &TRI, 0, &MRI) << ":"
100+
<< printRegClassOrBank(Reg, MRI, &TRI) << '\n');
101+
102+
RegistersToRewrite.push_back({Reg, RewritableSubRegs});
103+
}
104+
105+
LLVM_DEBUG(dbgs() << "Found " << RegistersToRewrite.size()
106+
<< " candidate register(s) for rewriting\n");
107+
108+
return RegistersToRewrite;
109+
}
110+
111+
/// Split candidate registers into independent virtual registers for each
112+
/// subregister. Each composite register is rewritten using its subregister
113+
/// indices, with live intervals and debug information updated accordingly.
114+
void rewriteCandidates(RegRewriteInfo &RegistersToRewrite,
115+
MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI,
116+
VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS,
117+
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
118+
119+
LLVM_DEBUG(dbgs() << "Rewriting " << RegistersToRewrite.size()
120+
<< " candidate register(s)\n");
121+
122+
for (auto [VReg, SubRegs] : RegistersToRewrite) {
123+
LLVM_DEBUG(dbgs() << " Rewriting " << printReg(VReg, &TRI, 0, &MRI)
124+
<< " into " << SubRegs.size() << " subregister(s)\n");
125+
std::optional<Register> NoPhysReg = {};
126+
AIESuperRegUtils::rewriteSuperReg(VReg, NoPhysReg, SubRegs, MRI, TRI, VRM,
127+
LRM, LIS, Indexes, DebugVars);
128+
}
129+
}
130+
131+
bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) {
132+
LLVM_DEBUG(llvm::dbgs() << "*** Splitting unallocated super-registers: "
133+
<< MF.getName() << " ***\n");
134+
135+
MachineRegisterInfo &MRI = MF.getRegInfo();
136+
VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
137+
LiveRegMatrix &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
138+
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
139+
SlotIndexes &Indexes = getAnalysis<SlotIndexesWrapperPass>().getSI();
140+
LiveDebugVariables &DebugVars =
141+
getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
142+
auto &TRI =
143+
*static_cast<const AIEBaseRegisterInfo *>(MRI.getTargetRegisterInfo());
144+
145+
LLVM_DEBUG(dbgs() << "Identifying rewrite candidates...\n");
146+
RegRewriteInfo RegistersToRewrite = getRewriteCandidates(MRI, TRI, VRM);
147+
148+
if (RegistersToRewrite.empty()) {
149+
LLVM_DEBUG(dbgs() << "No candidates found, skipping rewrite\n");
150+
return false;
151+
}
152+
153+
LLVM_DEBUG(dbgs() << "Performing register rewrites...\n");
154+
rewriteCandidates(RegistersToRewrite, MRI, TRI, VRM, LRM, LIS, Indexes,
155+
DebugVars);
156+
157+
LLVM_DEBUG(dbgs() << "Successfully rewrote " << RegistersToRewrite.size()
158+
<< " register(s)\n");
159+
160+
return !RegistersToRewrite.empty();
161+
}
162+
163+
} // end anonymous namespace
164+
165+
char AIEUnallocatedSuperRegRewriter::ID = 0;
166+
char &llvm::AIEUnallocatedSuperRegRewriterID =
167+
AIEUnallocatedSuperRegRewriter::ID;
168+
169+
INITIALIZE_PASS(AIEUnallocatedSuperRegRewriter,
170+
"aie-unallocated-superreg-rewrite",
171+
"AIE unallocated super-reg rewrite", false, false)
172+
173+
llvm::FunctionPass *llvm::createAIEUnallocatedSuperRegRewriter() {
174+
return new AIEUnallocatedSuperRegRewriter();
175+
}

llvm/lib/Target/AIE/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ add_llvm_target(AIECodeGen
141141
AIE2TargetMachine.cpp
142142
AIE2TargetTransformInfo.cpp
143143
AIETiedRegOperands.cpp
144+
AIEUnallocatedSuperRegRewriter.cpp
144145
ReservedRegsLICM.cpp
145146
AIEOutlineMemoryGEP.cpp
146147
AIEWawRegRewriter.cpp

llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
114114
addPass(createAIESuperRegRewriter());
115115
addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
116116
addPass(createAIESuperRegRewriter());
117+
if (EnableFineGrainedStagedRA)
118+
addPass(createAIEUnallocatedSuperRegRewriter());
117119
}
118120
addPass(createGreedyRegisterAllocator());
119121
if (EnableWAWRegRewrite) {

llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@
247247
; AIE-O1-NEXT: AIE super-reg rewrite
248248
; AIE-O1-NEXT: Greedy Register Allocator
249249
; AIE-O1-NEXT: AIE super-reg rewrite
250+
; AIE-O1-NEXT: AIE unallocated super-reg rewrite
250251
; AIE-O1-NEXT: Greedy Register Allocator
251252
; AIE-O1-NEXT: AIE waw-reg rewrite
252253
; AIE-O1-NEXT: Greedy Register Allocator
@@ -472,6 +473,7 @@
472473
; AIE-O23-NEXT: AIE super-reg rewrite
473474
; AIE-O23-NEXT: Greedy Register Allocator
474475
; AIE-O23-NEXT: AIE super-reg rewrite
476+
; AIE-O23-NEXT: AIE unallocated super-reg rewrite
475477
; AIE-O23-NEXT: Greedy Register Allocator
476478
; AIE-O23-NEXT: AIE waw-reg rewrite
477479
; AIE-O23-NEXT: Greedy Register Allocator

0 commit comments

Comments
 (0)