Skip to content

Commit fc323cc

Browse files
AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering for divergent operands that must be sgpr.
1 parent ad17135 commit fc323cc

18 files changed

+514
-243
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117117
return LLT::scalar(32);
118118
}
119119

120-
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
121-
const RegisterBankInfo &RBI);
122-
123-
static void unmergeReadAnyLane(MachineIRBuilder &B,
124-
SmallVectorImpl<Register> &SgprDstParts,
125-
LLT UnmergeTy, Register VgprSrc,
126-
const RegisterBankInfo &RBI) {
120+
template <typename ReadLaneFnTy>
121+
static Register buildReadLane(MachineIRBuilder &, Register,
122+
const RegisterBankInfo &, ReadLaneFnTy);
123+
124+
template <typename ReadLaneFnTy>
125+
static void
126+
unmergeReadAnyLane(MachineIRBuilder &B, SmallVectorImpl<Register> &SgprDstParts,
127+
LLT UnmergeTy, Register VgprSrc, const RegisterBankInfo &RBI,
128+
ReadLaneFnTy BuildRL) {
127129
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
128130
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
129131
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
130-
SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
132+
SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
131133
}
132134
}
133135

134-
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
135-
const RegisterBankInfo &RBI) {
136+
template <typename ReadLaneFnTy>
137+
static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
138+
const RegisterBankInfo &RBI,
139+
ReadLaneFnTy BuildRL) {
136140
LLT Ty = B.getMRI()->getType(VgprSrc);
137141
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
138142
if (Ty.getSizeInBits() == 32) {
139-
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc})
140-
.getReg(0);
143+
Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
144+
return BuildRL(B, SgprDst, VgprSrc).getReg(0);
141145
}
142146

143147
SmallVector<Register, 8> SgprDstParts;
144-
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
148+
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
149+
BuildRL);
145150

146151
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
147152
}
148153

149-
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
150-
Register VgprSrc, const RegisterBankInfo &RBI) {
154+
template <typename ReadLaneFnTy>
155+
static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
156+
Register VgprSrc, const RegisterBankInfo &RBI,
157+
ReadLaneFnTy BuildReadLane) {
151158
LLT Ty = B.getMRI()->getType(VgprSrc);
152159
if (Ty.getSizeInBits() == 32) {
153-
B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
160+
BuildReadLane(B, SgprDst, VgprSrc);
154161
return;
155162
}
156163

157164
SmallVector<Register, 8> SgprDstParts;
158-
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
165+
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
166+
BuildReadLane);
159167

160168
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
161169
}
170+
171+
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
172+
Register VgprSrc, const RegisterBankInfo &RBI) {
173+
return buildReadLane(
174+
B, SgprDst, VgprSrc, RBI,
175+
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176+
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177+
});
178+
}
179+
180+
void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
181+
Register VgprSrc, const RegisterBankInfo &RBI) {
182+
return buildReadLane(
183+
B, SgprDst, VgprSrc, RBI,
184+
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185+
return B.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, SgprDst)
186+
.addReg(VgprSrc);
187+
});
188+
}

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class IntrinsicLaneMaskAnalyzer {
5151

5252
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
5353
const RegisterBankInfo &RBI);
54+
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
55+
const RegisterBankInfo &RBI);
5456
}
5557
}
5658

0 commit comments

Comments
 (0)