@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117
117
return LLT::scalar (32 );
118
118
}
119
119
120
- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
121
- const RegisterBankInfo &RBI);
122
-
123
- static void unmergeReadAnyLane (MachineIRBuilder &B,
124
- SmallVectorImpl<Register> &SgprDstParts,
125
- LLT UnmergeTy, Register VgprSrc,
126
- const RegisterBankInfo &RBI) {
120
+ template <typename ReadLaneFnTy>
121
+ static Register buildReadLane (MachineIRBuilder &, Register,
122
+ const RegisterBankInfo &, ReadLaneFnTy);
123
+
124
+ template <typename ReadLaneFnTy>
125
+ static void
126
+ unmergeReadAnyLane (MachineIRBuilder &B, SmallVectorImpl<Register> &SgprDstParts,
127
+ LLT UnmergeTy, Register VgprSrc, const RegisterBankInfo &RBI,
128
+ ReadLaneFnTy BuildRL) {
127
129
const RegisterBank *VgprRB = &RBI.getRegBank (AMDGPU::VGPRRegBankID);
128
130
auto Unmerge = B.buildUnmerge ({VgprRB, UnmergeTy}, VgprSrc);
129
131
for (unsigned i = 0 ; i < Unmerge->getNumOperands () - 1 ; ++i) {
130
- SgprDstParts.push_back (buildReadAnyLane (B, Unmerge.getReg (i), RBI));
132
+ SgprDstParts.push_back (buildReadLane (B, Unmerge.getReg (i), RBI, BuildRL ));
131
133
}
132
134
}
133
135
134
- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
135
- const RegisterBankInfo &RBI) {
136
+ template <typename ReadLaneFnTy>
137
+ static Register buildReadLane (MachineIRBuilder &B, Register VgprSrc,
138
+ const RegisterBankInfo &RBI,
139
+ ReadLaneFnTy BuildRL) {
136
140
LLT Ty = B.getMRI ()->getType (VgprSrc);
137
141
const RegisterBank *SgprRB = &RBI.getRegBank (AMDGPU::SGPRRegBankID);
138
142
if (Ty.getSizeInBits () == 32 ) {
139
- return B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {{ SgprRB, Ty}}, {VgprSrc})
140
- .getReg (0 );
143
+ Register SgprDst = B. getMRI ()-> createVirtualRegister ({ SgprRB, Ty});
144
+ return BuildRL (B, SgprDst, VgprSrc) .getReg (0 );
141
145
}
142
146
143
147
SmallVector<Register, 8 > SgprDstParts;
144
- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
148
+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
149
+ BuildRL);
145
150
146
151
return B.buildMergeLikeInstr ({SgprRB, Ty}, SgprDstParts).getReg (0 );
147
152
}
148
153
149
- void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
150
- Register VgprSrc, const RegisterBankInfo &RBI) {
154
+ template <typename ReadLaneFnTy>
155
+ static void buildReadLane (MachineIRBuilder &B, Register SgprDst,
156
+ Register VgprSrc, const RegisterBankInfo &RBI,
157
+ ReadLaneFnTy BuildReadLane) {
151
158
LLT Ty = B.getMRI ()->getType (VgprSrc);
152
159
if (Ty.getSizeInBits () == 32 ) {
153
- B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, { SgprDst}, { VgprSrc} );
160
+ BuildReadLane (B, SgprDst, VgprSrc);
154
161
return ;
155
162
}
156
163
157
164
SmallVector<Register, 8 > SgprDstParts;
158
- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
165
+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
166
+ BuildReadLane);
159
167
160
168
B.buildMergeLikeInstr (SgprDst, SgprDstParts).getReg (0 );
161
169
}
170
+
171
+ void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
172
+ Register VgprSrc, const RegisterBankInfo &RBI) {
173
+ return buildReadLane (
174
+ B, SgprDst, VgprSrc, RBI,
175
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176
+ return B.buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177
+ });
178
+ }
179
+
180
+ void AMDGPU::buildReadFirstLane (MachineIRBuilder &B, Register SgprDst,
181
+ Register VgprSrc, const RegisterBankInfo &RBI) {
182
+ return buildReadLane (
183
+ B, SgprDst, VgprSrc, RBI,
184
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185
+ return B.buildIntrinsic (Intrinsic::amdgcn_readfirstlane, SgprDst)
186
+ .addReg (VgprSrc);
187
+ });
188
+ }
0 commit comments