@@ -36,10 +36,8 @@ class SIOptimizeExecMaskingPreRA {
3636 LiveIntervals *LIS;
3737 const AMDGPU::LaneMaskConstants &LMC;
3838
39- MCRegister CondReg;
4039 MCRegister ExecReg;
4140
42- bool optimizeVcndVcmpPair (MachineBasicBlock &MBB);
4341 bool optimizeElseBranch (MachineBasicBlock &MBB);
4442
4543public:
@@ -88,193 +86,6 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
8886 return new SIOptimizeExecMaskingPreRALegacy ();
8987}
9088
91- // See if there is a def between \p AndIdx and \p SelIdx that needs to live
92- // beyond \p AndIdx.
93- static bool isDefBetween (const LiveRange &LR, SlotIndex AndIdx,
94- SlotIndex SelIdx) {
95- LiveQueryResult AndLRQ = LR.Query (AndIdx);
96- return (!AndLRQ.isKill () && AndLRQ.valueIn () != LR.Query (SelIdx).valueOut ());
97- }
98-
99- // FIXME: Why do we bother trying to handle physical registers here?
100- static bool isDefBetween (const SIRegisterInfo &TRI,
101- LiveIntervals *LIS, Register Reg,
102- const MachineInstr &Sel, const MachineInstr &And) {
103- SlotIndex AndIdx = LIS->getInstructionIndex (And).getRegSlot ();
104- SlotIndex SelIdx = LIS->getInstructionIndex (Sel).getRegSlot ();
105-
106- if (Reg.isVirtual ())
107- return isDefBetween (LIS->getInterval (Reg), AndIdx, SelIdx);
108-
109- for (MCRegUnit Unit : TRI.regunits (Reg.asMCReg ())) {
110- if (isDefBetween (LIS->getRegUnit (Unit), AndIdx, SelIdx))
111- return true ;
112- }
113-
114- return false ;
115- }
116-
117- // Optimize sequence
118- // %sel = V_CNDMASK_B32_e64 0, 1, %cc
119- // %cmp = V_CMP_NE_U32 1, %sel
120- // $vcc = S_AND_B64 $exec, %cmp
121- // S_CBRANCH_VCC[N]Z
122- // =>
123- // $vcc = S_ANDN2_B64 $exec, %cc
124- // S_CBRANCH_VCC[N]Z
125- //
126- // It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
127- // rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
128- // only 3 first instructions are really needed. S_AND_B64 with exec is a
129- // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
130- // lanes.
131- //
132- // Returns true on success.
133- bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair (MachineBasicBlock &MBB) {
134- auto I = llvm::find_if (MBB.terminators (), [](const MachineInstr &MI) {
135- unsigned Opc = MI.getOpcode ();
136- return Opc == AMDGPU::S_CBRANCH_VCCZ ||
137- Opc == AMDGPU::S_CBRANCH_VCCNZ; });
138- if (I == MBB.terminators ().end ())
139- return false ;
140-
141- auto *And =
142- TRI->findReachingDef (CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);
143- if (!And || And->getOpcode () != LMC.AndOpc || !And->getOperand (1 ).isReg () ||
144- !And->getOperand (2 ).isReg ())
145- return false ;
146-
147- MachineOperand *AndCC = &And->getOperand (1 );
148- Register CmpReg = AndCC->getReg ();
149- unsigned CmpSubReg = AndCC->getSubReg ();
150- if (CmpReg == Register (ExecReg)) {
151- AndCC = &And->getOperand (2 );
152- CmpReg = AndCC->getReg ();
153- CmpSubReg = AndCC->getSubReg ();
154- } else if (And->getOperand (2 ).getReg () != Register (ExecReg)) {
155- return false ;
156- }
157-
158- auto *Cmp = TRI->findReachingDef (CmpReg, CmpSubReg, *And, *MRI, LIS);
159- if (!Cmp || !(Cmp->getOpcode () == AMDGPU::V_CMP_NE_U32_e32 ||
160- Cmp->getOpcode () == AMDGPU::V_CMP_NE_U32_e64) ||
161- Cmp->getParent () != And->getParent ())
162- return false ;
163-
164- MachineOperand *Op1 = TII->getNamedOperand (*Cmp, AMDGPU::OpName::src0);
165- MachineOperand *Op2 = TII->getNamedOperand (*Cmp, AMDGPU::OpName::src1);
166- if (Op1->isImm () && Op2->isReg ())
167- std::swap (Op1, Op2);
168- if (!Op1->isReg () || !Op2->isImm () || Op2->getImm () != 1 )
169- return false ;
170-
171- Register SelReg = Op1->getReg ();
172- if (SelReg.isPhysical ())
173- return false ;
174-
175- auto *Sel = TRI->findReachingDef (SelReg, Op1->getSubReg (), *Cmp, *MRI, LIS);
176- if (!Sel || Sel->getOpcode () != AMDGPU::V_CNDMASK_B32_e64)
177- return false ;
178-
179- if (TII->hasModifiersSet (*Sel, AMDGPU::OpName::src0_modifiers) ||
180- TII->hasModifiersSet (*Sel, AMDGPU::OpName::src1_modifiers))
181- return false ;
182-
183- Op1 = TII->getNamedOperand (*Sel, AMDGPU::OpName::src0);
184- Op2 = TII->getNamedOperand (*Sel, AMDGPU::OpName::src1);
185- MachineOperand *CC = TII->getNamedOperand (*Sel, AMDGPU::OpName::src2);
186- if (!Op1->isImm () || !Op2->isImm () || !CC->isReg () ||
187- Op1->getImm () != 0 || Op2->getImm () != 1 )
188- return false ;
189-
190- Register CCReg = CC->getReg ();
191-
192- // If there was a def between the select and the and, we would need to move it
193- // to fold this.
194- if (isDefBetween (*TRI, LIS, CCReg, *Sel, *And))
195- return false ;
196-
197- // Cannot safely mirror live intervals with PHI nodes, so check for these
198- // before optimization.
199- SlotIndex SelIdx = LIS->getInstructionIndex (*Sel);
200- LiveInterval *SelLI = &LIS->getInterval (SelReg);
201- if (llvm::any_of (SelLI->vnis (),
202- [](const VNInfo *VNI) {
203- return VNI->isPHIDef ();
204- }))
205- return false ;
206-
207- // TODO: Guard against implicit def operands?
208- LLVM_DEBUG (dbgs () << " Folding sequence:\n\t " << *Sel << ' \t ' << *Cmp << ' \t '
209- << *And);
210-
211- MachineInstr *Andn2 =
212- BuildMI (MBB, *And, And->getDebugLoc (), TII->get (LMC.AndN2Opc ),
213- And->getOperand (0 ).getReg ())
214- .addReg (ExecReg)
215- .addReg (CCReg, getUndefRegState (CC->isUndef ()), CC->getSubReg ());
216- MachineOperand &AndSCC = And->getOperand (3 );
217- assert (AndSCC.getReg () == AMDGPU::SCC);
218- MachineOperand &Andn2SCC = Andn2->getOperand (3 );
219- assert (Andn2SCC.getReg () == AMDGPU::SCC);
220- Andn2SCC.setIsDead (AndSCC.isDead ());
221-
222- SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps (*And, *Andn2);
223- And->eraseFromParent ();
224-
225- LLVM_DEBUG (dbgs () << " =>\n\t " << *Andn2 << ' \n ' );
226-
227- // Update live intervals for CCReg before potentially removing CmpReg/SelReg,
228- // and their associated liveness information.
229- SlotIndex CmpIdx = LIS->getInstructionIndex (*Cmp);
230- if (CCReg.isVirtual ()) {
231- LiveInterval &CCLI = LIS->getInterval (CCReg);
232- auto CCQ = CCLI.Query (SelIdx.getRegSlot ());
233- if (CCQ.valueIn ()) {
234- LIS->removeInterval (CCReg);
235- LIS->createAndComputeVirtRegInterval (CCReg);
236- }
237- } else
238- LIS->removeAllRegUnitsForPhysReg (CCReg);
239-
240- // Try to remove compare. Cmp value should not used in between of cmp
241- // and s_and_b64 if VCC or just unused if any other register.
242- LiveInterval *CmpLI = CmpReg.isVirtual () ? &LIS->getInterval (CmpReg) : nullptr ;
243- if ((CmpLI && CmpLI->Query (AndIdx.getRegSlot ()).isKill ()) ||
244- (CmpReg == Register (CondReg) &&
245- std::none_of (std::next (Cmp->getIterator ()), Andn2->getIterator (),
246- [&](const MachineInstr &MI) {
247- return MI.readsRegister (CondReg, TRI);
248- }))) {
249- LLVM_DEBUG (dbgs () << " Erasing: " << *Cmp << ' \n ' );
250- if (CmpLI)
251- LIS->removeVRegDefAt (*CmpLI, CmpIdx.getRegSlot ());
252- LIS->RemoveMachineInstrFromMaps (*Cmp);
253- Cmp->eraseFromParent ();
254-
255- // Try to remove v_cndmask_b32.
256- // Kill status must be checked before shrinking the live range.
257- bool IsKill = SelLI->Query (CmpIdx.getRegSlot ()).isKill ();
258- LIS->shrinkToUses (SelLI);
259- bool IsDead = SelLI->Query (SelIdx.getRegSlot ()).isDeadDef ();
260- if (MRI->use_nodbg_empty (SelReg) && (IsKill || IsDead)) {
261- LLVM_DEBUG (dbgs () << " Erasing: " << *Sel << ' \n ' );
262-
263- LIS->removeVRegDefAt (*SelLI, SelIdx.getRegSlot ());
264- LIS->RemoveMachineInstrFromMaps (*Sel);
265- bool ShrinkSel = Sel->getOperand (0 ).readsReg ();
266- Sel->eraseFromParent ();
267- if (ShrinkSel) {
268- // The result of the V_CNDMASK was a subreg def which counted as a read
269- // from the other parts of the reg. Shrink their live ranges.
270- LIS->shrinkToUses (SelLI);
271- }
272- }
273- }
274-
275- return true ;
276- }
277-
27889// Optimize sequence
27990// %dst = S_OR_SAVEEXEC %src
28091// ... instructions not modifying exec ...
@@ -368,7 +179,6 @@ bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
368179}
369180
370181bool SIOptimizeExecMaskingPreRA::run (MachineFunction &MF) {
371- CondReg = MCRegister::from (LMC.VccReg );
372182 ExecReg = MCRegister::from (LMC.ExecReg );
373183
374184 DenseSet<Register> RecalcRegs ({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
@@ -381,13 +191,6 @@ bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
381191 Changed = true ;
382192 }
383193
384- if (optimizeVcndVcmpPair (MBB)) {
385- RecalcRegs.insert (AMDGPU::VCC_LO);
386- RecalcRegs.insert (AMDGPU::VCC_HI);
387- RecalcRegs.insert (AMDGPU::SCC);
388- Changed = true ;
389- }
390-
391194 // Try to remove unneeded instructions before s_endpgm.
392195 if (MBB.succ_empty ()) {
393196 if (MBB.empty ())
0 commit comments