Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 55 additions & 32 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10212,7 +10212,7 @@ static bool followSubRegDef(MachineInstr &MI,
}

MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI) {
const MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
if (!P.Reg.isVirtual())
return nullptr;
Expand Down Expand Up @@ -10748,7 +10748,35 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
return false;

const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
// SCC is already valid after SCCValid.
// SCCRedefine will redefine SCC to the same value already available after
// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
// update kill/dead flags if necessary.
const auto optimizeSCC = [this](MachineInstr *SCCValid,
MachineInstr *SCCRedefine) -> bool {
MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
SCCRedefine->getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}
if (MachineOperand *SccDef =
SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
SCCRedefine->eraseFromParent();


dbgs() << "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ\n";
SCCValid->dump();

return true;
};

const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
this]() -> bool {
if (CmpValue != 0)
return false;
Expand Down Expand Up @@ -10783,25 +10811,33 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
return false;

MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI :
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}
if (!optimizeSCC(Def, &CmpInstr))
return false;

if (MachineOperand *SccDef =
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
CmpInstr.eraseFromParent();
// If s_or_32 result is unused (i.e. it is effectively a 64-bit s_cmp_lg of
// a register pair) and the input is a 64-bit foldableSelect then transform:
//
// (s_or_b32 (S_CSELECT_B64 (non-zero imm), 0), 0 => (S_CSELECT_B64
// (non-zero
// imm), 0)
if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
MachineOperand OrOpnd1 = Def->getOperand(1);
MachineOperand OrOpnd2 = Def->getOperand(2);

if (OrOpnd1.isReg() && OrOpnd2.isReg() &&
OrOpnd1.getReg() != OrOpnd2.getReg()) {
auto *Def1 = getVRegSubRegDef(getRegSubRegPair(OrOpnd1), *MRI);
auto *Def2 = getVRegSubRegDef(getRegSubRegPair(OrOpnd2), *MRI);
if (Def1 == Def2 && foldableSelect(Def1))
if (optimizeSCC(Def1, Def))
dbgs() << "BBBBBBBBBBBBBBBBBBINGOOOOO!\n";
}
}
return true;
};

const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
this](int64_t ExpectedValue, unsigned SrcSize,
bool IsReversible, bool IsSigned) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
Expand Down Expand Up @@ -10875,21 +10911,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;

MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI :
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}

MachineOperand *SccDef =
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
CmpInstr.eraseFromParent();
if (!optimizeSCC(Def, &CmpInstr))
return false;

if (!MRI->use_nodbg_empty(DefReg)) {
assert(!IsReversedCC);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
/// skipping copy like instructions and subreg-manipulation pseudos.
/// Following another subreg of a reg:subreg isn't supported.
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI);
const MachineRegisterInfo &MRI);

/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
Expand Down
Loading