Skip to content

[LV] Vectorize maxnum/minnum w/o fast-math flags. #148239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 18, 2025
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
Expand Down Expand Up @@ -250,6 +252,7 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
}
Expand Down
26 changes: 23 additions & 3 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
if (isFPMinMaxRecurrenceKind(Kind)) {
InstDesc Res = isMinMaxPattern(I, Kind, Prev);
if (!Res.isRecurrence())
return InstDesc(false, I);
if (HasRequiredFMF())
return Res;
// We may be able to vectorize FMax/FMin reductions using maxnum/minnum
// intrinsics with extra checks ensuring the vector loop handles only
// non-NaN inputs.
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) {
assert(Kind == RecurKind::FMax &&
"unexpected recurrence kind for maxnum");
return InstDesc(I, RecurKind::FMaxNum);
}
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) {
assert(Kind == RecurKind::FMin &&
"unexpected recurrence kind for minnum");
return InstDesc(I, RecurKind::FMinNum);
}
return InstDesc(false, I);
}
if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::FMax:
case RecurKind::FMaxNum:
return Intrinsic::vector_reduce_fmax;
case RecurKind::FMin:
case RecurKind::FMinNum:
return Intrinsic::vector_reduce_fmin;
case RecurKind::FMaximum:
return Intrinsic::vector_reduce_fmaximum;
Expand Down Expand Up @@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
case RecurKind::SMax:
return Intrinsic::smax;
case RecurKind::FMin:
case RecurKind::FMinNum:
return Intrinsic::minnum;
case RecurKind::FMax:
case RecurKind::FMaxNum:
return Intrinsic::maxnum;
case RecurKind::FMinimum:
return Intrinsic::minimum;
Expand Down Expand Up @@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
if (Ty->isIntOrIntVectorTy() ||
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
(RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
// TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
"rdx.minmax");
Expand Down Expand Up @@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::UMin:
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMinNum:
case RecurKind::FMaxNum:
case RecurKind::FMinimum:
case RecurKind::FMaximum:
case RecurKind::FMinimumNum:
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ class VPBuilder {

/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
/// and \p B.
/// TODO: add createFCmp when needed.
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
Expand All @@ -240,6 +239,17 @@ class VPBuilder {
new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
}

/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
/// and \p B.
VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
}

VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
Expand Down
18 changes: 14 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4361,10 +4361,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {

bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
// reductions need special handling and are currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
if (!Legal->isReductionVariable(&Phi))
return Legal->isFixedOrderRecurrence(&Phi);
RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
}))
return false;

// Phis with uses outside of the loop require special handling and are
Expand Down Expand Up @@ -8787,6 +8791,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
return nullptr;
Comment on lines +8796 to +8798
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
return nullptr;
// Apply mandatory transformation to handle FP maxnum/minnum reduction with NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
return nullptr;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23202,6 +23202,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down Expand Up @@ -23339,6 +23341,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down Expand Up @@ -23441,6 +23445,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return ResTy;
}
case Instruction::ICmp:
case Instruction::FCmp:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the treatment of FCmp here and below be patched independently, or is it tested only now when FCmp is introduced to check isNaN?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, only this patch introduces FCmp VPInstructions, so only tested now.

case VPInstruction::ActiveLaneMask:
assert(inferScalarType(R->getOperand(0)) ==
inferScalarType(R->getOperand(1)) &&
Expand Down
160 changes: 160 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,3 +652,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}

bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
RedPhiR->getBackedgeValue()->getDefiningRecipe());
if (!MinMaxR)
return nullptr;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth checking/asserting intrinsic is min/max?

auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
!(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
return nullptr;

#ifndef NDEBUG
Intrinsic::ID RdxIntrinsicId =
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
: Intrinsic::minnum;
assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
RdxIntrinsicId) ||
(RepR &&
cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
RdxIntrinsicId) &&
"Intrinsic did not match recurrence kind");
#endif

if (MinMaxR->getOperand(0) == RedPhiR)
return MinMaxR->getOperand(1);

assert(MinMaxR->getOperand(1) == RedPhiR &&
"Reduction phi operand expected");
return MinMaxR->getOperand(0);
};

VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPReductionPHIRecipe *RedPhiR = nullptr;
bool HasUnsupportedPhi = false;
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
continue;
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
if (!Cur) {
// TODO: Also support fixed-order recurrence phis.
HasUnsupportedPhi = true;
continue;
}
// For now, only a single reduction is supported.
// TODO: Support multiple MaxNum/MinNum reductions and other reductions.
if (RedPhiR)
return false;
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
Cur->getRecurrenceKind() != RecurKind::FMinNum) {
HasUnsupportedPhi = true;
continue;
}
RedPhiR = Cur;
}

if (!RedPhiR)
return true;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
if (RedPhiRK != RecurKind::FMaxNum && RedPhiRK != RecurKind::FMinNum)
return true;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RedPhiR must be FMaxNum/FMinNum when set, added an assert

// We won't be able to resume execution in the scalar tail, if there are
// unsupported header phis or there is no scalar tail at all, due to
// tail-folding.
if (HasUnsupportedPhi || !Plan.hasScalarTail())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (HasUnsupportedPhi || !Plan.hasScalarTail())
// <explanation why Plan needs to have a scalar tail>
if (HasUnsupportedPhi || !Plan.hasScalarTail())

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks.

return false;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
if (!MinMaxOp)
return false;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved, thanks!

VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
if (!MinMaxOp)
return false;

RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
"unsupported reduction");

/// Check if the vector loop of \p Plan can early exit and restart
/// execution of last vector iteration in the scalar loop. This requires all
/// recipes up to early exit point be side-effect free as they are
/// re-executed. Currently we check that the loop is free of any recipe that
/// may write to memory. Expected to operate on an early VPlan w/o nested
/// regions.
for (VPBlockBase *VPB : vp_depth_first_shallow(
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
auto *VPBB = cast<VPBasicBlock>(VPB);
for (auto &R : *VPBB) {
if (R.mayWriteToMemory() &&
!match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
return false;
}
}

VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
VPBuilder Builder(LatchVPBB->getTerminator());
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
auto *IsLatchExitTaken =
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
LatchExitingBranch->getOperand(1));

VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have an isNaN opcode if its useful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, would help merge checks when interleaving, but would be good to do as follow-up?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine. Would also help clarify the purpose of the recipe a bit.

VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
auto *AnyExitTaken =
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
LatchExitingBranch->eraseFromParent();

// If we exit early due to NaNs, compute the final reduction result based on
// the reduction phi at the beginning of the last vector iteration.
auto *RdxResult = find_singleton<VPSingleDefRecipe>(
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
auto *VPI = dyn_cast<VPInstruction>(U);
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
return VPI;
return nullptr;
});

auto *MiddleVPBB = Plan.getMiddleBlock();
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
auto *NewSel =
Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
RdxResult->setOperand(1, NewSel);

auto *ScalarPH = Plan.getScalarPreheader();
// Update resume phis for inductions in the scalar preheader. If AnyNaN is
// true, the resume from the start of the last vector iteration via the
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// true, the resume from the start of the last vector iteration via the
// true, resume from the start of the last vector iteration via the

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks

// canonical IV, otherwise from the original value.
for (auto &R : ScalarPH->phis()) {
auto *ResumeR = cast<VPPhi>(&R);
VPValue *VecV = ResumeR->getOperand(0);
if (VecV == RdxResult)
continue;
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
&Plan.getVectorTripCount());
DerivedIV->moveAfter(&*Builder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
}
}
// Bail out and abandon the current, partially modified, VPlan if we
// encounter resume phi that cannot be updated yet.
if (VecV != &Plan.getVectorTripCount()) {
LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
"FMaxNum/FMinNum reduction.\n");
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning false now, after several recipes have been modified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in that case we abandon the transform (and the invalid VPlan wil be discarded). Alternative would be to first check separately before making changes and then updating phis separately.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting, at-least.
We do look out for values that feed the next iteration by considering supported header phis, so could also check if all values that leave the loop are supported? May be good to issue debug messages and/or remarks when encountering such vectorization obstacles, possibly as follow-up.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added a debug message + comment!

}
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}

auto *MiddleTerm = MiddleVPBB->getTerminator();
Builder.setInsertPoint(MiddleTerm);
VPValue *MiddleCond = MiddleTerm->getOperand(0);
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
MiddleTerm->setOperand(0, NewCond);
return true;
}
Loading