Skip to content

[VPlan] Run narrowInterleaveGroups during general VPlan optimizations. #149706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7253,9 +7253,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);

VPlanTransforms::convertToConcreteRecipes(BestVPlan,
Expand Down Expand Up @@ -8364,6 +8361,14 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
!VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
*Plan, CM.getMaxSafeElements()))
break;

if (auto P = VPlanTransforms::narrowInterleaveGroups(
*Plan,
TTI.getRegisterBitWidth(
TargetTransformInfo::RGK_FixedWidthVector),
SubRange))
VPlans.push_back(std::move(P));

assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
} else {
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
}

this->UF.setUnderlyingValue(ConstantInt::get(TCTy, UF));
}

VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
Expand Down Expand Up @@ -1252,6 +1254,7 @@ VPlan *VPlan::duplicate() {
}
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
Old2NewVPValues[&VF] = &NewPlan->VF;
Old2NewVPValues[&UF] = &NewPlan->UF;
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
if (BackedgeTakenCount) {
NewPlan->BackedgeTakenCount = new VPValue();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3895,6 +3895,9 @@ class VPlan {
/// Represents the vectorization factor of the loop.
VPValue VF;

/// Represents the symbolic unroll factor of the loop.
VPValue UF;

/// Represents the loop-invariant VF * UF of the vector loop region.
VPValue VFxUF;

Expand Down Expand Up @@ -4050,6 +4053,9 @@ class VPlan {
/// Returns the VF of the vector loop region.
VPValue &getVF() { return VF; };

/// Returns the symbolic UF of the vector loop region.
VPValue &getSymbolicUF() { return UF; };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const


/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }

Expand Down
76 changes: 57 additions & 19 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3146,19 +3146,20 @@ static bool isAlreadyNarrow(VPValue *VPV) {
return RepR && RepR->isSingleScalar();
}

void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
std::unique_ptr<VPlan>
VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, unsigned VectorRegWidth,
VFRange &Range) {
using namespace llvm::VPlanPatternMatch;
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
if (VF.isScalable() || !VectorLoop)
return;
if (Plan.hasScalableVF() || !VectorLoop)
return nullptr;

VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
Type *CanonicalIVType = CanonicalIV->getScalarType();
VPTypeAnalysis TypeInfo(CanonicalIVType);

unsigned FixedVF = VF.getFixedValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
std::optional<unsigned> VFToOptimize;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
Expand All @@ -3173,30 +3174,47 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// * recipes writing to memory except interleave groups
// Only support plans with a canonical induction phi.
if (R.isPhi())
return;
return nullptr;

auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
if (R.mayWriteToMemory() && !InterleaveR)
return;
return nullptr;

// Do not narrow interleave groups if there are VectorPointer recipes and
// the plan was unrolled. The recipe implicitly uses VF from
// VPTransformState.
// TODO: Remove restriction once the VF for the VectorPointer offset is
// modeled explicitly as operand.
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
return;
return nullptr;

// All other ops are allowed, but we reject uses that cannot be converted
// when checking all allowed consumers (store interleave groups) below.
if (!InterleaveR)
continue;

// Bail out on non-consecutive interleave groups.
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
VectorRegWidth))
return;

// Try to find a single VF, where all interleave groups are consecutive and
// saturate the full vector width. If we already have a candidate VF, check
// if it is applicable for the current InterleaveR, otherwise look for a
// suitable VF across the Plans VFs.
//
if (VFToOptimize) {
if (!isConsecutiveInterleaveGroup(InterleaveR, *VFToOptimize, TypeInfo,
VectorRegWidth))
return nullptr;
} else {
for (ElementCount VF : Plan.vectorFactors()) {
if (!VF.isFixed())
continue;
if (isConsecutiveInterleaveGroup(InterleaveR, VF.getFixedValue(),
TypeInfo, VectorRegWidth)) {
VFToOptimize = VF.getFixedValue();
break;
}
}
if (!VFToOptimize)
return nullptr;
}
// Skip read interleave groups.
if (InterleaveR->getStoredValues().empty())
continue;
Expand Down Expand Up @@ -3232,24 +3250,44 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
if (!WideMember0)
return;
return nullptr;
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
R->getNumOperands() > 2)
return;
return nullptr;
if (any_of(enumerate(R->operands()),
[WideMember0, Idx = I](const auto &P) {
const auto &[OpIdx, OpV] = P;
return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
}))
return;
return nullptr;
}
StoreGroups.push_back(InterleaveR);
}

if (StoreGroups.empty())
return;
return nullptr;

// All interleave groups in Plan can be narrowed for VFToOptimize. Split the
// original Plan into 2: a) a new clone which contains all VFs of Plan, except
// VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
std::unique_ptr<VPlan> NewPlan;
if (size(Plan.vectorFactors()) != 1) {
NewPlan = std::unique_ptr<VPlan>(Plan.duplicate());
Plan.setVF(ElementCount::getFixed(*VFToOptimize));
bool First = true;
for (ElementCount VF : NewPlan->vectorFactors()) {
if (VF.isFixed() && VF.getFixedValue() == *VFToOptimize)
continue;
if (First) {
NewPlan->setVF(VF);
First = false;
continue;
}
NewPlan->addVF(VF);
}
}

// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
auto NarrowOp = [](VPValue *V) -> VPValue * {
Expand Down Expand Up @@ -3314,11 +3352,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// original iteration.
auto *CanIV = Plan.getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
CanIV->getScalarType(), 1 * Plan.getUF())));
Inc->setOperand(1, &Plan.getSymbolicUF());
Plan.getVF().replaceAllUsesWith(
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
removeDeadRecipes(Plan);
return NewPlan;
}

/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
Expand Down
21 changes: 13 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,19 @@ struct VPlanTransforms {
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);

/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
/// width (checked via \o VectorRegWidth). This effectively is a very simple
/// form of loop-aware SLP, where we use interleave groups to identify
/// candidates.
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth);
/// Try to find a single VF among \p Plan's VFs for which all interleave
/// groups (with VF elements) can be replaced by wide loads ans tores
/// processing VF elements, if all transformed interleave groups access the
/// full vector width (checked via \o VectorRegWidth). If the transformation
/// can be applied, the original \p Plan will be split in 2, if is has
/// multiple VFs: a) a new clone which contains all VFs of Plan, except
/// VFToOptimize, and b) the original Plan with VFToOptimize as single VF. In
/// that case, the new clone is returned.
///
/// This effectively is a very simple form of loop-aware SLP, where we use
/// interleave groups to identify candidates.
static std::unique_ptr<VPlan>
narrowInterleaveGroups(VPlan &Plan, unsigned VectorRegWidth, VFRange &Range);

/// Predicate and linearize the control-flow in the only loop region of
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
Expand Down
Loading
Loading