Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7660,9 +7660,9 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
CM.foldTailByMasking() || !GEP
? GEPNoWrapFlags::none()
: GEP->getNoWrapFlags().withoutNoUnsignedWrap();
VectorPtr = new VPVectorEndPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
/*Stride*/ -1, Flags, VPI->getDebugLoc());
VectorPtr = createVectorEndPointerRecipe(Ptr, getLoadStoreType(I),
/*Stride*/ -1, Flags,
VPI->getDebugLoc());
} else {
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->getNoWrapFlags()
Expand Down Expand Up @@ -8351,6 +8351,25 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
RdxUnordered{/*VFScaleFactor=*/ScaleFactor}, ReductionI->getDebugLoc());
}

VPVectorEndPointerRecipe *VPRecipeBuilder::createVectorEndPointerRecipe(
VPValue *Ptr, Type *SourceElementType, int64_t Stride,
GEPNoWrapFlags GEPFlags, DebugLoc DbgLoc) {
// Offset for Part 0 = Stride * (VF - 1).
VPTypeAnalysis TypeInfo(Plan);
const DataLayout &DL =
Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout();
Type *IndexTy = DL.getIndexType(TypeInfo.inferScalarType(Ptr));
Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF());
VPValue *VF = Builder.createScalarZExtOrTrunc(&Plan.getVF(), IndexTy, VFTy,
DebugLoc::getUnknown());
VPValue *VFMinusOne = Builder.createOverflowingOp(
Instruction::Sub, {VF, Plan.getConstantInt(IndexTy, 1)}, {true, true});
VPValue *StridexVFMinusOne = Builder.createOverflowingOp(
Instruction::Mul, {VFMinusOne, Plan.getConstantInt(IndexTy, Stride)});
return new VPVectorEndPointerRecipe(Ptr, StridexVFMinusOne, SourceElementType,
Stride, GEPFlags, DbgLoc);
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
if (ElementCount::isKnownGT(MinVF, MaxVF))
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ class VPRecipeBuilder {
: std::make_optional(It->second);
}

void setInsertPoint(VPRecipeBase *IP) { Builder.setInsertPoint(IP); }
VPBuilder &getBuilder() const { return Builder; }

/// Find all possible partial reductions in the loop and track all of those
/// that are valid so recipes can be formed later.
void collectScaledReductions(VFRange &Range);
Expand All @@ -162,6 +165,12 @@ class VPRecipeBuilder {
VPRecipeBase *tryToCreatePartialReduction(VPInstruction *Reduction,
unsigned ScaleFactor);

/// Create and return a VectorEndPointer recipe.
VPVectorEndPointerRecipe *
createVectorEndPointerRecipe(VPValue *Ptr, Type *SourceElementType,
int64_t Stride, GEPNoWrapFlags GEPFlags,
DebugLoc DbgLoc);

/// Set the recipe created for given ingredient.
void setRecipe(Instruction *I, VPRecipeBase *R) {
assert(!Ingredient2Recipe.contains(I) &&
Expand Down
26 changes: 13 additions & 13 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1907,27 +1907,27 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
/// A recipe to compute a pointer to the last element of each part of a widened
/// memory access for widened memory accesses of IndexedTy. Used for
/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<2> {
Type *IndexedTy;
class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags {
Type *SourceElementTy;

/// The constant stride of the pointer computed by this recipe, expressed in
/// units of IndexedTy.
/// units of SourceElementTy.
int64_t Stride;

public:
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *Offset, Type *SourceElementTy,
int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
IndexedTy(IndexedTy), Stride(Stride) {
: VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, {Ptr, Offset},
GEPFlags, DL),
SourceElementTy(SourceElementTy), Stride(Stride) {
assert(Stride < 0 && "Stride must be negative");
}

VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)

VPValue *getVFValue() { return getOperand(1); }
const VPValue *getVFValue() const { return getOperand(1); }
int64_t getStride() const { return Stride; }
Type *getSourceElementType() const { return SourceElementTy; }
VPValue *getOffset() const { return getOperand(1); }

void execute(VPTransformState &State) override;

Expand All @@ -1953,9 +1953,9 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
}

VPVectorEndPointerRecipe *clone() override {
return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
Stride, getGEPNoWrapFlags(),
getDebugLoc());
return new VPVectorEndPointerRecipe(getOperand(0), getOffset(),
getSourceElementType(), getStride(),
getGEPNoWrapFlags(), getDebugLoc());
}

protected:
Expand Down
26 changes: 4 additions & 22 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2488,28 +2488,10 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

void VPVectorEndPointerRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
unsigned CurrentPart = getUnrollPart(*this);
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this));

// The wide store needs to start at the last vector element.
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
if (IndexTy != RunTimeVF->getType())
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
// NumElt = Stride * CurrentPart * RunTimeVF
Value *NumElt = Builder.CreateMul(
ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF);
// LastLane = Stride * (RunTimeVF - 1)
Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));
if (Stride != 1)
LastLane =
Builder.CreateMul(ConstantInt::getSigned(IndexTy, Stride), LastLane);
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *ResultPtr =
Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
getGEPNoWrapFlags());

Value *Ptr = State.get(getOperand(0), true);
Value *Offset = State.get(getOffset(), true);
Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",
getGEPNoWrapFlags());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this now identical to VPVectorPointerRecipe? Not for this PR but maybe can we unify them at some point.

State.set(this, ResultPtr, /*IsScalar*/ true);
}

Expand Down
61 changes: 45 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,15 @@ static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
return Users.takeVector();
}

static SmallVector<VPValue *> collectOperandsRecursively(VPRecipeBase *R) {
SetVector<VPValue *> Operands(llvm::from_range, R->operands());
for (unsigned I = 0; I != Operands.size(); ++I) {
if (VPRecipeBase *Cur = Operands[I]->getDefiningRecipe())
Operands.insert_range(Cur->operands());
}
return Operands.takeVector();
}

/// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only
/// generates scalar values.
Expand Down Expand Up @@ -2843,11 +2852,30 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
VPValue *Addr, *Mask, *EndPtr;

/// Adjust any end pointers so that they point to the end of EVL lanes not VF.
auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone();
EVLEndPtr->insertBefore(&CurRecipe);
EVLEndPtr->setOperand(1, &EVL);
return EVLEndPtr;
auto AdjustEndPtr = [&EVL, &Plan, &TypeInfo](VPValue *EndPtr) {
auto *VEPR = cast<VPVectorEndPointerRecipe>(EndPtr);
SmallVector<VPValue *> Operands({VEPR->getOffset()});
VPRecipeBase *OffsetR = VEPR->getOffset()->getDefiningRecipe();
append_range(Operands, collectOperandsRecursively(OffsetR));
for (VPValue *Op : reverse(Operands)) {
if (!Op->hasDefiningRecipe())
continue;
VPRecipeBase *Clone = Op->getDefiningRecipe()->clone();
Clone->insertBefore(VEPR);
VPRecipeBase *EVLR = EVL.getDefiningRecipe();
VPBuilder Builder(EVLR->getParent(), std::next(EVLR->getIterator()));
Type *EVLType = TypeInfo.inferScalarType(&EVL);
VPValue *EVLCast = Builder.createScalarZExtOrTrunc(
&EVL, TypeInfo.inferScalarType(&Plan->getVF()), EVLType,
EVLR->getDebugLoc());
Clone->replaceUsesOfWith(&Plan->getVF(), EVLCast);
Op->replaceUsesWithIf(
Clone->getVPSingleValue(), [&Operands](VPUser &U, unsigned Idx) {
return !is_contained(Operands,
cast<VPRecipeBase>(U).getVPSingleValue());
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's pretty unfortunate that changing the VF is a lot more complicated now. If it makes it an easier we should never have an ExplicitVectorLength with UF != 1. Maybe you can just assert here instead of trying to handle it fully generally.

}
return VEPR;
};

if (match(&CurRecipe,
Expand All @@ -2858,7 +2886,7 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,

if (match(&CurRecipe,
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_VPValue())) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to still match here for -1 * (VF - 1) otherwise it's not a correct transform. I guess you could add a helper matcher for that?

cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
AdjustEndPtr(EndPtr), EVL, Mask);
Expand All @@ -2871,7 +2899,7 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,

if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
m_RemoveMask(HeaderMask, Mask))) &&
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_VPValue())) &&
cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
AdjustEndPtr(EndPtr), EVL, Mask);
Expand Down Expand Up @@ -2916,10 +2944,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();

assert(all_of(Plan.getVF().users(),
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
VPWidenIntOrFpInductionRecipe>) &&
"User of VF that we can't transform to EVL.");
assert(
all_of(Plan.getVF().users(), IsaPred<VPInstruction, VPScalarIVStepsRecipe,
VPWidenIntOrFpInductionRecipe>) &&
"User of VF that we can't transform to EVL.");
Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
});
Expand Down Expand Up @@ -3412,6 +3440,7 @@ void VPlanTransforms::createInterleaveGroups(
Instruction *IRInsertPos = IG->getInsertPos();
auto *InsertPos =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
RecipeBuilder.setInsertPoint(InsertPos);

GEPNoWrapFlags NW = GEPNoWrapFlags::none();
if (auto *Gep = dyn_cast<GetElementPtrInst>(
Expand All @@ -3421,6 +3450,7 @@ void VPlanTransforms::createInterleaveGroups(
// Get or create the start address for the interleave group.
VPValue *Addr = Start->getAddr();
VPRecipeBase *AddrDef = Addr->getDefiningRecipe();
VPBuilder &B = RecipeBuilder.getBuilder();
if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) {
// We cannot re-use the address of member zero because it does not
// dominate the insert position. Instead, use the address of the insert
Expand All @@ -3436,18 +3466,17 @@ void VPlanTransforms::createInterleaveGroups(
IG->getIndex(IRInsertPos),
/*IsSigned=*/true);
VPValue *OffsetVPV = Plan.getConstantInt(-Offset);
VPBuilder B(InsertPos);
Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
}
// If the group is reverse, adjust the index to refer to the last vector
// lane instead of the first. We adjust the index from the first vector
// lane, rather than directly getting the pointer for lane VF - 1, because
// the pointer operand of the interleaved access is supposed to be uniform.
if (IG->isReverse()) {
auto *ReversePtr = new VPVectorEndPointerRecipe(
Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos),
-(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
ReversePtr->insertBefore(InsertPos);
auto *ReversePtr = RecipeBuilder.createVectorEndPointerRecipe(
Addr, getLoadStoreType(IRInsertPos), -(int64_t)IG->getFactor(), NW,
InsertPos->getDebugLoc());
B.insert(ReversePtr);
Addr = ReversePtr;
}
auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
Expand Down
25 changes: 20 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,25 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
Copy->addOperand(VFxPart);
continue;
}
if (auto *VEPR = dyn_cast<VPVectorEndPointerRecipe>(&R)) {
VPBuilder Builder(VEPR);
VPValue *PrevOffset =
cast<VPVectorEndPointerRecipe>(getValueForPart(VEPR, Part - 1))
->getOffset();
Type *IndexTy = TypeInfo.inferScalarType(PrevOffset);
Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF());
VPValue *VF = Builder.createScalarZExtOrTrunc(
&Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown());
// Offset = PrevOffset + Stride * VF.
VPValue *VFxStride = Builder.createOverflowingOp(
Instruction::Mul,
{VF, Plan.getConstantInt(IndexTy, VEPR->getStride())});
VPValue *Offset = Builder.createOverflowingOp(Instruction::Add,
{PrevOffset, VFxStride});
Copy->setOperand(0, VEPR->getOperand(0));
Copy->setOperand(1, Offset);
continue;
}
if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
if (Phi && Phi->isOrdered()) {
Expand All @@ -329,14 +348,10 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {

// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
VPVectorEndPointerRecipe>(Copy) ||
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));

if (isa<VPVectorEndPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
return VerifyEVLUse(*R, 2);
})
.Case<VPWidenLoadEVLRecipe, VPVectorEndPointerRecipe,
VPInterleaveEVLRecipe>(
.Case<VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPInstructionWithType>(
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
Expand Down
Loading