Skip to content

Commit 628a79e

Browse files
committed
The third conversion method for vp.reverse
It may temporarily lose some performance when EVL tail folding.
1 parent 3e387bb commit 628a79e

File tree

3 files changed

+51
-39
lines changed

3 files changed

+51
-39
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3484,6 +3484,15 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
34843484
setMask(Mask);
34853485
}
34863486

3487+
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3488+
VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3489+
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3490+
{Addr, StoredVal, &EVL}, S.isConsecutive(),
3491+
S.isReverse(), S, S.getDebugLoc()) {
3492+
assert(isReverse() && "Only reverse access need to set new stored value");
3493+
setMask(Mask);
3494+
}
3495+
34873496
VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
34883497

34893498
/// Return the address accessed by this recipe.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,25 +2856,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28562856
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28572857
EVL, Mask);
28582858

2859-
if (match(&CurRecipe,
2859+
VPValue *ReversedVal;
2860+
if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) &&
2861+
match(ReversedVal,
28602862
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
28612863
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2862-
cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
2863-
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
2864-
AdjustEndPtr(EndPtr), EVL, Mask);
2864+
cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) {
2865+
auto *LoadR = new VPWidenLoadEVLRecipe(
2866+
*cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask);
2867+
LoadR->insertBefore(&CurRecipe);
2868+
return new VPWidenIntrinsicRecipe(
2869+
Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue(), &EVL},
2870+
TypeInfo.inferScalarType(LoadR), {}, {}, DL);
2871+
}
28652872

28662873
if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
28672874
m_RemoveMask(HeaderMask, Mask))) &&
28682875
!cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
28692876
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
28702877
EVL, Mask);
28712878

2872-
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
2879+
VPValue *StoredVal;
2880+
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(StoredVal),
28732881
m_RemoveMask(HeaderMask, Mask))) &&
28742882
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2875-
cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
2876-
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2877-
AdjustEndPtr(EndPtr), EVL, Mask);
2883+
cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) {
2884+
if (match(StoredVal, m_Reverse(m_VPValue(ReversedVal)))) {
2885+
auto *NewReverse = new VPWidenIntrinsicRecipe(
2886+
Intrinsic::experimental_vp_reverse,
2887+
{ReversedVal, Plan->getTrue(), &EVL},
2888+
TypeInfo.inferScalarType(ReversedVal), {}, {},
2889+
cast<VPInstruction>(StoredVal)->getDebugLoc());
2890+
NewReverse->insertBefore(&CurRecipe);
2891+
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2892+
AdjustEndPtr(EndPtr), NewReverse, EVL,
2893+
Mask);
2894+
}
2895+
}
28782896

28792897
if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
28802898
if (Rdx->isConditional() &&
@@ -2947,7 +2965,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29472965
// contained.
29482966
bool ContainsFORs =
29492967
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2950-
VPValue *PrevEVL = nullptr;
29512968
if (ContainsFORs) {
29522969
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29532970
VPValue *MaxEVL = &Plan.getVF();
@@ -2958,42 +2975,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29582975
TypeInfo.inferScalarType(MaxEVL), DebugLoc::getUnknown());
29592976

29602977
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2961-
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc::getUnknown(),
2962-
"prev.evl");
2963-
}
2964-
2965-
// Transform the recipes must be converted to vector predication intrinsics
2966-
// even if they do not use header mask.
2967-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2968-
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2969-
for (VPRecipeBase &R : *VPBB) {
2970-
VPWidenIntrinsicRecipe *NewRecipe = nullptr;
2971-
VPValue *V1, *V2;
2972-
if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2973-
m_VPValue(V1), m_VPValue(V2)))) {
2978+
VPValue *PrevEVL = Builder.createScalarPhi(
2979+
{MaxEVL, &EVL}, DebugLoc::getUnknown(), "prev.evl");
2980+
2981+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2982+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2983+
for (VPRecipeBase &R : *VPBB) {
2984+
VPValue *V1, *V2;
2985+
if (!match(&R,
2986+
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2987+
m_VPValue(V1), m_VPValue(V2))))
2988+
continue;
29742989
VPValue *Imm = Plan.getOrAddLiveIn(
29752990
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
2976-
NewRecipe = new VPWidenIntrinsicRecipe(
2991+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
29772992
Intrinsic::experimental_vp_splice,
29782993
{V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
29792994
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
29802995
R.getDebugLoc());
2996+
VPSplice->insertBefore(&R);
2997+
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2998+
ToErase.push_back(&R);
29812999
}
2982-
2983-
// TODO: Only convert reverse to vp.reverse if it uses the result of
2984-
// vp.load, or defines the stored value of vp.store.
2985-
if (match(&R, m_Reverse(m_VPValue(V1)))) {
2986-
NewRecipe = new VPWidenIntrinsicRecipe(
2987-
Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue(), &EVL},
2988-
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
2989-
R.getDebugLoc());
2990-
}
2991-
2992-
if (!NewRecipe)
2993-
continue;
2994-
NewRecipe->insertBefore(&R);
2995-
R.getVPSingleValue()->replaceAllUsesWith(NewRecipe);
2996-
ToErase.push_back(&R);
29973000
}
29983001
}
29993002

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
1515
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]]
1616
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1717
; CHECK: [[VECTOR_PH]]:
18-
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer)
1918
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2019
; CHECK: [[VECTOR_BODY]]:
2120
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -24,6 +23,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
2423
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]]
2524
; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]]
2625
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]]
26+
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
2727
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
2828
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP4]]
2929
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], 1

0 commit comments

Comments
 (0)