Skip to content

Commit 19fb53d

Browse files
committed
The third conversion method for vp.reverse
It may temporarily lose some performance when EVL tail folding.
1 parent 1bda098 commit 19fb53d

File tree

3 files changed

+51
-39
lines changed

3 files changed

+51
-39
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3486,6 +3486,15 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
34863486
setMask(Mask);
34873487
}
34883488

3489+
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3490+
VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3491+
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3492+
{Addr, StoredVal, &EVL}, S.isConsecutive(),
3493+
S.isReverse(), S, S.getDebugLoc()) {
3494+
assert(isReverse() && "Only reverse access need to set new stored value");
3495+
setMask(Mask);
3496+
}
3497+
34893498
VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
34903499

34913500
/// Return the address accessed by this recipe.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2887,25 +2887,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28872887
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28882888
EVL, Mask);
28892889

2890-
if (match(&CurRecipe,
2890+
VPValue *ReversedVal;
2891+
if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) &&
2892+
match(ReversedVal,
28912893
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
28922894
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2893-
cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
2894-
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
2895-
AdjustEndPtr(EndPtr), EVL, Mask);
2895+
cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) {
2896+
auto *LoadR = new VPWidenLoadEVLRecipe(
2897+
*cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask);
2898+
LoadR->insertBefore(&CurRecipe);
2899+
return new VPWidenIntrinsicRecipe(
2900+
Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue(), &EVL},
2901+
TypeInfo.inferScalarType(LoadR), {}, {}, DL);
2902+
}
28962903

28972904
if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
28982905
m_RemoveMask(HeaderMask, Mask))) &&
28992906
!cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
29002907
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
29012908
EVL, Mask);
29022909

2903-
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
2910+
VPValue *StoredVal;
2911+
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(StoredVal),
29042912
m_RemoveMask(HeaderMask, Mask))) &&
29052913
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2906-
cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
2907-
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2908-
AdjustEndPtr(EndPtr), EVL, Mask);
2914+
cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) {
2915+
if (match(StoredVal, m_Reverse(m_VPValue(ReversedVal)))) {
2916+
auto *NewReverse = new VPWidenIntrinsicRecipe(
2917+
Intrinsic::experimental_vp_reverse,
2918+
{ReversedVal, Plan->getTrue(), &EVL},
2919+
TypeInfo.inferScalarType(ReversedVal), {}, {},
2920+
cast<VPInstruction>(StoredVal)->getDebugLoc());
2921+
NewReverse->insertBefore(&CurRecipe);
2922+
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2923+
AdjustEndPtr(EndPtr), NewReverse, EVL,
2924+
Mask);
2925+
}
2926+
}
29092927

29102928
if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
29112929
if (Rdx->isConditional() &&
@@ -2978,7 +2996,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29782996
// contained.
29792997
bool ContainsFORs =
29802998
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2981-
VPValue *PrevEVL = nullptr;
29822999
if (ContainsFORs) {
29833000
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29843001
VPValue *MaxEVL = &Plan.getVF();
@@ -2989,42 +3006,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29893006
TypeInfo.inferScalarType(MaxEVL), DebugLoc::getUnknown());
29903007

29913008
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2992-
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc::getUnknown(),
2993-
"prev.evl");
2994-
}
2995-
2996-
// Transform the recipes must be converted to vector predication intrinsics
2997-
// even if they do not use header mask.
2998-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2999-
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
3000-
for (VPRecipeBase &R : *VPBB) {
3001-
VPWidenIntrinsicRecipe *NewRecipe = nullptr;
3002-
VPValue *V1, *V2;
3003-
if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
3004-
m_VPValue(V1), m_VPValue(V2)))) {
3009+
VPValue *PrevEVL = Builder.createScalarPhi(
3010+
{MaxEVL, &EVL}, DebugLoc::getUnknown(), "prev.evl");
3011+
3012+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3013+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
3014+
for (VPRecipeBase &R : *VPBB) {
3015+
VPValue *V1, *V2;
3016+
if (!match(&R,
3017+
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
3018+
m_VPValue(V1), m_VPValue(V2))))
3019+
continue;
30053020
VPValue *Imm = Plan.getOrAddLiveIn(
30063021
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
3007-
NewRecipe = new VPWidenIntrinsicRecipe(
3022+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
30083023
Intrinsic::experimental_vp_splice,
30093024
{V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
30103025
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
30113026
R.getDebugLoc());
3027+
VPSplice->insertBefore(&R);
3028+
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3029+
ToErase.push_back(&R);
30123030
}
3013-
3014-
// TODO: Only convert reverse to vp.reverse if it uses the result of
3015-
// vp.load, or defines the stored value of vp.store.
3016-
if (match(&R, m_Reverse(m_VPValue(V1)))) {
3017-
NewRecipe = new VPWidenIntrinsicRecipe(
3018-
Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue(), &EVL},
3019-
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
3020-
R.getDebugLoc());
3021-
}
3022-
3023-
if (!NewRecipe)
3024-
continue;
3025-
NewRecipe->insertBefore(&R);
3026-
R.getVPSingleValue()->replaceAllUsesWith(NewRecipe);
3027-
ToErase.push_back(&R);
30283031
}
30293032
}
30303033

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
1515
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]]
1616
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1717
; CHECK: [[VECTOR_PH]]:
18-
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer)
1918
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2019
; CHECK: [[VECTOR_BODY]]:
2120
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -24,6 +23,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
2423
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]]
2524
; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]]
2625
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]]
26+
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
2727
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
2828
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP4]]
2929
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], 1

0 commit comments

Comments
 (0)