@@ -2887,25 +2887,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28872887 return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28882888 EVL, Mask);
28892889
2890- if (match (&CurRecipe,
2890+ VPValue *ReversedVal;
2891+ if (match (&CurRecipe, m_Reverse (m_VPValue (ReversedVal))) &&
2892+ match (ReversedVal,
28912893 m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
28922894 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2893- cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2894- return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2895- AdjustEndPtr (EndPtr), EVL, Mask);
2895+ cast<VPWidenLoadRecipe>(ReversedVal)->isReverse ()) {
2896+ auto *LoadR = new VPWidenLoadEVLRecipe (
2897+ *cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr (EndPtr), EVL, Mask);
2898+ LoadR->insertBefore (&CurRecipe);
2899+ return new VPWidenIntrinsicRecipe (
2900+ Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue (), &EVL},
2901+ TypeInfo.inferScalarType (LoadR), {}, {}, DL);
2902+ }
28962903
28972904 if (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
28982905 m_RemoveMask (HeaderMask, Mask))) &&
28992906 !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
29002907 return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
29012908 EVL, Mask);
29022909
2903- if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2910+ VPValue *StoredVal;
2911+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (StoredVal),
29042912 m_RemoveMask (HeaderMask, Mask))) &&
29052913 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2906- cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2907- return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2908- AdjustEndPtr (EndPtr), EVL, Mask);
2914+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse ()) {
2915+ if (match (StoredVal, m_Reverse (m_VPValue (ReversedVal)))) {
2916+ auto *NewReverse = new VPWidenIntrinsicRecipe (
2917+ Intrinsic::experimental_vp_reverse,
2918+ {ReversedVal, Plan->getTrue (), &EVL},
2919+ TypeInfo.inferScalarType (ReversedVal), {}, {},
2920+ cast<VPInstruction>(StoredVal)->getDebugLoc ());
2921+ NewReverse->insertBefore (&CurRecipe);
2922+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2923+ AdjustEndPtr (EndPtr), NewReverse, EVL,
2924+ Mask);
2925+ }
2926+ }
29092927
29102928 if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
29112929 if (Rdx->isConditional () &&
@@ -2978,7 +2996,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29782996 // contained.
29792997 bool ContainsFORs =
29802998 any_of (Header->phis (), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2981- VPValue *PrevEVL = nullptr ;
29822999 if (ContainsFORs) {
29833000 // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29843001 VPValue *MaxEVL = &Plan.getVF ();
@@ -2989,42 +3006,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29893006 TypeInfo.inferScalarType (MaxEVL), DebugLoc::getUnknown ());
29903007
29913008 Builder.setInsertPoint (Header, Header->getFirstNonPhi ());
2992- PrevEVL = Builder.createScalarPhi ({MaxEVL, &EVL}, DebugLoc::getUnknown (),
2993- " prev.evl" );
2994- }
2995-
2996- // Transform the recipes must be converted to vector predication intrinsics
2997- // even if they do not use header mask.
2998- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2999- vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
3000- for (VPRecipeBase &R : *VPBB) {
3001- VPWidenIntrinsicRecipe *NewRecipe = nullptr ;
3002- VPValue *V1, *V2;
3003- if (match (&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
3004- m_VPValue (V1), m_VPValue (V2)))) {
3009+ VPValue *PrevEVL = Builder.createScalarPhi (
3010+ {MaxEVL, &EVL}, DebugLoc::getUnknown (), " prev.evl" );
3011+
3012+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3013+ vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
3014+ for (VPRecipeBase &R : *VPBB) {
3015+ VPValue *V1, *V2;
3016+ if (!match (&R,
3017+ m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
3018+ m_VPValue (V1), m_VPValue (V2))))
3019+ continue ;
30053020 VPValue *Imm = Plan.getOrAddLiveIn (
30063021 ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
3007- NewRecipe = new VPWidenIntrinsicRecipe (
3022+ VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe (
30083023 Intrinsic::experimental_vp_splice,
30093024 {V1, V2, Imm, Plan.getTrue (), PrevEVL, &EVL},
30103025 TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
30113026 R.getDebugLoc ());
3027+ VPSplice->insertBefore (&R);
3028+ R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
3029+ ToErase.push_back (&R);
30123030 }
3013-
3014- // TODO: Only convert reverse to vp.reverse if it uses the result of
3015- // vp.load, or defines the stored value of vp.store.
3016- if (match (&R, m_Reverse (m_VPValue (V1)))) {
3017- NewRecipe = new VPWidenIntrinsicRecipe (
3018- Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue (), &EVL},
3019- TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
3020- R.getDebugLoc ());
3021- }
3022-
3023- if (!NewRecipe)
3024- continue ;
3025- NewRecipe->insertBefore (&R);
3026- R.getVPSingleValue ()->replaceAllUsesWith (NewRecipe);
3027- ToErase.push_back (&R);
30283031 }
30293032 }
30303033
0 commit comments