@@ -2856,25 +2856,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28562856 return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28572857 EVL, Mask);
28582858
2859- if (match (&CurRecipe,
2859+ VPValue *ReversedVal;
2860+ if (match (&CurRecipe, m_Reverse (m_VPValue (ReversedVal))) &&
2861+ match (ReversedVal,
28602862 m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
28612863 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2862- cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2863- return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2864- AdjustEndPtr (EndPtr), EVL, Mask);
2864+ cast<VPWidenLoadRecipe>(ReversedVal)->isReverse ()) {
2865+ auto *LoadR = new VPWidenLoadEVLRecipe (
2866+ *cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr (EndPtr), EVL, Mask);
2867+ LoadR->insertBefore (&CurRecipe);
2868+ return new VPWidenIntrinsicRecipe (
2869+ Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue (), &EVL},
2870+ TypeInfo.inferScalarType (LoadR), {}, {}, DL);
2871+ }
28652872
28662873 if (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
28672874 m_RemoveMask (HeaderMask, Mask))) &&
28682875 !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
28692876 return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
28702877 EVL, Mask);
28712878
2872- if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2879+ VPValue *StoredVal;
2880+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (StoredVal),
28732881 m_RemoveMask (HeaderMask, Mask))) &&
28742882 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2875- cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2876- return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2877- AdjustEndPtr (EndPtr), EVL, Mask);
2883+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse ()) {
2884+ if (match (StoredVal, m_Reverse (m_VPValue (ReversedVal)))) {
2885+ auto *NewReverse = new VPWidenIntrinsicRecipe (
2886+ Intrinsic::experimental_vp_reverse,
2887+ {ReversedVal, Plan->getTrue (), &EVL},
2888+ TypeInfo.inferScalarType (ReversedVal), {}, {},
2889+ cast<VPInstruction>(StoredVal)->getDebugLoc ());
2890+ NewReverse->insertBefore (&CurRecipe);
2891+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2892+ AdjustEndPtr (EndPtr), NewReverse, EVL,
2893+ Mask);
2894+ }
2895+ }
28782896
28792897 if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
28802898 if (Rdx->isConditional () &&
@@ -2947,7 +2965,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29472965 // contained.
29482966 bool ContainsFORs =
29492967 any_of (Header->phis (), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2950- VPValue *PrevEVL = nullptr ;
29512968 if (ContainsFORs) {
29522969 // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29532970 VPValue *MaxEVL = &Plan.getVF ();
@@ -2958,42 +2975,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29582975 TypeInfo.inferScalarType (MaxEVL), DebugLoc::getUnknown ());
29592976
29602977 Builder.setInsertPoint (Header, Header->getFirstNonPhi ());
2961- PrevEVL = Builder.createScalarPhi ({MaxEVL, &EVL}, DebugLoc::getUnknown (),
2962- " prev.evl" );
2963- }
2964-
2965- // Transform the recipes must be converted to vector predication intrinsics
2966- // even if they do not use header mask.
2967- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2968- vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2969- for (VPRecipeBase &R : *VPBB) {
2970- VPWidenIntrinsicRecipe *NewRecipe = nullptr ;
2971- VPValue *V1, *V2;
2972- if (match (&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2973- m_VPValue (V1), m_VPValue (V2)))) {
2978+ VPValue *PrevEVL = Builder.createScalarPhi (
2979+ {MaxEVL, &EVL}, DebugLoc::getUnknown (), " prev.evl" );
2980+
2981+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2982+ vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2983+ for (VPRecipeBase &R : *VPBB) {
2984+ VPValue *V1, *V2;
2985+ if (!match (&R,
2986+ m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2987+ m_VPValue (V1), m_VPValue (V2))))
2988+ continue ;
29742989 VPValue *Imm = Plan.getOrAddLiveIn (
29752990 ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
2976- NewRecipe = new VPWidenIntrinsicRecipe (
2991+ VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe (
29772992 Intrinsic::experimental_vp_splice,
29782993 {V1, V2, Imm, Plan.getTrue (), PrevEVL, &EVL},
29792994 TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
29802995 R.getDebugLoc ());
2996+ VPSplice->insertBefore (&R);
2997+ R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
2998+ ToErase.push_back (&R);
29812999 }
2982-
2983- // TODO: Only convert reverse to vp.reverse if it uses the result of
2984- // vp.load, or defines the stored value of vp.store.
2985- if (match (&R, m_Reverse (m_VPValue (V1)))) {
2986- NewRecipe = new VPWidenIntrinsicRecipe (
2987- Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue (), &EVL},
2988- TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
2989- R.getDebugLoc ());
2990- }
2991-
2992- if (!NewRecipe)
2993- continue ;
2994- NewRecipe->insertBefore (&R);
2995- R.getVPSingleValue ()->replaceAllUsesWith (NewRecipe);
2996- ToErase.push_back (&R);
29973000 }
29983001 }
29993002
0 commit comments