@@ -1143,6 +1143,7 @@ class LoopVectorizationCostModel {
11431143 CM_Widen_Reverse, // For consecutive accesses with stride -1.
11441144 CM_Interleave,
11451145 CM_GatherScatter,
1146+ CM_Strided,
11461147 CM_Scalarize,
11471148 CM_VectorCall,
11481149 CM_IntrinsicCall
@@ -6160,6 +6161,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
61606161 " Expected consecutive stride." );
61616162 InstWidening Decision =
61626163 ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
6164+ // Consider using strided load/store for consecutive reverse accesses to
6165+ // achieve more efficient memory operations.
6166+ if (ConsecutiveStride == -1 ) {
6167+ const InstructionCost StridedLoadStoreCost =
6168+ isLegalStridedLoadStore (&I, VF) ? getStridedLoadStoreCost (&I, VF)
6169+ : InstructionCost::getInvalid ();
6170+ if (StridedLoadStoreCost < Cost) {
6171+ Decision = CM_Strided;
6172+ Cost = StridedLoadStoreCost;
6173+ }
6174+ }
61636175 setWideningDecision (&I, VF, Decision, Cost);
61646176 continue ;
61656177 }
@@ -6805,6 +6817,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68056817 return TTI::CastContextHint::Normal;
68066818
68076819 switch (getWideningDecision (I, VF)) {
6820+ // TODO: New CastContextHint for strided accesses.
6821+ case LoopVectorizationCostModel::CM_Strided:
68086822 case LoopVectorizationCostModel::CM_GatherScatter:
68096823 return TTI::CastContextHint::GatherScatter;
68106824 case LoopVectorizationCostModel::CM_Interleave:
@@ -8363,6 +8377,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83638377 bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
83648378 bool Consecutive =
83658379 Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
8380+ bool Strided = Decision == LoopVectorizationCostModel::CM_Strided;
83668381
83678382 VPValue *Ptr = isa<LoadInst>(I) ? Operands[0 ] : Operands[1 ];
83688383 if (Consecutive) {
@@ -8389,12 +8404,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83898404 Ptr = VectorPtr;
83908405 }
83918406 if (LoadInst *Load = dyn_cast<LoadInst>(I))
8392- return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse, false ,
8393- I->getDebugLoc ());
8407+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse,
8408+ Strided, I->getDebugLoc ());
83948409
83958410 StoreInst *Store = cast<StoreInst>(I);
83968411 return new VPWidenStoreRecipe (*Store, Ptr, Operands[0 ], Mask, Consecutive,
8397- Reverse, false , I->getDebugLoc ());
8412+ Reverse, Strided , I->getDebugLoc ());
83988413}
83998414
84008415// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
0 commit comments