diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 082489f70f1c6..05e8aa81ecf46 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7660,9 +7660,9 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI, CM.foldTailByMasking() || !GEP ? GEPNoWrapFlags::none() : GEP->getNoWrapFlags().withoutNoUnsignedWrap(); - VectorPtr = new VPVectorEndPointerRecipe( - Ptr, &Plan.getVF(), getLoadStoreType(I), - /*Stride*/ -1, Flags, VPI->getDebugLoc()); + VectorPtr = createVectorEndPointerRecipe(Ptr, getLoadStoreType(I), + /*Stride*/ -1, Flags, + VPI->getDebugLoc()); } else { VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), GEP ? GEP->getNoWrapFlags() @@ -8351,6 +8351,25 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, RdxUnordered{/*VFScaleFactor=*/ScaleFactor}, ReductionI->getDebugLoc()); } +VPVectorEndPointerRecipe *VPRecipeBuilder::createVectorEndPointerRecipe( + VPValue *Ptr, Type *SourceElementType, int64_t Stride, + GEPNoWrapFlags GEPFlags, DebugLoc DbgLoc) { + // Offset for Part 0 = Stride * (VF - 1). + VPTypeAnalysis TypeInfo(Plan); + const DataLayout &DL = + Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(TypeInfo.inferScalarType(Ptr)); + Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF()); + VPValue *VF = Builder.createScalarZExtOrTrunc(&Plan.getVF(), IndexTy, VFTy, + DebugLoc::getUnknown()); + VPValue *VFMinusOne = Builder.createOverflowingOp( + Instruction::Sub, {VF, Plan.getConstantInt(IndexTy, 1)}, {true, true}); + VPValue *StridexVFMinusOne = Builder.createOverflowingOp( + Instruction::Mul, {VFMinusOne, Plan.getConstantInt(IndexTy, Stride)}); + return new VPVectorEndPointerRecipe(Ptr, StridexVFMinusOne, SourceElementType, + Stride, GEPFlags, DbgLoc); +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { if (ElementCount::isKnownGT(MinVF, MaxVF)) diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 1808be118cd2a..54d21ec00a932 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -149,6 +149,9 @@ class VPRecipeBuilder { : std::make_optional(It->second); } + void setInsertPoint(VPRecipeBase *IP) { Builder.setInsertPoint(IP); } + VPBuilder &getBuilder() const { return Builder; } + /// Find all possible partial reductions in the loop and track all of those /// that are valid so recipes can be formed later. void collectScaledReductions(VFRange &Range); @@ -162,6 +165,12 @@ class VPRecipeBuilder { VPRecipeBase *tryToCreatePartialReduction(VPInstruction *Reduction, unsigned ScaleFactor); + /// Create and return a VectorEndPointer recipe. + VPVectorEndPointerRecipe * + createVectorEndPointerRecipe(VPValue *Ptr, Type *SourceElementType, + int64_t Stride, GEPNoWrapFlags GEPFlags, + DebugLoc DbgLoc); + /// Set the recipe created for given ingredient. void setRecipe(Instruction *I, VPRecipeBase *R) { assert(!Ingredient2Recipe.contains(I) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7023a5d4af176..c4010afbe3ef1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1907,27 +1907,27 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { /// A recipe to compute a pointer to the last element of each part of a widened /// memory access for widened memory accesses of IndexedTy. Used for /// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. -class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, - public VPUnrollPartAccessor<2> { - Type *IndexedTy; +class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags { + Type *SourceElementTy; /// The constant stride of the pointer computed by this recipe, expressed in - /// units of IndexedTy. + /// units of SourceElementTy. int64_t Stride; public: - VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, + VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *Offset, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) - : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, - ArrayRef({Ptr, VF}), GEPFlags, DL), - IndexedTy(IndexedTy), Stride(Stride) { + : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, {Ptr, Offset}, + GEPFlags, DL), + SourceElementTy(SourceElementTy), Stride(Stride) { assert(Stride < 0 && "Stride must be negative"); } VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC) - VPValue *getVFValue() { return getOperand(1); } - const VPValue *getVFValue() const { return getOperand(1); } + int64_t getStride() const { return Stride; } + Type *getSourceElementType() const { return SourceElementTy; } + VPValue *getOffset() const { return getOperand(1); } void execute(VPTransformState &State) override; @@ -1953,9 +1953,9 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, } VPVectorEndPointerRecipe *clone() override { - return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy, - Stride, getGEPNoWrapFlags(), - getDebugLoc()); + return new VPVectorEndPointerRecipe(getOperand(0), getOffset(), + getSourceElementType(), getStride(), + getGEPNoWrapFlags(), getDebugLoc()); } protected: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fb7aaec4e93a9..443a95104e722 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2488,28 +2488,10 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; - unsigned CurrentPart = getUnrollPart(*this); - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); - - // The wide store needs to start at the last vector element. - Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); - if (IndexTy != RunTimeVF->getType()) - RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); - // NumElt = Stride * CurrentPart * RunTimeVF - Value *NumElt = Builder.CreateMul( - ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF); - // LastLane = Stride * (RunTimeVF - 1) - Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1)); - if (Stride != 1) - LastLane = - Builder.CreateMul(ConstantInt::getSigned(IndexTy, Stride), LastLane); - Value *Ptr = State.get(getOperand(0), VPLane(0)); - Value *ResultPtr = - Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); - ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", - getGEPNoWrapFlags()); - + Value *Ptr = State.get(getOperand(0), true); + Value *Offset = State.get(getOffset(), true); + Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "", + getGEPNoWrapFlags()); State.set(this, ResultPtr, /*IsScalar*/ true); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9c4f4246deb41..45238d21996a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -783,6 +783,15 @@ static SmallVector collectUsersRecursively(VPValue *V) { return Users.takeVector(); } +static SmallVector collectOperandsRecursively(VPRecipeBase *R) { + SetVector Operands(llvm::from_range, R->operands()); + for (unsigned I = 0; I != Operands.size(); ++I) { + if (VPRecipeBase *Cur = Operands[I]->getDefiningRecipe()) + Operands.insert_range(Cur->operands()); + } + return Operands.takeVector(); +} + /// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd /// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only /// generates scalar values. @@ -2843,11 +2852,30 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, VPValue *Addr, *Mask, *EndPtr; /// Adjust any end pointers so that they point to the end of EVL lanes not VF. - auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) { - auto *EVLEndPtr = cast(EndPtr)->clone(); - EVLEndPtr->insertBefore(&CurRecipe); - EVLEndPtr->setOperand(1, &EVL); - return EVLEndPtr; + auto AdjustEndPtr = [&EVL, &Plan, &TypeInfo](VPValue *EndPtr) { + auto *VEPR = cast(EndPtr); + SmallVector Operands({VEPR->getOffset()}); + VPRecipeBase *OffsetR = VEPR->getOffset()->getDefiningRecipe(); + append_range(Operands, collectOperandsRecursively(OffsetR)); + for (VPValue *Op : reverse(Operands)) { + if (!Op->hasDefiningRecipe()) + continue; + VPRecipeBase *Clone = Op->getDefiningRecipe()->clone(); + Clone->insertBefore(VEPR); + VPRecipeBase *EVLR = EVL.getDefiningRecipe(); + VPBuilder Builder(EVLR->getParent(), std::next(EVLR->getIterator())); + Type *EVLType = TypeInfo.inferScalarType(&EVL); + VPValue *EVLCast = Builder.createScalarZExtOrTrunc( + &EVL, TypeInfo.inferScalarType(&Plan->getVF()), EVLType, + EVLR->getDebugLoc()); + Clone->replaceUsesOfWith(&Plan->getVF(), EVLCast); + Op->replaceUsesWithIf( + Clone->getVPSingleValue(), [&Operands](VPUser &U, unsigned Idx) { + return !is_contained(Operands, + cast(U).getVPSingleValue()); + }); + } + return VEPR; }; if (match(&CurRecipe, @@ -2858,7 +2886,7 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, if (match(&CurRecipe, m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && - match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && + match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_VPValue())) && cast(CurRecipe).isReverse()) return new VPWidenLoadEVLRecipe(cast(CurRecipe), AdjustEndPtr(EndPtr), EVL, Mask); @@ -2871,7 +2899,7 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(), m_RemoveMask(HeaderMask, Mask))) && - match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && + match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_VPValue())) && cast(CurRecipe).isReverse()) return new VPWidenStoreEVLRecipe(cast(CurRecipe), AdjustEndPtr(EndPtr), EVL, Mask); @@ -2916,10 +2944,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - assert(all_of(Plan.getVF().users(), - IsaPred) && - "User of VF that we can't transform to EVL."); + assert( + all_of(Plan.getVF().users(), IsaPred) && + "User of VF that we can't transform to EVL."); Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { return isa(U); }); @@ -3412,6 +3440,7 @@ void VPlanTransforms::createInterleaveGroups( Instruction *IRInsertPos = IG->getInsertPos(); auto *InsertPos = cast(RecipeBuilder.getRecipe(IRInsertPos)); + RecipeBuilder.setInsertPoint(InsertPos); GEPNoWrapFlags NW = GEPNoWrapFlags::none(); if (auto *Gep = dyn_cast( @@ -3421,6 +3450,7 @@ void VPlanTransforms::createInterleaveGroups( // Get or create the start address for the interleave group. VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); + VPBuilder &B = RecipeBuilder.getBuilder(); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { // We cannot re-use the address of member zero because it does not // dominate the insert position. Instead, use the address of the insert @@ -3436,7 +3466,6 @@ void VPlanTransforms::createInterleaveGroups( IG->getIndex(IRInsertPos), /*IsSigned=*/true); VPValue *OffsetVPV = Plan.getConstantInt(-Offset); - VPBuilder B(InsertPos); Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW); } // If the group is reverse, adjust the index to refer to the last vector @@ -3444,10 +3473,10 @@ void VPlanTransforms::createInterleaveGroups( // lane, rather than directly getting the pointer for lane VF - 1, because // the pointer operand of the interleaved access is supposed to be uniform. if (IG->isReverse()) { - auto *ReversePtr = new VPVectorEndPointerRecipe( - Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos), - -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc()); - ReversePtr->insertBefore(InsertPos); + auto *ReversePtr = RecipeBuilder.createVectorEndPointerRecipe( + Addr, getLoadStoreType(IRInsertPos), -(int64_t)IG->getFactor(), NW, + InsertPos->getDebugLoc()); + B.insert(ReversePtr); Addr = ReversePtr; } auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 8198945764936..d35171929dc5f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -313,6 +313,25 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { Copy->addOperand(VFxPart); continue; } + if (auto *VEPR = dyn_cast(&R)) { + VPBuilder Builder(VEPR); + VPValue *PrevOffset = + cast(getValueForPart(VEPR, Part - 1)) + ->getOffset(); + Type *IndexTy = TypeInfo.inferScalarType(PrevOffset); + Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF()); + VPValue *VF = Builder.createScalarZExtOrTrunc( + &Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown()); + // Offset = PrevOffset + Stride * VF. + VPValue *VFxStride = Builder.createOverflowingOp( + Instruction::Mul, + {VF, Plan.getConstantInt(IndexTy, VEPR->getStride())}); + VPValue *Offset = Builder.createOverflowingOp(Instruction::Add, + {PrevOffset, VFxStride}); + Copy->setOperand(0, VEPR->getOperand(0)); + Copy->setOperand(1, Offset); + continue; + } if (auto *Red = dyn_cast(&R)) { auto *Phi = dyn_cast(R.getOperand(0)); if (Phi && Phi->isOrdered()) { @@ -329,14 +348,10 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { // Add operand indicating the part to generate code for, to recipes still // requiring it. - if (isa(Copy) || + if (isa(Copy) || match(Copy, m_VPInstruction())) Copy->addOperand(getConstantInt(Part)); - - if (isa(R)) - Copy->setOperand(0, R.getOperand(0)); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 2d63d2a787f88..86c08a9fc2be2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -171,8 +171,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } return VerifyEVLUse(*R, 2); }) - .Case( + .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case( [&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); }) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 8935010e71676..cd0edf92622bc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -359,6 +359,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 2, [[TMP5]] ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 @@ -371,8 +373,6 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 2, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) @@ -383,9 +383,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 2, [[TMP15]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP8]] ; CHECK-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP12]]) ; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP13]]) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) @@ -1550,6 +1548,8 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 4, [[TMP6]] ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 @@ -1562,8 +1562,6 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 4, [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) @@ -1580,9 +1578,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw [[REVERSE4]], [[VEC_IND]] ; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw [[REVERSE5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP25:%.*]] = sub nsw i64 4, [[TMP22]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP9]] ; CHECK-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP17]]) ; CHECK-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP18]]) ; CHECK-NEXT: [[REVERSE8:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index ba6bd8f36f5e4..9ee593c658c0f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -24,28 +24,22 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 1, [[TMP5]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP22]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = fadd [[WIDE_LOAD]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD1]], splat (double 1.000000e+00) -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]] -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]] ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 8 ; CHECK-NEXT: store [[TMP17]], ptr [[TMP24]], align 8 @@ -101,28 +95,22 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP7]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP8]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[N]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP15]], [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP25]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP18]], align 8 ; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], splat (i64 1) ; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD3]], splat (i64 1) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP8]] -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]] ; CHECK-NEXT: store [[TMP19]], ptr [[TMP23]], align 8 ; CHECK-NEXT: store [[TMP20]], ptr [[TMP27]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index ec874d0b48030..59ae1ee132b10 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -11,14 +11,13 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_f64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i64 0 +; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i64 {{.*}} ; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x double>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x double> %[[WIDE]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = fadd <8 x double> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds double, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i64 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i64 -7 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x double> %[[FADD]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: store <8 x double> %[[REVERSE6]], ptr %[[GEP4]], align 8 @@ -44,14 +43,13 @@ for.body: ; preds = %entry, %for.body define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_i64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i64 0 +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i64 {{.*}} ; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x i64>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x i64> %[[WIDE]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = add <8 x i64> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i64, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i64 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i64 -7 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x i64> %[[FADD]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: store <8 x i64> %[[REVERSE6]], ptr %[[GEP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 9f62c7dcda65a..1ab38a0304934 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -61,8 +61,7 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst, ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[INDEX]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 9ea95658818fe..42a9bcc62a114 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -254,8 +254,7 @@ define void @strides_different_direction(ptr noalias nocapture %A, ptr noalias n ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[N:%.*]], [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index f1fbf1dd5d942..e21fa5bb9b146 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -261,22 +261,14 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -7 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -11 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -15 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -19 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -23 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -27 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -31 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -340,8 +332,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -1 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) @@ -384,22 +375,14 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -7 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -11 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -15 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -19 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -23 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -27 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -31 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -463,8 +446,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index f7340fee47eb8..6a0d4a143b133 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -21,6 +21,9 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[ENTRY:.*:]] ; RV64-NEXT: br label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: +; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; RV64-NEXT: [[TMP2:%.*]] = sub nuw nsw i64 [[TMP1]], 1 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -29,22 +32,13 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP10:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; RV64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]] +; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], -1 +; RV64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP6]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; RV64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP25]] -; RV64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP18]] +; RV64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP6]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 @@ -62,6 +56,10 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[ENTRY:.*:]] ; RV32-NEXT: br label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: +; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; RV32-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; RV32-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP2]], 1 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -70,20 +68,13 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] -; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP11]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[TMP10]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 [[TMP12]] +; RV32-NEXT: [[TMP10:%.*]] = mul i32 [[TMP3]], -1 +; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[TMP10]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP15:%.*]] = add [[REVERSE]], splat (i32 1) ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP20:%.*]] = mul i32 -1, [[TMP19]] -; RV32-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP17]] -; RV32-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP18]], i32 [[TMP20]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP10]] ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 @@ -110,39 +101,27 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 [[TMP12]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 [[TMP5]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add i64 [[TMP15]], [[TMP10]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP19]], align 4 ; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD1]]) ; RV64-UF2-NEXT: [[TMP20:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP21:%.*]] = add [[REVERSE2]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 -1, [[TMP24]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP29]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP28]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP20]]) ; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP27]], align 4 ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP21]]) @@ -204,6 +183,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: +; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP16]], 1 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -214,22 +196,13 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP28:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP25]] -; RV64-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP28]] -; RV64-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP38]], i64 [[TMP26]] +; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP17]], -1 +; RV64-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP19]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP27]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 -; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] -; RV64-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP30]], i64 [[TMP31]] -; RV64-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i64 [[TMP33]] +; RV64-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP30]], i64 [[TMP19]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 @@ -267,6 +240,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: +; RV32-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; RV32-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP10]] to i32 +; RV32-NEXT: [[TMP8:%.*]] = sub nuw nsw i32 [[TMP7]], 1 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -277,20 +254,13 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP18]] -; RV32-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i32 [[TMP17]] -; RV32-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP20]], i32 [[TMP19]] +; RV32-NEXT: [[TMP17:%.*]] = mul i32 [[TMP8]], -1 +; RV32-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP15]], i32 [[TMP17]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP28]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP22:%.*]] = add [[REVERSE]], splat (i32 1) ; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] -; RV32-NEXT: [[TMP21:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP27:%.*]] = mul i32 -1, [[TMP26]] -; RV32-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 [[TMP21]] -; RV32-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i32 [[TMP27]] +; RV32-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP23]], i32 [[TMP17]] ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 @@ -346,6 +316,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = sub nuw nsw i64 [[TMP18]], 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP26]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -354,33 +326,19 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP22:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP32:%.*]] = mul i64 -1, [[TMP31]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 [[TMP18]], -1 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[TMP30]], [[TMP27]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP30]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[TMP32]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP25]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP33]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP34]], align 4 ; RV64-UF2-NEXT: [[REVERSE5:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD4]]) ; RV64-UF2-NEXT: [[TMP35:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP36:%.*]] = add [[REVERSE5]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP38:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP39:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP40:%.*]] = mul i64 -1, [[TMP39]] -; RV64-UF2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP38]] -; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i64 [[TMP40]] -; RV64-UF2-NEXT: [[TMP43:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP44:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP45:%.*]] = mul i64 -1, [[TMP44]] -; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP43]] -; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP46]], i64 [[TMP45]] +; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP25]] ; RV64-UF2-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP35]]) ; RV64-UF2-NEXT: store [[REVERSE6]], ptr [[TMP42]], align 4 ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP36]]) @@ -455,6 +413,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: +; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP16]], 1 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -465,22 +426,13 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP28:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP25]] -; RV64-NEXT: [[TMP38:%.*]] = getelementptr float, ptr [[TMP23]], i64 [[TMP28]] -; RV64-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP38]], i64 [[TMP26]] +; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP17]], -1 +; RV64-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP23]], i64 [[TMP19]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP27]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]] -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 -; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] -; RV64-NEXT: [[TMP34:%.*]] = getelementptr float, ptr [[TMP30]], i64 [[TMP31]] -; RV64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[TMP34]], i64 [[TMP33]] +; RV64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[TMP30]], i64 [[TMP19]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 @@ -518,6 +470,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: +; RV32-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; RV32-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP10]] to i32 +; RV32-NEXT: [[TMP8:%.*]] = sub nuw nsw i32 [[TMP7]], 1 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -528,20 +484,13 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP18]] -; RV32-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP15]], i32 [[TMP17]] -; RV32-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP20]], i32 [[TMP19]] +; RV32-NEXT: [[TMP17:%.*]] = mul i32 [[TMP8]], -1 +; RV32-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP15]], i32 [[TMP17]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP28]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP22:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; RV32-NEXT: [[TMP21:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP27:%.*]] = mul i32 -1, [[TMP26]] -; RV32-NEXT: [[TMP24:%.*]] = getelementptr float, ptr [[TMP23]], i32 [[TMP21]] -; RV32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP24]], i32 [[TMP27]] +; RV32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 [[TMP17]] ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 @@ -597,6 +546,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = sub nuw nsw i64 [[TMP18]], 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP26]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -605,33 +556,19 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP22:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP32:%.*]] = mul i64 -1, [[TMP31]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 [[TMP18]], -1 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[TMP30]], [[TMP27]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP30]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP33]], i64 [[TMP32]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP25]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP33]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP34]], align 4 ; RV64-UF2-NEXT: [[REVERSE5:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD4]]) ; RV64-UF2-NEXT: [[TMP35:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP36:%.*]] = fadd [[REVERSE5]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP38:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP39:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP40:%.*]] = mul i64 -1, [[TMP39]] -; RV64-UF2-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP38]] -; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP41]], i64 [[TMP40]] -; RV64-UF2-NEXT: [[TMP43:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP44:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP45:%.*]] = mul i64 -1, [[TMP44]] -; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP43]] -; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP46]], i64 [[TMP45]] +; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP25]] ; RV64-UF2-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP35]]) ; RV64-UF2-NEXT: store [[REVERSE6]], ptr [[TMP42]], align 4 ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP36]]) @@ -684,6 +621,9 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[ENTRY:.*:]] ; RV64-NEXT: br label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: +; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; RV64-NEXT: [[TMP2:%.*]] = sub nuw nsw i64 [[TMP1]], 1 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -692,22 +632,13 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP10:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP9]] -; RV64-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i64 [[TMP11]] +; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], -1 +; RV64-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP6]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; RV64-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP25]] -; RV64-NEXT: [[TMP21:%.*]] = getelementptr float, ptr [[TMP20]], i64 [[TMP18]] +; RV64-NEXT: [[TMP21:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP6]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 @@ -725,6 +656,10 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[ENTRY:.*:]] ; RV32-NEXT: br label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: +; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; RV32-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; RV32-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP2]], 1 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -733,20 +668,13 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP11]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP8]], i32 [[TMP10]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP13]], i32 [[TMP12]] +; RV32-NEXT: [[TMP10:%.*]] = mul i32 [[TMP3]], -1 +; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP8]], i32 [[TMP10]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP15:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP20:%.*]] = mul i32 -1, [[TMP19]] -; RV32-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP17]] -; RV32-NEXT: [[TMP22:%.*]] = getelementptr float, ptr [[TMP18]], i32 [[TMP20]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP10]] ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 @@ -773,39 +701,27 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 [[TMP12]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 [[TMP5]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add i64 [[TMP15]], [[TMP10]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP17]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP19]], align 4 ; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD1]]) ; RV64-UF2-NEXT: [[TMP20:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP21:%.*]] = fadd [[REVERSE2]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 -1, [[TMP24]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP29]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP31]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP20]]) ; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP27]], align 4 ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP21]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 13990000585ea..d5cd92da59481 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -12,6 +12,9 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub nuw nsw i64 [[TMP1]], 1 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -20,21 +23,12 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP18]], 1 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 -1, [[TMP11]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP6]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 [[TMP19]], 1 -; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP23]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP6]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 @@ -60,25 +54,19 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 +; NO-VP-NEXT: [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1 +; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 [[TMP9]], -1 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; NO-VP-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], -1 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 ; NO-VP-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP16]] -; NO-VP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP18]] +; NO-VP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP12]] ; NO-VP-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4i32( [[REVERSE]]) ; NO-VP-NEXT: store [[REVERSE1]], ptr [[TMP20]], align 4 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -129,6 +117,9 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -140,23 +131,14 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[OFFSET_IDX3]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 [[TMP26]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP15]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP2]], -1 ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP27]], 1 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP30]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) @@ -183,6 +165,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL1:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP11]], -1 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] @@ -192,21 +176,13 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[OFFSET_IDX1]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp slt [[WIDE_LOAD]], splat (i32 100) -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 -1, [[TMP13]] -; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP11]], i64 [[TMP12]] +; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP8]] ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP14]] ; NO-VP-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], [[REVERSE]], poison) ; NO-VP-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_MASKED_LOAD]]) ; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP20:%.*]] = mul i64 -1, [[TMP19]] -; NO-VP-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP18]] -; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP20]] +; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP14]] ; NO-VP-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[REVERSE2]]) ; NO-VP-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[REVERSE4]], ptr align 4 [[TMP22]], [[REVERSE3]]) @@ -277,39 +253,28 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub nuw nsw i64 [[TMP1]], 1 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] -; IF-EVL-NEXT: [[TMP29:%.*]] = sub i64 [[TMP9]], 1 -; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP29]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP2]], -1 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP16]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP30]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP20]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] -; IF-EVL-NEXT: [[TMP31:%.*]] = sub i64 [[TMP22]], 1 -; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 -1, [[TMP31]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE2]], ptr align 1 [[TMP26]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 @@ -331,20 +296,17 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[INDEX]] ; NO-VP-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -15 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -15 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; NO-VP-NEXT: [[REVERSE:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], <16 x i8> [[REVERSE]] ; NO-VP-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> align 1 [[TMP3]], <16 x i1> splat (i1 true), <16 x i8> poison) ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -15 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP4]], i64 -15 ; NO-VP-NEXT: [[REVERSE1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_GATHER]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP6]], align 1 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -15 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -15 ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP9]], align 1 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index f804329169fe0..3fafe16ba83d3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -15,6 +15,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -23,12 +26,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]] ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]] -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP18]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], -1 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP7]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_REVERSE]], ptr align 8 [[TMP19]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll index 46695221c27db..dd63b34c69784 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll @@ -11,6 +11,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) { ; CHECK: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[OTC:%.+]]> = original trip-count @@ -20,6 +21,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT-SCALAR vp<[[VFPROM:%.+]]> = zext vp<[[VF]]> to i64 +; CHECK-NEXT: EMIT vp<[[SUB:%.+]]> = sub nuw nsw vp<[[VFPROM]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[SUB]]>, ir<-1> +; CHECK-NEXT: EMIT-SCALAR vp<[[VFPROM2:%.+]]> = zext vp<[[VF]]> to i64 +; CHECK-NEXT: EMIT vp<[[SUB2:%.+]]> = sub nuw nsw vp<[[VFPROM2]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL2:%.+]]> = mul vp<[[SUB2]]>, ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -33,11 +40,17 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[SCALAR_STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_B:%.+]]> = getelementptr inbounds ir<[[B:%.+]]>, ir<[[IDX_PROM]]> -; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVL]]> +; CHECK-NEXT: EMIT-SCALAR vp<[[EVLPROM:%.+]]> = zext vp<[[EVL]]> to i64 +; CHECK-NEXT: EMIT vp<[[SUB3:%.+]]> = sub nuw nsw vp<[[VFPROM]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL3:%.+]]> = mul vp<[[SUB]]>, ir<-1> +; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[MUL3]]> ; CHECK-NEXT: WIDEN ir<[[VAL_B:%.+]]> = vp.load vp<[[VEC_END_PTR_B]]>, vp<[[EVL]]> ; CHECK-NEXT: WIDEN ir<[[ADD_RESULT:%.+]]> = add ir<[[VAL_B]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_A:%.+]]> = getelementptr inbounds ir<[[A:%.+]]>, ir<[[IDX_PROM]]> -; CHECK-NEXT: vp<[[VEC_END_PTR_A:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_A]]>, vp<[[EVL]]> +; CHECK-NEXT: EMIT-SCALAR vp<[[EVLPROM2:%.+]]> = zext vp<[[EVL]]> to i64 +; CHECK-NEXT: EMIT vp<[[SUB4:%.+]]> = sub nuw nsw vp<[[VFPROM2]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL4:%.+]]> = mul vp<[[SUB2]]>, ir<-1> +; CHECK-NEXT: vp<[[VEC_END_PTR_A:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_A]]>, vp<[[MUL4]]> ; CHECK-NEXT: WIDEN vp.store vp<[[VEC_END_PTR_A]]>, ir<[[ADD_RESULT]]>, vp<[[EVL]]> ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[EVL]]>, vp<[[EVL_PHI]]> ; CHECK-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll index f0ea63c498a40..df802392a50f5 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll @@ -27,8 +27,7 @@ define void @induction_phi_and_branch_cost(ptr %end, ptr %start.1, ptr %start.2) ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], -4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 -3 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index d19ae728cc913..dfe99286cfa25 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -23,8 +23,7 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP23]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP6]], <4 x i1> [[REVERSE]], <4 x i32> poison) ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index e4977ee642b09..e3c264d068a6e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1117,68 +1117,56 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -12 -; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]] +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -3 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -7 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -11 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -15 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META22:![0-9]+]] ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD10]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[REVERSE7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -3 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -4 -; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -3 -; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -3 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -12 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -3 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 -3 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -7 +; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i64 -11 +; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -15 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP15]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP16]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP19]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP21]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP18]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -3 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -4 -; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -3 -; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 -; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -3 -; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -12 -; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -3 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP27]], i64 -3 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP27]], i64 -7 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP27]], i64 -11 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP27]], i64 -15 ; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP19]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP20]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP21]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE30:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP22]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1208,68 +1196,56 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -7 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -24 -; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -7 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34:![0-9]+]] +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -7 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -15 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -23 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -31 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META34:![0-9]+]] ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD8]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE11:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD10]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP10:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp sgt <8 x i32> [[REVERSE7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -7 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -7 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -16 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -7 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -24 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -7 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 -7 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -15 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i64 -23 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -31 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP15]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP16]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP19]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP21]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP18]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -7 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -7 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -16 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -7 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -24 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -7 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP27]], i64 -7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP27]], i64 -15 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP27]], i64 -23 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP27]], i64 -31 ; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP19]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP20]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP21]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE30:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP22]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX512-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 2809a77b36f1a..7e053ac554737 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -30,8 +30,7 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP26]], i64 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -7 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP26]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP28]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i8> [[WIDE_LOAD]], <8 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP29]] = xor <8 x i8> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index c756a54ec6d2b..272d9868c98d0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -30,8 +30,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 6c63b823b7666..456cb9f832f24 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -130,8 +130,7 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -177,8 +176,7 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; INTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 ; INTER-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; INTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -465,8 +463,7 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) { ; INTER-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0 -; INTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -6 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -6 ; INTER-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; INTER-NEXT: [[VEC_PHI:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index eab9df558f608..8122a6d903e9f 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1648,8 +1648,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll index dee377d61ba30..65242f6705a25 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll @@ -81,8 +81,7 @@ define i32 @induction_trunc_wrapflags(ptr %p) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 326, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[VEC_IND]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index ffe9da09ca680..7088c5981da00 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -19,10 +19,8 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VEC_IND]], <4 x i64> [[STEP_ADD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll index 309fcece3f7ee..6c57294add0b7 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll @@ -15,8 +15,7 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -25,8 +24,7 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> @@ -77,8 +75,7 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -87,8 +84,7 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> @@ -139,8 +135,7 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -149,8 +144,7 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 2200a7d0431d2..0d2897ae5e433 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -16,8 +16,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -52,14 +51,10 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -172,8 +167,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -498,8 +492,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -822,8 +815,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -858,14 +850,10 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 8bf98f043580b..0694bae7440a8 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1290,8 +1290,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP0]], i64 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 8d3d0ff7a6406..7be0fe4ebd3a4 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -296,14 +296,12 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 @@ -388,8 +386,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) @@ -544,8 +541,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 1a1c05187590e..ba37a3442ad19 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -173,8 +173,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) @@ -203,8 +202,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], [[N]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 0 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d96134e8adf1d..70847a739b642 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -38,8 +38,7 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 33b3d263e634a..98da110a44e8c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -56,8 +56,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -119,8 +118,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 -1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index c3c467b69bb91..ffd4911a23442 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -36,8 +36,7 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[IV_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x ptr> [[WIDE_LOAD]], <2 x ptr> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[REVERSE]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 71c75e52d4050..51d2b825dc914 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -14,8 +14,7 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -60,8 +59,7 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -106,8 +104,7 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index d3e291e4f3ed2..4e91cf84ebdf4 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,10 +20,8 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -74,10 +72,8 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -134,10 +130,8 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 @@ -221,10 +215,8 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> @@ -274,10 +266,8 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 5a1844ac450e7..28a643e05339a 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -39,12 +39,10 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2_DIFF]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i64 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 -3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 -3 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[REVERSE]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE4]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index af272955abbd2..dd7a8a87a921b 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -99,8 +99,7 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 -3 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 5be2b09a504c0..8ef9e872a530e 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -996,22 +996,20 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 -3 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META46:![0-9]+]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 -3 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] +; CHECK-NEXT: [[TMP24:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] -; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP29]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[REVERSE5]], ptr [[TMP28]], align 4, !alias.scope [[META49]], !noalias [[META46]] +; CHECK-NEXT: [[TMP27:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] +; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[REVERSE5]], ptr [[TMP26]], align 4, !alias.scope [[META49]], !noalias [[META46]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP15]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] @@ -1020,13 +1018,13 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] -; CHECK-NEXT: [[TMP31:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]] -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: [[TMP33:%.*]] = add nsw i64 [[INNER_IV]], [[TMP17]] -; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP33]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4 -; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[TMP29:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add nsw i64 [[INNER_IV]], [[TMP17]] +; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4 +; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP32]], [[TMP30]] ; CHECK-NEXT: store i32 [[ADD9_US]], ptr [[ARRAYIDX8_US]], align 4 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], -1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[INNER_IV]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index bdf73d6a52c22..a8dc7e3472fd1 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -569,14 +569,10 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i64 -3 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -3 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -7 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -11 +; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -586,14 +582,10 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 0 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 -3 -; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i64 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -12 -; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -7 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -11 +; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 @@ -807,14 +799,10 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -3 +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -3 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -7 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -11 +; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -824,14 +812,10 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 ; VF4IC4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -12 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -3 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -7 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -11 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index f76634d954dd3..2794065b0cca3 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1573,13 +1573,11 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index e10cb2794aadb..4a4675f71af9b 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -153,8 +153,7 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -1 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 88dead4418628..88fde69b4fe00 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1148,6 +1148,8 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: EMIT vp<[[SUB:%.+]]> = sub nuw nsw vp<[[VF]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[SUB]]>, ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -1157,7 +1159,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%ptr.iv.next>, vp<[[MUL]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ne ir<%l>, ir<0> ; CHECK-NEXT: Successor(s): pred.store