Skip to content

Commit 52f85fd

Browse files
committed
[AArch64] Enable RT and partial unrolling for loops with reductions.
Update unrolling preferences for Apple Silicon CPUs to enable partial unrolling and runtime unrolling for small loops with reductions. This builds on top of unroller changes to introduce parallel reduction phis, if possible: #149470.
1 parent 0645db8 commit 52f85fd

File tree

2 files changed

+139
-23
lines changed

2 files changed

+139
-23
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Support/Debug.h"
2626
#include "llvm/TargetParser/AArch64TargetParser.h"
2727
#include "llvm/Transforms/InstCombine/InstCombiner.h"
28+
#include "llvm/Transforms/Utils/UnrollLoop.h"
2829
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
2930
#include <algorithm>
3031
#include <optional>
@@ -4787,6 +4788,22 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
47874788
if (!L->getExitBlock())
47884789
return;
47894790

4791+
// Check if the loop contains any reductions that could be parallelized when
4792+
// unrolling. If so, enable partial unrolling, if the trip count is know to be
4793+
// a multiple of 2.
4794+
bool HasParellelizableReductions =
4795+
L->getNumBlocks() == 1 &&
4796+
any_of(L->getHeader()->phis(),
4797+
[&SE, L](PHINode &Phi) {
4798+
return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
4799+
}) &&
4800+
isLoopSizeWithinBudget(L, TTI, 12, nullptr);
4801+
if (HasParellelizableReductions &&
4802+
SE.getSmallConstantTripMultiple(L, L->getExitingBlock()) % 2 == 0) {
4803+
UP.Partial = true;
4804+
UP.MaxCount = 4;
4805+
}
4806+
47904807
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
47914808
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
47924809
(SE.getSmallConstantMaxTripCount(L) > 0 &&
@@ -4802,6 +4819,11 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
48024819
// Limit to loops with trip counts that are cheap to expand.
48034820
UP.SCEVExpansionBudget = 1;
48044821

4822+
if (HasParellelizableReductions) {
4823+
UP.Runtime = true;
4824+
UP.DefaultUnrollRuntimeCount = 4;
4825+
}
4826+
48054827
// Try to unroll small, single block loops, if they have load/store
48064828
// dependencies, to expose more parallel memory access streams.
48074829
BasicBlock *Header = L->getHeader();

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 117 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -585,16 +585,34 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
585585
; APPLE-NEXT: [[ENTRY:.*]]:
586586
; APPLE-NEXT: br label %[[LOOP:.*]]
587587
; APPLE: [[LOOP]]:
588-
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
588+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
589+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
590+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
591+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
589592
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
590593
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
591594
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
592-
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
593-
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
594-
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
595-
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
595+
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
596+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
597+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
598+
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
599+
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
600+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
601+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
602+
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
603+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP2]]
604+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
605+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
606+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
607+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
608+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
609+
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
610+
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
596611
; APPLE: [[EXIT]]:
597-
; APPLE-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
612+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
613+
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
614+
; APPLE-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
615+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
598616
; APPLE-NEXT: ret i32 [[BIN_RDX2]]
599617
;
600618
; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial(
@@ -731,21 +749,42 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
731749
; APPLE-NEXT: [[ENTRY:.*]]:
732750
; APPLE-NEXT: br label %[[LOOP:.*]]
733751
; APPLE: [[LOOP]]:
734-
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
735-
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
736-
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ]
752+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
753+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3:%.*]], %[[LOOP]] ]
754+
; APPLE-NEXT: [[RDX_21:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
755+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
756+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RES_2:%.*]], %[[LOOP]] ]
757+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ]
737758
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
738759
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
739-
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
740-
; APPLE-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]]
741-
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
742-
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
743-
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
760+
; APPLE-NEXT: [[RES_2]] = add i32 [[RDX]], [[TMP0]]
761+
; APPLE-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
762+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
763+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
764+
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
765+
; APPLE-NEXT: [[BIN_RDX3]] = add i32 [[RDX_1]], [[TMP1]]
766+
; APPLE-NEXT: [[RDX_2_NEXT_1:%.*]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
767+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
768+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
769+
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
770+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_21]], [[TMP2]]
771+
; APPLE-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_NEXT_1]], [[TMP2]]
772+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
773+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
774+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
775+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
776+
; APPLE-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[TMP3]]
777+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
778+
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
779+
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
744780
; APPLE: [[EXIT]]:
745-
; APPLE-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
746-
; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ]
781+
; APPLE-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
782+
; APPLE-NEXT: [[RES_3:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ]
747783
; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]]
748-
; APPLE-NEXT: ret i32 [[SUM]]
784+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[SUM]]
785+
; APPLE-NEXT: [[BIN_RDX4:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
786+
; APPLE-NEXT: [[SUM1:%.*]] = add i32 [[BIN_RDX4]], [[RES_3]]
787+
; APPLE-NEXT: ret i32 [[SUM1]]
749788
;
750789
; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
751790
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
@@ -802,18 +841,72 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
802841
; APPLE-LABEL: define i32 @test_add_reduction_runtime(
803842
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
804843
; APPLE-NEXT: [[ENTRY:.*]]:
844+
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
845+
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3
846+
; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
847+
; APPLE-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
848+
; APPLE: [[ENTRY_NEW]]:
849+
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
805850
; APPLE-NEXT: br label %[[LOOP:.*]]
806851
; APPLE: [[LOOP]]:
807-
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ]
808-
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP]] ]
852+
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
853+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
854+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
855+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
856+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
857+
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
809858
; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
810859
; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
811-
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
812-
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
860+
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP6]]
861+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1
862+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
863+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
864+
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]]
865+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2
866+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
867+
; APPLE-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
868+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]]
869+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3
870+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
871+
; APPLE-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
872+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]]
873+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4
874+
; APPLE-NEXT: [[NITER_NEXT_3]] = add nuw i64 [[NITER]], 4
875+
; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
876+
; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
877+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
878+
; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
879+
; APPLE-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
880+
; APPLE-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
881+
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
882+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
883+
; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
884+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
885+
; APPLE: [[EXIT_UNR_LCSSA]]:
886+
; APPLE-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
887+
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
888+
; APPLE-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
889+
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
890+
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
891+
; APPLE: [[LOOP_EPIL_PREHEADER]]:
892+
; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
893+
; APPLE: [[LOOP_EPIL]]:
894+
; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
895+
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
896+
; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
897+
; APPLE-NEXT: [[GEP_A_EPIL1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL1]]
898+
; APPLE-NEXT: [[TMP7:%.*]] = load i32, ptr [[GEP_A_EPIL1]], align 2
899+
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP7]]
900+
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL1]], 1
813901
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
814-
; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]]
902+
; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
903+
; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
904+
; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]]
905+
; APPLE: [[EXIT_EPILOG_LCSSA]]:
906+
; APPLE-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
907+
; APPLE-NEXT: br label %[[EXIT]]
815908
; APPLE: [[EXIT]]:
816-
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP]] ]
909+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
817910
; APPLE-NEXT: ret i32 [[RES]]
818911
;
819912
; OTHER-LABEL: define i32 @test_add_reduction_runtime(
@@ -908,6 +1001,7 @@ exit:
9081001
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
9091002
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
9101003
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
1004+
; APPLE: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
9111005
;.
9121006
; OTHER: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
9131007
; OTHER: [[META1]] = !{!"llvm.loop.unroll.disable"}

0 commit comments

Comments
 (0)