diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e12caa2136962..1de5134b5d6d9 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4957,7 +4957,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + for (ssize_t LUIdx = Uses.size() - 1; LUIdx >= 0; --LUIdx) { LSRUse &LU = Uses[LUIdx]; for (const Formula &F : LU.Formulae) { if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1)) @@ -5002,8 +5002,6 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { // Delete the old use. DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; break; } } diff --git a/llvm/test/CodeGen/ARM/loop-indexing.ll b/llvm/test/CodeGen/ARM/loop-indexing.ll index bb859b202bbc0..62fafc53e5e86 100644 --- a/llvm/test/CodeGen/ARM/loop-indexing.ll +++ b/llvm/test/CodeGen/ARM/loop-indexing.ll @@ -68,12 +68,11 @@ exit: } ; CHECK-LABEL: convolve_16bit -; TODO: Both arrays should use indexing ; CHECK-DEFAULT: ldr{{.*}}, #8]! -; CHECK-DEFAULT-NOT: ldr{{.*}}]! +; CHECK-DEFAULT: ldr{{.*}}, #8]! ; CHECK-COMPLEX: ldr{{.*}}, #8]! -; CHECK-COMPLEX-NOT: ldr{{.*}}]! +; CHECK-COMPLEX: ldr{{.*}}, #8]! ; DISABLED-NOT: ldr{{.*}}]! ; DISABLED-NOT: str{{.*}}]! diff --git a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll index f5ae9a20a4ee0..030acb382bb5a 100644 --- a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll +++ b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll @@ -16,8 +16,8 @@ define void @foo(i32 zeroext %n, ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: cmplwi r3, 0 ; CHECK-NEXT: beqlr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.lr.ph -; CHECK-NEXT: addi r4, r4, 64 ; CHECK-NEXT: addi r5, r5, 64 +; CHECK-NEXT: addi r4, r4, 64 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %for.body @@ -41,8 +41,8 @@ define void @foo(i32 zeroext %n, ptr %ptr, ptr %ptr2) { ; CHECK-BE-NEXT: cmplwi r3, 0 ; CHECK-BE-NEXT: beqlr cr0 ; CHECK-BE-NEXT: # %bb.1: # %for.body.lr.ph -; CHECK-BE-NEXT: addi r4, r4, 64 ; CHECK-BE-NEXT: addi r5, r5, 64 +; CHECK-BE-NEXT: addi r4, r4, 64 ; CHECK-BE-NEXT: mtctr r3 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %for.body diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index 79f2ef3e3746a..7508ac12e9b46 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -8,31 +8,31 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bgelr 0 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: addi 12, 5, 3 ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 27, 5, 2 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 29, 5, 1 +; CHECK-NEXT: addi 11, 3, 16 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 28, 5, 3 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 30, 5, 1 -; CHECK-NEXT: mulld 12, 8, 5 ; CHECK-NEXT: mulld 0, 9, 8 -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 29, 3, 16 -; CHECK-NEXT: sldi 11, 10, 3 +; CHECK-NEXT: mulld 28, 8, 5 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: sldi 30, 10, 3 ; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill ; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill -; CHECK-NEXT: mulld 30, 8, 30 -; CHECK-NEXT: mulld 28, 8, 28 +; CHECK-NEXT: mulld 12, 8, 12 +; CHECK-NEXT: mulld 29, 8, 29 ; CHECK-NEXT: mulld 8, 8, 27 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: add 5, 5, 9 ; CHECK-NEXT: add 12, 12, 0 -; CHECK-NEXT: add 30, 30, 0 +; CHECK-NEXT: add 29, 29, 0 ; CHECK-NEXT: add 28, 28, 0 ; CHECK-NEXT: add 8, 8, 0 ; CHECK-NEXT: cmpd 5, 7 @@ -43,24 +43,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: add 25, 6, 12 +; CHECK-NEXT: add 24, 6, 28 +; CHECK-NEXT: add 26, 6, 12 +; CHECK-NEXT: add 25, 6, 29 +; CHECK-NEXT: sldi 23, 24, 3 ; CHECK-NEXT: add 24, 6, 8 -; CHECK-NEXT: sldi 26, 6, 3 -; CHECK-NEXT: sldi 23, 25, 3 -; CHECK-NEXT: add 25, 6, 30 -; CHECK-NEXT: sldi 24, 24, 3 -; CHECK-NEXT: add 26, 4, 26 +; CHECK-NEXT: sldi 26, 26, 3 ; CHECK-NEXT: sldi 22, 25, 3 -; CHECK-NEXT: add 25, 6, 28 -; CHECK-NEXT: add 24, 29, 24 +; CHECK-NEXT: sldi 25, 6, 3 +; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: add 26, 11, 26 +; CHECK-NEXT: add 25, 4, 25 ; CHECK-NEXT: add 23, 3, 23 -; CHECK-NEXT: sldi 25, 25, 3 ; CHECK-NEXT: add 22, 3, 22 -; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: add 24, 11, 24 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfd 0, 0(26) +; CHECK-NEXT: lfd 0, 0(25) ; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: add 6, 6, 10 ; CHECK-NEXT: cmpd 6, 27 @@ -70,7 +70,7 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 16(23) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 24(23) -; CHECK-NEXT: add 23, 23, 11 +; CHECK-NEXT: add 23, 23, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 0(22) ; CHECK-NEXT: xsadddp 0, 0, 1 @@ -79,7 +79,7 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 16(22) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 24(22) -; CHECK-NEXT: add 22, 22, 11 +; CHECK-NEXT: add 22, 22, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, -16(24) ; CHECK-NEXT: xsadddp 0, 0, 1 @@ -88,19 +88,19 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 0(24) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 8(24) -; CHECK-NEXT: add 24, 24, 11 +; CHECK-NEXT: add 24, 24, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(25) +; CHECK-NEXT: lfd 1, -16(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(25) +; CHECK-NEXT: lfd 1, -8(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(25) +; CHECK-NEXT: lfd 1, 0(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(25) -; CHECK-NEXT: add 25, 25, 11 +; CHECK-NEXT: lfd 1, 8(26) +; CHECK-NEXT: add 26, 26, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfd 0, 0(26) -; CHECK-NEXT: add 26, 26, 11 +; CHECK-NEXT: stfd 0, 0(25) +; CHECK-NEXT: add 25, 25, 30 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index af0942e99182d..5c7eb283aa6f0 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -18,8 +18,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: cmpwi 3, 1 ; CHECK-NEXT: bltlr 0 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader -; CHECK-NEXT: stdu 1, -592(1) -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: stdu 1, -608(1) +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset r14, -192 ; CHECK-NEXT: .cfi_offset r15, -184 ; CHECK-NEXT: .cfi_offset r16, -176 @@ -56,300 +56,293 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 2, 728(1) -; CHECK-NEXT: ld 14, 688(1) -; CHECK-NEXT: ld 11, 704(1) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 21, 5 -; CHECK-NEXT: lwa 5, 0(7) -; CHECK-NEXT: ld 7, 720(1) -; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 22, 6 -; CHECK-NEXT: ld 6, 848(1) +; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 28, 5 +; CHECK-NEXT: ld 5, 864(1) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 18, 760(1) -; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 12, 696(1) -; CHECK-NEXT: lxv 0, 0(9) -; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 1, 0(8) +; CHECK-NEXT: ld 2, 848(1) +; CHECK-NEXT: ld 12, 784(1) +; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 22, 6 +; CHECK-NEXT: li 6, 9 +; CHECK-NEXT: ld 23, 800(1) +; CHECK-NEXT: ld 29, 712(1) +; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 25, 816(1) ; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 30, 824(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 29, 840(1) -; CHECK-NEXT: ld 28, 832(1) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 23, 784(1) -; CHECK-NEXT: ld 20, 776(1) -; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: extswsli 9, 5, 3 -; CHECK-NEXT: lxv 4, 0(14) -; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 0, 5, 40 -; CHECK-NEXT: sldi 14, 5, 5 -; CHECK-NEXT: mulli 31, 5, 24 -; CHECK-NEXT: lxv 38, 0(2) -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 2, 5, 48 -; CHECK-NEXT: sldi 5, 5, 4 -; CHECK-NEXT: ld 16, 744(1) -; CHECK-NEXT: lxv 5, 0(10) -; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 6, 712(1) -; CHECK-NEXT: mr 10, 7 -; CHECK-NEXT: add 7, 14, 21 -; CHECK-NEXT: lxv 13, 0(19) -; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 8, 11 -; CHECK-NEXT: li 11, 9 -; CHECK-NEXT: iselgt 3, 3, 11 +; CHECK-NEXT: ld 24, 808(1) +; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 26, 824(1) +; CHECK-NEXT: ld 27, 832(1) +; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 15, 728(1) +; CHECK-NEXT: ld 14, 720(1) +; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 17, 744(1) +; CHECK-NEXT: ld 16, 736(1) +; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: iselgt 3, 3, 6 +; CHECK-NEXT: ld 19, 760(1) +; CHECK-NEXT: ld 18, 752(1) +; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 21, 776(1) +; CHECK-NEXT: ld 20, 768(1) +; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 30, 840(1) +; CHECK-NEXT: ld 31, 792(1) +; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: lxv 39, 0(8) +; CHECK-NEXT: stfd 26, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 568(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: rldicl 11, 3, 61, 3 -; CHECK-NEXT: lxv 3, 0(12) -; CHECK-NEXT: lxv 40, 0(6) -; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: add 19, 21, 5 -; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 39, 0(10) -; CHECK-NEXT: addi 3, 7, 32 -; CHECK-NEXT: add 12, 31, 21 -; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 33, 0(15) -; CHECK-NEXT: lxv 32, 0(16) -; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(17) -; CHECK-NEXT: lxv 36, 0(18) -; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 12, 0(20) -; CHECK-NEXT: lxv 11, 0(23) -; CHECK-NEXT: add 20, 21, 9 -; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 10, 0(24) -; CHECK-NEXT: lxv 9, 0(25) -; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 8, 0(26) -; CHECK-NEXT: lxv 7, 0(27) -; CHECK-NEXT: addi 12, 12, 32 -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: mr 26, 21 -; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 6, 0(30) -; CHECK-NEXT: lxv 41, 0(28) -; CHECK-NEXT: addi 7, 11, 1 -; CHECK-NEXT: add 11, 0, 21 -; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 43, 0(29) -; CHECK-NEXT: lxv 42, 0(5) -; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill -; CHECK-NEXT: addi 11, 11, 32 -; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 4, 0(23) +; CHECK-NEXT: lxv 1, 0(26) +; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 5, 856(1) +; CHECK-NEXT: lxv 3, 0(24) +; CHECK-NEXT: lxv 2, 0(25) +; CHECK-NEXT: std 25, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 38, 0(9) +; CHECK-NEXT: lxv 33, 0(10) +; CHECK-NEXT: std 12, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: rldicl 3, 3, 61, 3 +; CHECK-NEXT: lxv 32, 0(11) +; CHECK-NEXT: lxv 37, 0(29) +; CHECK-NEXT: mr 8, 11 +; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 36, 0(14) +; CHECK-NEXT: lxv 13, 0(15) +; CHECK-NEXT: stfd 28, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 12, 0(16) +; CHECK-NEXT: lxv 11, 0(17) +; CHECK-NEXT: stfd 30, 592(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 600(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 10, 0(18) +; CHECK-NEXT: lxv 9, 0(19) +; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 8, 0(20) +; CHECK-NEXT: lxv 7, 0(21) +; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 6, 0(12) +; CHECK-NEXT: lxv 5, 0(31) +; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 0, 0(27) +; CHECK-NEXT: lxv 40, 0(30) +; CHECK-NEXT: li 30, 1 +; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 41, 0(2) +; CHECK-NEXT: std 5, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: std 2, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: addi 7, 3, 1 +; CHECK-NEXT: mulli 3, 5, 40 +; CHECK-NEXT: extswsli 6, 5, 3 +; CHECK-NEXT: mulli 31, 5, 48 +; CHECK-NEXT: add 0, 28, 6 +; CHECK-NEXT: ld 6, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: add 23, 28, 3 +; CHECK-NEXT: sldi 3, 5, 4 +; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: add 26, 28, 3 +; CHECK-NEXT: sldi 3, 5, 5 +; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill +; CHECK-NEXT: std 10, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 42, 0(6) +; CHECK-NEXT: std 16, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: add 24, 28, 3 +; CHECK-NEXT: mulli 3, 5, 24 +; CHECK-NEXT: std 18, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: add 25, 28, 3 +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: std 20, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 43, 0(3) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 5, 2, 27, 0 ; CHECK-NEXT: mr 6, 22 -; CHECK-NEXT: mr 30, 20 -; CHECK-NEXT: mr 29, 19 +; CHECK-NEXT: mr 5, 28 +; CHECK-NEXT: mr 27, 0 +; CHECK-NEXT: mr 11, 26 +; CHECK-NEXT: mr 2, 25 +; CHECK-NEXT: mr 12, 24 +; CHECK-NEXT: mr 3, 23 ; CHECK-NEXT: mtctr 7 -; CHECK-NEXT: add 25, 21, 5 -; CHECK-NEXT: maddld 5, 2, 27, 14 -; CHECK-NEXT: add 24, 21, 5 -; CHECK-NEXT: maddld 5, 2, 27, 31 -; CHECK-NEXT: add 23, 21, 5 -; CHECK-NEXT: mr 5, 26 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(5) -; CHECK-NEXT: xvmaddadp 1, 45, 35 -; CHECK-NEXT: lxvp 46, 0(30) -; CHECK-NEXT: xvmaddadp 0, 47, 35 -; CHECK-NEXT: lxvp 48, 0(29) -; CHECK-NEXT: lxvp 50, 0(23) -; CHECK-NEXT: lxvp 62, 0(24) -; CHECK-NEXT: lxvp 60, 0(25) +; CHECK-NEXT: xvmaddadp 39, 45, 35 +; CHECK-NEXT: lxvp 46, 0(27) +; CHECK-NEXT: xvmaddadp 38, 47, 35 +; CHECK-NEXT: lxvp 48, 0(11) +; CHECK-NEXT: lxvp 50, 0(2) +; CHECK-NEXT: lxvp 62, 0(12) +; CHECK-NEXT: lxvp 60, 0(3) ; CHECK-NEXT: lxvp 58, 32(6) ; CHECK-NEXT: lxvp 56, 32(5) -; CHECK-NEXT: lxvp 54, 32(30) -; CHECK-NEXT: lxvp 52, 32(29) -; CHECK-NEXT: lxvp 30, 32(23) -; CHECK-NEXT: lxvp 28, 32(24) -; CHECK-NEXT: lxvp 26, 32(25) -; CHECK-NEXT: xvmaddadp 5, 49, 35 -; CHECK-NEXT: xvmaddadp 4, 51, 35 -; CHECK-NEXT: xvmaddadp 3, 63, 35 -; CHECK-NEXT: xvmaddadp 2, 61, 35 -; CHECK-NEXT: xvmaddadp 40, 44, 34 -; CHECK-NEXT: xvmaddadp 39, 46, 34 -; CHECK-NEXT: xvmaddadp 38, 48, 34 -; CHECK-NEXT: xvmaddadp 33, 50, 34 -; CHECK-NEXT: xvmaddadp 32, 62, 34 -; CHECK-NEXT: xvmaddadp 37, 60, 34 -; CHECK-NEXT: xvmaddadp 36, 57, 59 -; CHECK-NEXT: xvmaddadp 13, 55, 59 -; CHECK-NEXT: xvmaddadp 12, 53, 59 -; CHECK-NEXT: xvmaddadp 11, 31, 59 -; CHECK-NEXT: xvmaddadp 10, 29, 59 -; CHECK-NEXT: xvmaddadp 9, 27, 59 -; CHECK-NEXT: xvmaddadp 8, 56, 58 -; CHECK-NEXT: xvmaddadp 7, 54, 58 -; CHECK-NEXT: xvmaddadp 6, 52, 58 +; CHECK-NEXT: lxvp 54, 32(27) +; CHECK-NEXT: lxvp 52, 32(11) +; CHECK-NEXT: lxvp 30, 32(2) +; CHECK-NEXT: lxvp 28, 32(12) +; CHECK-NEXT: lxvp 26, 32(3) +; CHECK-NEXT: xvmaddadp 33, 49, 35 +; CHECK-NEXT: xvmaddadp 32, 51, 35 +; CHECK-NEXT: xvmaddadp 37, 63, 35 +; CHECK-NEXT: xvmaddadp 36, 61, 35 +; CHECK-NEXT: xvmaddadp 13, 44, 34 +; CHECK-NEXT: xvmaddadp 12, 46, 34 +; CHECK-NEXT: xvmaddadp 11, 48, 34 +; CHECK-NEXT: xvmaddadp 10, 50, 34 +; CHECK-NEXT: xvmaddadp 9, 62, 34 +; CHECK-NEXT: xvmaddadp 8, 60, 34 +; CHECK-NEXT: xvmaddadp 7, 57, 59 +; CHECK-NEXT: xvmaddadp 6, 55, 59 +; CHECK-NEXT: xvmaddadp 5, 53, 59 +; CHECK-NEXT: xvmaddadp 4, 31, 59 +; CHECK-NEXT: xvmaddadp 3, 29, 59 +; CHECK-NEXT: xvmaddadp 2, 27, 59 +; CHECK-NEXT: xvmaddadp 1, 56, 58 +; CHECK-NEXT: xvmaddadp 0, 54, 58 +; CHECK-NEXT: xvmaddadp 40, 52, 58 ; CHECK-NEXT: xvmaddadp 41, 30, 58 -; CHECK-NEXT: xvmaddadp 43, 28, 58 -; CHECK-NEXT: xvmaddadp 42, 26, 58 +; CHECK-NEXT: xvmaddadp 42, 28, 58 +; CHECK-NEXT: xvmaddadp 43, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 5, 5, 64 -; CHECK-NEXT: addi 30, 30, 64 -; CHECK-NEXT: addi 29, 29, 64 -; CHECK-NEXT: addi 23, 23, 64 -; CHECK-NEXT: addi 24, 24, 64 -; CHECK-NEXT: addi 25, 25, 64 +; CHECK-NEXT: addi 27, 27, 64 +; CHECK-NEXT: addi 11, 11, 64 +; CHECK-NEXT: addi 2, 2, 64 +; CHECK-NEXT: addi 12, 12, 64 +; CHECK-NEXT: addi 3, 3, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 28, 28, 6 -; CHECK-NEXT: add 26, 26, 2 -; CHECK-NEXT: add 20, 20, 2 -; CHECK-NEXT: add 11, 11, 2 -; CHECK-NEXT: add 19, 19, 2 -; CHECK-NEXT: add 3, 3, 2 -; CHECK-NEXT: add 12, 12, 2 -; CHECK-NEXT: addi 27, 27, 1 -; CHECK-NEXT: cmpld 28, 4 +; CHECK-NEXT: addi 30, 30, 6 +; CHECK-NEXT: add 0, 0, 31 +; CHECK-NEXT: add 23, 23, 31 +; CHECK-NEXT: add 26, 26, 31 +; CHECK-NEXT: add 24, 24, 31 +; CHECK-NEXT: add 28, 28, 31 +; CHECK-NEXT: add 25, 25, 31 +; CHECK-NEXT: cmpld 30, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload +; CHECK-NEXT: stxv 39, 0(3) ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 31, 600(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 592(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 576(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 32, 0(8) +; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) -; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) -; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) -; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(8) -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(10) -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 13, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 2, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 43, 0(3) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 42, 0(3) -; CHECK-NEXT: addi 1, 1, 592 +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 43, 0(3) +; CHECK-NEXT: addi 1, 1, 608 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, ptr %.l, align 4 diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll index 912a74ba8df8f..242f28b4be9d3 100644 --- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll +++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll @@ -41,24 +41,24 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) % ; CHECK-P9-NEXT: lxvdsx vs0, 0, r8 ; CHECK-P9-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-NEXT: extswsli r5, r5, 3 +; CHECK-P9-NEXT: add r4, r9, r10 ; CHECK-P9-NEXT: addi r3, r3, 1 -; CHECK-P9-NEXT: sub r4, r10, r5 -; CHECK-P9-NEXT: add r5, r9, r10 +; CHECK-P9-NEXT: sub r5, r10, r5 ; CHECK-P9-NEXT: mtctr r3 -; CHECK-P9-NEXT: add r4, r6, r4 +; CHECK-P9-NEXT: add r5, r6, r5 ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_2: # %_loop_2_do_ ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxv vs1, -16(r5) -; CHECK-P9-NEXT: lxv vs2, 0(r5) -; CHECK-P9-NEXT: lxv vs3, -16(r4) -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: addi r4, r4, 128 +; CHECK-P9-NEXT: lxv vs1, -16(r4) +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs3, -16(r5) +; CHECK-P9-NEXT: lxv vs4, 0(r5) +; CHECK-P9-NEXT: addi r5, r5, 128 ; CHECK-P9-NEXT: xvmaddadp vs1, vs3, vs1 -; CHECK-P9-NEXT: stxv vs1, -16(r5) +; CHECK-P9-NEXT: stxv vs1, -16(r4) ; CHECK-P9-NEXT: xvmaddadp vs2, vs4, vs0 -; CHECK-P9-NEXT: stxv vs2, 0(r5) -; CHECK-P9-NEXT: addi r5, r5, 128 +; CHECK-P9-NEXT: stxv vs2, 0(r4) +; CHECK-P9-NEXT: addi r4, r4, 128 ; CHECK-P9-NEXT: bdnz .LBB0_2 ; CHECK-P9-NEXT: # %bb.3: # %_return_bb ; CHECK-P9-NEXT: blr @@ -92,25 +92,25 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) % ; CHECK-P10-NEXT: sub r3, r7, r3 ; CHECK-P10-NEXT: lxvdsx vs0, 0, r8 ; CHECK-P10-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P10-NEXT: add r4, r9, r10 ; CHECK-P10-NEXT: extswsli r5, r5, 3 ; CHECK-P10-NEXT: addi r3, r3, 1 -; CHECK-P10-NEXT: sub r4, r10, r5 -; CHECK-P10-NEXT: add r5, r9, r10 +; CHECK-P10-NEXT: sub r5, r10, r5 ; CHECK-P10-NEXT: mtctr r3 -; CHECK-P10-NEXT: add r4, r6, r4 +; CHECK-P10-NEXT: add r5, r6, r5 ; CHECK-P10-NEXT: .p2align 4 ; CHECK-P10-NEXT: .LBB0_2: # %_loop_2_do_ ; CHECK-P10-NEXT: # -; CHECK-P10-NEXT: lxv vs1, -16(r5) -; CHECK-P10-NEXT: lxv vs2, 0(r5) -; CHECK-P10-NEXT: lxv vs3, -16(r4) +; CHECK-P10-NEXT: lxv vs1, -16(r4) +; CHECK-P10-NEXT: lxv vs2, 0(r4) +; CHECK-P10-NEXT: lxv vs3, -16(r5) ; CHECK-P10-NEXT: xvmaddadp vs1, vs3, vs1 -; CHECK-P10-NEXT: lxv vs4, 0(r4) +; CHECK-P10-NEXT: lxv vs4, 0(r5) ; CHECK-P10-NEXT: xvmaddadp vs2, vs4, vs0 -; CHECK-P10-NEXT: addi r4, r4, 128 -; CHECK-P10-NEXT: stxv vs1, -16(r5) -; CHECK-P10-NEXT: stxv vs2, 0(r5) ; CHECK-P10-NEXT: addi r5, r5, 128 +; CHECK-P10-NEXT: stxv vs1, -16(r4) +; CHECK-P10-NEXT: stxv vs2, 0(r4) +; CHECK-P10-NEXT: addi r4, r4, 128 ; CHECK-P10-NEXT: bdnz .LBB0_2 ; CHECK-P10-NEXT: # %bb.3: # %_return_bb ; CHECK-P10-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll index 5d5445f9f473a..f3e34101efa29 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll @@ -187,34 +187,34 @@ define void @foo() { ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_1: # %vector.body ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxv 2, -32(3) -; CHECK-P9-NEXT: lxv 3, -32(4) -; CHECK-P9-NEXT: lxv 4, -16(4) -; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 2, -32(6) ; CHECK-P9-NEXT: lxv 3, -32(5) +; CHECK-P9-NEXT: lxv 4, -16(5) +; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 3, -32(4) ; CHECK-P9-NEXT: vmuluwm 2, 2, 3 -; CHECK-P9-NEXT: lxv 3, -16(3) -; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 0(4) -; CHECK-P9-NEXT: stxv 2, -32(6) -; CHECK-P9-NEXT: lxv 2, -16(5) -; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 0(3) +; CHECK-P9-NEXT: lxv 3, -16(6) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 16(4) -; CHECK-P9-NEXT: addi 4, 4, 64 -; CHECK-P9-NEXT: stxv 2, -16(6) -; CHECK-P9-NEXT: lxv 2, 0(5) +; CHECK-P9-NEXT: lxv 4, 0(5) +; CHECK-P9-NEXT: stxv 2, -32(3) +; CHECK-P9-NEXT: lxv 2, -16(4) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 16(3) -; CHECK-P9-NEXT: addi 3, 3, 64 +; CHECK-P9-NEXT: lxv 3, 0(6) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: stxv 2, 0(6) -; CHECK-P9-NEXT: lxv 2, 16(5) +; CHECK-P9-NEXT: lxv 4, 16(5) ; CHECK-P9-NEXT: addi 5, 5, 64 +; CHECK-P9-NEXT: stxv 2, -16(3) +; CHECK-P9-NEXT: lxv 2, 0(4) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: stxv 2, 16(6) +; CHECK-P9-NEXT: lxv 3, 16(6) ; CHECK-P9-NEXT: addi 6, 6, 64 +; CHECK-P9-NEXT: vadduwm 3, 4, 3 +; CHECK-P9-NEXT: stxv 2, 0(3) +; CHECK-P9-NEXT: lxv 2, 16(4) +; CHECK-P9-NEXT: addi 4, 4, 64 +; CHECK-P9-NEXT: vmuluwm 2, 3, 2 +; CHECK-P9-NEXT: stxv 2, 16(3) +; CHECK-P9-NEXT: addi 3, 3, 64 ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %for.end ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 7c6c7e90413b1..5df68cc931b6d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1737,38 +1737,38 @@ define arm_aapcs_vfpcc float @half_short_mac(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: adds r4, r0, #4 +; CHECK-NEXT: adds r4, r1, #4 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: adds r3, r1, #4 +; CHECK-NEXT: adds r3, r0, #4 ; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: .LBB11_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh.w r5, [r3, #2] -; CHECK-NEXT: vldr.16 s2, [r4, #2] +; CHECK-NEXT: ldrsh.w r5, [r4, #2] +; CHECK-NEXT: vldr.16 s2, [r3, #2] ; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: vmov s4, r5 -; CHECK-NEXT: ldrsh r5, [r3], #8 +; CHECK-NEXT: ldrsh r5, [r4], #8 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-NEXT: ldrsh r6, [r3, #-10] +; CHECK-NEXT: ldrsh r6, [r4, #-10] ; CHECK-NEXT: vmul.f16 s2, s2, s4 ; CHECK-NEXT: vmov s6, r5 -; CHECK-NEXT: vldr.16 s4, [r4] +; CHECK-NEXT: vldr.16 s4, [r3] ; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: ldrsh r5, [r3, #-12] +; CHECK-NEXT: ldrsh r5, [r4, #-12] ; CHECK-NEXT: vmul.f16 s4, s4, s6 ; CHECK-NEXT: vmov s8, r6 -; CHECK-NEXT: vldr.16 s6, [r4, #-2] +; CHECK-NEXT: vldr.16 s6, [r3, #-2] ; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmov s10, r5 ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 -; CHECK-NEXT: vldr.16 s8, [r4, #-4] +; CHECK-NEXT: vldr.16 s8, [r3, #-4] ; CHECK-NEXT: vcvt.f16.s32 s10, s10 ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 ; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: adds r4, #8 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 8a5a15a57912c..1937e60ce95bb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -369,31 +369,31 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r5, r0, #3 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adds r6, r0, #3 -; CHECK-NEXT: adds r7, r1, #1 +; CHECK-NEXT: adds r6, r1, #3 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r9, [r6, #-3] +; CHECK-NEXT: ldrb r9, [r5, #-3] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-3] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldrb r9, [r6, #-2] -; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldrb r9, [r5, #-2] +; CHECK-NEXT: ldrb r4, [r6, #-2] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldrb r9, [r6, #-1] -; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldrb r9, [r5, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-1] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldrb r9, [r6], #4 -; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldrb r9, [r5], #4 +; CHECK-NEXT: ldrb r4, [r6], #4 ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB5_11 @@ -645,31 +645,31 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonl ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r5, r0, #3 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adds r6, r0, #3 -; CHECK-NEXT: adds r7, r1, #1 +; CHECK-NEXT: adds r6, r1, #3 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r9, [r6, #-3] +; CHECK-NEXT: ldrb r9, [r5, #-3] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-3] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldrb r9, [r6, #-2] -; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldrb r9, [r5, #-2] +; CHECK-NEXT: ldrb r4, [r6, #-2] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldrb r9, [r6, #-1] -; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldrb r9, [r5, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-1] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldrb r9, [r6], #4 -; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldrb r9, [r5], #4 +; CHECK-NEXT: ldrb r4, [r6], #4 ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB7_11 @@ -921,33 +921,33 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: add.w r5, r0, #8 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r6, r0, #8 -; CHECK-NEXT: add.w r7, r1, #8 +; CHECK-NEXT: add.w r6, r1, #8 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB9_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r9, [r6, #-8] +; CHECK-NEXT: ldr r9, [r5, #-8] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldr r4, [r7, #-8] +; CHECK-NEXT: ldr r4, [r6, #-8] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldr r9, [r6, #-4] -; CHECK-NEXT: ldr r4, [r7, #-4] +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldr r9, [r5, #-4] +; CHECK-NEXT: ldr r4, [r6, #-4] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldr.w r9, [r6] -; CHECK-NEXT: ldr r4, [r7] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldr.w r9, [r5] +; CHECK-NEXT: ldr r4, [r6] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldr.w r9, [r6, #4] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldr.w r9, [r5, #4] +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: ldr r4, [r6, #4] ; CHECK-NEXT: adds r6, #16 -; CHECK-NEXT: ldr r4, [r7, #4] -; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB9_7 ; CHECK-NEXT: .LBB9_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB9_11 diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll index 71ad598abe683..53985c74440db 100644 --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -136,7 +136,7 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: movl (%r8), %r9d ; CHECK-NEXT: leal 8(,%rdx,8), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: leaq 32(%rsi), %rbx +; CHECK-NEXT: leaq 48(%rsi), %rbx ; CHECK-NEXT: leaq 8(,%rdx,8), %r14 ; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12 @@ -223,10 +223,10 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: .LBB1_12: # %vector.body ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movdqu %xmm0, -48(%r11) ; CHECK-NEXT: movdqu %xmm0, -32(%r11) ; CHECK-NEXT: movdqu %xmm0, -16(%r11) ; CHECK-NEXT: movdqu %xmm0, (%r11) -; CHECK-NEXT: movdqu %xmm0, 16(%r11) ; CHECK-NEXT: addq $64, %r11 ; CHECK-NEXT: addq $8, %rax ; CHECK-NEXT: jne .LBB1_12 diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll index 6993fd16dbad5..04a85fe1a66f6 100644 --- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll @@ -393,6 +393,7 @@ for.body: ; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[PROLOGUE]] ; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: +; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: ; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[BODY]] ; CHECK-UNROLL-NOT: b diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll index 2eb41cd5c2fc6..0bdc3d253ad85 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll @@ -5,25 +5,17 @@ define void @convolution(ptr %src0, ptr %src1, i64 %stride_xm, i64 %stride_xp, ptr %dst, i32 %w) { ; CHECK-LABEL: convolution: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add x9, x1, x3 -; CHECK-NEXT: add x10, x1, x2 -; CHECK-NEXT: add x11, x0, x2 -; CHECK-NEXT: add x12, x0, x3 ; CHECK-NEXT: .LBB0_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x13, x1, x8 -; CHECK-NEXT: add x14, x0, x8 -; CHECK-NEXT: ldr q0, [x11, x8] -; CHECK-NEXT: ldp q2, q3, [x14] -; CHECK-NEXT: ldr q1, [x12, x8] -; CHECK-NEXT: ldp q6, q7, [x13] +; CHECK-NEXT: ldr q0, [x0, x2] +; CHECK-NEXT: ldr q1, [x0, x3] ; CHECK-NEXT: subs w5, w5, #1 -; CHECK-NEXT: ldr q4, [x10, x8] -; CHECK-NEXT: ldr q5, [x9, x8] +; CHECK-NEXT: ldp q2, q3, [x0], #32 +; CHECK-NEXT: ldr q4, [x1, x2] +; CHECK-NEXT: ldr q5, [x1, x3] ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldp q6, q7, [x1], #32 ; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: fadd v2.4s, v4.4s, v5.4s ; CHECK-NEXT: fadd v3.4s, v6.4s, v7.4s ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll new file mode 100644 index 0000000000000..1ff6f59c94734 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=aarch64-none-elf -loop-reduce < %s | FileCheck %s + +; An unrolled loop that's too complex, causing LSR to collapse the unrolled +; LSRUses into one. +; The last LSRUse should be the one that the others are collased into, and IV +; increment GEP should happen at the end of the loop. + +define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { +; CHECK-LABEL: define void @unrolled_loop( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[LOW:%.*]], i32 [[HIGH:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC_3:.*]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP16:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP15]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 -8 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[VAL]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -8 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP10]], align 4 +; CHECK-NEXT: br label %[[FOR_INC:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VAL]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -8 +; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_ELSE_THEN:.*]], label %[[IF_ELSE_ELSE:.*]] +; CHECK: [[IF_ELSE_THEN]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP9]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[IF_ELSE_ELSE]]: +; CHECK-NEXT: store i32 [[VAL]], ptr [[SCEVGEP9]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 -4 +; CHECK-NEXT: [[VAL_1:%.*]] = load i32, ptr [[SCEVGEP5]], align 4 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp sgt i32 [[VAL_1]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1_1]], label %[[IF_THEN_1:.*]], label %[[IF_ELSE_1:.*]] +; CHECK: [[IF_ELSE_1]]: +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp slt i32 [[VAL_1]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -4 +; CHECK-NEXT: br i1 [[CMP2_1]], label %[[IF_ELSE_THEN_1:.*]], label %[[IF_ELSE_ELSE_1:.*]] +; CHECK: [[IF_ELSE_ELSE_1]]: +; CHECK-NEXT: store i32 [[VAL_1]], ptr [[SCEVGEP14]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1:.*]] +; CHECK: [[IF_ELSE_THEN_1]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP14]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1]] +; CHECK: [[IF_THEN_1]]: +; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -4 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP13]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1]] +; CHECK: [[FOR_INC_1]]: +; CHECK-NEXT: [[VAL_2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp sgt i32 [[VAL_2]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1_2]], label %[[IF_THEN_2:.*]], label %[[IF_ELSE_2:.*]] +; CHECK: [[IF_ELSE_2]]: +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp slt i32 [[VAL_2]], [[LOW]] +; CHECK-NEXT: br i1 [[CMP2_2]], label %[[IF_ELSE_THEN_2:.*]], label %[[IF_ELSE_ELSE_2:.*]] +; CHECK: [[IF_ELSE_ELSE_2]]: +; CHECK-NEXT: store i32 [[VAL_2]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2:.*]] +; CHECK: [[IF_ELSE_THEN_2]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2]] +; CHECK: [[IF_THEN_2]]: +; CHECK-NEXT: store i32 [[HIGH]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2]] +; CHECK: [[FOR_INC_2]]: +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[VAL_3:%.*]] = load i32, ptr [[SCEVGEP4]], align 4 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp sgt i32 [[VAL_3]], [[HIGH]] +; CHECK-NEXT: [[SCEVGEP16]] = getelementptr i8, ptr [[LSR_IV1]], i64 16 +; CHECK-NEXT: br i1 [[CMP1_3]], label %[[IF_THEN_3:.*]], label %[[IF_ELSE_3:.*]] +; CHECK: [[IF_ELSE_3]]: +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp slt i32 [[VAL_3]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: br i1 [[CMP2_3]], label %[[IF_ELSE_THEN_3:.*]], label %[[IF_ELSE_ELSE_3:.*]] +; CHECK: [[IF_ELSE_ELSE_3]]: +; CHECK-NEXT: store i32 [[VAL_3]], ptr [[SCEVGEP12]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[IF_ELSE_THEN_3]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP12]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[IF_THEN_3]]: +; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP11]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[FOR_INC_3]]: +; CHECK-NEXT: [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 16 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV14]], -4 +; CHECK-NEXT: [[NITER_NCMP:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.inc.3 ] + %niter = phi i64 [ 0, %entry ], [ %niter.next, %for.inc.3 ] + %srcidx = getelementptr inbounds nuw i32, ptr %src, i64 %idx + %val = load i32, ptr %srcidx, align 4 + %cmp1 = icmp sgt i32 %val, %high + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %dstidx1 = getelementptr inbounds nuw i32, ptr %dst, i64 %idx + store i32 %high, ptr %dstidx1, align 4 + br label %for.inc + +if.else: + %cmp2 = icmp slt i32 %val, %low + %dstidx2 = getelementptr inbounds nuw i32, ptr %dst, i64 %idx + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + store i32 %low, ptr %dstidx2, align 4 + br label %for.inc + +if.else.else: + store i32 %val, ptr %dstidx2, align 4 + br label %for.inc + +for.inc: + %inc = or disjoint i64 %idx, 1 + %srcidx.1 = getelementptr inbounds nuw i32, ptr %src, i64 %inc + %val.1 = load i32, ptr %srcidx.1, align 4 + %cmp1.1 = icmp sgt i32 %val.1, %high + br i1 %cmp1.1, label %if.then.1, label %if.else.1 + +if.else.1: + %cmp2.1 = icmp slt i32 %val.1, %low + %dstidx2.1 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc + br i1 %cmp2.1, label %if.else.then.1, label %if.else.else.1 + +if.else.else.1: + store i32 %val.1, ptr %dstidx2.1, align 4 + br label %for.inc.1 + +if.else.then.1: + store i32 %low, ptr %dstidx2.1, align 4 + br label %for.inc.1 + +if.then.1: + %dstidx1.1 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc + store i32 %high, ptr %dstidx1.1, align 4 + br label %for.inc.1 + +for.inc.1: + %inc.1 = or disjoint i64 %idx, 2 + %srcidx.2 = getelementptr inbounds nuw i32, ptr %src, i64 %inc.1 + %val.2 = load i32, ptr %srcidx.2, align 4 + %cmp1.2 = icmp sgt i32 %val.2, %high + br i1 %cmp1.2, label %if.then.2, label %if.else.2 + +if.else.2: + %cmp2.2 = icmp slt i32 %val.2, %low + %dstidx2.2 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.1 + br i1 %cmp2.2, label %if.else.then.2, label %if.else.else.2 + +if.else.else.2: + store i32 %val.2, ptr %dstidx2.2, align 4 + br label %for.inc.2 + +if.else.then.2: + store i32 %low, ptr %dstidx2.2, align 4 + br label %for.inc.2 + +if.then.2: + %dstidx1.2 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.1 + store i32 %high, ptr %dstidx1.2, align 4 + br label %for.inc.2 + +for.inc.2: + %inc.2 = or disjoint i64 %idx, 3 + %srcidx.3 = getelementptr inbounds nuw i32, ptr %src, i64 %inc.2 + %val.3 = load i32, ptr %srcidx.3, align 4 + %cmp1.3 = icmp sgt i32 %val.3, %high + br i1 %cmp1.3, label %if.then.3, label %if.else.3 + +if.else.3: + %cmp2.3 = icmp slt i32 %val.3, %low + %dstidx2.3 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.2 + br i1 %cmp2.3, label %if.else.then.3, label %if.else.else.3 + +if.else.else.3: + store i32 %val.3, ptr %dstidx2.3, align 4 + br label %for.inc.3 + +if.else.then.3: + store i32 %low, ptr %dstidx2.3, align 4 + br label %for.inc.3 + +if.then.3: + %dstidx1.3 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.2 + store i32 %high, ptr %dstidx1.3, align 4 + br label %for.inc.3 + +for.inc.3: + %idx.next = add nuw i64 %idx, 4 + %niter.next = add i64 %niter, 4 + %niter.ncmp = icmp eq i64 %niter.next, %n + br i1 %niter.ncmp, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll index 1b64ade50f219..ab3585387aa5d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll @@ -4,10 +4,9 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; RUN: opt -mtriple=thumbv7em %s -S -loop-reduce -lsr-complexity-limit=2147483647 -o - | FileCheck %s ; CHECK-LABEL: for.body12.us.us: -; CHECK: [[LSR_IV6:%[^ ]+]] = phi ptr [ [[SCEVGEP7:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP5:%[^ ]+]], %for.cond9.preheader.us.us ] ; CHECK: phi i32 +; CHECK: [[LSR_IV6:%[^ ]+]] = phi ptr [ [[SCEVGEP7:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP5:%[^ ]+]], %for.cond9.preheader.us.us ] ; CHECK: [[LSR_IV:%[^ ]+]] = phi ptr [ [[SCEVGEP1:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP:%[^ ]+]], %for.cond9.preheader.us.us ] -; CHECK: phi i32 ; CHECK: [[SCEVGEP1]] = getelementptr i8, ptr [[LSR_IV]], i32 8 ; CHECK: [[SCEVGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i32 8 diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll index 4914bb72d8945..4f5f8ee16a704 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll @@ -20,17 +20,17 @@ define i32 @main() { ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[CALL]], align 4 ; CHECK-NEXT: ret i32 0 ; CHECK: [[BB2]]: -; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[BB:%.*]] ] -; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[BB]] ] -; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[BB]] ] -; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[BB]] ] -; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[BB]] ] -; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[BB]] ] -; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[BB]] ] -; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[BB]] ] -; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 40, [[BB]] ] -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 48, [[BB]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 72, [[BB]] ] +; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[TMP0:%.*]] ] +; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 48, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 72, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 80, [[TMP0]] ] ; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV30]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP32]], align 8 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV27]] @@ -47,14 +47,14 @@ define i32 @main() { ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP14]], align 8 ; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV9]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP11]], align 4 -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 -40 ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP6]], align 8 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP3]], align 8 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP]], align 8 -; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] -; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 40 +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP8]], align 8 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 88 ; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 88