-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP] Avoid -passes=instcombine stages in SLP tests #146257
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-llvm-transforms Author: Hanyang (Eric) Xu (ericxu233) ChangesAddresses: #145511 Note that there are still two instances of --passes=slp-vectorizer,instcombine left unchanged because it seems that the tests are meant to run in conjunction with instcombine and removing instcombine would invalidate their original objective: llvm/test/Transforms/SLPVectorizer/arith-div-undef.ll Patch is 538.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146257.diff 38 Files Affected:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index e460f558f4723..8dc88f7b96716 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=slp-vectorizer,instcombine -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -passes=slp-vectorizer -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
-; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer' -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -16,10 +16,10 @@ target triple = "aarch64--linux-gnu"
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @gather_multiple_use(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15)
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537)
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535)
@@ -57,22 +57,26 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
@data = global [6 x [258 x i8]] zeroinitializer, align 1
define void @gather_load(ptr noalias %ptr) {
; CHECK-LABEL: @gather_load(
-; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 4
-; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 6
-; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
-; CHECK-NEXT: [[L0:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 258), align 1
+; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
+; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2
+; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3
+; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[ARRAYIDX149:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 1, i64 0
+; CHECK-NEXT: [[L0:%.*]] = load i8, ptr [[ARRAYIDX149]], align 1
; CHECK-NEXT: [[CONV150:%.*]] = zext i8 [[L0]] to i16
-; CHECK-NEXT: [[ADD152:%.*]] = add nuw nsw i16 [[CONV150]], 10
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 517), align 1
+; CHECK-NEXT: [[ADD152:%.*]] = add i16 10, [[CONV150]]
+; CHECK-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 2, i64 1
+; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[ARRAYIDX155]], align 1
; CHECK-NEXT: [[CONV156:%.*]] = zext i8 [[L1]] to i16
-; CHECK-NEXT: [[ADD158:%.*]] = add nuw nsw i16 [[CONV156]], 20
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 776), align 1
+; CHECK-NEXT: [[ADD158:%.*]] = add i16 20, [[CONV156]]
+; CHECK-NEXT: [[ARRAYIDX161:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 3, i64 2
+; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[ARRAYIDX161]], align 1
; CHECK-NEXT: [[CONV162:%.*]] = zext i8 [[L2]] to i16
-; CHECK-NEXT: [[ADD164:%.*]] = add nuw nsw i16 [[CONV162]], 30
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 1035), align 1
+; CHECK-NEXT: [[ADD164:%.*]] = add i16 30, [[CONV162]]
+; CHECK-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 4, i64 3
+; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[ARRAYIDX167]], align 1
; CHECK-NEXT: [[CONV168:%.*]] = zext i8 [[L3]] to i16
-; CHECK-NEXT: [[ADD170:%.*]] = add nuw nsw i16 [[CONV168]], 40
+; CHECK-NEXT: [[ADD170:%.*]] = add i16 40, [[CONV168]]
; CHECK-NEXT: store i16 [[ADD152]], ptr [[ARRAYIDX182]], align 2
; CHECK-NEXT: store i16 [[ADD158]], ptr [[ARRAYIDX183]], align 2
; CHECK-NEXT: store i16 [[ADD164]], ptr [[ARRAYIDX184]], align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
index 900d5f293b5b8..4f8fc339fb64b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=GENERIC
-; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=KRYO
+; RUN: opt -S -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=GENERIC
+; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=KRYO
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -36,57 +36,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; GENERIC-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; GENERIC-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; GENERIC-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; GENERIC-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; GENERIC-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; GENERIC-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; GENERIC-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; GENERIC-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; GENERIC-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; GENERIC-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; GENERIC-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; GENERIC-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; GENERIC-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; GENERIC-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; GENERIC-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; GENERIC-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; GENERIC-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; GENERIC-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -109,57 +101,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; KRYO-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; KRYO-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; KRYO-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; KRYO-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; KRYO-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; KRYO-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; KRYO-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; KRYO-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; KRYO-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; KRYO-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; KRYO-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; KRYO-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; KRYO-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; KRYO-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; KRYO-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; KRYO-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; KRYO-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; KRYO-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; KRYO-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; KRYO-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; KRYO-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; KRYO-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; KRYO-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; KRYO-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; KRYO-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; KRYO-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; KRYO-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; KRYO-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; KRYO-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; KRYO-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; KRYO-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -293,55 +277,55 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
-; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
+; GENERIC-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP1]], [[TMP3]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
; GENERI...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Hanyang (Eric) Xu (ericxu233) ChangesAddresses: #145511 Note that there are still two instances of --passes=slp-vectorizer,instcombine left unchanged because it seems that the tests are meant to run in conjunction with instcombine and removing instcombine would invalidate their original objective: llvm/test/Transforms/SLPVectorizer/arith-div-undef.ll Patch is 538.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146257.diff 38 Files Affected:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index e460f558f4723..8dc88f7b96716 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=slp-vectorizer,instcombine -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -passes=slp-vectorizer -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
-; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer' -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -16,10 +16,10 @@ target triple = "aarch64--linux-gnu"
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @gather_multiple_use(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15)
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537)
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535)
@@ -57,22 +57,26 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
@data = global [6 x [258 x i8]] zeroinitializer, align 1
define void @gather_load(ptr noalias %ptr) {
; CHECK-LABEL: @gather_load(
-; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 4
-; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 6
-; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
-; CHECK-NEXT: [[L0:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 258), align 1
+; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
+; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2
+; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3
+; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[ARRAYIDX149:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 1, i64 0
+; CHECK-NEXT: [[L0:%.*]] = load i8, ptr [[ARRAYIDX149]], align 1
; CHECK-NEXT: [[CONV150:%.*]] = zext i8 [[L0]] to i16
-; CHECK-NEXT: [[ADD152:%.*]] = add nuw nsw i16 [[CONV150]], 10
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 517), align 1
+; CHECK-NEXT: [[ADD152:%.*]] = add i16 10, [[CONV150]]
+; CHECK-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 2, i64 1
+; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[ARRAYIDX155]], align 1
; CHECK-NEXT: [[CONV156:%.*]] = zext i8 [[L1]] to i16
-; CHECK-NEXT: [[ADD158:%.*]] = add nuw nsw i16 [[CONV156]], 20
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 776), align 1
+; CHECK-NEXT: [[ADD158:%.*]] = add i16 20, [[CONV156]]
+; CHECK-NEXT: [[ARRAYIDX161:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 3, i64 2
+; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[ARRAYIDX161]], align 1
; CHECK-NEXT: [[CONV162:%.*]] = zext i8 [[L2]] to i16
-; CHECK-NEXT: [[ADD164:%.*]] = add nuw nsw i16 [[CONV162]], 30
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 1035), align 1
+; CHECK-NEXT: [[ADD164:%.*]] = add i16 30, [[CONV162]]
+; CHECK-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 4, i64 3
+; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[ARRAYIDX167]], align 1
; CHECK-NEXT: [[CONV168:%.*]] = zext i8 [[L3]] to i16
-; CHECK-NEXT: [[ADD170:%.*]] = add nuw nsw i16 [[CONV168]], 40
+; CHECK-NEXT: [[ADD170:%.*]] = add i16 40, [[CONV168]]
; CHECK-NEXT: store i16 [[ADD152]], ptr [[ARRAYIDX182]], align 2
; CHECK-NEXT: store i16 [[ADD158]], ptr [[ARRAYIDX183]], align 2
; CHECK-NEXT: store i16 [[ADD164]], ptr [[ARRAYIDX184]], align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
index 900d5f293b5b8..4f8fc339fb64b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=GENERIC
-; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=KRYO
+; RUN: opt -S -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=GENERIC
+; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=KRYO
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -36,57 +36,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; GENERIC-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; GENERIC-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; GENERIC-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; GENERIC-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; GENERIC-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; GENERIC-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; GENERIC-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; GENERIC-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; GENERIC-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; GENERIC-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; GENERIC-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; GENERIC-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; GENERIC-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; GENERIC-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; GENERIC-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; GENERIC-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; GENERIC-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; GENERIC-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -109,57 +101,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; KRYO-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; KRYO-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; KRYO-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; KRYO-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; KRYO-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; KRYO-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; KRYO-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; KRYO-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; KRYO-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; KRYO-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; KRYO-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; KRYO-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; KRYO-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; KRYO-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; KRYO-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; KRYO-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; KRYO-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; KRYO-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; KRYO-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; KRYO-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; KRYO-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; KRYO-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; KRYO-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; KRYO-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; KRYO-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; KRYO-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; KRYO-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; KRYO-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; KRYO-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; KRYO-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; KRYO-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -293,55 +277,55 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
-; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
+; GENERIC-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP1]], [[TMP3]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
; GENERI...
[truncated]
|
@alexey-bataev @RKSimon Request for review please. |
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: cat %t | FileCheck -check-prefix=YAML %s | ||
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
try to remove dce too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: cat %t | FileCheck -check-prefix=YAML %s | ||
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s | ||
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same, try to remove dce
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! Note that removing DCE doesn't change the def checks for both cases.
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] | ||
; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] | ||
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4 | ||
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4 | ||
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a note that dead code must be removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I don't quite understand. By adding a note, do you mean commenting on the lit test that dead code must be removed in the future?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! Please let me know if the TODO comment is okay and in the right place.
; CHECK-NEXT: [[DST4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0:%.*]], i64 16 | ||
; CHECK-NEXT: [[DST8:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0]], i64 32 | ||
; CHECK-NEXT: [[DST12:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0]], i64 48 | ||
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same, add a note that dead code must be removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
… removing dead code
Addresses: #145511
Note that there are still two instances of --passes=slp-vectorizer,instcombine left unchanged because it seems that the tests are meant to run in conjunction with instcombine and removing instcombine would invalidate their original objective:
llvm/test/Transforms/SLPVectorizer/arith-div-undef.ll
llvm/test/Transforms/SLPVectorizer/slp-hr-with-reuse.ll