-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) #123466
Conversation
Simplifies the hidden "all_of(X == 0)" pattern Fixes llvm#123456
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesSimplifies the hidden "all_of(X == 0)" pattern Fixes #123456 Full diff: https://github.com/llvm/llvm-project/pull/123466.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 410b08912a5e24..33ddcb57e9b08b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48054,6 +48054,18 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
DAG.getAllOnesConstant(DL, NotOp1.getValueType())));
}
}
+ // PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X)
+ if (EFLAGS.getOpcode() == X86ISD::PTEST &&
+ ISD::isBuildVectorAllOnes(Op1.getNode())) {
+ SDValue BC0 = peekThroughBitcasts(Op0);
+ if (BC0.getOpcode() == X86ISD::PCMPEQ &&
+ ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
+ SDLoc DL(EFLAGS);
+ CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
+ SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
+ return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
+ }
+ }
}
if (CC == X86::COND_E || CC == X86::COND_NE) {
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index f5ab700caea467..fda14027e994e3 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -376,20 +376,14 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
; SSE-LABEL: ptestc_v4i32_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE-NEXT: ptest %xmm0, %xmm1
-; SSE-NEXT: setb %al
+; SSE-NEXT: ptest %xmm0, %xmm0
+; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
-; AVX-NEXT: setb %al
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: sete %al
; AVX-NEXT: retq
%icmp = icmp eq <4 x i32> %a0, zeroinitializer
%sext = sext <4 x i1> %icmp to <4 x i32>
@@ -403,22 +397,14 @@ define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: ptestc_v4i32_and_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: ptest %xmm0, %xmm1
-; SSE-NEXT: setb %al
+; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_and_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
-; AVX-NEXT: setb %al
+; AVX-NEXT: vptest %xmm0, %xmm1
+; AVX-NEXT: sete %al
; AVX-NEXT: retq
%and = and <4 x i32> %a1, %a0
%icmp = icmp eq <4 x i32> %and, zeroinitializer
@@ -433,21 +419,13 @@ define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: ptestc_v4i32_andnot_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: pxor %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: ptest %xmm0, %xmm1
; SSE-NEXT: setae %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_andnot_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: vptest %xmm0, %xmm1
; AVX-NEXT: setae %al
; AVX-NEXT: retq
%not = xor <4 x i32> %a1, splat (i32 -1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/39/builds/4025 Here is the relevant piece of the build log for the reference
|
Simplifies the hidden "all_of(X == 0)" pattern
Fixes #123456