From c5941fe6d2a678b8863838629a9f69b5171ba4c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 18 Jan 2025 16:15:57 +0000 Subject: [PATCH] [X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) Simplifies the hidden "all_of(X == 0)" pattern Fixes #123456 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++ llvm/test/CodeGen/X86/combine-ptest.ll | 40 ++++++------------------- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 410b08912a5e241..33ddcb57e9b08be 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48054,6 +48054,18 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, DAG.getAllOnesConstant(DL, NotOp1.getValueType()))); } } + // PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) + if (EFLAGS.getOpcode() == X86ISD::PTEST && + ISD::isBuildVectorAllOnes(Op1.getNode())) { + SDValue BC0 = peekThroughBitcasts(Op0); + if (BC0.getOpcode() == X86ISD::PCMPEQ && + ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) { + SDLoc DL(EFLAGS); + CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE); + SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0)); + return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X); + } + } } if (CC == X86::COND_E || CC == X86::COND_NE) { diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll index f5ab700caea4672..fda14027e994e37 100644 --- a/llvm/test/CodeGen/X86/combine-ptest.ll +++ b/llvm/test/CodeGen/X86/combine-ptest.ll @@ -376,20 +376,14 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) { ; SSE-LABEL: ptestc_v4i32_eq0: ; SSE: # %bb.0: -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE-NEXT: ptest %xmm0, %xmm1 -; SSE-NEXT: setb %al +; SSE-NEXT: ptest %xmm0, %xmm0 +; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX-LABEL: ptestc_v4i32_eq0: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vptest %xmm1, %xmm0 -; AVX-NEXT: setb %al +; AVX-NEXT: vptest %xmm0, %xmm0 +; AVX-NEXT: sete %al ; AVX-NEXT: retq %icmp = icmp eq <4 x i32> %a0, zeroinitializer %sext = sext <4 x i1> %icmp to <4 x i32> @@ -403,22 +397,14 @@ define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) { define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: ptestc_v4i32_and_eq0: ; SSE: # %bb.0: -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE-NEXT: ptest %xmm0, %xmm1 -; SSE-NEXT: setb %al +; SSE-NEXT: sete %al ; SSE-NEXT: retq ; ; AVX-LABEL: ptestc_v4i32_and_eq0: ; AVX: # %bb.0: -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vptest %xmm1, %xmm0 -; AVX-NEXT: setb %al +; AVX-NEXT: vptest %xmm0, %xmm1 +; AVX-NEXT: sete %al ; AVX-NEXT: retq %and = and <4 x i32> %a1, %a0 %icmp = icmp eq <4 x i32> %and, zeroinitializer @@ -433,21 +419,13 @@ define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) { define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: ptestc_v4i32_andnot_eq0: ; SSE: # %bb.0: -; SSE-NEXT: pandn %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: ptest %xmm1, %xmm0 +; SSE-NEXT: ptest %xmm0, %xmm1 ; SSE-NEXT: setae %al ; SSE-NEXT: retq ; ; AVX-LABEL: ptestc_v4i32_andnot_eq0: ; AVX: # %bb.0: -; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vptest %xmm1, %xmm0 +; AVX-NEXT: vptest %xmm0, %xmm1 ; AVX-NEXT: setae %al ; AVX-NEXT: retq %not = xor <4 x i32> %a1, splat (i32 -1)