diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 53c0da45f2f66..e601c626123a4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57969,8 +57969,27 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); + unsigned int Opcode = N->getOpcode(); SDLoc DL(N); + // Use a 32-bit add+zext if upper 33 bits known zero. + if (VT == MVT::i64 && Subtarget.is64Bit()) { + APInt HiMask = APInt::getHighBitsSet(64, 33); + if (DAG.MaskedValueIsZero(Op0, HiMask) && + DAG.MaskedValueIsZero(Op1, HiMask)) { + SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0); + SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + bool NSW = Op0->getFlags().hasNoSignedWrap(); + NSW = NSW & DAG.willNotOverflowAdd(true, LHS, RHS); + SDNodeFlags Flags; + // No unsigned wrap when upper 33 bits are zeros hence always true + Flags.setNoUnsignedWrap(true); + Flags.setNoSignedWrap(NSW); + SDValue Sum = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sum); + } + } + if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget)) return Select; diff --git a/llvm/test/CodeGen/X86/reduce-i64-add.ll b/llvm/test/CodeGen/X86/reduce-i64-add.ll new file mode 100644 index 0000000000000..faedfcc9ce421 --- /dev/null +++ b/llvm/test/CodeGen/X86/reduce-i64-add.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +define i64 @test1(i16 %a) nounwind { +; X86-LABEL: test1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $42, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl +; +; X64-LABEL: test1: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: addl $42, %eax +; X64-NEXT: retq + %zext_a = zext i16 %a to i64 + %sum = add nuw nsw i64 %zext_a, 42 + ret i64 %sum +} + +; First 48 bits are all zeros so we can safely truncate to 32 bit additon +define i64 @test2(i16 %a, i16 %b) nounwind { +; X86-LABEL: test2: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl +; +; X64-LABEL: test2: +; X64: # %bb.0: +; X64-NEXT: movzwl %si, %ecx +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: retq + %zext_a = zext i16 %a to i64 + %zext_b = zext i16 %b to i64 + %sum = add nuw nsw i64 %zext_a, %zext_b + ret i64 %sum +} + +; Set the 32nd bit of a to force 64 bit addition, we do not truncate to 32 bit addition in this case +define i64 @test3(i16 %a) nounwind { +; X86-LABEL: test3: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $42, %eax +; X86-NEXT: movl $1, %edx +; X86-NEXT: retl +; +; X64-LABEL: test3: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: movabsq $4294967338, %rax # imm = 0x10000002A +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: retq + %zext_a = zext i16 %a to i64 + %or_a = or i64 %zext_a, 4294967296 + %sum = add nuw nsw i64 %or_a, 42 + ret i64 %sum +} + +; We don't truncate to 32 bit addition in case of sign extension +define i64 @test4(i16 %a, i16 %b) nounwind { +; X86-LABEL: test4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: adcl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: test4: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movswq %di, %rcx +; X64-NEXT: movswq %si, %rax +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: retq + %sext_a = sext i16 %a to i64 + %sext_b = sext i16 %b to i64 + %sum = add nuw nsw i64 %sext_a, %sext_b + ret i64 %sum +}