Skip to content

Commit 947e824

Browse files
committed
[X86] Reduce i64 to i32 when high bits are zeros for add/sub/mul
1 parent 24d730b commit 947e824

File tree

4 files changed

+159
-0
lines changed

4 files changed

+159
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49803,8 +49803,35 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4980349803
TargetLowering::DAGCombinerInfo &DCI,
4980449804
const X86Subtarget &Subtarget) {
4980549805
EVT VT = N->getValueType(0);
49806+
SDValue Op0 = N->getOperand(0);
49807+
SDValue Op1 = N->getOperand(1);
49808+
unsigned int Opcode = N->getOpcode();
4980649809
SDLoc DL(N);
4980749810

49811+
// If both operands of a 64-bit multiply are known to have their upper 48 bits
49812+
// zero, the result is guaranteed to fit in 32 bits. For example:
49813+
// (i16::MAX * i16::MAX) = 32767 * 32767 = 1073676289
49814+
// which fits within a signed 32-bit integer (i32::MAX = 2,147,483,647).
49815+
// In such cases, we can safely perform the multiplication as a 32-bit signed
49816+
// `mul` followed by a zero-extension to i64.
49817+
if (VT == MVT::i64 && Subtarget.is64Bit()) {
49818+
APInt HiMask = APInt::getHighBitsSet(64, 48);
49819+
if (DAG.MaskedValueIsZero(Op0, HiMask) &&
49820+
DAG.MaskedValueIsZero(Op1, HiMask)) {
49821+
SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
49822+
SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
49823+
bool NSW = Op0->getFlags().hasNoSignedWrap();
49824+
bool NUW = Op0->getFlags().hasNoUnsignedWrap();
49825+
NSW = NSW & DAG.willNotOverflowMul(true, LHS, RHS);
49826+
NUW = NUW & DAG.willNotOverflowMul(false, LHS, RHS);
49827+
SDNodeFlags Flags;
49828+
Flags.setNoUnsignedWrap(NUW);
49829+
Flags.setNoSignedWrap(NSW);
49830+
SDValue Mul = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
49831+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Mul);
49832+
}
49833+
}
49834+
4980849835
if (SDValue V = combineMulToPMADDWD(N, DL, DAG, Subtarget))
4980949836
return V;
4981049837

@@ -58070,8 +58097,28 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
5807058097
EVT VT = N->getValueType(0);
5807158098
SDValue Op0 = N->getOperand(0);
5807258099
SDValue Op1 = N->getOperand(1);
58100+
unsigned int Opcode = N->getOpcode();
5807358101
SDLoc DL(N);
5807458102

58103+
// Use a 32-bit add+zext if upper 33 bits known zero.
58104+
if (VT == MVT::i64 && Subtarget.is64Bit()) {
58105+
APInt HiMask = APInt::getHighBitsSet(64, 33);
58106+
if (DAG.MaskedValueIsZero(Op0, HiMask) &&
58107+
DAG.MaskedValueIsZero(Op1, HiMask)) {
58108+
SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
58109+
SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
58110+
bool NSW = Op0->getFlags().hasNoSignedWrap();
58111+
bool NUW = Op0->getFlags().hasNoUnsignedWrap();
58112+
NSW = NSW & DAG.willNotOverflowAdd(true, LHS, RHS);
58113+
NUW = NUW & DAG.willNotOverflowAdd(false, LHS, RHS);
58114+
SDNodeFlags Flags;
58115+
Flags.setNoUnsignedWrap(NUW);
58116+
Flags.setNoSignedWrap(NSW);
58117+
SDValue Sum = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
58118+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sum);
58119+
}
58120+
}
58121+
5807558122
if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget))
5807658123
return Select;
5807758124

@@ -58297,8 +58344,28 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
5829758344
EVT VT = N->getValueType(0);
5829858345
SDValue Op0 = N->getOperand(0);
5829958346
SDValue Op1 = N->getOperand(1);
58347+
unsigned int Opcode = N->getOpcode();
5830058348
SDLoc DL(N);
5830158349

58350+
// Use a 32-bit sub+zext if upper 33 bits known zero.
58351+
if (VT == MVT::i64 && Subtarget.is64Bit()) {
58352+
APInt HiMask = APInt::getHighBitsSet(64, 33);
58353+
if (DAG.MaskedValueIsZero(Op0, HiMask) &&
58354+
DAG.MaskedValueIsZero(Op1, HiMask)) {
58355+
SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
58356+
SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
58357+
bool NSW = Op0->getFlags().hasNoSignedWrap();
58358+
bool NUW = Op0->getFlags().hasNoUnsignedWrap();
58359+
NSW = NSW & DAG.willNotOverflowSub(true, LHS, RHS);
58360+
NUW = NUW & DAG.willNotOverflowSub(false, LHS, RHS);
58361+
SDNodeFlags Flags;
58362+
Flags.setNoUnsignedWrap(NUW);
58363+
Flags.setNoSignedWrap(NSW);
58364+
SDValue Sub = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
58365+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sub);
58366+
}
58367+
}
58368+
5830258369
auto IsNonOpaqueConstant = [&](SDValue Op) {
5830358370
return DAG.isConstantIntBuildVectorOrConstantInt(Op,
5830458371
/*AllowOpaques*/ false);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
4+
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
5+
6+
define i64 @test1(i16 %a) {
7+
; X86-LABEL: test1:
8+
; X86: # %bb.0:
9+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
10+
; X86-NEXT: addl $42, %eax
11+
; X86-NEXT: xorl %edx, %edx
12+
; X86-NEXT: retl
13+
;
14+
; X64-LINUX-LABEL: test1:
15+
; X64-LINUX: # %bb.0:
16+
; X64-LINUX-NEXT: movzwl %di, %eax
17+
; X64-LINUX-NEXT: addl $42, %eax
18+
; X64-LINUX-NEXT: retq
19+
;
20+
; X64-WIN32-LABEL: test1:
21+
; X64-WIN32: # %bb.0:
22+
; X64-WIN32-NEXT: movzwl %cx, %eax
23+
; X64-WIN32-NEXT: addl $42, %eax
24+
; X64-WIN32-NEXT: retq
25+
%zext_a = zext i16 %a to i64
26+
%sum = add i64 %zext_a, 42
27+
ret i64 %sum
28+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
4+
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
5+
6+
define i64 @test1(i16 %a) {
7+
; X86-LABEL: test1:
8+
; X86: # %bb.0:
9+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
10+
; X86-NEXT: movl $42, %ecx
11+
; X86-NEXT: mull %ecx
12+
; X86-NEXT: retl
13+
;
14+
; X64-LINUX-LABEL: test1:
15+
; X64-LINUX: # %bb.0:
16+
; X64-LINUX-NEXT: movzwl %di, %eax
17+
; X64-LINUX-NEXT: imull $42, %eax, %eax
18+
; X64-LINUX-NEXT: retq
19+
;
20+
; X64-WIN32-LABEL: test1:
21+
; X64-WIN32: # %bb.0:
22+
; X64-WIN32-NEXT: movzwl %cx, %eax
23+
; X64-WIN32-NEXT: imull $42, %eax, %eax
24+
; X64-WIN32-NEXT: retq
25+
26+
%zext_a = zext i16 %a to i64
27+
%mul = mul i64 %zext_a, 42
28+
ret i64 %mul
29+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -o - %s | FileCheck %s
3+
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
4+
; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
5+
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
6+
7+
define i64 @test1(i16 %a) nounwind {
8+
; CHECK-LABEL: test1:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: movzwl %di, %eax
11+
; CHECK-NEXT: addl $42, %eax
12+
; CHECK-NEXT: retq
13+
;
14+
; X86-LABEL: test1:
15+
; X86: # %bb.0:
16+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
17+
; X86-NEXT: addl $42, %eax
18+
; X86-NEXT: xorl %edx, %edx
19+
; X86-NEXT: retl
20+
;
21+
; X64-LINUX-LABEL: test1:
22+
; X64-LINUX: # %bb.0:
23+
; X64-LINUX-NEXT: movzwl %di, %eax
24+
; X64-LINUX-NEXT: addl $42, %eax
25+
; X64-LINUX-NEXT: retq
26+
;
27+
; X64-WIN32-LABEL: test1:
28+
; X64-WIN32: # %bb.0:
29+
; X64-WIN32-NEXT: movzwl %cx, %eax
30+
; X64-WIN32-NEXT: addl $42, %eax
31+
; X64-WIN32-NEXT: retq
32+
%zext_a = zext i16 %a to i64
33+
%sub = sub i64 %zext_a, -42
34+
ret i64 %sub
35+
}

0 commit comments

Comments
 (0)