Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16544,12 +16544,23 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
break;
}

// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
int ShX;
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
if (ShX <= 3)
return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
// 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
if (Shift == 0) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
DAG.getTargetConstant(ShY, DL, VT), Shl);
return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
}
}
return SDValue();
}
Expand Down Expand Up @@ -16610,7 +16621,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getTargetConstant(Shift, DL, VT), Shift1);
}

// TODO: 2^(C1>3) * 3,5,9 +/- 1
// TODO: 2^(C1>3) * 3/5/9 - 1

// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
Expand Down
100 changes: 100 additions & 0 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,58 @@ define i64 @addmul146(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @mul49(i64 %a) {
; RV64I-LABEL: mul49:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 49
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul49:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 4
; RV64ZBA-NEXT: sh1add a1, a1, a1
; RV64ZBA-NEXT: add a0, a1, a0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If my CPU has a 2 cycle multiply latency like SiFiveP400/600/800, this seems worse. Should I consider adding a tuning flag to disable this for my CPUs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR expands mul into 3 instructions, with a possible fold of zext into slli.uw.
Most of the transforms in expandMul expand into three instructions, so I assumed it's okay.
If not (for reasons other than hasMinSize() which is handled), I think we should handle that together with the other 3-instruction expansions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I agree with that.

Some of the transforms have ILP between 2 of the 3 instructions. This lowering is serial. There are probably other serial lowerings. On a core with more shift/add resources than multiply, a serial implementation this could still be beneficial if it prevents the multiplier from being a bottleneck.

On a single issue in order CPU with a pipelined multiplier, this may just be increasing the code size for no benefit. Freeing up the multiplier doesn't help if you can't issue any more instructions.

; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: mul49:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: slli a1, a0, 4
; RV64XANDESPERF-NEXT: nds.lea.h a1, a1, a1
; RV64XANDESPERF-NEXT: add a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 49
ret i64 %c
}

define i64 @zext_mul49(i32 signext %a) {
; RV64I-LABEL: zext_mul49:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 49
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: mulhu a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_mul49:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli.uw a1, a0, 4
; RV64ZBA-NEXT: sh1add a1, a1, a1
; RV64ZBA-NEXT: add.uw a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: zext_mul49:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: slli a1, a0, 32
; RV64XANDESPERF-NEXT: srli a1, a1, 28
; RV64XANDESPERF-NEXT: nds.lea.h a1, a1, a1
; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 49
ret i64 %c
}

define i64 @mul50(i64 %a) {
; RV64I-LABEL: mul50:
; RV64I: # %bb.0:
Expand Down Expand Up @@ -1044,6 +1096,54 @@ define i64 @addmul100(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @mul145(i64 %a) {
; RV64I-LABEL: mul145:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 145
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul145:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 4
; RV64ZBA-NEXT: sh3add a1, a1, a1
; RV64ZBA-NEXT: add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: mul145:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: slli a1, a0, 4
; RV64XANDESPERF-NEXT: nds.lea.d a1, a1, a1
; RV64XANDESPERF-NEXT: add a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 145
ret i64 %c
}

define i64 @mul161(i64 %a) {
; RV64I-LABEL: mul161:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 161
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul161:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 5
; RV64ZBA-NEXT: sh2add a1, a1, a1
; RV64ZBA-NEXT: add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: mul161:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: slli a1, a0, 5
; RV64XANDESPERF-NEXT: nds.lea.w a1, a1, a1
; RV64XANDESPERF-NEXT: add a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 161
ret i64 %c
}

define i64 @mul162(i64 %a) {
; RV64I-LABEL: mul162:
; RV64I: # %bb.0:
Expand Down