Skip to content

Commit f0a123f

Browse files
pfusikgit-crd
authored andcommitted
[RISCV] Expand multiplication by 2^N * 3/5/9 + 1 with SHL_ADD (llvm#166933)
1 parent 6ed225f commit f0a123f

File tree

2 files changed

+113
-2
lines changed

2 files changed

+113
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16878,12 +16878,23 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
1687816878
break;
1687916879
}
1688016880

16881-
// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
1688216881
int ShX;
1688316882
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
1688416883
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
16884+
// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
1688516885
if (ShX <= 3)
1688616886
return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
16887+
// 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
16888+
if (Shift == 0) {
16889+
SDLoc DL(N);
16890+
EVT VT = N->getValueType(0);
16891+
SDValue X = N->getOperand(0);
16892+
SDValue Shl =
16893+
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
16894+
SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16895+
DAG.getTargetConstant(ShY, DL, VT), Shl);
16896+
return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
16897+
}
1688716898
}
1688816899
return SDValue();
1688916900
}
@@ -16944,7 +16955,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1694416955
DAG.getTargetConstant(Shift, DL, VT), Shift1);
1694516956
}
1694616957

16947-
// TODO: 2^(C1>3) * 3,5,9 +/- 1
16958+
// TODO: 2^(C1>3) * 3/5/9 - 1
1694816959

1694916960
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
1695016961
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,58 @@ define i64 @addmul146(i64 %a, i64 %b) {
944944
ret i64 %d
945945
}
946946

947+
define i64 @mul49(i64 %a) {
948+
; RV64I-LABEL: mul49:
949+
; RV64I: # %bb.0:
950+
; RV64I-NEXT: li a1, 49
951+
; RV64I-NEXT: mul a0, a0, a1
952+
; RV64I-NEXT: ret
953+
;
954+
; RV64ZBA-LABEL: mul49:
955+
; RV64ZBA: # %bb.0:
956+
; RV64ZBA-NEXT: slli a1, a0, 4
957+
; RV64ZBA-NEXT: sh1add a1, a1, a1
958+
; RV64ZBA-NEXT: add a0, a1, a0
959+
; RV64ZBA-NEXT: ret
960+
;
961+
; RV64XANDESPERF-LABEL: mul49:
962+
; RV64XANDESPERF: # %bb.0:
963+
; RV64XANDESPERF-NEXT: slli a1, a0, 4
964+
; RV64XANDESPERF-NEXT: nds.lea.h a1, a1, a1
965+
; RV64XANDESPERF-NEXT: add a0, a1, a0
966+
; RV64XANDESPERF-NEXT: ret
967+
%c = mul i64 %a, 49
968+
ret i64 %c
969+
}
970+
971+
define i64 @zext_mul49(i32 signext %a) {
972+
; RV64I-LABEL: zext_mul49:
973+
; RV64I: # %bb.0:
974+
; RV64I-NEXT: li a1, 49
975+
; RV64I-NEXT: slli a1, a1, 32
976+
; RV64I-NEXT: slli a0, a0, 32
977+
; RV64I-NEXT: mulhu a0, a0, a1
978+
; RV64I-NEXT: ret
979+
;
980+
; RV64ZBA-LABEL: zext_mul49:
981+
; RV64ZBA: # %bb.0:
982+
; RV64ZBA-NEXT: slli.uw a1, a0, 4
983+
; RV64ZBA-NEXT: sh1add a1, a1, a1
984+
; RV64ZBA-NEXT: add.uw a0, a0, a1
985+
; RV64ZBA-NEXT: ret
986+
;
987+
; RV64XANDESPERF-LABEL: zext_mul49:
988+
; RV64XANDESPERF: # %bb.0:
989+
; RV64XANDESPERF-NEXT: slli a1, a0, 32
990+
; RV64XANDESPERF-NEXT: srli a1, a1, 28
991+
; RV64XANDESPERF-NEXT: nds.lea.h a1, a1, a1
992+
; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0
993+
; RV64XANDESPERF-NEXT: ret
994+
%b = zext i32 %a to i64
995+
%c = mul i64 %b, 49
996+
ret i64 %c
997+
}
998+
947999
define i64 @mul50(i64 %a) {
9481000
; RV64I-LABEL: mul50:
9491001
; RV64I: # %bb.0:
@@ -1044,6 +1096,54 @@ define i64 @addmul100(i64 %a, i64 %b) {
10441096
ret i64 %d
10451097
}
10461098

1099+
define i64 @mul145(i64 %a) {
1100+
; RV64I-LABEL: mul145:
1101+
; RV64I: # %bb.0:
1102+
; RV64I-NEXT: li a1, 145
1103+
; RV64I-NEXT: mul a0, a0, a1
1104+
; RV64I-NEXT: ret
1105+
;
1106+
; RV64ZBA-LABEL: mul145:
1107+
; RV64ZBA: # %bb.0:
1108+
; RV64ZBA-NEXT: slli a1, a0, 4
1109+
; RV64ZBA-NEXT: sh3add a1, a1, a1
1110+
; RV64ZBA-NEXT: add a0, a1, a0
1111+
; RV64ZBA-NEXT: ret
1112+
;
1113+
; RV64XANDESPERF-LABEL: mul145:
1114+
; RV64XANDESPERF: # %bb.0:
1115+
; RV64XANDESPERF-NEXT: slli a1, a0, 4
1116+
; RV64XANDESPERF-NEXT: nds.lea.d a1, a1, a1
1117+
; RV64XANDESPERF-NEXT: add a0, a1, a0
1118+
; RV64XANDESPERF-NEXT: ret
1119+
%c = mul i64 %a, 145
1120+
ret i64 %c
1121+
}
1122+
1123+
define i64 @mul161(i64 %a) {
1124+
; RV64I-LABEL: mul161:
1125+
; RV64I: # %bb.0:
1126+
; RV64I-NEXT: li a1, 161
1127+
; RV64I-NEXT: mul a0, a0, a1
1128+
; RV64I-NEXT: ret
1129+
;
1130+
; RV64ZBA-LABEL: mul161:
1131+
; RV64ZBA: # %bb.0:
1132+
; RV64ZBA-NEXT: slli a1, a0, 5
1133+
; RV64ZBA-NEXT: sh2add a1, a1, a1
1134+
; RV64ZBA-NEXT: add a0, a1, a0
1135+
; RV64ZBA-NEXT: ret
1136+
;
1137+
; RV64XANDESPERF-LABEL: mul161:
1138+
; RV64XANDESPERF: # %bb.0:
1139+
; RV64XANDESPERF-NEXT: slli a1, a0, 5
1140+
; RV64XANDESPERF-NEXT: nds.lea.w a1, a1, a1
1141+
; RV64XANDESPERF-NEXT: add a0, a1, a0
1142+
; RV64XANDESPERF-NEXT: ret
1143+
%c = mul i64 %a, 161
1144+
ret i64 %c
1145+
}
1146+
10471147
define i64 @mul162(i64 %a) {
10481148
; RV64I-LABEL: mul162:
10491149
; RV64I: # %bb.0:

0 commit comments

Comments
 (0)