Skip to content

Commit 4e675a0

Browse files
authored
[SelectionDAG] Lowering usub.sat(a, 1) to a - (a != 0) (#170076)
I recently observed that LLVM generates the following code: ``` addi a1, a0, -1 sltu a0, a0, a1 addi a0, a0, -1 and a0, a0, a1 ret ``` This could be optimized using the snez instruction instead.
1 parent 345d763 commit 4e675a0

File tree

4 files changed

+186
-10
lines changed

4 files changed

+186
-10
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10895,6 +10895,18 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
1089510895
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
1089610896
}
1089710897

10898+
// usub.sat(a, 1) -> sub(a, zext(a != 0))
10899+
if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
10900+
LHS = DAG.getFreeze(LHS);
10901+
SDValue Zero = DAG.getConstant(0, dl, VT);
10902+
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10903+
SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
10904+
SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
10905+
Subtrahend =
10906+
DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
10907+
return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
10908+
}
10909+
1089810910
// uadd.sat(a, b) -> umin(a, ~b) + b
1089910911
if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
1090010912
SDValue InvRHS = DAG.getNOT(dl, RHS, VT);

llvm/test/CodeGen/AArch64/and-mask-removal.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,9 @@ define i64 @pr58109(i8 signext %0) {
483483
; CHECK-SD-LABEL: pr58109:
484484
; CHECK-SD: ; %bb.0:
485485
; CHECK-SD-NEXT: add w8, w0, #1
486-
; CHECK-SD-NEXT: and w8, w8, #0xff
487-
; CHECK-SD-NEXT: subs w8, w8, #1
488-
; CHECK-SD-NEXT: csel w0, wzr, w8, lo
486+
; CHECK-SD-NEXT: ands w8, w8, #0xff
487+
; CHECK-SD-NEXT: cset w9, ne
488+
; CHECK-SD-NEXT: sub w0, w8, w9
489489
; CHECK-SD-NEXT: ret
490490
;
491491
; CHECK-GI-LABEL: pr58109:

llvm/test/CodeGen/RISCV/usub_sat.ll

Lines changed: 168 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
4-
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV32IZbb
5-
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV64IZbb
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v | FileCheck %s --check-prefix=RV32I
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefix=RV64I
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV32IZbb
5+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+v | FileCheck %s --check-prefix=RV64IZbb
66

77
define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
88
; RV32I-LABEL: func:
@@ -185,3 +185,167 @@ define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
185185
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
186186
ret i4 %tmp;
187187
}
188+
189+
define signext i32 @fun9(i32 signext %x) nounwind {
190+
; RV32I-LABEL: fun9:
191+
; RV32I: # %bb.0:
192+
; RV32I-NEXT: snez a1, a0
193+
; RV32I-NEXT: sub a0, a0, a1
194+
; RV32I-NEXT: ret
195+
;
196+
; RV64I-LABEL: fun9:
197+
; RV64I: # %bb.0:
198+
; RV64I-NEXT: snez a1, a0
199+
; RV64I-NEXT: subw a0, a0, a1
200+
; RV64I-NEXT: ret
201+
;
202+
; RV32IZbb-LABEL: fun9:
203+
; RV32IZbb: # %bb.0:
204+
; RV32IZbb-NEXT: li a1, 1
205+
; RV32IZbb-NEXT: maxu a0, a0, a1
206+
; RV32IZbb-NEXT: addi a0, a0, -1
207+
; RV32IZbb-NEXT: ret
208+
;
209+
; RV64IZbb-LABEL: fun9:
210+
; RV64IZbb: # %bb.0:
211+
; RV64IZbb-NEXT: li a1, 1
212+
; RV64IZbb-NEXT: maxu a0, a0, a1
213+
; RV64IZbb-NEXT: addiw a0, a0, -1
214+
; RV64IZbb-NEXT: ret
215+
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 1)
216+
ret i32 %tmp
217+
}
218+
219+
define signext i32 @fun10(i32 signext %x) nounwind {
220+
; RV32I-LABEL: fun10:
221+
; RV32I: # %bb.0:
222+
; RV32I-NEXT: ret
223+
;
224+
; RV64I-LABEL: fun10:
225+
; RV64I: # %bb.0:
226+
; RV64I-NEXT: ret
227+
;
228+
; RV32IZbb-LABEL: fun10:
229+
; RV32IZbb: # %bb.0:
230+
; RV32IZbb-NEXT: ret
231+
;
232+
; RV64IZbb-LABEL: fun10:
233+
; RV64IZbb: # %bb.0:
234+
; RV64IZbb-NEXT: ret
235+
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 0)
236+
ret i32 %tmp
237+
}
238+
239+
define signext i32 @fun11(i32 signext %x) nounwind {
240+
; RV32I-LABEL: fun11:
241+
; RV32I: # %bb.0:
242+
; RV32I-NEXT: addi a1, a0, 1
243+
; RV32I-NEXT: sltu a0, a0, a1
244+
; RV32I-NEXT: addi a0, a0, -1
245+
; RV32I-NEXT: and a0, a0, a1
246+
; RV32I-NEXT: ret
247+
;
248+
; RV64I-LABEL: fun11:
249+
; RV64I: # %bb.0:
250+
; RV64I-NEXT: addiw a1, a0, 1
251+
; RV64I-NEXT: sltu a0, a0, a1
252+
; RV64I-NEXT: addi a0, a0, -1
253+
; RV64I-NEXT: and a0, a0, a1
254+
; RV64I-NEXT: ret
255+
;
256+
; RV32IZbb-LABEL: fun11:
257+
; RV32IZbb: # %bb.0:
258+
; RV32IZbb-NEXT: li a0, 0
259+
; RV32IZbb-NEXT: ret
260+
;
261+
; RV64IZbb-LABEL: fun11:
262+
; RV64IZbb: # %bb.0:
263+
; RV64IZbb-NEXT: li a0, 0
264+
; RV64IZbb-NEXT: ret
265+
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 -1)
266+
ret i32 %tmp
267+
}
268+
269+
define <4 x i32> @fun12(<4 x i32> %a0) nounwind {
270+
; RV32I-LABEL: fun12:
271+
; RV32I: # %bb.0:
272+
; RV32I-NEXT: li a0, 1
273+
; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
274+
; RV32I-NEXT: vssubu.vx v8, v8, a0
275+
; RV32I-NEXT: ret
276+
;
277+
; RV64I-LABEL: fun12:
278+
; RV64I: # %bb.0:
279+
; RV64I-NEXT: li a0, 1
280+
; RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
281+
; RV64I-NEXT: vssubu.vx v8, v8, a0
282+
; RV64I-NEXT: ret
283+
;
284+
; RV32IZbb-LABEL: fun12:
285+
; RV32IZbb: # %bb.0:
286+
; RV32IZbb-NEXT: li a0, 1
287+
; RV32IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
288+
; RV32IZbb-NEXT: vssubu.vx v8, v8, a0
289+
; RV32IZbb-NEXT: ret
290+
;
291+
; RV64IZbb-LABEL: fun12:
292+
; RV64IZbb: # %bb.0:
293+
; RV64IZbb-NEXT: li a0, 1
294+
; RV64IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
295+
; RV64IZbb-NEXT: vssubu.vx v8, v8, a0
296+
; RV64IZbb-NEXT: ret
297+
%1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 1))
298+
ret <4 x i32> %1
299+
}
300+
301+
define <4 x i32> @fun13(<4 x i32> %a0) nounwind {
302+
; RV32I-LABEL: fun13:
303+
; RV32I: # %bb.0:
304+
; RV32I-NEXT: li a0, -1
305+
; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
306+
; RV32I-NEXT: vssubu.vx v8, v8, a0
307+
; RV32I-NEXT: ret
308+
;
309+
; RV64I-LABEL: fun13:
310+
; RV64I: # %bb.0:
311+
; RV64I-NEXT: li a0, -1
312+
; RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma
313+
; RV64I-NEXT: vssubu.vx v8, v8, a0
314+
; RV64I-NEXT: ret
315+
;
316+
; RV32IZbb-LABEL: fun13:
317+
; RV32IZbb: # %bb.0:
318+
; RV32IZbb-NEXT: li a0, -1
319+
; RV32IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
320+
; RV32IZbb-NEXT: vssubu.vx v8, v8, a0
321+
; RV32IZbb-NEXT: ret
322+
;
323+
; RV64IZbb-LABEL: fun13:
324+
; RV64IZbb: # %bb.0:
325+
; RV64IZbb-NEXT: li a0, -1
326+
; RV64IZbb-NEXT: vsetivli zero, 4, e32, m1, ta, ma
327+
; RV64IZbb-NEXT: vssubu.vx v8, v8, a0
328+
; RV64IZbb-NEXT: ret
329+
%1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 -1))
330+
ret <4 x i32> %1
331+
}
332+
333+
define <4 x i32> @fun14(<4 x i32> %a0) nounwind {
334+
; RV32I-LABEL: fun14:
335+
; RV32I: # %bb.0:
336+
; RV32I-NEXT: ret
337+
;
338+
; RV64I-LABEL: fun14:
339+
; RV64I: # %bb.0:
340+
; RV64I-NEXT: ret
341+
;
342+
; RV32IZbb-LABEL: fun14:
343+
; RV32IZbb: # %bb.0:
344+
; RV32IZbb-NEXT: ret
345+
;
346+
; RV64IZbb-LABEL: fun14:
347+
; RV64IZbb: # %bb.0:
348+
; RV64IZbb-NEXT: ret
349+
%1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a0, <4 x i32> splat (i32 0))
350+
ret <4 x i32> %1
351+
}

llvm/test/CodeGen/X86/combine-sub-usat.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) {
116116
define i32 @combine_dec_i32(i32 %a0) {
117117
; CHECK-LABEL: combine_dec_i32:
118118
; CHECK: # %bb.0:
119-
; CHECK-NEXT: xorl %eax, %eax
120-
; CHECK-NEXT: subl $1, %edi
121-
; CHECK-NEXT: cmovael %edi, %eax
119+
; CHECK-NEXT: movl %edi, %eax
120+
; CHECK-NEXT: cmpl $1, %edi
121+
; CHECK-NEXT: adcl $-1, %eax
122122
; CHECK-NEXT: retq
123123
%1 = call i32 @llvm.usub.sat.i32(i32 %a0, i32 1)
124124
ret i32 %1

0 commit comments

Comments
 (0)