[GlobalISel] Allow expansion of urem by constant in prelegalizer #145914

jyli0116 · 2025-06-26T14:59:13Z

This patch allows urem by a constant to be expanded more efficiently to avoid the need for expensive udiv instructions. This is part of the resolution to issue #118090

llvmbot · 2025-06-26T14:59:49Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: None (jyli0116)

Changes

This patch allows urem by a constant to be expanded more efficiently to avoid the need for expensive udiv instructions.

Patch is 49.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145914.diff

7 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7-3)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+61-8)
(added) llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll (+243)
(modified) llvm/test/CodeGen/AArch64/pr58431.ll (+5-3)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+27-77)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+89-506)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..9139425658480 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -693,18 +693,22 @@ class CombinerHelper {
   /// feeding a G_AND instruction \p MI.
   bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
-  /// Given an G_UDIV \p MI expressing a divide by constant, return an
-  /// expression that implements it by multiplying by a magic number.
+  /// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
+  /// return an expression that implements it by multiplying by a magic number.
   /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-  MachineInstr *buildUDivUsingMul(MachineInstr &MI) const;
+  MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
   /// Combine G_UDIV by constant into a multiply by magic constant.
   bool matchUDivByConst(MachineInstr &MI) const;
   void applyUDivByConst(MachineInstr &MI) const;
+  /// Combine G_UREM by constant into a multiply by magic constant.
+  bool matchURemByConst(MachineInstr &MI) const;
+  void applyURemByConst(MachineInstr &MI) const;
 
   /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
   /// expression that implements it by multiplying by a magic number.
   /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
+  /// Combine G_SDIV by constant into a multiply by magic constant.
   bool matchSDivByConst(MachineInstr &MI) const;
   void applySDivByConst(MachineInstr &MI) const;
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 4a92dc16c1bf4..52cbbf91849b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1156,6 +1156,14 @@ def udiv_by_pow2 : GICombineRule<
 def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
                                       sdiv_by_pow2, udiv_by_pow2]>;
 
+def urem_by_const : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_UREM):$root,
+   [{ return Helper.matchURemByConst(*${root}); }]),
+  (apply [{ Helper.applyURemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const]>;
+
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_PTR_ADD):$root,
@@ -2048,7 +2056,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax,
+    intrem_combines, sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
     simplify_neg_minmax, combine_concat_vector,
     sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..c511f27a5e8ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5288,12 +5288,13 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
   return false;
 }
 
-MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_UDIV);
-  auto &UDiv = cast<GenericMachineInstr>(MI);
-  Register Dst = UDiv.getReg(0);
-  Register LHS = UDiv.getReg(1);
-  Register RHS = UDiv.getReg(2);
+MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const {
+  unsigned opcode = MI.getOpcode();
+  assert(opcode == TargetOpcode::G_UDIV || opcode == TargetOpcode::G_UREM);
+  auto &UDivorRem = cast<GenericMachineInstr>(MI);
+  Register Dst = UDivorRem.getReg(0);
+  Register LHS = UDivorRem.getReg(1);
+  Register RHS = UDivorRem.getReg(2);
   LLT Ty = MRI.getType(Dst);
   LLT ScalarTy = Ty.getScalarType();
   const unsigned EltBits = ScalarTy.getScalarSizeInBits();
@@ -5446,7 +5447,13 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
   auto IsOne = MIB.buildICmp(
       CmpInst::Predicate::ICMP_EQ,
       Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
-  return MIB.buildSelect(Ty, IsOne, LHS, Q);
+  auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
+
+  if (opcode == TargetOpcode::G_UREM) {
+    auto Prod = MIB.buildMul(Ty, ret, RHS);
+    return MIB.buildSub(Ty, LHS, Prod);
+  }
+  return ret;
 }
 
 bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
@@ -5494,7 +5501,53 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
 }
 
 void CombinerHelper::applyUDivByConst(MachineInstr &MI) const {
-  auto *NewMI = buildUDivUsingMul(MI);
+  auto *NewMI = buildUDivorURemUsingMul(MI);
+  replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchURemByConst(MachineInstr &MI) const {
+  assert(MI.getOpcode() == TargetOpcode::G_UREM);
+  Register Dst = MI.getOperand(0).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT DstTy = MRI.getType(Dst);
+
+  auto &MF = *MI.getMF();
+  AttributeList Attr = MF.getFunction().getAttributes();
+  const auto &TLI = getTargetLowering();
+  LLVMContext &Ctx = MF.getFunction().getContext();
+  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
+    return false;
+
+  // Don't do this for minsize because the instruction sequence is usually
+  // larger.
+  if (MF.getFunction().hasMinSize())
+    return false;
+
+  auto *RHSDef = MRI.getVRegDef(RHS);
+  if (!isConstantOrConstantVector(*RHSDef, MRI))
+    return false;
+
+  // Don't do this if the types are not going to be legal.
+  if (LI) {
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer(
+            {TargetOpcode::G_ICMP,
+             {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+              DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
+        return false;
+  }
+
+  return matchUnaryPredicate(
+      MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
+}
+
+void CombinerHelper::applyURemByConst(MachineInstr &MI) const {
+  auto *NewMI = buildUDivorURemUsingMul(MI);
   replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
new file mode 100644
index 0000000000000..0cf827410c30c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK-GI
+
+
+define i8 @test7s8(i8 %a) {
+; CHECK-SD-LABEL: test7s8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #37 // =0x25
+; CHECK-SD-NEXT:    and w9, w0, #0xff
+; CHECK-SD-NEXT:    mul w8, w9, w8
+; CHECK-SD-NEXT:    lsr w8, w8, #8
+; CHECK-SD-NEXT:    sub w9, w0, w8
+; CHECK-SD-NEXT:    and w9, w9, #0xfe
+; CHECK-SD-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT:    lsr w8, w8, #2
+; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT:    add w0, w0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7s8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #37 // =0x25
+; CHECK-GI-NEXT:    and w9, w0, #0xff
+; CHECK-GI-NEXT:    mul w8, w9, w8
+; CHECK-GI-NEXT:    lsr w8, w8, #8
+; CHECK-GI-NEXT:    sub w9, w0, w8
+; CHECK-GI-NEXT:    ubfx w9, w9, #1, #7
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    ubfx w8, w8, #2, #6
+; CHECK-GI-NEXT:    lsl w9, w8, #3
+; CHECK-GI-NEXT:    sub w8, w9, w8
+; CHECK-GI-NEXT:    sub w0, w0, w8
+; CHECK-GI-NEXT:    ret
+  %r = urem i8 %a, 7
+  ret i8 %r
+}
+
+define i8 @test100s8(i8 %a) {
+; CHECK-SD-LABEL: test100s8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #41 // =0x29
+; CHECK-SD-NEXT:    and w9, w0, #0xff
+; CHECK-SD-NEXT:    mul w8, w9, w8
+; CHECK-SD-NEXT:    mov w9, #100 // =0x64
+; CHECK-SD-NEXT:    lsr w8, w8, #12
+; CHECK-SD-NEXT:    msub w0, w8, w9, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100s8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #41 // =0x29
+; CHECK-GI-NEXT:    and w9, w0, #0xff
+; CHECK-GI-NEXT:    mul w8, w9, w8
+; CHECK-GI-NEXT:    mov w9, #100 // =0x64
+; CHECK-GI-NEXT:    lsr w8, w8, #8
+; CHECK-GI-NEXT:    lsr w8, w8, #4
+; CHECK-GI-NEXT:    msub w0, w8, w9, w0
+; CHECK-GI-NEXT:    ret
+  %r = urem i8 %a, 100
+  ret i8 %r
+}
+
+define i32 @test7s32(i32 %a) {
+; CHECK-SD-LABEL: test7s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
+; CHECK-SD-NEXT:    umull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x8, x8, #32
+; CHECK-SD-NEXT:    sub w9, w0, w8
+; CHECK-SD-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT:    lsr w8, w8, #2
+; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT:    add w0, w0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-GI-NEXT:    movk w8, #9362, lsl #16
+; CHECK-GI-NEXT:    umull x8, w0, w8
+; CHECK-GI-NEXT:    lsr x8, x8, #32
+; CHECK-GI-NEXT:    sub w9, w0, w8
+; CHECK-GI-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-GI-NEXT:    lsr w8, w8, #2
+; CHECK-GI-NEXT:    lsl w9, w8, #3
+; CHECK-GI-NEXT:    sub w8, w9, w8
+; CHECK-GI-NEXT:    sub w0, w0, w8
+; CHECK-GI-NEXT:    ret
+ %c = urem i32 %a, 7
+ ret i32 %c
+}
+
+define i32 @test100s32(i32 %a) {
+; CHECK-SD-LABEL: test100s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT:    mov w9, #100 // =0x64
+; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
+; CHECK-SD-NEXT:    umull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x8, x8, #37
+; CHECK-SD-NEXT:    msub w0, w8, w9, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-GI-NEXT:    mov w9, #100 // =0x64
+; CHECK-GI-NEXT:    movk w8, #20971, lsl #16
+; CHECK-GI-NEXT:    umull x8, w0, w8
+; CHECK-GI-NEXT:    lsr x8, x8, #32
+; CHECK-GI-NEXT:    lsr w8, w8, #5
+; CHECK-GI-NEXT:    msub w0, w8, w9, w0
+; CHECK-GI-NEXT:    ret
+ %c = urem i32 %a, 100
+ ret i32 %c
+}
+
+define <8 x i16> @test7v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test7v8s16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #9363 // =0x2493
+; CHECK-SD-NEXT:    dup v1.8h, w8
+; CHECK-SD-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    sub v2.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    usra v1.8h, v2.8h, #1
+; CHECK-SD-NEXT:    movi v2.8h, #7
+; CHECK-SD-NEXT:    ushr v1.8h, v1.8h, #2
+; CHECK-SD-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7v8s16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI4_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    sub v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    usra v1.8h, v2.8h, #1
+; CHECK-GI-NEXT:    movi v2.8h, #7
+; CHECK-GI-NEXT:    ushr v1.8h, v1.8h, #2
+; CHECK-GI-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
+  %r = urem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @test100v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test100v8s16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT:    ushr v2.8h, v0.8h, #2
+; CHECK-SD-NEXT:    dup v1.8h, w8
+; CHECK-SD-NEXT:    umull2 v3.4s, v2.8h, v1.8h
+; CHECK-SD-NEXT:    umull v1.4s, v2.4h, v1.4h
+; CHECK-SD-NEXT:    movi v2.8h, #100
+; CHECK-SD-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT:    ushr v1.8h, v1.8h, #1
+; CHECK-SD-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100v8s16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI5_0
+; CHECK-GI-NEXT:    ushr v1.8h, v0.8h, #2
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-GI-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; CHECK-GI-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT:    movi v2.8h, #100
+; CHECK-GI-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT:    ushr v1.8h, v1.8h, #1
+; CHECK-GI-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
+  %r = urem <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+  ret <8 x i16> %r
+}
+
+define <4 x i32> @test7v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test7v4s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
+; CHECK-SD-NEXT:    dup v1.4s, w8
+; CHECK-SD-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    sub v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    usra v1.4s, v2.4s, #1
+; CHECK-SD-NEXT:    movi v2.4s, #7
+; CHECK-SD-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-SD-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7v4s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI6_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI6_0]
+; CHECK-GI-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    sub v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    usra v1.4s, v2.4s, #1
+; CHECK-GI-NEXT:    movi v2.4s, #7
+; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-GI-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
+  %r = urem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @test100v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test100v4s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
+; CHECK-SD-NEXT:    dup v1.4s, w8
+; CHECK-SD-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    movi v2.4s, #100
+; CHECK-SD-NEXT:    ushr v1.4s, v1.4s, #5
+; CHECK-SD-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100v4s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI7_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    movi v2.4s, #100
+; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #5
+; CHECK-GI-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
+  %r = urem <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+  ret <4 x i32> %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index 88bab4af95d64..467ceb062f249 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -4,10 +4,12 @@
 define i32 @f(i64 %0) {
 ; CHECK-LABEL: f:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #10 // =0xa
+; CHECK-NEXT:    mov x8, #-7378697629483820647 // =0x9999999999999999
 ; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    udiv x10, x9, x8
-; CHECK-NEXT:    msub x0, x10, x8, x9
+; CHECK-NEXT:    mov w10, #10 // =0xa
+; CHECK-NEXT:    eor x8, x8, #0x8000000000000003
+; CHECK-NEXT:    umulh x8, x9, x8
+; CHECK-NEXT:    msub x0, x8, x10, x9
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = trunc i64 %0 to i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 24ec4fa48f778..6ae2f56f6ae6d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -211,91 +211,41 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 ; CHECK-LABEL: v_urem_i32_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x4996c7d8
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0xb2a50881
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v1, 20, v1
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem i32 %num, 1235195
   ret i32 %result
 }
 
 define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_urem_v2i32_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 0xffed2705, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_urem_v2i32_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT:    v_mov_b32_e32 v3, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_lo_u32 v5, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v0, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v0, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_cmp_g...
[truncated]

llvmbot · 2025-06-26T14:59:49Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (jyli0116)

Changes

This patch allows urem by a constant to be expanded more efficiently to avoid the need for expensive udiv instructions.

Patch is 49.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145914.diff

7 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7-3)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+61-8)
(added) llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll (+243)
(modified) llvm/test/CodeGen/AArch64/pr58431.ll (+5-3)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+27-77)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+89-506)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..9139425658480 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -693,18 +693,22 @@ class CombinerHelper {
   /// feeding a G_AND instruction \p MI.
   bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
-  /// Given an G_UDIV \p MI expressing a divide by constant, return an
-  /// expression that implements it by multiplying by a magic number.
+  /// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
+  /// return an expression that implements it by multiplying by a magic number.
   /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-  MachineInstr *buildUDivUsingMul(MachineInstr &MI) const;
+  MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
   /// Combine G_UDIV by constant into a multiply by magic constant.
   bool matchUDivByConst(MachineInstr &MI) const;
   void applyUDivByConst(MachineInstr &MI) const;
+  /// Combine G_UREM by constant into a multiply by magic constant.
+  bool matchURemByConst(MachineInstr &MI) const;
+  void applyURemByConst(MachineInstr &MI) const;
 
   /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
   /// expression that implements it by multiplying by a magic number.
   /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
+  /// Combine G_SDIV by constant into a multiply by magic constant.
   bool matchSDivByConst(MachineInstr &MI) const;
   void applySDivByConst(MachineInstr &MI) const;
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 4a92dc16c1bf4..52cbbf91849b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1156,6 +1156,14 @@ def udiv_by_pow2 : GICombineRule<
 def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
                                       sdiv_by_pow2, udiv_by_pow2]>;
 
+def urem_by_const : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_UREM):$root,
+   [{ return Helper.matchURemByConst(*${root}); }]),
+  (apply [{ Helper.applyURemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const]>;
+
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_PTR_ADD):$root,
@@ -2048,7 +2056,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax,
+    intrem_combines, sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
     simplify_neg_minmax, combine_concat_vector,
     sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..c511f27a5e8ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5288,12 +5288,13 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
   return false;
 }
 
-MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
-  assert(MI.getOpcode() == TargetOpcode::G_UDIV);
-  auto &UDiv = cast<GenericMachineInstr>(MI);
-  Register Dst = UDiv.getReg(0);
-  Register LHS = UDiv.getReg(1);
-  Register RHS = UDiv.getReg(2);
+MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const {
+  unsigned opcode = MI.getOpcode();
+  assert(opcode == TargetOpcode::G_UDIV || opcode == TargetOpcode::G_UREM);
+  auto &UDivorRem = cast<GenericMachineInstr>(MI);
+  Register Dst = UDivorRem.getReg(0);
+  Register LHS = UDivorRem.getReg(1);
+  Register RHS = UDivorRem.getReg(2);
   LLT Ty = MRI.getType(Dst);
   LLT ScalarTy = Ty.getScalarType();
   const unsigned EltBits = ScalarTy.getScalarSizeInBits();
@@ -5446,7 +5447,13 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
   auto IsOne = MIB.buildICmp(
       CmpInst::Predicate::ICMP_EQ,
       Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
-  return MIB.buildSelect(Ty, IsOne, LHS, Q);
+  auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
+
+  if (opcode == TargetOpcode::G_UREM) {
+    auto Prod = MIB.buildMul(Ty, ret, RHS);
+    return MIB.buildSub(Ty, LHS, Prod);
+  }
+  return ret;
 }
 
 bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
@@ -5494,7 +5501,53 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
 }
 
 void CombinerHelper::applyUDivByConst(MachineInstr &MI) const {
-  auto *NewMI = buildUDivUsingMul(MI);
+  auto *NewMI = buildUDivorURemUsingMul(MI);
+  replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchURemByConst(MachineInstr &MI) const {
+  assert(MI.getOpcode() == TargetOpcode::G_UREM);
+  Register Dst = MI.getOperand(0).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT DstTy = MRI.getType(Dst);
+
+  auto &MF = *MI.getMF();
+  AttributeList Attr = MF.getFunction().getAttributes();
+  const auto &TLI = getTargetLowering();
+  LLVMContext &Ctx = MF.getFunction().getContext();
+  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
+    return false;
+
+  // Don't do this for minsize because the instruction sequence is usually
+  // larger.
+  if (MF.getFunction().hasMinSize())
+    return false;
+
+  auto *RHSDef = MRI.getVRegDef(RHS);
+  if (!isConstantOrConstantVector(*RHSDef, MRI))
+    return false;
+
+  // Don't do this if the types are not going to be legal.
+  if (LI) {
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer(
+            {TargetOpcode::G_ICMP,
+             {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+              DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
+        return false;
+  }
+
+  return matchUnaryPredicate(
+      MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
+}
+
+void CombinerHelper::applyURemByConst(MachineInstr &MI) const {
+  auto *NewMI = buildUDivorURemUsingMul(MI);
   replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
new file mode 100644
index 0000000000000..0cf827410c30c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK-GI
+
+
+define i8 @test7s8(i8 %a) {
+; CHECK-SD-LABEL: test7s8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #37 // =0x25
+; CHECK-SD-NEXT:    and w9, w0, #0xff
+; CHECK-SD-NEXT:    mul w8, w9, w8
+; CHECK-SD-NEXT:    lsr w8, w8, #8
+; CHECK-SD-NEXT:    sub w9, w0, w8
+; CHECK-SD-NEXT:    and w9, w9, #0xfe
+; CHECK-SD-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT:    lsr w8, w8, #2
+; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT:    add w0, w0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7s8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #37 // =0x25
+; CHECK-GI-NEXT:    and w9, w0, #0xff
+; CHECK-GI-NEXT:    mul w8, w9, w8
+; CHECK-GI-NEXT:    lsr w8, w8, #8
+; CHECK-GI-NEXT:    sub w9, w0, w8
+; CHECK-GI-NEXT:    ubfx w9, w9, #1, #7
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    ubfx w8, w8, #2, #6
+; CHECK-GI-NEXT:    lsl w9, w8, #3
+; CHECK-GI-NEXT:    sub w8, w9, w8
+; CHECK-GI-NEXT:    sub w0, w0, w8
+; CHECK-GI-NEXT:    ret
+  %r = urem i8 %a, 7
+  ret i8 %r
+}
+
+define i8 @test100s8(i8 %a) {
+; CHECK-SD-LABEL: test100s8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #41 // =0x29
+; CHECK-SD-NEXT:    and w9, w0, #0xff
+; CHECK-SD-NEXT:    mul w8, w9, w8
+; CHECK-SD-NEXT:    mov w9, #100 // =0x64
+; CHECK-SD-NEXT:    lsr w8, w8, #12
+; CHECK-SD-NEXT:    msub w0, w8, w9, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100s8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #41 // =0x29
+; CHECK-GI-NEXT:    and w9, w0, #0xff
+; CHECK-GI-NEXT:    mul w8, w9, w8
+; CHECK-GI-NEXT:    mov w9, #100 // =0x64
+; CHECK-GI-NEXT:    lsr w8, w8, #8
+; CHECK-GI-NEXT:    lsr w8, w8, #4
+; CHECK-GI-NEXT:    msub w0, w8, w9, w0
+; CHECK-GI-NEXT:    ret
+  %r = urem i8 %a, 100
+  ret i8 %r
+}
+
+define i32 @test7s32(i32 %a) {
+; CHECK-SD-LABEL: test7s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
+; CHECK-SD-NEXT:    umull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x8, x8, #32
+; CHECK-SD-NEXT:    sub w9, w0, w8
+; CHECK-SD-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT:    lsr w8, w8, #2
+; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT:    add w0, w0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-GI-NEXT:    movk w8, #9362, lsl #16
+; CHECK-GI-NEXT:    umull x8, w0, w8
+; CHECK-GI-NEXT:    lsr x8, x8, #32
+; CHECK-GI-NEXT:    sub w9, w0, w8
+; CHECK-GI-NEXT:    add w8, w8, w9, lsr #1
+; CHECK-GI-NEXT:    lsr w8, w8, #2
+; CHECK-GI-NEXT:    lsl w9, w8, #3
+; CHECK-GI-NEXT:    sub w8, w9, w8
+; CHECK-GI-NEXT:    sub w0, w0, w8
+; CHECK-GI-NEXT:    ret
+ %c = urem i32 %a, 7
+ ret i32 %c
+}
+
+define i32 @test100s32(i32 %a) {
+; CHECK-SD-LABEL: test100s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT:    mov w9, #100 // =0x64
+; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
+; CHECK-SD-NEXT:    umull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x8, x8, #37
+; CHECK-SD-NEXT:    msub w0, w8, w9, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-GI-NEXT:    mov w9, #100 // =0x64
+; CHECK-GI-NEXT:    movk w8, #20971, lsl #16
+; CHECK-GI-NEXT:    umull x8, w0, w8
+; CHECK-GI-NEXT:    lsr x8, x8, #32
+; CHECK-GI-NEXT:    lsr w8, w8, #5
+; CHECK-GI-NEXT:    msub w0, w8, w9, w0
+; CHECK-GI-NEXT:    ret
+ %c = urem i32 %a, 100
+ ret i32 %c
+}
+
+define <8 x i16> @test7v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test7v8s16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #9363 // =0x2493
+; CHECK-SD-NEXT:    dup v1.8h, w8
+; CHECK-SD-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    sub v2.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    usra v1.8h, v2.8h, #1
+; CHECK-SD-NEXT:    movi v2.8h, #7
+; CHECK-SD-NEXT:    ushr v1.8h, v1.8h, #2
+; CHECK-SD-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7v8s16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI4_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    sub v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    usra v1.8h, v2.8h, #1
+; CHECK-GI-NEXT:    movi v2.8h, #7
+; CHECK-GI-NEXT:    ushr v1.8h, v1.8h, #2
+; CHECK-GI-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
+  %r = urem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @test100v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test100v8s16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT:    ushr v2.8h, v0.8h, #2
+; CHECK-SD-NEXT:    dup v1.8h, w8
+; CHECK-SD-NEXT:    umull2 v3.4s, v2.8h, v1.8h
+; CHECK-SD-NEXT:    umull v1.4s, v2.4h, v1.4h
+; CHECK-SD-NEXT:    movi v2.8h, #100
+; CHECK-SD-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT:    ushr v1.8h, v1.8h, #1
+; CHECK-SD-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100v8s16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI5_0
+; CHECK-GI-NEXT:    ushr v1.8h, v0.8h, #2
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-GI-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; CHECK-GI-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT:    movi v2.8h, #100
+; CHECK-GI-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT:    ushr v1.8h, v1.8h, #1
+; CHECK-GI-NEXT:    mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
+  %r = urem <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+  ret <8 x i16> %r
+}
+
+define <4 x i32> @test7v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test7v4s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
+; CHECK-SD-NEXT:    dup v1.4s, w8
+; CHECK-SD-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    sub v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    usra v1.4s, v2.4s, #1
+; CHECK-SD-NEXT:    movi v2.4s, #7
+; CHECK-SD-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-SD-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test7v4s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI6_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI6_0]
+; CHECK-GI-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    sub v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    usra v1.4s, v2.4s, #1
+; CHECK-GI-NEXT:    movi v2.4s, #7
+; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-GI-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
+  %r = urem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @test100v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test100v4s32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
+; CHECK-SD-NEXT:    dup v1.4s, w8
+; CHECK-SD-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    movi v2.4s, #100
+; CHECK-SD-NEXT:    ushr v1.4s, v1.4s, #5
+; CHECK-SD-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test100v4s32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI7_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT:    umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    movi v2.4s, #100
+; CHECK-GI-NEXT:    ushr v1.4s, v1.4s, #5
+; CHECK-GI-NEXT:    mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    ret
+  %r = urem <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+  ret <4 x i32> %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index 88bab4af95d64..467ceb062f249 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -4,10 +4,12 @@
 define i32 @f(i64 %0) {
 ; CHECK-LABEL: f:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #10 // =0xa
+; CHECK-NEXT:    mov x8, #-7378697629483820647 // =0x9999999999999999
 ; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    udiv x10, x9, x8
-; CHECK-NEXT:    msub x0, x10, x8, x9
+; CHECK-NEXT:    mov w10, #10 // =0xa
+; CHECK-NEXT:    eor x8, x8, #0x8000000000000003
+; CHECK-NEXT:    umulh x8, x9, x8
+; CHECK-NEXT:    msub x0, x8, x10, x9
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = trunc i64 %0 to i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 24ec4fa48f778..6ae2f56f6ae6d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -211,91 +211,41 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 ; CHECK-LABEL: v_urem_i32_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x4996c7d8
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0xb2a50881
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v1, 20, v1
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem i32 %num, 1235195
   ret i32 %result
 }
 
 define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_urem_v2i32_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 0xffed2705, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_urem_v2i32_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT:    v_mov_b32_e32 v3, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_lo_u32 v5, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v0, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v0, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_cmp_g...
[truncated]

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

arsenm · 2025-06-27T05:48:48Z

llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll

+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK-GI


If you're going to mix dag and globalisel run lines, should explicitly disable global-isel in the dag line. Also is there an existing test you can just add run lines to?

Not as far as I'm aware - there's a fairly complete set of unit tests in rem.ll for urem/srem between two registers, it would probably be nice to have the same for urem/srem by constant, so I'll be working on adding that.

llvm/include/llvm/Target/GlobalISel/Combine.td

Pierre-vh

Combiner bits LGTM

…ted code

…RUN line

davemgreen

LGTM other than a suggestion to trim the test a little. Thanks.

davemgreen · 2025-07-01T06:49:53Z

llvm/test/CodeGen/AArch64/rem-by-const.ll

+  ret <2 x i128> %s
+}
+
+define <3 x i128> @sv3i128_7(<3 x i128> %d, <3 x i128> %e) {


This file is fairly long - because of the way we legalize nodes we can avoid testing some of the large special cases I think, and remove the > 2 x i128 cases. They should just be the same as 2xi128 but larger and the actual legalization is tested elsewhere.

llvm-ci · 2025-07-02T12:50:43Z

LLVM Buildbot has detected a new failure on builder clang-hip-vega20 running on hip-vega20-0 while building llvm at step 3 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/22494

Here is the relevant piece of the build log for the reference

Step 3 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-build.sh --jobs=' (failure)
...
[71/158] Linking CXX executable bin/llvm-size
[72/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUCombinerHelper.cpp.o
[73/158] Linking CXX executable bin/llvm-rtdyld
[74/158] Linking CXX executable bin/llvm-tli-checker
[75/158] Linking CXX executable bin/clang-installapi
[76/158] Linking CXX executable bin/llvm-xray
[77/158] Linking CXX executable bin/clang-move
[78/158] Linking CXX executable bin/clang-diff
[79/158] Linking CXX executable bin/llvm-symbolizer
[80/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o 
ccache /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/include -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -fvisibility=hidden  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -std=c++17 -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o -c /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp:86:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc: In lambda function:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc:3077: error: ‘const class llvm::AMDGPUCombinerHelper’ has no member named ‘matchUDivByConst’; did you mean ‘matchSDivByConst’?
 3077 |     if(![&](){return Helper.matchUDivByConst(*State.MIs[0]);}()) {
      | 
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp:86:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc: In member function ‘virtual bool {anonymous}::AMDGPUPreLegalizerCombinerImpl::runCustomAction(unsigned int, const llvm::GIMatchTableExecutor::MatcherState&, llvm::GIMatchTableExecutor::NewMIVector&) const’:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc:3081: error: ‘const class llvm::AMDGPUCombinerHelper’ has no member named ‘applyUDivByConst’; did you mean ‘applySDivByConst’?
 3081 |     Helper.applyUDivByConst(*State.MIs[0]);
      | 
[81/158] Generating ../../bin/llvm-addr2line
[82/158] Linking CXX executable bin/obj2yaml
[83/158] Linking CXX executable bin/sanstats
[84/158] Linking CXX executable bin/clang-doc
[85/158] Linking CXX executable bin/clang-refactor
[86/158] Linking CXX executable bin/clang-include-cleaner
[87/158] Linking CXX executable bin/find-all-symbols
[88/158] Linking CXX executable bin/sancov
[89/158] Linking CXX executable bin/clang-change-namespace
[90/158] Linking CXX executable bin/pp-trace
[91/158] Linking CXX executable bin/clang-include-fixer
[92/158] Linking CXX executable bin/clang-reorder-fields
[93/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o 
ccache /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/include -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -fvisibility=hidden  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -std=c++17 -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o -c /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp:126:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc: In lambda function:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc:3146: error: ‘class llvm::AMDGPUCombinerHelper’ has no member named ‘matchUDivByConst’; did you mean ‘matchSDivByConst’?
 3146 |     if(![&](){return Helper.matchUDivByConst(*State.MIs[0]);}()) {
      | 
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp:126:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc: In member function ‘virtual bool {anonymous}::AMDGPUPostLegalizerCombinerImpl::runCustomAction(unsigned int, const llvm::GIMatchTableExecutor::MatcherState&, llvm::GIMatchTableExecutor::NewMIVector&) const’:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc:3150: error: ‘class llvm::AMDGPUCombinerHelper’ has no member named ‘applyUDivByConst’; did you mean ‘applySDivByConst’?
 3150 |     Helper.applyUDivByConst(*State.MIs[0]);
      | 
[94/158] Linking CXX executable bin/tool-template
[95/158] Linking CXX executable bin/modularize
Step 7 (Building LLVM) failure: Building LLVM (failure)
...
[71/158] Linking CXX executable bin/llvm-size
[72/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUCombinerHelper.cpp.o
[73/158] Linking CXX executable bin/llvm-rtdyld
[74/158] Linking CXX executable bin/llvm-tli-checker
[75/158] Linking CXX executable bin/clang-installapi
[76/158] Linking CXX executable bin/llvm-xray
[77/158] Linking CXX executable bin/clang-move
[78/158] Linking CXX executable bin/clang-diff
[79/158] Linking CXX executable bin/llvm-symbolizer
[80/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o 
ccache /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/include -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -fvisibility=hidden  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -std=c++17 -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPreLegalizerCombiner.cpp.o -c /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp:86:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc: In lambda function:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc:3077: error: ‘const class llvm::AMDGPUCombinerHelper’ has no member named ‘matchUDivByConst’; did you mean ‘matchSDivByConst’?
 3077 |     if(![&](){return Helper.matchUDivByConst(*State.MIs[0]);}()) {
      | 
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp:86:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc: In member function ‘virtual bool {anonymous}::AMDGPUPreLegalizerCombinerImpl::runCustomAction(unsigned int, const llvm::GIMatchTableExecutor::MatcherState&, llvm::GIMatchTableExecutor::NewMIVector&) const’:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPreLegalizeGICombiner.inc:3081: error: ‘const class llvm::AMDGPUCombinerHelper’ has no member named ‘applyUDivByConst’; did you mean ‘applySDivByConst’?
 3081 |     Helper.applyUDivByConst(*State.MIs[0]);
      | 
[81/158] Generating ../../bin/llvm-addr2line
[82/158] Linking CXX executable bin/obj2yaml
[83/158] Linking CXX executable bin/sanstats
[84/158] Linking CXX executable bin/clang-doc
[85/158] Linking CXX executable bin/clang-refactor
[86/158] Linking CXX executable bin/clang-include-cleaner
[87/158] Linking CXX executable bin/find-all-symbols
[88/158] Linking CXX executable bin/sancov
[89/158] Linking CXX executable bin/clang-change-namespace
[90/158] Linking CXX executable bin/pp-trace
[91/158] Linking CXX executable bin/clang-include-fixer
[92/158] Linking CXX executable bin/clang-reorder-fields
[93/158] Building CXX object lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o 
ccache /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU -I/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/include -I/home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -fvisibility=hidden  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -std=c++17 -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUPostLegalizerCombiner.cpp.o -c /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp:126:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc: In lambda function:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc:3146: error: ‘class llvm::AMDGPUCombinerHelper’ has no member named ‘matchUDivByConst’; did you mean ‘matchSDivByConst’?
 3146 |     if(![&](){return Helper.matchUDivByConst(*State.MIs[0]);}()) {
      | 
In file included from /home/botworker/bbot/clang-hip-vega20/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp:126:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc: In member function ‘virtual bool {anonymous}::AMDGPUPostLegalizerCombinerImpl::runCustomAction(unsigned int, const llvm::GIMatchTableExecutor::MatcherState&, llvm::GIMatchTableExecutor::NewMIVector&) const’:
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/lib/Target/AMDGPU/AMDGPUGenPostLegalizeGICombiner.inc:3150: error: ‘class llvm::AMDGPUCombinerHelper’ has no member named ‘applyUDivByConst’; did you mean ‘applySDivByConst’?
 3150 |     Helper.applyUDivByConst(*State.MIs[0]);
      | 
[94/158] Linking CXX executable bin/tool-template
[95/158] Linking CXX executable bin/modularize

jyli0116 requested a review from davemgreen June 26, 2025 14:59

llvmbot added backend:AArch64 backend:AMDGPU llvm:globalisel labels Jun 26, 2025

jyli0116 changed the title ~~[GlobalIsel] Allow expansion of urem by constant in prelegalizer~~ [GlobalISel] Allow expansion of urem by constant in prelegalizer Jun 26, 2025

jyli0116 requested review from aemerson and arsenm June 26, 2025 16:02

davemgreen reviewed Jun 26, 2025

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp Outdated Show resolved Hide resolved

arsenm reviewed Jun 27, 2025

View reviewed changes

arsenm requested a review from Pierre-vh June 27, 2025 05:49

Pierre-vh reviewed Jun 30, 2025

View reviewed changes

jyli0116 requested review from arsenm and davemgreen June 30, 2025 13:35

jyli0116 added 7 commits July 1, 2025 10:29

[GlobalIsel] Allow expansion of urem by constant in prelegalizer

0728d73

formatting

8ad3dcd

[GlobalISel] Combine matchUDiv/URem and applyUDiv/URem for less repea…

b28861f

…ted code

[GISel] removed wip_match_opcode, explicitly disabled global-isel in …

c62e874

…RUN line

[GISel] Added unit tests in AArch64 for UREM/SREM

caf4d4e

[GISel] Explicitly disabled global-isel on SDAG run line in tests

162594c

specify global isel flag on SDAG and GISel runlines on unit test

ef837a4

jyli0116 force-pushed the GISel_rembyconst branch from 30cdad3 to ef837a4 Compare July 1, 2025 10:30

davemgreen approved these changes Jul 1, 2025

View reviewed changes

reduce number of rem by const tests

37e8519

jyli0116 merged commit 9c0743f into llvm:main Jul 2, 2025
7 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[GlobalISel] Allow expansion of urem by constant in prelegalizer #145914

[GlobalISel] Allow expansion of urem by constant in prelegalizer #145914

Uh oh!

jyli0116 commented Jun 26, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jun 26, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jun 26, 2025

Uh oh!

Uh oh!

arsenm Jun 27, 2025

Uh oh!

jyli0116 Jun 30, 2025

Uh oh!

Uh oh!

Pierre-vh left a comment

Uh oh!

davemgreen left a comment

Uh oh!

davemgreen Jul 1, 2025

Uh oh!

Uh oh!

llvm-ci commented Jul 2, 2025

Uh oh!

Uh oh!

		; RUN: llc < %s -mtriple=aarch64-unknown-unknown \| FileCheck %s --check-prefixes=CHECK-SD
		; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel \| FileCheck %s --check-prefixes=CHECK-GI

[GlobalISel] Allow expansion of urem by constant in prelegalizer #145914

[GlobalISel] Allow expansion of urem by constant in prelegalizer #145914

Uh oh!

Conversation

jyli0116 commented Jun 26, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 26, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 26, 2025

Uh oh!

Uh oh!

arsenm Jun 27, 2025

Choose a reason for hiding this comment

Uh oh!

jyli0116 Jun 30, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Pierre-vh left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen Jul 1, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jul 2, 2025

Uh oh!

Uh oh!

jyli0116 commented Jun 26, 2025 •

edited

Loading

llvmbot commented Jun 26, 2025 •

edited

Loading