diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index c15263e0b06f8..5ec82c30f268f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -641,6 +641,8 @@ class CombinerHelper { /// KnownBits information. bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const; + bool combineMergedBFXCompare(MachineInstr &MI) const; + /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2) bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4a92dc16c1bf4..cba46a5edf9ec 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1085,6 +1085,14 @@ def double_icmp_zero_or_combine: GICombineRule< (G_ICMP $root, $p, $ordst, 0)) >; +// Transform ((X | (G_UBFX X, ...) | ...) == 0) (or != 0) +// into a compare of a extract/mask of X +def icmp_merged_bfx_combine: GICombineRule< + (defs root:$root), + (combine (G_ICMP $dst, $p, $src, 0):$root, + [{ return Helper.combineMergedBFXCompare(*${root}); }]) +>; + def and_or_disjoint_mask : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -2052,7 +2060,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, simplify_neg_minmax, combine_concat_vector, sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines, - combine_use_vector_truncate, merge_combines, overflow_combines]>; + combine_use_vector_truncate, merge_combines, overflow_combines, + icmp_merged_bfx_combine]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index fc40533cf3dc9..e1d43f37bac13 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -140,3 +140,92 @@ bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI, return false; } + +bool CombinerHelper::combineMergedBFXCompare(MachineInstr &MI) const { + const GICmp *Cmp = cast(&MI); + + ICmpInst::Predicate CC = Cmp->getCond(); + if (CC != CmpInst::ICMP_EQ && CC != CmpInst::ICMP_NE) + return false; + + Register CmpLHS = Cmp->getLHSReg(); + Register CmpRHS = Cmp->getRHSReg(); + + LLT OpTy = MRI.getType(CmpLHS); + if (!OpTy.isScalar() || OpTy.isPointer()) + return false; + + assert(isZeroOrZeroSplat(CmpRHS, /*AllowUndefs=*/false)); + + Register Src; + const auto IsSrc = [&](Register R) { + if (!Src) { + Src = R; + return true; + } + + return Src == R; + }; + + MachineInstr *CmpLHSDef = MRI.getVRegDef(CmpLHS); + if (CmpLHSDef->getOpcode() != TargetOpcode::G_OR) + return false; + + APInt PartsMask(OpTy.getSizeInBits(), 0); + SmallVector Worklist = {CmpLHSDef}; + while (!Worklist.empty()) { + MachineInstr *Cur = Worklist.pop_back_val(); + + Register Dst = Cur->getOperand(0).getReg(); + if (!MRI.hasOneUse(Dst) && Dst != Src) + return false; + + if (Cur->getOpcode() == TargetOpcode::G_OR) { + Worklist.push_back(MRI.getVRegDef(Cur->getOperand(1).getReg())); + Worklist.push_back(MRI.getVRegDef(Cur->getOperand(2).getReg())); + continue; + } + + if (Cur->getOpcode() == TargetOpcode::G_UBFX) { + Register Op = Cur->getOperand(1).getReg(); + Register Width = Cur->getOperand(2).getReg(); + Register Off = Cur->getOperand(3).getReg(); + + auto WidthCst = getIConstantVRegVal(Width, MRI); + auto OffCst = getIConstantVRegVal(Off, MRI); + if (!WidthCst || !OffCst || !IsSrc(Op)) + return false; + + unsigned Start = OffCst->getZExtValue(); + unsigned End = Start + WidthCst->getZExtValue(); + if (End > OpTy.getScalarSizeInBits()) + return false; + PartsMask.setBits(Start, End); + continue; + } + + if (Cur->getOpcode() == TargetOpcode::G_AND) { + Register LHS = Cur->getOperand(1).getReg(); + Register RHS = Cur->getOperand(2).getReg(); + + auto MaskCst = getIConstantVRegVal(RHS, MRI); + if (!MaskCst || !MaskCst->isMask() || !IsSrc(LHS)) + return false; + + PartsMask |= *MaskCst; + continue; + } + + return false; + } + + if (!PartsMask.isMask() || !Src) + return false; + + assert(OpTy == MRI.getType(Src) && "Ignored a type casting operation?"); + auto MaskedSrc = + Builder.buildAnd(OpTy, Src, Builder.buildConstant(OpTy, PartsMask)); + Builder.buildICmp(CC, Cmp->getReg(0), MaskedSrc, CmpRHS, Cmp->getFlags()); + MI.eraseFromParent(); + return true; +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir new file mode 100644 index 0000000000000..b96a6772010ed --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir @@ -0,0 +1,326 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck %s + +--- +name: basic_i64_2x5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: basic_i64_2x5 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1023 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %reg, [[C]] + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s64), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s64) = COPY $vgpr0_vgpr1 + %mask:_(s64) = G_CONSTANT i64 31 + %reg_mask:_(s64) = G_AND %reg, %mask + %k:_(s64) = G_CONSTANT i64 5 + %bfx:_(s64) = G_UBFX %reg, %k, %k + %x:_(s64) = G_OR %reg_mask, %bfx + %zero:_(s64) = G_CONSTANT i64 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: basic_i32_2x5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: basic_i32_2x5 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]] + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg, %mask + %k:_(s32) = G_CONSTANT i32 5 + %bfx:_(s32) = G_UBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: basic_ne_i32_2x5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: basic_ne_i32_2x5 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]] + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ne), [[AND]](s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg, %mask + %k:_(s32) = G_CONSTANT i32 5 + %bfx:_(s32) = G_UBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(ne), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: basic_i32_5x5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: basic_i32_5x5 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]] + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg, %mask + %five:_(s32) = G_CONSTANT i32 5 + %bfx1:_(s32) = G_UBFX %reg, %five, %five + %x1:_(s32) = G_OR %reg_mask, %bfx1 + %k2:_(s32) = G_CONSTANT i32 10 + %bfx2:_(s32) = G_UBFX %reg, %k2, %five + %x2:_(s32) = G_OR %x1, %bfx2 + %k3:_(s32) = G_CONSTANT i32 15 + %bfx3:_(s32) = G_UBFX %reg, %k3, %five + %x3:_(s32) = G_OR %x2, %bfx3 + %k4:_(s32) = G_CONSTANT i32 20 + %bfx4:_(s32) = G_UBFX %reg, %k4, %five + %x4:_(s32) = G_OR %x3, %bfx4 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x4, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: basic_i16_2x5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: basic_i16_2x5 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %reg_trunc:_(s16) = G_TRUNC %reg(s32) + ; CHECK-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1023 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND %reg_trunc, [[C]] + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s16), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %reg_trunc:_(s16) = G_TRUNC %reg + %mask:_(s16) = G_CONSTANT i16 31 + %reg_mask:_(s16) = G_AND %reg_trunc, %mask + %k:_(s16) = G_CONSTANT i16 5 + %bfx:_(s16) = G_UBFX %reg_trunc, %k, %k + %x:_(s16) = G_OR %reg_mask, %bfx + %zero:_(s16) = G_CONSTANT i16 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: unsupported_sbfx +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: unsupported_sbfx + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask + ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k + ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg, %mask + %k:_(s32) = G_CONSTANT i32 5 + %bfx:_(s32) = G_SBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: unsupported_src_changes +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: unsupported_src_changes + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask + ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k + ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %reg2:_(s32) = COPY $vgpr1 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg2, %mask + %k:_(s32) = G_CONSTANT i32 5 + %bfx:_(s32) = G_SBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: unsupported_holes_in_mask +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: unsupported_holes_in_mask + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask + ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k + ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %reg2:_(s32) = COPY $vgpr1 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg2, %mask + %k:_(s32) = G_CONSTANT i32 6 + %bfx:_(s32) = G_UBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: unsupported_bfx_out_of_range +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: unsupported_bfx_out_of_range + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask + ; CHECK-NEXT: %width:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %off:_(s32) = G_CONSTANT i32 26 + ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %off(s32), %width + ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %reg2:_(s32) = COPY $vgpr1 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg2, %mask + %width:_(s32) = G_CONSTANT i32 12 + %off:_(s32) = G_CONSTANT i32 26 + %bfx:_(s32) = G_UBFX %reg, %off, %width + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... + +--- +name: unsupported_cc +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: unsupported_cc + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask + ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k + ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ule), %x(s32), %zero + ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %reg:_(s32) = COPY $vgpr0 + %mask:_(s32) = G_CONSTANT i32 31 + %reg_mask:_(s32) = G_AND %reg, %mask + %k:_(s32) = G_CONSTANT i32 5 + %bfx:_(s32) = G_UBFX %reg, %k, %k + %x:_(s32) = G_OR %reg_mask, %bfx + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(ule), %x, %zero + %res:_(s32) = G_ZEXT %cmp + $vgpr0 = COPY %res +... diff --git a/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll b/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll index 64d055bc40e98..487504b4e494d 100644 --- a/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll @@ -9,77 +9,36 @@ ; (workitem_id_x | workitem_id_y | workitem_id_z) == 0 define i1 @workitem_zero() { -; DAGISEL-GFX8-LABEL: workitem_zero: -; DAGISEL-GFX8: ; %bb.0: ; %entry -; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31] -; -; DAGISEL-GFX942-LABEL: workitem_zero: -; DAGISEL-GFX942: ; %bb.0: ; %entry -; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; DAGISEL-GFX942-NEXT: s_nop 1 -; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31] -; -; DAGISEL-GFX12-LABEL: workitem_zero: -; DAGISEL-GFX12: ; %bb.0: ; %entry -; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd -; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX8-LABEL: workitem_zero: -; GISEL-GFX8: ; %bb.0: ; %entry -; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10 -; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: workitem_zero: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX942-LABEL: workitem_zero: -; GISEL-GFX942: ; %bb.0: ; %entry -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10 -; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GISEL-GFX942-NEXT: s_nop 1 -; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: workitem_zero: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX12-LABEL: workitem_zero: -; GISEL-GFX12: ; %bb.0: ; %entry -; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 -; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 -; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 -; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 -; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10 -; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GISEL-GFX12-NEXT: s_wait_alu 0xfffd -; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-LABEL: workitem_zero: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] entry: %0 = tail call i32 @llvm.amdgcn.workitem.id.x() %1 = tail call i32 @llvm.amdgcn.workitem.id.y() @@ -92,77 +51,36 @@ entry: ; (workitem_id_x | workitem_id_y | workitem_id_z) != 0 define i1 @workitem_nonzero() { -; DAGISEL-GFX8-LABEL: workitem_nonzero: -; DAGISEL-GFX8: ; %bb.0: ; %entry -; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31] -; -; DAGISEL-GFX942-LABEL: workitem_nonzero: -; DAGISEL-GFX942: ; %bb.0: ; %entry -; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; DAGISEL-GFX942-NEXT: s_nop 1 -; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31] -; -; DAGISEL-GFX12-LABEL: workitem_nonzero: -; DAGISEL-GFX12: ; %bb.0: ; %entry -; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 -; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd -; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX8-LABEL: workitem_nonzero: -; GISEL-GFX8: ; %bb.0: ; %entry -; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10 -; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: workitem_nonzero: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX942-LABEL: workitem_nonzero: -; GISEL-GFX942: ; %bb.0: ; %entry -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10 -; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GISEL-GFX942-NEXT: s_nop 1 -; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: workitem_nonzero: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX12-LABEL: workitem_nonzero: -; GISEL-GFX12: ; %bb.0: ; %entry -; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 -; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 -; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 -; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 -; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10 -; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10 -; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GISEL-GFX12-NEXT: s_wait_alu 0xfffd -; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-LABEL: workitem_nonzero: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] entry: %0 = tail call i32 @llvm.amdgcn.workitem.id.x() %1 = tail call i32 @llvm.amdgcn.workitem.id.y()