-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Add tests for workgroup/workitem intrinsic optimizations #146053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/pierre-vh/fold-assertzext-id-intrinsics
Are you sure you want to change the base?
[AMDGPU] Add tests for workgroup/workitem intrinsic optimizations #146053
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesPatch is 24.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146053.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll b/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll
new file mode 100644
index 0000000000000..14120680216fc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -mtriple=amdgcn -mcpu=fiji %s -o - | FileCheck %s --check-prefixes=GFX8,DAGISEL-GFX9
+; RUN: llc -O3 -mtriple=amdgcn -mcpu=gfx942 %s -o - | FileCheck %s --check-prefixes=GFX942,DAGISEL-GFX942
+; RUN: llc -O3 -mtriple=amdgcn -mcpu=gfx1200 %s -o - | FileCheck %s --check-prefixes=GFX12,DAGISEL-GFX12
+
+; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=fiji %s -o - | FileCheck %s --check-prefixes=GFX8,GISEL-GFX8
+; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=gfx942 %s -o - | FileCheck %s --check-prefixes=GFX942,GISEL-GFX942
+; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=gfx1200 %s -o - | FileCheck %s --check-prefixes=GFX12,GISEL-GFX12
+
+; (workitem_id_x | workitem_id_y | workitem_id_z) == 0
+define i1 @workitem_zero() {
+; DAGISEL-GFX9-LABEL: workitem_zero:
+; DAGISEL-GFX9: ; %bb.0: ; %entry
+; DAGISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX9-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX9-NEXT: v_or_b32_e32 v1, v31, v1
+; DAGISEL-GFX9-NEXT: v_or_b32_e32 v0, v1, v0
+; DAGISEL-GFX9-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; DAGISEL-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; DAGISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX942-LABEL: workitem_zero:
+; DAGISEL-GFX942: ; %bb.0: ; %entry
+; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX942-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX942-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX942-NEXT: v_or3_b32 v0, v31, v1, v0
+; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; DAGISEL-GFX942-NEXT: s_nop 1
+; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX12-LABEL: workitem_zero:
+; DAGISEL-GFX12: ; %bb.0: ; %entry
+; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-GFX12-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX12-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; DAGISEL-GFX12-NEXT: v_or3_b32 v0, v31, v1, v0
+; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; DAGISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
+; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX8-LABEL: workitem_zero:
+; GISEL-GFX8: ; %bb.0: ; %entry
+; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
+; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX942-LABEL: workitem_zero:
+; GISEL-GFX942: ; %bb.0: ; %entry
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
+; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
+; GISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GISEL-GFX942-NEXT: s_nop 1
+; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: workitem_zero:
+; GISEL-GFX12: ; %bb.0: ; %entry
+; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
+; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
+; GISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
+; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %or = or i32 %0, %1
+ %2 = tail call i32 @llvm.amdgcn.workitem.id.z()
+ %or1 = or i32 %or, %2
+ %cmp = icmp eq i32 %or1, 0
+ ret i1 %cmp
+}
+
+; (workitem_id_x | workitem_id_y | workitem_id_z) != 0
+define i1 @workitem_nonzero() {
+; DAGISEL-GFX9-LABEL: workitem_nonzero:
+; DAGISEL-GFX9: ; %bb.0: ; %entry
+; DAGISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX9-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX9-NEXT: v_or_b32_e32 v1, v31, v1
+; DAGISEL-GFX9-NEXT: v_or_b32_e32 v0, v1, v0
+; DAGISEL-GFX9-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; DAGISEL-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; DAGISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX942-LABEL: workitem_nonzero:
+; DAGISEL-GFX942: ; %bb.0: ; %entry
+; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX942-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX942-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX942-NEXT: v_or3_b32 v0, v31, v1, v0
+; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; DAGISEL-GFX942-NEXT: s_nop 1
+; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX12-LABEL: workitem_nonzero:
+; DAGISEL-GFX12: ; %bb.0: ; %entry
+; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-GFX12-NEXT: v_lshrrev_b32_e32 v0, 20, v31
+; DAGISEL-GFX12-NEXT: v_lshrrev_b32_e32 v1, 10, v31
+; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; DAGISEL-GFX12-NEXT: v_or3_b32 v0, v31, v1, v0
+; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; DAGISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
+; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX8-LABEL: workitem_nonzero:
+; GISEL-GFX8: ; %bb.0: ; %entry
+; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
+; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX942-LABEL: workitem_nonzero:
+; GISEL-GFX942: ; %bb.0: ; %entry
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
+; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
+; GISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-GFX942-NEXT: s_nop 1
+; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: workitem_nonzero:
+; GISEL-GFX12: ; %bb.0: ; %entry
+; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
+; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
+; GISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
+; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %or = or i32 %0, %1
+ %2 = tail call i32 @llvm.amdgcn.workitem.id.z()
+ %or1 = or i32 %or, %2
+ %cmp = icmp ne i32 %or1, 0
+ ret i1 %cmp
+}
+
+; (workgroup_id_x | workgroup_id_y | workgroup_id_z) == 0
+define i1 @workgroup_zero() {
+; DAGISEL-GFX9-LABEL: workgroup_zero:
+; DAGISEL-GFX9: ; %bb.0: ; %entry
+; DAGISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX9-NEXT: s_or_b32 s4, s12, s13
+; DAGISEL-GFX9-NEXT: s_or_b32 s4, s4, s14
+; DAGISEL-GFX9-NEXT: s_cmp_eq_u32 s4, 0
+; DAGISEL-GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
+; DAGISEL-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; DAGISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX942-LABEL: workgroup_zero:
+; DAGISEL-GFX942: ; %bb.0: ; %entry
+; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
+; DAGISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
+; DAGISEL-GFX942-NEXT: s_cmp_eq_u32 s0, 0
+; DAGISEL-GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
+; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX12-LABEL: workgroup_zero:
+; DAGISEL-GFX12: ; %bb.0: ; %entry
+; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; DAGISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_cmp_eq_u32 s0, 0
+; DAGISEL-GFX12-NEXT: s_cselect_b32 s0, -1, 0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX8-LABEL: workgroup_zero:
+; GISEL-GFX8: ; %bb.0: ; %entry
+; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
+; GISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
+; GISEL-GFX8-NEXT: s_cmp_eq_u32 s4, 0
+; GISEL-GFX8-NEXT: s_cselect_b32 s4, 1, 0
+; GISEL-GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX942-LABEL: workgroup_zero:
+; GISEL-GFX942: ; %bb.0: ; %entry
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
+; GISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
+; GISEL-GFX942-NEXT: s_cmp_eq_u32 s0, 0
+; GISEL-GFX942-NEXT: s_cselect_b32 s0, 1, 0
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: workgroup_zero:
+; GISEL-GFX12: ; %bb.0: ; %entry
+; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_cmp_eq_u32 s0, 0
+; GISEL-GFX12-NEXT: s_cselect_b32 s0, 1, 0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
+ %1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
+ %or = or i32 %0, %1
+ %2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
+ %or1 = or i32 %or, %2
+ %cmp = icmp eq i32 %or1, 0
+ ret i1 %cmp
+}
+
+; (workgroup_id_x | workgroup_id_y | workgroup_id_z) != 0
+define i1 @workgroup_nonzero() {
+; DAGISEL-GFX9-LABEL: workgroup_nonzero:
+; DAGISEL-GFX9: ; %bb.0: ; %entry
+; DAGISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX9-NEXT: s_or_b32 s4, s12, s13
+; DAGISEL-GFX9-NEXT: s_or_b32 s4, s4, s14
+; DAGISEL-GFX9-NEXT: s_cmp_lg_u32 s4, 0
+; DAGISEL-GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
+; DAGISEL-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; DAGISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX942-LABEL: workgroup_nonzero:
+; DAGISEL-GFX942: ; %bb.0: ; %entry
+; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
+; DAGISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
+; DAGISEL-GFX942-NEXT: s_cmp_lg_u32 s0, 0
+; DAGISEL-GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
+; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL-GFX12-LABEL: workgroup_nonzero:
+; DAGISEL-GFX12: ; %bb.0: ; %entry
+; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; DAGISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_cmp_lg_u32 s0, 0
+; DAGISEL-GFX12-NEXT: s_cselect_b32 s0, -1, 0
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX8-LABEL: workgroup_nonzero:
+; GISEL-GFX8: ; %bb.0: ; %entry
+; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
+; GISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
+; GISEL-GFX8-NEXT: s_cmp_lg_u32 s4, 0
+; GISEL-GFX8-NEXT: s_cselect_b32 s4, 1, 0
+; GISEL-GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX942-LABEL: workgroup_nonzero:
+; GISEL-GFX942: ; %bb.0: ; %entry
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
+; GISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
+; GISEL-GFX942-NEXT: s_cmp_lg_u32 s0, 0
+; GISEL-GFX942-NEXT: s_cselect_b32 s0, 1, 0
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: workgroup_nonzero:
+; GISEL-GFX12: ; %bb.0: ; %entry
+; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_cmp_lg_u32 s0, 0
+; GISEL-GFX12-NEXT: s_cselect_b32 s0, 1, 0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
+ %1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
+ %or = or i32 %0, %1
+ %2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
+ %or1 = or i32 %or, %2
+ %cmp = icmp ne i32 %or1, 0
+ ret i1 %cmp
+}
+
+; (workitem_id_x | workitem_id_y | workitem_id_z | workgroup_id_x | workgroup_id_y | workgroup_id_z) == 0
+define i1 @workitem_workgroup_zero() {
+; GFX8-LABEL: workitem_workgroup_zero:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_or_b32 s4, s12, s13
+; GFX8-NEXT: s_or_b32 s4, s4, s14
+; GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: workitem_workgroup_zero:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_or_b32 s0, s12, s13
+; GFX942-NEXT: s_or_b32 s0, s0, s14
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GFX942-NEXT: v_or3_b32 v0, s0, v0, v1
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: workitem_workgroup_zero:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-NEXT: s_or_b32 s0, ttmp9, s0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_or_b32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_or3_b32 v0, s0, v0, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
+ %1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
+ %or = or i32 %0, %1
+ %2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
+ %or1 = or i32 %or, %2
+ %3 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %or2 = or i32 %or1, %3
+ %4 = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %or3 = or i32 %or2, %4
+ %5 = tail call i32 @llvm.amdgcn.workitem.id.z()
+ %or4 = or i32 %or3, %5
+ %cmp = icmp eq i32 %or3, 0
+ ret i1 %cmp
+}
+
+; (workitem_id_x | workitem_id_y | workitem_id_z | workgroup_id_x | workgroup_id_y | workgroup_id_z) != 0
+define i1 @workitem_workgroup_nonzero() {
+; GFX8-LABEL: workitem_workgroup_nonzero:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_or_b32 s4, s12, s13
+; GFX8-NEXT: s_or_b32 s4, s4, s14
+; GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits
4eee684
to
2b9f0b5
Compare
9ad4aa6
to
444c470
Compare
No description provided.