Skip to content

Commit 9ad4aa6

Browse files
committed
[AMDGPU][DAG] Remove AssertZext before some intrinsics
1 parent 249f074 commit 9ad4aa6

11 files changed

+91
-79
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3985,6 +3985,24 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
39853985
}
39863986
}
39873987

3988+
// AssertZext in front of these intrinsics is not necessary, the lowering of
3989+
// the intrinsics into a register read will insert one if it is needed.
3990+
if (N->getOpcode() == ISD::AssertZext &&
3991+
N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3992+
unsigned IID = N0.getConstantOperandVal(0);
3993+
switch (IID) {
3994+
case Intrinsic::amdgcn_workitem_id_x:
3995+
case Intrinsic::amdgcn_workitem_id_y:
3996+
case Intrinsic::amdgcn_workitem_id_z:
3997+
case Intrinsic::amdgcn_workgroup_id_x:
3998+
case Intrinsic::amdgcn_workgroup_id_y:
3999+
case Intrinsic::amdgcn_workgroup_id_z:
4000+
return N0;
4001+
default:
4002+
break;
4003+
}
4004+
}
4005+
39884006
return SDValue();
39894007
}
39904008

llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
442442
;
443443
; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use:
444444
; GFX11: ; %bb.0:
445-
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
445+
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
446446
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
447-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
447+
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
448448
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
449449
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
450450
; GFX11-NEXT: ds_store_b32 v0, v1 offset:456

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -714,10 +714,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
714714
; GFX11-LABEL: store_load_vindex_kernel:
715715
; GFX11: ; %bb.0: ; %bb
716716
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
717-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
717+
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
718718
; GFX11-NEXT: v_mov_b32_e32 v2, 15
719719
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
720-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
720+
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
721721
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
722722
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
723723
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -732,9 +732,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
732732
; GFX12-LABEL: store_load_vindex_kernel:
733733
; GFX12: ; %bb.0: ; %bb
734734
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
735-
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
735+
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
736736
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
737-
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
737+
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
738738
; GFX12-NEXT: s_wait_kmcnt 0x0
739739
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
740740
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -769,8 +769,8 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
769769
; GFX942-LABEL: store_load_vindex_kernel:
770770
; GFX942: ; %bb.0: ; %bb
771771
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
772-
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
773772
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
773+
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
774774
; GFX942-NEXT: v_mov_b32_e32 v1, 15
775775
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
776776
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -809,10 +809,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
809809
; GFX11-PAL-LABEL: store_load_vindex_kernel:
810810
; GFX11-PAL: ; %bb.0: ; %bb
811811
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
812-
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
812+
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
813813
; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15
814814
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2)
815-
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
815+
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
816816
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
817817
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
818818
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -827,9 +827,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
827827
; GFX12-PAL-LABEL: store_load_vindex_kernel:
828828
; GFX12-PAL: ; %bb.0: ; %bb
829829
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
830-
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
830+
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
831831
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1)
832-
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
832+
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
833833
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
834834
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 7
835835
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -1958,10 +1958,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
19581958
; GFX11-LABEL: store_load_vindex_small_offset_kernel:
19591959
; GFX11: ; %bb.0: ; %bb
19601960
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
1961-
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
1961+
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
19621962
; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc
19631963
; GFX11-NEXT: s_waitcnt vmcnt(0)
1964-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1964+
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
19651965
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
19661966
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19671967
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -1976,10 +1976,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
19761976
; GFX12-LABEL: store_load_vindex_small_offset_kernel:
19771977
; GFX12: ; %bb.0: ; %bb
19781978
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
1979-
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
1979+
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
19801980
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
19811981
; GFX12-NEXT: s_wait_loadcnt 0x0
1982-
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1982+
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
19831983
; GFX12-NEXT: s_wait_kmcnt 0x0
19841984
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
19851985
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -2021,8 +2021,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
20212021
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
20222022
; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1
20232023
; GFX942-NEXT: s_waitcnt vmcnt(0)
2024-
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
20252024
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2025+
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
20262026
; GFX942-NEXT: v_mov_b32_e32 v1, 15
20272027
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
20282028
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -2092,10 +2092,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
20922092
; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel:
20932093
; GFX11-PAL: ; %bb.0: ; %bb
20942094
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
2095-
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2095+
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
20962096
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off glc dlc
20972097
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
2098-
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2098+
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
20992099
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
21002100
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
21012101
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2110,10 +2110,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
21102110
; GFX12-PAL-LABEL: store_load_vindex_small_offset_kernel:
21112111
; GFX12-PAL: ; %bb.0: ; %bb
21122112
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
2113-
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2113+
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
21142114
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
21152115
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
2116-
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2116+
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
21172117
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
21182118
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
21192119
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
@@ -3254,10 +3254,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
32543254
; GFX11-LABEL: store_load_vindex_large_offset_kernel:
32553255
; GFX11: ; %bb.0: ; %bb
32563256
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
3257-
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
3257+
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
32583258
; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
32593259
; GFX11-NEXT: s_waitcnt vmcnt(0)
3260-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3260+
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
32613261
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
32623262
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
32633263
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3274,10 +3274,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
32743274
; GFX12-LABEL: store_load_vindex_large_offset_kernel:
32753275
; GFX12: ; %bb.0: ; %bb
32763276
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
3277-
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
3277+
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
32783278
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
32793279
; GFX12-NEXT: s_wait_loadcnt 0x0
3280-
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3280+
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
32813281
; GFX12-NEXT: s_wait_kmcnt 0x0
32823282
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
32833283
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -3319,8 +3319,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
33193319
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
33203320
; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
33213321
; GFX942-NEXT: s_waitcnt vmcnt(0)
3322-
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
33233322
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3323+
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
33243324
; GFX942-NEXT: v_mov_b32_e32 v1, 15
33253325
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
33263326
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -3391,10 +3391,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
33913391
; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel:
33923392
; GFX11-PAL: ; %bb.0: ; %bb
33933393
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
3394-
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
3394+
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
33953395
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
33963396
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
3397-
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3397+
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
33983398
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
33993399
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
34003400
; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3411,10 +3411,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
34113411
; GFX12-PAL-LABEL: store_load_vindex_large_offset_kernel:
34123412
; GFX12-PAL: ; %bb.0: ; %bb
34133413
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
3414-
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
3414+
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
34153415
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
34163416
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
3417-
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3417+
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
34183418
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
34193419
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
34203420
; GFX12-PAL-NEXT: s_wait_storecnt 0x0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in,
1515
; GCN-LABEL: test_iglp_opt_mfma_gemm:
1616
; GCN: ; %bb.0: ; %entry
1717
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
18-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1918
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
19+
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
2020
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
2121
; GCN-NEXT: ; iglp_opt mask(0x00000000)
2222
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -153,8 +153,8 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
153153
; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
154154
; GCN: ; %bb.0: ; %entry
155155
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
156-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
157156
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
157+
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
158158
; GCN-NEXT: v_mov_b32_e32 v2, 1.0
159159
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
160160
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -289,8 +289,8 @@ define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias
289289
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
290290
; GCN: ; %bb.0: ; %entry
291291
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
292-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
293292
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
293+
; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
294294
; GCN-NEXT: ; iglp_opt mask(0x00000000)
295295
; GCN-NEXT: s_waitcnt lgkmcnt(0)
296296
; GCN-NEXT: v_add_u32_e32 v1, s0, v0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
66
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
77
; GCN: ; %bb.0: ; %entry
88
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
9-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
9+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
1010
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
11-
; GCN-NEXT: v_lshlrev_b32_e32 v40, 5, v0
11+
; GCN-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
1212
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1313
; GCN-NEXT: v_add_nc_u32_e32 v32, s0, v40
1414
; GCN-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
@@ -74,9 +74,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
7474
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
7575
; EXACTCUTOFF: ; %bb.0: ; %entry
7676
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
77-
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
77+
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
7878
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
79-
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v40, 5, v0
79+
; EXACTCUTOFF-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
8080
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
8181
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v32, s0, v40
8282
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
@@ -178,9 +178,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
178178
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
179179
; GCN: ; %bb.0: ; %entry
180180
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
181-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
181+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
182182
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
183-
; GCN-NEXT: v_lshlrev_b32_e32 v16, 5, v0
183+
; GCN-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
184184
; GCN-NEXT: s_waitcnt lgkmcnt(0)
185185
; GCN-NEXT: v_add_nc_u32_e32 v17, s0, v16
186186
; GCN-NEXT: v_add_nc_u32_e32 v16, s1, v16
@@ -260,9 +260,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
260260
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
261261
; EXACTCUTOFF: ; %bb.0: ; %entry
262262
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
263-
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
263+
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
264264
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
265-
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v16, 5, v0
265+
; EXACTCUTOFF-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
266266
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
267267
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v17, s0, v16
268268
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v16, s1, v16

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
88
; GCN-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
99
; GCN: ; %bb.0: ; %entry
1010
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
11-
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
11+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
1212
; GCN-NEXT: v_mov_b32_e32 v48, 0
1313
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
14-
; GCN-NEXT: v_lshlrev_b32_e32 v28, 4, v0
14+
; GCN-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
1515
; GCN-NEXT: s_wait_kmcnt 0x0
1616
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v28
1717
; GCN-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28
@@ -60,10 +60,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
6060
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
6161
; EXACTCUTOFF: ; %bb.0: ; %entry
6262
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
63-
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
63+
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 4, v0
6464
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v48, 0
6565
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
66-
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v28, 4, v0
66+
; EXACTCUTOFF-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
6767
; EXACTCUTOFF-NEXT: s_wait_kmcnt 0x0
6868
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v0, s0, v28
6969
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28

0 commit comments

Comments
 (0)