@@ -714,10 +714,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
714
714
; GFX11-LABEL: store_load_vindex_kernel:
715
715
; GFX11: ; %bb.0: ; %bb
716
716
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
717
- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff , v0
717
+ ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
718
718
; GFX11-NEXT: v_mov_b32_e32 v2, 15
719
719
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
720
- ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
720
+ ; GFX11-NEXT: v_and_b32_e32 v0, 0xffc , v0
721
721
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
722
722
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
723
723
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -732,9 +732,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
732
732
; GFX12-LABEL: store_load_vindex_kernel:
733
733
; GFX12: ; %bb.0: ; %bb
734
734
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
735
- ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
735
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
736
736
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
737
- ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
737
+ ; GFX12-NEXT: v_and_b32_e32 v0, 0xffc , v0
738
738
; GFX12-NEXT: s_wait_kmcnt 0x0
739
739
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
740
740
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -769,8 +769,8 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
769
769
; GFX942-LABEL: store_load_vindex_kernel:
770
770
; GFX942: ; %bb.0: ; %bb
771
771
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
772
- ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
773
772
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
773
+ ; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
774
774
; GFX942-NEXT: v_mov_b32_e32 v1, 15
775
775
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
776
776
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -809,10 +809,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
809
809
; GFX11-PAL-LABEL: store_load_vindex_kernel:
810
810
; GFX11-PAL: ; %bb.0: ; %bb
811
811
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
812
- ; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0x3ff , v0
812
+ ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
813
813
; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15
814
814
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2)
815
- ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
815
+ ; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
816
816
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
817
817
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
818
818
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -827,9 +827,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
827
827
; GFX12-PAL-LABEL: store_load_vindex_kernel:
828
828
; GFX12-PAL: ; %bb.0: ; %bb
829
829
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
830
- ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
830
+ ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
831
831
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1)
832
- ; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
832
+ ; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
833
833
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
834
834
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 7
835
835
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -1958,10 +1958,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
1958
1958
; GFX11-LABEL: store_load_vindex_small_offset_kernel:
1959
1959
; GFX11: ; %bb.0: ; %bb
1960
1960
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
1961
- ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
1961
+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
1962
1962
; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc
1963
1963
; GFX11-NEXT: s_waitcnt vmcnt(0)
1964
- ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
1964
+ ; GFX11-NEXT: v_and_b32_e32 v0, 0xffc , v0
1965
1965
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
1966
1966
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1967
1967
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -1976,10 +1976,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
1976
1976
; GFX12-LABEL: store_load_vindex_small_offset_kernel:
1977
1977
; GFX12: ; %bb.0: ; %bb
1978
1978
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
1979
- ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
1979
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
1980
1980
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
1981
1981
; GFX12-NEXT: s_wait_loadcnt 0x0
1982
- ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
1982
+ ; GFX12-NEXT: v_and_b32_e32 v0, 0xffc , v0
1983
1983
; GFX12-NEXT: s_wait_kmcnt 0x0
1984
1984
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
1985
1985
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -2021,8 +2021,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
2021
2021
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
2022
2022
; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1
2023
2023
; GFX942-NEXT: s_waitcnt vmcnt(0)
2024
- ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
2025
2024
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2025
+ ; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
2026
2026
; GFX942-NEXT: v_mov_b32_e32 v1, 15
2027
2027
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
2028
2028
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -2092,10 +2092,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
2092
2092
; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel:
2093
2093
; GFX11-PAL: ; %bb.0: ; %bb
2094
2094
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
2095
- ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
2095
+ ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
2096
2096
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off glc dlc
2097
2097
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
2098
- ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
2098
+ ; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
2099
2099
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
2100
2100
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
2101
2101
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2110,10 +2110,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
2110
2110
; GFX12-PAL-LABEL: store_load_vindex_small_offset_kernel:
2111
2111
; GFX12-PAL: ; %bb.0: ; %bb
2112
2112
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
2113
- ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
2113
+ ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
2114
2114
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
2115
2115
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
2116
- ; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
2116
+ ; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
2117
2117
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
2118
2118
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
2119
2119
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
@@ -3254,10 +3254,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
3254
3254
; GFX11-LABEL: store_load_vindex_large_offset_kernel:
3255
3255
; GFX11: ; %bb.0: ; %bb
3256
3256
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
3257
- ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
3257
+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
3258
3258
; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
3259
3259
; GFX11-NEXT: s_waitcnt vmcnt(0)
3260
- ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
3260
+ ; GFX11-NEXT: v_and_b32_e32 v0, 0xffc , v0
3261
3261
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3262
3262
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
3263
3263
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3274,10 +3274,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
3274
3274
; GFX12-LABEL: store_load_vindex_large_offset_kernel:
3275
3275
; GFX12: ; %bb.0: ; %bb
3276
3276
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
3277
- ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
3277
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
3278
3278
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
3279
3279
; GFX12-NEXT: s_wait_loadcnt 0x0
3280
- ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
3280
+ ; GFX12-NEXT: v_and_b32_e32 v0, 0xffc , v0
3281
3281
; GFX12-NEXT: s_wait_kmcnt 0x0
3282
3282
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
3283
3283
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -3319,8 +3319,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
3319
3319
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
3320
3320
; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
3321
3321
; GFX942-NEXT: s_waitcnt vmcnt(0)
3322
- ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
3323
3322
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
3323
+ ; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
3324
3324
; GFX942-NEXT: v_mov_b32_e32 v1, 15
3325
3325
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
3326
3326
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -3391,10 +3391,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
3391
3391
; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel:
3392
3392
; GFX11-PAL: ; %bb.0: ; %bb
3393
3393
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
3394
- ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
3394
+ ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
3395
3395
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
3396
3396
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
3397
- ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
3397
+ ; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
3398
3398
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
3399
3399
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
3400
3400
; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3411,10 +3411,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
3411
3411
; GFX12-PAL-LABEL: store_load_vindex_large_offset_kernel:
3412
3412
; GFX12-PAL: ; %bb.0: ; %bb
3413
3413
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
3414
- ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff , v0
3414
+ ; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2 , v0
3415
3415
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
3416
3416
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
3417
- ; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
3417
+ ; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc , v0
3418
3418
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
3419
3419
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
3420
3420
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
0 commit comments