Skip to content

Commit 4adb8ff

Browse files
shiltianrampitec
andcommitted
[NFC][AMDGPU] Add an IR test for v_cvt_f16_bf8
This was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent 3be44e2 commit 4adb8ff

File tree

1 file changed

+166
-0
lines changed

1 file changed

+166
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,172 @@ declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32)
99
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16)
1010
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16)
1111

12+
define amdgpu_ps float @test_cvt_f16_bf8_byte0(i32 %a) {
13+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0:
14+
; GFX1250-SDAG-REAL16: ; %bb.0:
15+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
16+
; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
17+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
18+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
19+
;
20+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
21+
; GFX1250-SDAG-FAKE16: ; %bb.0:
22+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
23+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
24+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
25+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
26+
;
27+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0:
28+
; GFX1250-GISEL-REAL16: ; %bb.0:
29+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
30+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
31+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
32+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
33+
;
34+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
35+
; GFX1250-GISEL-FAKE16: ; %bb.0:
36+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
37+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
39+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
40+
%cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 0)
41+
%ret = fpext half %cvt to float
42+
ret float %ret
43+
}
44+
45+
define amdgpu_ps float @test_cvt_f16_bf8_byte1(i32 %a) {
46+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1:
47+
; GFX1250-SDAG-REAL16: ; %bb.0:
48+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
49+
; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
50+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
51+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
52+
;
53+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
54+
; GFX1250-SDAG-FAKE16: ; %bb.0:
55+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
56+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
57+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
58+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
59+
;
60+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1:
61+
; GFX1250-GISEL-REAL16: ; %bb.0:
62+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
63+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
64+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
65+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
66+
;
67+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
68+
; GFX1250-GISEL-FAKE16: ; %bb.0:
69+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
70+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
71+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
72+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
73+
%cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 1)
74+
%ret = fpext half %cvt to float
75+
ret float %ret
76+
}
77+
78+
define amdgpu_ps float @test_cvt_f16_bf8_byte2(i32 %a) {
79+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2:
80+
; GFX1250-SDAG-REAL16: ; %bb.0:
81+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
82+
; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
83+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
84+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
85+
;
86+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
87+
; GFX1250-SDAG-FAKE16: ; %bb.0:
88+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
89+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
90+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
91+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
92+
;
93+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2:
94+
; GFX1250-GISEL-REAL16: ; %bb.0:
95+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
96+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
97+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
98+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
99+
;
100+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
101+
; GFX1250-GISEL-FAKE16: ; %bb.0:
102+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
103+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
104+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
105+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
106+
%cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 2)
107+
%ret = fpext half %cvt to float
108+
ret float %ret
109+
}
110+
111+
define amdgpu_ps float @test_cvt_f16_bf8_byte3(i32 %a) {
112+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3:
113+
; GFX1250-SDAG-REAL16: ; %bb.0:
114+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
115+
; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
116+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
117+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
118+
;
119+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
120+
; GFX1250-SDAG-FAKE16: ; %bb.0:
121+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
122+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
123+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
124+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
125+
;
126+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3:
127+
; GFX1250-GISEL-REAL16: ; %bb.0:
128+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
129+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
130+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
131+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
132+
;
133+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
134+
; GFX1250-GISEL-FAKE16: ; %bb.0:
135+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
136+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
137+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
138+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
139+
%cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
140+
%ret = fpext half %cvt to float
141+
ret float %ret
142+
}
143+
144+
define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi(i32 %a) {
145+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
146+
; GFX1250-SDAG-REAL16: ; %bb.0:
147+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3
148+
; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0
149+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
150+
;
151+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
152+
; GFX1250-SDAG-FAKE16: ; %bb.0:
153+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
154+
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
155+
; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100
156+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
157+
;
158+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
159+
; GFX1250-GISEL-REAL16: ; %bb.0:
160+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
161+
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
162+
; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
163+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
164+
;
165+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
166+
; GFX1250-GISEL-FAKE16: ; %bb.0:
167+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
168+
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
169+
; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
170+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
171+
%cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
172+
%ins.0 = insertelement <2 x half> undef, half 0.0, i32 0
173+
%ins.1 = insertelement <2 x half> %ins.0, half %cvt, i32 1
174+
%ret = bitcast <2 x half> %ins.1 to float
175+
ret float %ret
176+
}
177+
12178
define amdgpu_ps float @test_cvt_f16_fp8_byte0(i32 %a) {
13179
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0:
14180
; GFX1250-SDAG-REAL16: ; %bb.0:

0 commit comments

Comments
 (0)