@@ -9,6 +9,172 @@ declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32)
9
9
declare <2 x half > @llvm.amdgcn.cvt.pk.f16.bf8 (i16 )
10
10
declare <2 x half > @llvm.amdgcn.cvt.pk.f16.fp8 (i16 )
11
11
12
+ define amdgpu_ps float @test_cvt_f16_bf8_byte0 (i32 %a ) {
13
+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0:
14
+ ; GFX1250-SDAG-REAL16: ; %bb.0:
15
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
16
+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
17
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
18
+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
19
+ ;
20
+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
21
+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
22
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
23
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
24
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
25
+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
26
+ ;
27
+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0:
28
+ ; GFX1250-GISEL-REAL16: ; %bb.0:
29
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
30
+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
31
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
32
+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
33
+ ;
34
+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
35
+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
36
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
37
+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
39
+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
40
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 0 )
41
+ %ret = fpext half %cvt to float
42
+ ret float %ret
43
+ }
44
+
45
+ define amdgpu_ps float @test_cvt_f16_bf8_byte1 (i32 %a ) {
46
+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1:
47
+ ; GFX1250-SDAG-REAL16: ; %bb.0:
48
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
49
+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
50
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
51
+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
52
+ ;
53
+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
54
+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
55
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
56
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
57
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
58
+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
59
+ ;
60
+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1:
61
+ ; GFX1250-GISEL-REAL16: ; %bb.0:
62
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
63
+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
64
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
65
+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
66
+ ;
67
+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
68
+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
69
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
70
+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
71
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
72
+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
73
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 1 )
74
+ %ret = fpext half %cvt to float
75
+ ret float %ret
76
+ }
77
+
78
+ define amdgpu_ps float @test_cvt_f16_bf8_byte2 (i32 %a ) {
79
+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2:
80
+ ; GFX1250-SDAG-REAL16: ; %bb.0:
81
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
82
+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
83
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
84
+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
85
+ ;
86
+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
87
+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
88
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
89
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
90
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
91
+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
92
+ ;
93
+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2:
94
+ ; GFX1250-GISEL-REAL16: ; %bb.0:
95
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
96
+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
97
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
98
+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
99
+ ;
100
+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
101
+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
102
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
103
+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
104
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
105
+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
106
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 2 )
107
+ %ret = fpext half %cvt to float
108
+ ret float %ret
109
+ }
110
+
111
+ define amdgpu_ps float @test_cvt_f16_bf8_byte3 (i32 %a ) {
112
+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3:
113
+ ; GFX1250-SDAG-REAL16: ; %bb.0:
114
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
115
+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
116
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
117
+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
118
+ ;
119
+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
120
+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
121
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
122
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
123
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
124
+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
125
+ ;
126
+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3:
127
+ ; GFX1250-GISEL-REAL16: ; %bb.0:
128
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
129
+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
130
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
131
+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
132
+ ;
133
+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
134
+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
135
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
136
+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
137
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
138
+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
139
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 3 )
140
+ %ret = fpext half %cvt to float
141
+ ret float %ret
142
+ }
143
+
144
+ define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi (i32 %a ) {
145
+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
146
+ ; GFX1250-SDAG-REAL16: ; %bb.0:
147
+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3
148
+ ; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0
149
+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
150
+ ;
151
+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
152
+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
153
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
154
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
155
+ ; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100
156
+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
157
+ ;
158
+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
159
+ ; GFX1250-GISEL-REAL16: ; %bb.0:
160
+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
161
+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
162
+ ; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
163
+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
164
+ ;
165
+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
166
+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
167
+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
168
+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
169
+ ; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
170
+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
171
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 3 )
172
+ %ins.0 = insertelement <2 x half > undef , half 0 .0 , i32 0
173
+ %ins.1 = insertelement <2 x half > %ins.0 , half %cvt , i32 1
174
+ %ret = bitcast <2 x half > %ins.1 to float
175
+ ret float %ret
176
+ }
177
+
12
178
define amdgpu_ps float @test_cvt_f16_fp8_byte0 (i32 %a ) {
13
179
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0:
14
180
; GFX1250-SDAG-REAL16: ; %bb.0:
0 commit comments