Skip to content

Commit 345d763

Browse files
authored
[X86] Add tests showing failure to concat matching SITOFP/UITOFP vector ops (#172852)
Tests have to perform an additional FADD to prevent combineConcatVectorOfCasts from performing the fold - we're trying to show when this fails to occur during a combineConcatVectorOps recursion Interestingly, due to uitofp expansion AVX1/2 is often managing to concat where AVX512 can't
1 parent d6c2cd6 commit 345d763

File tree

2 files changed

+926
-0
lines changed

2 files changed

+926
-0
lines changed
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
7+
8+
define <4 x double> @concat_sitofp_v4f64_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x double> %b0, <2 x double> %b1) {
9+
; SSE-LABEL: concat_sitofp_v4f64_v2i32:
10+
; SSE: # %bb.0:
11+
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
12+
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
13+
; SSE-NEXT: addpd %xmm2, %xmm0
14+
; SSE-NEXT: addpd %xmm3, %xmm1
15+
; SSE-NEXT: retq
16+
;
17+
; AVX-LABEL: concat_sitofp_v4f64_v2i32:
18+
; AVX: # %bb.0:
19+
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
20+
; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1
21+
; AVX-NEXT: vaddpd %xmm0, %xmm2, %xmm0
22+
; AVX-NEXT: vaddpd %xmm1, %xmm3, %xmm1
23+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
24+
; AVX-NEXT: retq
25+
%c0 = sitofp <2 x i32> %a0 to <2 x double>
26+
%c1 = sitofp <2 x i32> %a1 to <2 x double>
27+
%v0 = fadd <2 x double> %b0, %c0
28+
%v1 = fadd <2 x double> %b1, %c1
29+
%res = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30+
ret <4 x double> %res
31+
}
32+
33+
define <8 x float> @concat_sitofp_v8f32_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x float> %b0, <4 x float> %b1) {
34+
; SSE-LABEL: concat_sitofp_v8f32_v4i32:
35+
; SSE: # %bb.0:
36+
; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
37+
; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
38+
; SSE-NEXT: addps %xmm2, %xmm0
39+
; SSE-NEXT: addps %xmm3, %xmm1
40+
; SSE-NEXT: retq
41+
;
42+
; AVX-LABEL: concat_sitofp_v8f32_v4i32:
43+
; AVX: # %bb.0:
44+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
45+
; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1
46+
; AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0
47+
; AVX-NEXT: vaddps %xmm1, %xmm3, %xmm1
48+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
49+
; AVX-NEXT: retq
50+
%c0 = sitofp <4 x i32> %a0 to <4 x float>
51+
%c1 = sitofp <4 x i32> %a1 to <4 x float>
52+
%v0 = fadd <4 x float> %b0, %c0
53+
%v1 = fadd <4 x float> %b1, %c1
54+
%res = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
55+
ret <8 x float> %res
56+
}
57+
58+
define <8 x double> @concat_sitofp_v8f64_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x double> %b0, <2 x double> %b1, <2 x double> %b2, <2 x double> %b3) {
59+
; SSE-LABEL: concat_sitofp_v8f64_v2i32:
60+
; SSE: # %bb.0:
61+
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
62+
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
63+
; SSE-NEXT: cvtdq2pd %xmm2, %xmm2
64+
; SSE-NEXT: cvtdq2pd %xmm3, %xmm3
65+
; SSE-NEXT: addpd %xmm4, %xmm0
66+
; SSE-NEXT: addpd %xmm5, %xmm1
67+
; SSE-NEXT: addpd %xmm6, %xmm2
68+
; SSE-NEXT: addpd %xmm7, %xmm3
69+
; SSE-NEXT: retq
70+
;
71+
; AVX1OR2-LABEL: concat_sitofp_v8f64_v2i32:
72+
; AVX1OR2: # %bb.0:
73+
; AVX1OR2-NEXT: vcvtdq2pd %xmm0, %xmm0
74+
; AVX1OR2-NEXT: vcvtdq2pd %xmm1, %xmm1
75+
; AVX1OR2-NEXT: vcvtdq2pd %xmm2, %xmm2
76+
; AVX1OR2-NEXT: vcvtdq2pd %xmm3, %xmm3
77+
; AVX1OR2-NEXT: vaddpd %xmm0, %xmm4, %xmm0
78+
; AVX1OR2-NEXT: vaddpd %xmm1, %xmm5, %xmm1
79+
; AVX1OR2-NEXT: vaddpd %xmm2, %xmm6, %xmm2
80+
; AVX1OR2-NEXT: vaddpd %xmm3, %xmm7, %xmm3
81+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
82+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
83+
; AVX1OR2-NEXT: retq
84+
;
85+
; AVX512-LABEL: concat_sitofp_v8f64_v2i32:
86+
; AVX512: # %bb.0:
87+
; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0
88+
; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
89+
; AVX512-NEXT: vcvtdq2pd %xmm2, %xmm2
90+
; AVX512-NEXT: vcvtdq2pd %xmm3, %xmm3
91+
; AVX512-NEXT: vaddpd %xmm0, %xmm4, %xmm0
92+
; AVX512-NEXT: vaddpd %xmm1, %xmm5, %xmm1
93+
; AVX512-NEXT: vaddpd %xmm2, %xmm6, %xmm2
94+
; AVX512-NEXT: vaddpd %xmm3, %xmm7, %xmm3
95+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
96+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
97+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
98+
; AVX512-NEXT: retq
99+
%c0 = sitofp <2 x i32> %a0 to <2 x double>
100+
%c1 = sitofp <2 x i32> %a1 to <2 x double>
101+
%c2 = sitofp <2 x i32> %a2 to <2 x double>
102+
%c3 = sitofp <2 x i32> %a3 to <2 x double>
103+
%v0 = fadd <2 x double> %b0, %c0
104+
%v1 = fadd <2 x double> %b1, %c1
105+
%v2 = fadd <2 x double> %b2, %c2
106+
%v3 = fadd <2 x double> %b3, %c3
107+
%r01 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108+
%r23 = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
109+
%res = shufflevector <4 x double> %r01, <4 x double> %r23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
110+
ret <8 x double> %res
111+
}
112+
113+
define <16 x float> @concat_sitofp_v16f32_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, <4 x float> %b0, <4 x float> %b1, <4 x float> %b2, <4 x float> %b3) {
114+
; SSE-LABEL: concat_sitofp_v16f32_v4i32:
115+
; SSE: # %bb.0:
116+
; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
117+
; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
118+
; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
119+
; SSE-NEXT: cvtdq2ps %xmm3, %xmm3
120+
; SSE-NEXT: addps %xmm4, %xmm0
121+
; SSE-NEXT: addps %xmm5, %xmm1
122+
; SSE-NEXT: addps %xmm6, %xmm2
123+
; SSE-NEXT: addps %xmm7, %xmm3
124+
; SSE-NEXT: retq
125+
;
126+
; AVX1OR2-LABEL: concat_sitofp_v16f32_v4i32:
127+
; AVX1OR2: # %bb.0:
128+
; AVX1OR2-NEXT: vcvtdq2ps %xmm0, %xmm0
129+
; AVX1OR2-NEXT: vcvtdq2ps %xmm1, %xmm1
130+
; AVX1OR2-NEXT: vcvtdq2ps %xmm2, %xmm2
131+
; AVX1OR2-NEXT: vcvtdq2ps %xmm3, %xmm3
132+
; AVX1OR2-NEXT: vaddps %xmm0, %xmm4, %xmm0
133+
; AVX1OR2-NEXT: vaddps %xmm1, %xmm5, %xmm1
134+
; AVX1OR2-NEXT: vaddps %xmm2, %xmm6, %xmm2
135+
; AVX1OR2-NEXT: vaddps %xmm3, %xmm7, %xmm3
136+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
137+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
138+
; AVX1OR2-NEXT: retq
139+
;
140+
; AVX512-LABEL: concat_sitofp_v16f32_v4i32:
141+
; AVX512: # %bb.0:
142+
; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0
143+
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
144+
; AVX512-NEXT: vcvtdq2ps %xmm2, %xmm2
145+
; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm3
146+
; AVX512-NEXT: vaddps %xmm0, %xmm4, %xmm0
147+
; AVX512-NEXT: vaddps %xmm1, %xmm5, %xmm1
148+
; AVX512-NEXT: vaddps %xmm2, %xmm6, %xmm2
149+
; AVX512-NEXT: vaddps %xmm3, %xmm7, %xmm3
150+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
151+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
152+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
153+
; AVX512-NEXT: retq
154+
%c0 = sitofp <4 x i32> %a0 to <4 x float>
155+
%c1 = sitofp <4 x i32> %a1 to <4 x float>
156+
%c2 = sitofp <4 x i32> %a2 to <4 x float>
157+
%c3 = sitofp <4 x i32> %a3 to <4 x float>
158+
%v0 = fadd <4 x float> %b0, %c0
159+
%v1 = fadd <4 x float> %b1, %c1
160+
%v2 = fadd <4 x float> %b2, %c2
161+
%v3 = fadd <4 x float> %b3, %c3
162+
%r01 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
163+
%r23 = shufflevector <4 x float> %v2, <4 x float> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
164+
%res = shufflevector <8 x float> %r01, <8 x float> %r23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
165+
ret <16 x float> %res
166+
}
167+
168+
define <8 x double> @concat_sitofp_v8f64_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x double> %b0, <4 x double> %b1) {
169+
; SSE-LABEL: concat_sitofp_v8f64_v4i32:
170+
; SSE: # %bb.0:
171+
; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,2,3]
172+
; SSE-NEXT: cvtdq2pd %xmm6, %xmm6
173+
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
174+
; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
175+
; SSE-NEXT: cvtdq2pd %xmm7, %xmm7
176+
; SSE-NEXT: cvtdq2pd %xmm1, %xmm8
177+
; SSE-NEXT: addpd %xmm2, %xmm0
178+
; SSE-NEXT: addpd %xmm3, %xmm6
179+
; SSE-NEXT: addpd %xmm4, %xmm8
180+
; SSE-NEXT: addpd %xmm5, %xmm7
181+
; SSE-NEXT: movapd %xmm6, %xmm1
182+
; SSE-NEXT: movapd %xmm8, %xmm2
183+
; SSE-NEXT: movapd %xmm7, %xmm3
184+
; SSE-NEXT: retq
185+
;
186+
; AVX1OR2-LABEL: concat_sitofp_v8f64_v4i32:
187+
; AVX1OR2: # %bb.0:
188+
; AVX1OR2-NEXT: vcvtdq2pd %xmm0, %ymm0
189+
; AVX1OR2-NEXT: vcvtdq2pd %xmm1, %ymm1
190+
; AVX1OR2-NEXT: vaddpd %ymm0, %ymm2, %ymm0
191+
; AVX1OR2-NEXT: vaddpd %ymm1, %ymm3, %ymm1
192+
; AVX1OR2-NEXT: retq
193+
;
194+
; AVX512-LABEL: concat_sitofp_v8f64_v4i32:
195+
; AVX512: # %bb.0:
196+
; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0
197+
; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
198+
; AVX512-NEXT: vaddpd %ymm0, %ymm2, %ymm0
199+
; AVX512-NEXT: vaddpd %ymm1, %ymm3, %ymm1
200+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
201+
; AVX512-NEXT: retq
202+
%c0 = sitofp <4 x i32> %a0 to <4 x double>
203+
%c1 = sitofp <4 x i32> %a1 to <4 x double>
204+
%v0 = fadd <4 x double> %b0, %c0
205+
%v1 = fadd <4 x double> %b1, %c1
206+
%res = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
207+
ret <8 x double> %res
208+
}
209+
210+
define <16 x float> @concat_sitofp_v16f32_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x float> %b0, <8 x float> %b1) {
211+
; SSE-LABEL: concat_sitofp_v16f32_v8i32:
212+
; SSE: # %bb.0:
213+
; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
214+
; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
215+
; SSE-NEXT: cvtdq2ps %xmm3, %xmm3
216+
; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
217+
; SSE-NEXT: addps %xmm4, %xmm0
218+
; SSE-NEXT: addps %xmm5, %xmm1
219+
; SSE-NEXT: addps %xmm6, %xmm2
220+
; SSE-NEXT: addps %xmm7, %xmm3
221+
; SSE-NEXT: retq
222+
;
223+
; AVX1OR2-LABEL: concat_sitofp_v16f32_v8i32:
224+
; AVX1OR2: # %bb.0:
225+
; AVX1OR2-NEXT: vcvtdq2ps %ymm0, %ymm0
226+
; AVX1OR2-NEXT: vcvtdq2ps %ymm1, %ymm1
227+
; AVX1OR2-NEXT: vaddps %ymm0, %ymm2, %ymm0
228+
; AVX1OR2-NEXT: vaddps %ymm1, %ymm3, %ymm1
229+
; AVX1OR2-NEXT: retq
230+
;
231+
; AVX512-LABEL: concat_sitofp_v16f32_v8i32:
232+
; AVX512: # %bb.0:
233+
; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
234+
; AVX512-NEXT: vcvtdq2ps %ymm1, %ymm1
235+
; AVX512-NEXT: vaddps %ymm0, %ymm2, %ymm0
236+
; AVX512-NEXT: vaddps %ymm1, %ymm3, %ymm1
237+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
238+
; AVX512-NEXT: retq
239+
%c0 = sitofp <8 x i32> %a0 to <8 x float>
240+
%c1 = sitofp <8 x i32> %a1 to <8 x float>
241+
%v0 = fadd <8 x float> %b0, %c0
242+
%v1 = fadd <8 x float> %b1, %c1
243+
%res = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
244+
ret <16 x float> %res
245+
}

0 commit comments

Comments
 (0)