Skip to content

Commit 217304a

Browse files
[X86] Use X86FixupInstTunings to select between (V)MOVSS/D and (V)BLENDPS/D (#143895)
Fix #142588 Following @RKSimon’s suggestion, the transformation applies only when the blend mask is exactly 1, indicating that the instruction behaves like a move. Additionally, the conversion will only be performed when optimizing for size or when the target prefers MOVSS/D over BLENDPS/D for performance reasons. The switch-case instructions were identified with GPT O.O . Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 1a4cf1d commit 217304a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+272
-229
lines changed

llvm/lib/Target/X86/X86FixupInstTuning.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,22 @@ bool X86FixupInstTuningPass::processInstruction(
222222
return ProcessUNPCKToIntDomain(NewOpc);
223223
};
224224

225+
auto ProcessBLENDToMOV = [&](unsigned MovOpc) -> bool {
226+
if (MI.getOperand(NumOperands - 1).getImm() != 1)
227+
return false;
228+
bool Force = MF.getFunction().hasOptSize();
229+
if (!Force && !NewOpcPreferable(MovOpc))
230+
return false;
231+
MI.setDesc(TII->get(MovOpc));
232+
MI.removeOperand(NumOperands - 1);
233+
return true;
234+
};
235+
225236
switch (Opc) {
237+
case X86::VBLENDPSrri:
238+
return ProcessBLENDToMOV(X86::VMOVSSrr);
239+
case X86::VBLENDPDrri:
240+
return ProcessBLENDToMOV(X86::VMOVSDrr);
226241
case X86::VPERMILPDri:
227242
return ProcessVPERMILPDri(X86::VSHUFPDrri);
228243
case X86::VPERMILPDYri:

llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define void @endless_loop() {
1111
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
1212
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
1313
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
14-
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
14+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
1515
; AVX1-NEXT: vmovaps %ymm0, (%eax)
1616
; AVX1-NEXT: vmovaps %ymm1, (%eax)
1717
; AVX1-NEXT: vzeroupper
@@ -21,7 +21,7 @@ define void @endless_loop() {
2121
; AVX2: # %bb.0: # %entry
2222
; AVX2-NEXT: vbroadcastss (%eax), %xmm0
2323
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
24-
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
24+
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2525
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
2626
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
2727
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]

llvm/test/CodeGen/X86/avx-insertelt.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ define <8 x float> @insert_f32_firstelt_of_high_subvector(<8 x float> %x, float
9494
; AVX-LABEL: insert_f32_firstelt_of_high_subvector:
9595
; AVX: # %bb.0:
9696
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
97-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
97+
; AVX-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
9898
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9999
; AVX-NEXT: retq
100100
;
@@ -202,9 +202,9 @@ define <4 x i64> @insert_i64_firstelt_of_high_subvector(<4 x i64> %x, i64 %s) {
202202
define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) {
203203
; AVX-LABEL: insert_f32_firstelts:
204204
; AVX: # %bb.0:
205-
; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
205+
; AVX-NEXT: vmovss {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
206206
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
207-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
207+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
208208
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
209209
; AVX-NEXT: retq
210210
;

llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,7 +1843,7 @@ define <2 x double> @test_mm_cvtu64_sd(<2 x double> %__A, i64 %__B) {
18431843
; X86-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
18441844
; X86-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
18451845
; X86-NEXT: vaddsd %xmm1, %xmm2, %xmm1
1846-
; X86-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1846+
; X86-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
18471847
; X86-NEXT: retl
18481848
;
18491849
; X64-LABEL: test_mm_cvtu64_sd:
@@ -1891,7 +1891,7 @@ define <4 x float> @test_mm_cvtu64_ss(<4 x float> %__A, i64 %__B) {
18911891
; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
18921892
; X86-NEXT: fstps {{[0-9]+}}(%esp)
18931893
; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1894-
; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1894+
; X86-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
18951895
; X86-NEXT: movl %ebp, %esp
18961896
; X86-NEXT: popl %ebp
18971897
; X86-NEXT: .cfi_def_cfa %esp, 4

llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10483,7 +10483,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
1048310483
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
1048410484
; CHECK: ## %bb.0:
1048510485
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
10486-
; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
10486+
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1]
1048710487
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
1048810488
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1048910489
%q = load float, ptr %ptr_b

llvm/test/CodeGen/X86/avx512-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6505,7 +6505,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
65056505
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
65066506
; CHECK: # %bb.0:
65076507
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
6508-
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
6508+
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
65096509
; CHECK-NEXT: ret{{[l|q]}}
65106510
%q = load float, ptr %ptr_b
65116511
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0

llvm/test/CodeGen/X86/avx512copy-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
1111
; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
1212
; NOAVX512MOVZXC: # %bb.0:
1313
; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
14-
; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
14+
; NOAVX512MOVZXC-NEXT: vmovss %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf2,0x10,0xc0]
1515
; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
1616
; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
1717
%res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>

llvm/test/CodeGen/X86/build-vector-512.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ define <16 x float> @test_buildvector_16f32_2_var(float %a0, float %a1) {
578578
; AVX-32-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,17,0,0]
579579
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
580580
; AVX-32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
581-
; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
581+
; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
582582
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
583583
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm2[0]
584584
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
@@ -626,7 +626,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
626626
; AVX-32-NEXT: vbroadcastss (%ecx), %xmm1
627627
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
628628
; AVX-32-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
629-
; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
629+
; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
630630
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
631631
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
632632
; AVX-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
@@ -640,7 +640,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
640640
; AVX-64-NEXT: vbroadcastss (%rdi), %xmm1
641641
; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
642642
; AVX-64-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
643-
; AVX-64-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
643+
; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
644644
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
645645
; AVX-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
646646
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]

llvm/test/CodeGen/X86/buildvec-extract.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
4242
; AVX-LABEL: extract0_i32_zext_insert0_i64_zero:
4343
; AVX: # %bb.0:
4444
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
45-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
45+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4646
; AVX-NEXT: retq
4747
%e = extractelement <4 x i32> %x, i32 0
4848
%z = zext i32 %e to i64
@@ -85,7 +85,7 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
8585
; AVX: # %bb.0:
8686
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
8787
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
88-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
88+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
8989
; AVX-NEXT: retq
9090
%e = extractelement <4 x i32> %x, i32 1
9191
%z = zext i32 %e to i64
@@ -130,7 +130,7 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
130130
; AVX: # %bb.0:
131131
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
132132
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
133-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
134134
; AVX-NEXT: retq
135135
%e = extractelement <4 x i32> %x, i32 2
136136
%z = zext i32 %e to i64

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
5151
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
5252
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
5353
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
54-
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
54+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5555
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
5656
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
5757
; AVX512-NEXT: retq
@@ -149,7 +149,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
149149
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
150150
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
151151
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
152-
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
152+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
153153
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
154154
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
155155
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
@@ -235,12 +235,12 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
235235
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
236236
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2
237237
; AVX512-NEXT: vxorps %xmm3, %xmm3, %xmm3
238-
; AVX512-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
238+
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
239239
; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2
240240
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
241241
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
242242
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
243-
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
243+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
244244
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
245245
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
246246
; AVX512-NEXT: vmovd %xmm0, (%rdi)

llvm/test/CodeGen/X86/coalesce_commute_movsd.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ define <4 x float> @insert_f32(float %a0, <4 x float> %a1) {
4444
;
4545
; AVX-LABEL: insert_f32:
4646
; AVX: # %bb.0:
47-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
47+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4848
; AVX-NEXT: retq
4949
;
5050
; AVX512-LABEL: insert_f32:
5151
; AVX512: # %bb.0:
52-
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
52+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5353
; AVX512-NEXT: retq
5454
%1 = insertelement <4 x float> %a1, float %a0, i32 0
5555
ret <4 x float> %1

llvm/test/CodeGen/X86/combine-and.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ define <4 x i32> @test1(<4 x i32> %A) {
3737
; AVX-LABEL: test1:
3838
; AVX: # %bb.0:
3939
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
40-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
40+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4141
; AVX-NEXT: retq
4242
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
4343
ret <4 x i32> %1
@@ -195,7 +195,7 @@ define <4 x i32> @test11(<4 x i32> %A) {
195195
; AVX-LABEL: test11:
196196
; AVX: # %bb.0:
197197
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
198-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
198+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
199199
; AVX-NEXT: retq
200200
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
201201
ret <4 x i32> %1

llvm/test/CodeGen/X86/combine-or-shuffle.ll

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,20 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
8686
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
8787
; SSE4-NEXT: retq
8888
;
89-
; AVX-LABEL: test4:
90-
; AVX: # %bb.0:
91-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
92-
; AVX-NEXT: retq
89+
; AVX1-LABEL: test4:
90+
; AVX1: # %bb.0:
91+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
92+
; AVX1-NEXT: retq
93+
;
94+
; AVX2-LABEL: test4:
95+
; AVX2: # %bb.0:
96+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
97+
; AVX2-NEXT: retq
98+
;
99+
; AVX512-LABEL: test4:
100+
; AVX512: # %bb.0:
101+
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
102+
; AVX512-NEXT: retq
93103
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
94104
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
95105
%or = or <4 x i32> %shuf1, %shuf2
@@ -108,10 +118,20 @@ define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
108118
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
109119
; SSE4-NEXT: retq
110120
;
111-
; AVX-LABEL: test5:
112-
; AVX: # %bb.0:
113-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
114-
; AVX-NEXT: retq
121+
; AVX1-LABEL: test5:
122+
; AVX1: # %bb.0:
123+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
124+
; AVX1-NEXT: retq
125+
;
126+
; AVX2-LABEL: test5:
127+
; AVX2: # %bb.0:
128+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
129+
; AVX2-NEXT: retq
130+
;
131+
; AVX512-LABEL: test5:
132+
; AVX512: # %bb.0:
133+
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
134+
; AVX512-NEXT: retq
115135
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
116136
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
117137
%or = or <4 x i32> %shuf1, %shuf2
@@ -241,10 +261,20 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
241261
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
242262
; SSE4-NEXT: retq
243263
;
244-
; AVX-LABEL: test11:
245-
; AVX: # %bb.0:
246-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
247-
; AVX-NEXT: retq
264+
; AVX1-LABEL: test11:
265+
; AVX1: # %bb.0:
266+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
267+
; AVX1-NEXT: retq
268+
;
269+
; AVX2-LABEL: test11:
270+
; AVX2: # %bb.0:
271+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
272+
; AVX2-NEXT: retq
273+
;
274+
; AVX512-LABEL: test11:
275+
; AVX512: # %bb.0:
276+
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
277+
; AVX512-NEXT: retq
248278
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
249279
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
250280
%or = or <4 x i32> %and1, %and2
@@ -263,10 +293,20 @@ define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
263293
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
264294
; SSE4-NEXT: retq
265295
;
266-
; AVX-LABEL: test12:
267-
; AVX: # %bb.0:
268-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
269-
; AVX-NEXT: retq
296+
; AVX1-LABEL: test12:
297+
; AVX1: # %bb.0:
298+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
299+
; AVX1-NEXT: retq
300+
;
301+
; AVX2-LABEL: test12:
302+
; AVX2: # %bb.0:
303+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
304+
; AVX2-NEXT: retq
305+
;
306+
; AVX512-LABEL: test12:
307+
; AVX512: # %bb.0:
308+
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
309+
; AVX512-NEXT: retq
270310
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
271311
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
272312
%or = or <4 x i32> %and1, %and2
@@ -395,18 +435,18 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
395435
; AVX1-LABEL: test18:
396436
; AVX1: # %bb.0:
397437
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
398-
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
438+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
399439
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
400-
; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
440+
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
401441
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
402442
; AVX1-NEXT: retq
403443
;
404444
; AVX2-LABEL: test18:
405445
; AVX2: # %bb.0:
406446
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
407-
; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
447+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
408448
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
409-
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
449+
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
410450
; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
411451
; AVX2-NEXT: retq
412452
;

llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) {
13431343
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
13441344
; AVX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm1[0],mem[0]
13451345
; AVX-NEXT: vminpd %xmm0, %xmm2, %xmm0
1346-
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1346+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
13471347
; AVX-NEXT: retq
13481348
;
13491349
; AVX10_2-LABEL: test_fminimumnum_vector_nan:

llvm/test/CodeGen/X86/fmsubadd-combine.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x dou
1212
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
1313
; NOFMA-NEXT: vsubpd %xmm2, %xmm0, %xmm1
1414
; NOFMA-NEXT: vaddpd %xmm2, %xmm0, %xmm0
15-
; NOFMA-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
15+
; NOFMA-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1616
; NOFMA-NEXT: retq
1717
;
1818
; FMA3-LABEL: mul_subadd_pd128:
@@ -191,7 +191,7 @@ define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2
191191
; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0
192192
; CHECK-NEXT: vsubpd %xmm0, %xmm2, %xmm1
193193
; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
194-
; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
194+
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
195195
; CHECK-NEXT: retq
196196
entry:
197197
%AB = fmul <2 x double> %A, %B

0 commit comments

Comments
 (0)