@@ -1592,3 +1592,91 @@ entry:
15921592 %1 = bitcast <8 x i8 > %0 to i64
15931593 ret i64 %1
15941594}
1595+
1596+ define void @foo (<4 x i64 > %a , <4 x i64 > %b , ptr %p ) "min-legal-vector-width" ="256" "prefer-vector-width" ="256" {
1597+ ; SSE-LABEL: foo:
1598+ ; SSE: # %bb.0: # %entry
1599+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1600+ ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1601+ ; SSE-NEXT: movaps %xmm2, 16(%rdi)
1602+ ; SSE-NEXT: movaps %xmm0, (%rdi)
1603+ ; SSE-NEXT: retq
1604+ ;
1605+ ; AVX1-LABEL: foo:
1606+ ; AVX1: # %bb.0: # %entry
1607+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1608+ ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1609+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1610+ ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1611+ ; AVX1-NEXT: vmovaps %xmm1, 16(%rdi)
1612+ ; AVX1-NEXT: vmovaps %xmm0, (%rdi)
1613+ ; AVX1-NEXT: vzeroupper
1614+ ; AVX1-NEXT: retq
1615+ ;
1616+ ; AVX2-SLOW-LABEL: foo:
1617+ ; AVX2-SLOW: # %bb.0: # %entry
1618+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1619+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1620+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
1621+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1622+ ; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi)
1623+ ; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi)
1624+ ; AVX2-SLOW-NEXT: vzeroupper
1625+ ; AVX2-SLOW-NEXT: retq
1626+ ;
1627+ ; AVX2-FAST-ALL-LABEL: foo:
1628+ ; AVX2-FAST-ALL: # %bb.0: # %entry
1629+ ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1630+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1631+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1632+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi)
1633+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi)
1634+ ; AVX2-FAST-ALL-NEXT: vzeroupper
1635+ ; AVX2-FAST-ALL-NEXT: retq
1636+ ;
1637+ ; AVX2-FAST-PERLANE-LABEL: foo:
1638+ ; AVX2-FAST-PERLANE: # %bb.0: # %entry
1639+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2
1640+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1641+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2
1642+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1643+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi)
1644+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi)
1645+ ; AVX2-FAST-PERLANE-NEXT: vzeroupper
1646+ ; AVX2-FAST-PERLANE-NEXT: retq
1647+ ;
1648+ ; AVX512F-LABEL: foo:
1649+ ; AVX512F: # %bb.0: # %entry
1650+ ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1651+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1652+ ; AVX512F-NEXT: vpmovqd %zmm0, (%rdi)
1653+ ; AVX512F-NEXT: vzeroupper
1654+ ; AVX512F-NEXT: retq
1655+ ;
1656+ ; AVX512VL-LABEL: foo:
1657+ ; AVX512VL: # %bb.0: # %entry
1658+ ; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi)
1659+ ; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi)
1660+ ; AVX512VL-NEXT: vzeroupper
1661+ ; AVX512VL-NEXT: retq
1662+ ;
1663+ ; AVX512BW-LABEL: foo:
1664+ ; AVX512BW: # %bb.0: # %entry
1665+ ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1666+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1667+ ; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi)
1668+ ; AVX512BW-NEXT: vzeroupper
1669+ ; AVX512BW-NEXT: retq
1670+ ;
1671+ ; AVX512BWVL-LABEL: foo:
1672+ ; AVX512BWVL: # %bb.0: # %entry
1673+ ; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi)
1674+ ; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi)
1675+ ; AVX512BWVL-NEXT: vzeroupper
1676+ ; AVX512BWVL-NEXT: retq
1677+ entry:
1678+ %0 = shufflevector <4 x i64 > %a , <4 x i64 > %b , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1679+ %1 = trunc nsw <8 x i64 > %0 to <8 x i32 >
1680+ store <8 x i32 > %1 , ptr %p , align 16
1681+ ret void
1682+ }
0 commit comments