Skip to content

Commit 6c4639c

Browse files
sse2 version of xsimd::swizzle on [u]int16_t
... and test it, as well as other version.
1 parent 297dd76 commit 6c4639c

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

include/xsimd/arch/xsimd_sse2.hpp

+25
Original file line numberDiff line numberDiff line change
@@ -1640,6 +1640,31 @@ namespace xsimd
16401640
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, sse2 {}));
16411641
}
16421642

1643+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1644+
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<sse2>) noexcept
1645+
{
1646+
// permute within each lane
1647+
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
1648+
constexpr auto mask_hi = detail::mod_shuffle(V4, V5, V6, V7);
1649+
__m128i lo = _mm_shufflelo_epi16(self, mask_lo);
1650+
__m128i hi = _mm_shufflehi_epi16(self, mask_hi);
1651+
1652+
__m128i lo_lo = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(lo), _mm_castsi128_pd(lo), _MM_SHUFFLE2(0, 0)));
1653+
__m128i hi_hi = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(hi), _mm_castsi128_pd(hi), _MM_SHUFFLE2(1, 1)));
1654+
1655+
// mask to choose the right lane
1656+
batch_bool_constant<uint16_t, A, (V0 < 4), (V1 < 4), (V2 < 4), (V3 < 4), (V4 < 4), (V5 < 4), (V6 < 4), (V7 < 4)> blend_mask;
1657+
1658+
// blend the two permutes
1659+
return select(blend_mask, batch<uint16_t, A>(lo_lo), batch<uint16_t, A>(hi_hi));
1660+
}
1661+
1662+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1663+
XSIMD_INLINE batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<sse2>) noexcept
1664+
{
1665+
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, sse2 {}));
1666+
}
1667+
16431668
// transpose
16441669
template <class A>
16451670
XSIMD_INLINE void transpose(batch<float, A>* matrix_begin, batch<float, A>* matrix_end, requires_arch<sse2>) noexcept

test/test_shuffle.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -732,4 +732,15 @@ TEST_CASE_TEMPLATE("[small integer transpose]", B, xsimd::batch<uint16_t>, xsimd
732732
}
733733
}
734734

735+
#if XSIMD_WITH_NEON64 || (XSIMD_WITH_SSE2 && !XSIMD_WITH_AVX)
736+
TEST_CASE_TEMPLATE("[small integer swizzle]", B, xsimd::batch<uint16_t>, xsimd::batch<int16_t>)
737+
{
738+
shuffle_test<B> Test;
739+
SUBCASE("swizzle")
740+
{
741+
Test.swizzle();
742+
}
743+
}
744+
#endif
745+
735746
#endif

0 commit comments

Comments
 (0)