Skip to content

Commit e392354

Browse files
DiamonDinoiaserge-sans-paille
authored andcommitted
NEW API: Fixed shift, Rotation
1. adds the API bitwise_[l|r]shift<N>(...) and rot[l|r]<N>(...) 2. updates the test to use the API 3. Updates documentation
1 parent 0882550 commit e392354

File tree

10 files changed

+452
-18
lines changed

10 files changed

+452
-18
lines changed

docs/source/api/bitwise_operators_index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ Bitwise operators
4040
+---------------------------------------+----------------------------------------------------+
4141
| :cpp:func:`bitwise_andnot` | per slot bitwise and not |
4242
+---------------------------------------+----------------------------------------------------+
43-
| :cpp:func:`bitwise_lshift` | per slot bitwise and |
43+
| :cpp:func:`bitwise_lshift` | per slot bitwise left shift |
4444
+---------------------------------------+----------------------------------------------------+
45-
| :cpp:func:`bitwise_rshift` | per slot bitwise and not |
45+
| :cpp:func:`bitwise_rshift` | per slot bitwise right shift |
4646
+---------------------------------------+----------------------------------------------------+
4747
| :cpp:func:`rotr` | per slot rotate right |
4848
+---------------------------------------+----------------------------------------------------+

include/xsimd/arch/common/xsimd_common_arithmetic.hpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ namespace xsimd
3434
{ return x << y; },
3535
self, other);
3636
}
37+
template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
38+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
39+
{
40+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
41+
static_assert(shift < bits, "Shift must be less than the number of bits in T");
42+
return bitwise_lshift(self, shift, A {});
43+
}
3744

3845
// bitwise_rshift
3946
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
@@ -43,6 +50,13 @@ namespace xsimd
4350
{ return x >> y; },
4451
self, other);
4552
}
53+
template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
54+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept
55+
{
56+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
57+
static_assert(shift < bits, "Shift must be less than the number of bits in T");
58+
return bitwise_rshift(self, shift, A {});
59+
}
4660

4761
// decr
4862
template <class A, class T>
@@ -166,16 +180,30 @@ namespace xsimd
166180
template <class A, class T, class STy>
167181
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
168182
{
169-
constexpr auto N = std::numeric_limits<T>::digits;
170-
return (self << other) | (self >> (N - other));
183+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
184+
return (self << other) | (self >> (bits - other));
185+
}
186+
template <size_t count, class A, class T>
187+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept
188+
{
189+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
190+
static_assert(count < bits, "Count amount must be less than the number of bits in T");
191+
return bitwise_lshift<count>(self) | bitwise_rshift<bits - count>(self);
171192
}
172193

173194
// rotr
174195
template <class A, class T, class STy>
175196
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
176197
{
177-
constexpr auto N = std::numeric_limits<T>::digits;
178-
return (self >> other) | (self << (N - other));
198+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
199+
return (self >> other) | (self << (bits - other));
200+
}
201+
template <size_t count, class A, class T>
202+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept
203+
{
204+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
205+
static_assert(count < bits, "Count must be less than the number of bits in T");
206+
return bitwise_rshift<count>(self) | bitwise_lshift<bits - count>(self);
179207
}
180208

181209
// sadd

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#include "../types/xsimd_avx2_register.hpp"
1919

20+
#include <limits>
21+
2022
namespace xsimd
2123
{
2224

@@ -172,6 +174,29 @@ namespace xsimd
172174
}
173175
}
174176

177+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
178+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
179+
{
180+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
181+
static_assert(shift < bits, "Shift must be less than the number of bits in T");
182+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
183+
{
184+
return _mm256_slli_epi16(self, shift);
185+
}
186+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
187+
{
188+
return _mm256_slli_epi32(self, shift);
189+
}
190+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
191+
{
192+
return _mm256_slli_epi64(self, shift);
193+
}
194+
else
195+
{
196+
return bitwise_lshift<shift>(self, avx {});
197+
}
198+
}
199+
175200
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
176201
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
177202
{
@@ -252,6 +277,65 @@ namespace xsimd
252277
}
253278
}
254279

280+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
281+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
282+
{
283+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
284+
static_assert(shift < bits, "Shift amount must be less than the number of bits in T");
285+
if (std::is_signed<T>::value)
286+
{
287+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
288+
{
289+
__m256i sign_mask = _mm256_set1_epi16((0xFF00 >> shift) & 0x00FF);
290+
__m256i cmp_is_negative = _mm256_cmpgt_epi8(_mm256_setzero_si256(), self);
291+
__m256i res = _mm256_srai_epi16(self, shift);
292+
return _mm256_or_si256(
293+
detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
294+
{ return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
295+
sign_mask, cmp_is_negative),
296+
_mm256_andnot_si256(sign_mask, res));
297+
}
298+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
299+
{
300+
return _mm256_srai_epi16(self, shift);
301+
}
302+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
303+
{
304+
return _mm256_srai_epi32(self, shift);
305+
}
306+
else
307+
{
308+
return bitwise_rshift<shift>(self, avx {});
309+
}
310+
}
311+
else
312+
{
313+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
314+
{
315+
const __m256i byte_mask = _mm256_set1_epi16(0x00FF);
316+
__m256i u16 = _mm256_and_si256(self, byte_mask);
317+
__m256i r16 = _mm256_srli_epi16(u16, shift);
318+
return _mm256_and_si256(r16, byte_mask);
319+
}
320+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
321+
{
322+
return _mm256_srli_epi16(self, shift);
323+
}
324+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
325+
{
326+
return _mm256_srli_epi32(self, shift);
327+
}
328+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
329+
{
330+
return _mm256_srli_epi64(self, shift);
331+
}
332+
else
333+
{
334+
return bitwise_rshift<shift>(self, avx {});
335+
}
336+
}
337+
}
338+
255339
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
256340
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
257341
{

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,100 @@ namespace xsimd
564564
}
565565
}
566566

567+
// rotl
568+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
569+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
570+
{
571+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
572+
{
573+
return _mm512_rolv_epi32(self, other);
574+
}
575+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
576+
{
577+
return _mm512_rolv_epi64(self, other);
578+
}
579+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
580+
{ return rotl(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
581+
self, other);
582+
}
583+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
584+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
585+
{
586+
return rotl(self, batch<T, A>(other), A {});
587+
}
588+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
589+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512f>) noexcept
590+
{
591+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
592+
static_assert(count < bits, "Count must be less than the number of bits in T");
593+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
594+
{
595+
return _mm512_rol_epi32(self, count);
596+
}
597+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
598+
{
599+
return _mm512_rol_epi64(self, count);
600+
}
601+
602+
return detail::fwd_to_avx([](__m256i s) noexcept
603+
{ return rotl<count>(batch<T, avx2>(s), avx2 {}); },
604+
self);
605+
}
606+
607+
// rotr
608+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
609+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
610+
{
611+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
612+
{
613+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
614+
{ return rotr(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
615+
self, other);
616+
}
617+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
618+
{
619+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
620+
{
621+
return _mm512_rorv_epi32(self, other);
622+
}
623+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
624+
{
625+
return _mm512_rorv_epi64(self, other);
626+
}
627+
}
628+
return rotr(self, other, common {});
629+
}
630+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
631+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
632+
{
633+
return rotr(self, batch<T, A>(other), A {});
634+
}
635+
636+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
637+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512f>) noexcept
638+
{
639+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
640+
static_assert(count < bits, "Count must be less than the number of bits in T");
641+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
642+
{
643+
return detail::fwd_to_avx([](__m256i s) noexcept
644+
{ return rotr<count>(batch<T, avx2>(s), avx2 {}); },
645+
self);
646+
}
647+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
648+
{
649+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
650+
{
651+
return _mm512_ror_epi32(self, count);
652+
}
653+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
654+
{
655+
return _mm512_ror_epi64(self, count);
656+
}
657+
}
658+
return rotr<count>(self, common {});
659+
}
660+
567661
// bitwise_xor
568662
template <class A>
569663
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
@@ -2551,7 +2645,6 @@ namespace xsimd
25512645
}
25522646

25532647
}
2554-
25552648
}
25562649

25572650
#endif

include/xsimd/arch/xsimd_avx512vbmi2.hpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,64 @@ namespace xsimd
6767
{
6868
return _mm512_maskz_expand_epi8(mask.mask(), self);
6969
}
70+
71+
// rotl
72+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
73+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
74+
{
75+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
76+
{
77+
return _mm512_shldv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
78+
}
79+
else
80+
{
81+
return rotl(self, other, avx512bw {});
82+
}
83+
}
84+
85+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
86+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
87+
{
88+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
89+
static_assert(count < bits, "Count must be less than the number of bits in T");
90+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
91+
{
92+
return _mm512_shldi_epi16(self, self, count);
93+
}
94+
else
95+
{
96+
return rotl<count>(self, avx512bw {});
97+
}
98+
}
99+
100+
// rotr
101+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
102+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
103+
{
104+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
105+
{
106+
return _mm512_shrdv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
107+
}
108+
else
109+
{
110+
return rotr(self, other, avx512bw {});
111+
}
112+
}
113+
114+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
115+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
116+
{
117+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
118+
static_assert(count < bits, "count must be less than the number of bits in T");
119+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
120+
{
121+
return _mm512_shrdi_epi16(self, self, count);
122+
}
123+
else
124+
{
125+
return rotr<count>(self, avx512bw {});
126+
}
127+
}
70128
}
71129
}
72130

include/xsimd/arch/xsimd_common_fwd.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ namespace xsimd
2626
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<common>) noexcept;
2727
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2828
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
29+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
30+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept;
2931
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
3032
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
33+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
34+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept;
3135
template <class A, class T>
3236
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
3337
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
@@ -40,6 +44,14 @@ namespace xsimd
4044
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<common>) noexcept;
4145
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
4246
XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<common>) noexcept;
47+
template <class A, class T, class STy>
48+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
49+
template <size_t count, class A, class T>
50+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept;
51+
template <class A, class T, class STy>
52+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
53+
template <size_t count, class A, class T>
54+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept;
4355
// Forward declarations for pack-level helpers
4456
namespace detail
4557
{

0 commit comments

Comments
 (0)