2424//  Wrap intrinsics so we can pass them as function pointers
2525//  - OP: intrinsics name prefix, e.g., vorrq
2626//  - RT: type traits to deduce intrinsics return types
27- #define  WRAP_BINARY_INT_EXCLUDING_64 (OP, RT  )                                                       \
27+ #define  WRAP_BINARY_INT_EXCLUDING_64 (OP )                                                       \
2828    namespace  wrap  {                                                                               \
29-         inline  RT< uint8x16_t >  _##OP##_u8 (uint8x16_t  a, uint8x16_t  b) { return  ::OP##_u8  (a, b); } \
30-         inline  RT< int8x16_t >   _##OP##_s8 (int8x16_t   a, int8x16_t   b) { return  ::OP##_s8  (a, b); } \
31-         inline  RT< uint16x8_t >  _##OP##_u16(uint16x8_t  a, uint16x8_t  b) { return  ::OP##_u16 (a, b); } \
32-         inline  RT< int16x8_t >   _##OP##_s16(int16x8_t   a, int16x8_t   b) { return  ::OP##_s16 (a, b); } \
33-         inline  RT< uint32x4_t >  _##OP##_u32(uint32x4_t  a, uint32x4_t  b) { return  ::OP##_u32 (a, b); } \
34-         inline  RT< int32x4_t >   _##OP##_s32(int32x4_t   a, int32x4_t   b) { return  ::OP##_s32 (a, b); } \
29+         inline  uint8x16_t  _##OP##_u8 (uint8x16_t  a, uint8x16_t  b) { return  ::OP##_u8  (a, b); } \
30+         inline  int8x16_t   _##OP##_s8 (int8x16_t   a, int8x16_t   b) { return  ::OP##_s8  (a, b); } \
31+         inline  uint16x8_t  _##OP##_u16(uint16x8_t  a, uint16x8_t  b) { return  ::OP##_u16 (a, b); } \
32+         inline  int16x8_t   _##OP##_s16(int16x8_t   a, int16x8_t   b) { return  ::OP##_s16 (a, b); } \
33+         inline  uint32x4_t  _##OP##_u32(uint32x4_t  a, uint32x4_t  b) { return  ::OP##_u32 (a, b); } \
34+         inline  int32x4_t   _##OP##_s32(int32x4_t   a, int32x4_t   b) { return  ::OP##_s32 (a, b); } \
3535    }
3636
37- #define  WRAP_BINARY_INT (OP, RT  )                                                                    \
38-     WRAP_BINARY_INT_EXCLUDING_64 (OP, RT )                                                           \
37+ #define  WRAP_BINARY_INT (OP )                                                                    \
38+     WRAP_BINARY_INT_EXCLUDING_64 (OP)                                                           \
3939    namespace wrap {                                                                               \
40-         inline  RT< uint64x2_t >  _##OP##_u64 (uint64x2_t  a, uint64x2_t  b) { return  ::OP##_u64 (a, b); } \
41-         inline  RT< int64x2_t >   _##OP##_s64 (int64x2_t   a, int64x2_t   b) { return  ::OP##_s64 (a, b); } \
40+         inline  uint64x2_t  _##OP##_u64 (uint64x2_t  a, uint64x2_t  b) { return  ::OP##_u64 (a, b); } \
41+         inline  int64x2_t   _##OP##_s64 (int64x2_t   a, int64x2_t   b) { return  ::OP##_s64 (a, b); } \
4242    }
4343
44- #define  WRAP_BINARY_FLOAT (OP, RT  )                                                                     \
44+ #define  WRAP_BINARY_FLOAT (OP )                                                                     \
4545    namespace  wrap  {                                                                                  \
46-         inline  RT< float32x4_t >  _##OP##_f32(float32x4_t  a, float32x4_t  b) { return  ::OP##_f32 (a, b); } \
46+         inline  float32x4_t  _##OP##_f32(float32x4_t  a, float32x4_t  b) { return  ::OP##_f32 (a, b); } \
4747    }
4848
4949#define  WRAP_UNARY_INT_EXCLUDING_64 (OP )                                       \
@@ -87,139 +87,6 @@ namespace xsimd
8787
8888        namespace  detail 
8989        {
90-             template  <template  <class > class  return_type , class ... T>
91-             struct  neon_dispatcher_base 
92-             {
93-                 struct  unary 
94-                 {
95-                     using  container_type = std::tuple<return_type<T> (*)(T)...>;
96-                     const  container_type m_func;
97- 
98-                     template  <class  U >
99-                     return_type<U> apply (U rhs) const 
100-                     {
101-                         using  func_type = return_type<U> (*)(U);
102-                         auto  func = xsimd::detail::get<func_type>(m_func);
103-                         return  func (rhs);
104-                     }
105-                 };
106- 
107-                 struct  binary 
108-                 {
109-                     using  container_type = std::tuple<return_type<T> (*)(T, T) ...>;
110-                     const  container_type m_func;
111- 
112-                     template  <class  U >
113-                     return_type<U> apply (U lhs, U rhs) const 
114-                     {
115-                         using  func_type = return_type<U> (*)(U, U);
116-                         auto  func = xsimd::detail::get<func_type>(m_func);
117-                         return  func (lhs, rhs);
118-                     }
119-                 };
120-             };
121- 
122-             /* **************************
123-              *  arithmetic dispatchers * 
124-              ***************************/  
125- 
126-             template  <class  T >
127-             using  identity_return_type = T;
128-             
129-             template  <class ... T>
130-             struct  neon_dispatcher_impl  : neon_dispatcher_base<identity_return_type, T...>
131-             {
132-             };
133- 
134- 
135-             using  neon_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
136-                                                        uint16x8_t , int16x8_t ,
137-                                                        uint32x4_t , int32x4_t ,
138-                                                        uint64x2_t , int64x2_t ,
139-                                                        float32x4_t >;
140- 
141-             using  excluding_int64_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
142-                                                                    uint16x8_t , int16x8_t ,
143-                                                                    uint32x4_t , int32x4_t ,
144-                                                                    float32x4_t >;
145- 
146-             /* *************************
147-              * comparison dispatchers * 
148-              **************************/  
149- 
150-             template  <class  T >
151-             struct  comp_return_type_impl ;
152- 
153-             template  <>
154-             struct  comp_return_type_impl <uint8x16_t >
155-             {
156-                 using  type = uint8x16_t ;
157-             };
158- 
159- //  MSVC uses same underlying type for all vector variants which would cause C++ function overload ambiguity
160- #if  !defined(_WIN32) || (defined(__clang__))
161-             template  <>
162-             struct  comp_return_type_impl <int8x16_t >
163-             {
164-                 using  type = uint8x16_t ;
165-             };
166- 
167-             template  <>
168-             struct  comp_return_type_impl <uint16x8_t >
169-             {
170-                 using  type = uint16x8_t ;
171-             };
172- 
173-             template  <>
174-             struct  comp_return_type_impl <int16x8_t >
175-             {
176-                 using  type = uint16x8_t ;
177-             };
178- 
179-             template  <>
180-             struct  comp_return_type_impl <uint32x4_t >
181-             {
182-                 using  type = uint32x4_t ;
183-             };
184- 
185-             template  <>
186-             struct  comp_return_type_impl <int32x4_t >
187-             {
188-                 using  type = uint32x4_t ;
189-             };
190- 
191-             template  <>
192-             struct  comp_return_type_impl <uint64x2_t >
193-             {
194-                 using  type = uint64x2_t ;
195-             };
196- 
197-             template  <>
198-             struct  comp_return_type_impl <int64x2_t >
199-             {
200-                 using  type = uint64x2_t ;
201-             };
202-             
203-             template  <>
204-             struct  comp_return_type_impl <float32x4_t >
205-             {
206-                 using  type = uint32x4_t ;
207-             };
208- #endif 
209- 
210-             template  <class  T >
211-             using  comp_return_type = typename  comp_return_type_impl<T>::type;
212- 
213-             template  <class ... T>
214-             struct  neon_comp_dispatcher_impl  : neon_dispatcher_base<comp_return_type, T...>
215-             {
216-             };
217- 
218-             using  excluding_int64_comp_dispatcher = neon_comp_dispatcher_impl<uint8x16_t , int8x16_t ,
219-                                                                              uint16x8_t , int16x8_t ,
220-                                                                              uint32x4_t , int32x4_t ,
221-                                                                              float32x4_t >;
222- 
22390            /* *************************************
22491             * enabling / disabling metafunctions * 
22592             **************************************/  
@@ -627,8 +494,8 @@ namespace xsimd
627494         * add * 
628495         *******/  
629496
630-         WRAP_BINARY_INT (vaddq, detail::identity_return_type )
631-         WRAP_BINARY_FLOAT (vaddq, detail::identity_return_type )
497+         WRAP_BINARY_INT (vaddq)
498+         WRAP_BINARY_FLOAT (vaddq)
632499
633500        template  <class  A , class  T , detail::enable_neon_type_t <T> = 0 >
634501        batch<T, A> add (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -645,7 +512,7 @@ namespace xsimd
645512         * sadd * 
646513         ********/  
647514
648-         WRAP_BINARY_INT (vqaddq, detail::identity_return_type )
515+         WRAP_BINARY_INT (vqaddq)
649516
650517        template  <class  A , class  T , detail::enable_neon_type_t <T> = 0 >
651518        batch<T, A> sadd (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -662,8 +529,8 @@ namespace xsimd
662529         * sub * 
663530         *******/  
664531
665-         WRAP_BINARY_INT (vsubq, detail::identity_return_type )
666-         WRAP_BINARY_FLOAT (vsubq, detail::identity_return_type )
532+         WRAP_BINARY_INT (vsubq)
533+         WRAP_BINARY_FLOAT (vsubq)
667534
668535        template  <class  A , class  T , detail::enable_neon_type_t <T> = 0 >
669536        batch<T, A> sub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -680,7 +547,7 @@ namespace xsimd
680547         * ssub * 
681548         ********/  
682549
683-         WRAP_BINARY_INT (vqsubq, detail::identity_return_type )
550+         WRAP_BINARY_INT (vqsubq)
684551
685552        template  <class  A , class  T , detail::enable_neon_type_t <T> = 0 >
686553        batch<T, A> ssub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -698,8 +565,8 @@ namespace xsimd
698565         * mul * 
699566         *******/  
700567
701-         WRAP_BINARY_INT_EXCLUDING_64 (vmulq, detail::identity_return_type )
702-         WRAP_BINARY_FLOAT (vmulq, detail::identity_return_type )
568+         WRAP_BINARY_INT_EXCLUDING_64 (vmulq)
569+         WRAP_BINARY_FLOAT (vmulq)
703570
704571        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
705572        batch<T, A> mul (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -751,8 +618,8 @@ namespace xsimd
751618         * eq * 
752619         ******/  
753620
754-         WRAP_BINARY_INT_EXCLUDING_64 (vceqq, detail::comp_return_type )
755-         WRAP_BINARY_FLOAT (vceqq, detail::comp_return_type )
621+         WRAP_BINARY_INT_EXCLUDING_64 (vceqq)
622+         WRAP_BINARY_FLOAT (vceqq)
756623
757624        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
758625        batch_bool<T, A> eq (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -797,8 +664,8 @@ namespace xsimd
797664         * lt * 
798665         ******/  
799666
800-         WRAP_BINARY_INT_EXCLUDING_64 (vcltq, detail::comp_return_type )
801-         WRAP_BINARY_FLOAT (vcltq, detail::comp_return_type )
667+         WRAP_BINARY_INT_EXCLUDING_64 (vcltq)
668+         WRAP_BINARY_FLOAT (vcltq)
802669
803670        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
804671        batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -821,8 +688,8 @@ namespace xsimd
821688         * le * 
822689         ******/  
823690
824-         WRAP_BINARY_INT_EXCLUDING_64 (vcleq, detail::comp_return_type )
825-         WRAP_BINARY_FLOAT (vcleq, detail::comp_return_type )
691+         WRAP_BINARY_INT_EXCLUDING_64 (vcleq)
692+         WRAP_BINARY_FLOAT (vcleq)
826693
827694        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
828695        batch_bool<T, A> le (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -845,8 +712,8 @@ namespace xsimd
845712         * gt * 
846713         ******/  
847714
848-         WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type )
849-         WRAP_BINARY_FLOAT (vcgtq, detail::comp_return_type )
715+         WRAP_BINARY_INT_EXCLUDING_64 (vcgtq)
716+         WRAP_BINARY_FLOAT (vcgtq)
850717
851718        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
852719        batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -869,8 +736,8 @@ namespace xsimd
869736         * ge * 
870737         ******/  
871738
872-         WRAP_BINARY_INT_EXCLUDING_64 (vcgeq, detail::comp_return_type )
873-         WRAP_BINARY_FLOAT (vcgeq, detail::comp_return_type )
739+         WRAP_BINARY_INT_EXCLUDING_64 (vcgeq)
740+         WRAP_BINARY_FLOAT (vcgeq)
874741
875742        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
876743        batch_bool<T, A> get (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -893,7 +760,7 @@ namespace xsimd
893760         * bitwise_and * 
894761         ***************/  
895762
896-         WRAP_BINARY_INT (vandq, detail::identity_return_type )
763+         WRAP_BINARY_INT (vandq)
897764
898765        namespace  detail 
899766        {
@@ -930,7 +797,7 @@ namespace xsimd
930797         * bitwise_or * 
931798         **************/  
932799
933-         WRAP_BINARY_INT (vorrq, detail::identity_return_type )
800+         WRAP_BINARY_INT (vorrq)
934801
935802        namespace  detail 
936803        {
@@ -967,7 +834,7 @@ namespace xsimd
967834         * bitwise_xor * 
968835         ***************/  
969836
970-         WRAP_BINARY_INT (veorq, detail::identity_return_type )
837+         WRAP_BINARY_INT (veorq)
971838
972839        namespace  detail 
973840        {
@@ -1085,7 +952,7 @@ namespace xsimd
1085952         * bitwise_andnot * 
1086953         ******************/  
1087954
1088-         WRAP_BINARY_INT (vbicq, detail::identity_return_type )
955+         WRAP_BINARY_INT (vbicq)
1089956
1090957        namespace  detail 
1091958        {
@@ -1121,8 +988,8 @@ namespace xsimd
1121988         * min * 
1122989         *******/  
1123990
1124-         WRAP_BINARY_INT_EXCLUDING_64 (vminq, detail::identity_return_type )
1125-         WRAP_BINARY_FLOAT (vminq, detail::identity_return_type )
991+         WRAP_BINARY_INT_EXCLUDING_64 (vminq)
992+         WRAP_BINARY_FLOAT (vminq)
1126993
1127994        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
1128995        batch<T, A> min (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1145,8 +1012,8 @@ namespace xsimd
11451012         * max * 
11461013         *******/  
11471014
1148-         WRAP_BINARY_INT_EXCLUDING_64 (vmaxq, detail::identity_return_type )
1149-         WRAP_BINARY_FLOAT (vmaxq, detail::identity_return_type )
1015+         WRAP_BINARY_INT_EXCLUDING_64 (vmaxq)
1016+         WRAP_BINARY_FLOAT (vmaxq)
11501017
11511018        template  <class  A , class  T , detail::exclude_int64_neon_t <T> = 0 >
11521019        batch<T, A> max (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1355,30 +1222,6 @@ namespace xsimd
13551222            inline  float32x4_t  _vbslq_f32 (uint32x4_t  a, float32x4_t  b, float32x4_t  c) { return  ::vbslq_f32 (a, b, c); }
13561223        }
13571224
1358-         namespace  detail 
1359-         {
1360-             template  <class ... T>
1361-             struct  neon_select_dispatcher_impl 
1362-             {
1363-                 using  container_type = std::tuple<T (*)(comp_return_type<T>, T, T)...>;
1364-                 const  container_type m_func;
1365- 
1366-                 template  <class  U >
1367-                 U apply (comp_return_type<U> cond, U lhs, U rhs) const 
1368-                 {
1369-                     using  func_type = U  (*)(comp_return_type<U>, U, U);
1370-                     auto  func = xsimd::detail::get<func_type>(m_func);
1371-                     return  func (cond, lhs, rhs);
1372-                 }
1373-             };
1374- 
1375-             using  neon_select_dispatcher = neon_select_dispatcher_impl<uint8x16_t , int8x16_t ,
1376-                                                                      uint16x8_t , int16x8_t ,
1377-                                                                      uint32x4_t , int32x4_t ,
1378-                                                                      uint64x2_t , int64x2_t ,
1379-                                                                      float32x4_t >;
1380-         }
1381- 
13821225        template  <class  A , class  T , detail::enable_neon_type_t <T> = 0 >
13831226        batch<T, A> select (batch_bool<T, A> const & cond, batch<T, A> const & a, batch<T, A> const & b, requires_arch<neon>)
13841227        {
0 commit comments