@@ -50,142 +50,227 @@ pub const fn u8(n: u8) -> u8 {
5050 U8_ISQRT_WITH_REMAINDER [ n as usize ] . 0
5151}
5252
53- /// Returns the [integer square root][1] and remainder of any [`u8`](prim@u8)
54- /// input.
55- ///
56- /// For example, `u8_with_remainder(17) == (4, 1)` because the integer square
57- /// root of 17 is 4 and because 17 is 1 higher than 4 squared.
58- ///
59- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
60- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
61- /// Encyclopedia."
62- #[ must_use = "this returns the result of the operation, \
63- without modifying the original"]
64- // `#[inline(always)]` because this is just a memory access.
65- #[ inline( always) ]
66- const fn u8_with_remainder ( n : u8 ) -> ( u8 , u8 ) {
67- U8_ISQRT_WITH_REMAINDER [ n as usize ]
53+ /*macro_rules! unsigned_fn {
54+ ($unsigned_type:ident, $stages:block) => {
55+ pub const fn $unsigned_type(mut n: u16) -> u16 {
56+ if n == 0 {
57+ return 0;
58+ }
59+ const EVEN_BITMASK: u32 = u32::MAX & !1;
60+ let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
61+ n <<= precondition_shift;
62+
63+ let s = $stages;
64+
65+ let result_shift = precondition_shift >> 1;
66+ s >> result_shift
67+ }
68+ };
69+ }*/
70+
71+ macro_rules! first_stage {
72+ ( $original_bits: literal, $n: ident) => { {
73+ const N_SHIFT : u32 = $original_bits - 8 ;
74+ let n = $n >> N_SHIFT ;
75+
76+ U8_ISQRT_WITH_REMAINDER [ n as usize ]
77+ } } ;
6878}
6979
70- /// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
71- ///
72- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
73- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
74- /// Encyclopedia."
75- #[ must_use = "this returns the result of the operation, \
76- without modifying the original"]
77- // `#[inline(always)]` because the programmer-accessible functions will use
78- // this internally and the contents of this should be inlined there.
79- #[ inline( always) ]
80- pub const fn usize ( n : usize ) -> usize {
81- #[ cfg( target_pointer_width = "16" ) ]
82- {
83- u16 ( n as u16 ) as usize
80+ macro_rules! middle_stage {
81+ ( $original_bits: literal, $ty: ty, $n: ident, $s: ident, $r: ident) => { {
82+ const N_SHIFT : u32 = $original_bits - <$ty>:: BITS ;
83+ let n = ( $n >> N_SHIFT ) as $ty;
84+
85+ const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
86+ const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
87+ const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
88+ const LOWEST_QUARTER_1_BITS : $ty = ( 1 << QUARTER_BITS ) - 1 ;
89+
90+ let lo = n & LOWER_HALF_1_BITS ;
91+ let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
92+ let denominator = ( $s as $ty) << 1 ;
93+ let q = numerator / denominator;
94+ let u = numerator % denominator;
95+ let mut s = ( $s << QUARTER_BITS ) as $ty + q;
96+ let ( mut r, overflow) =
97+ ( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
98+ if overflow {
99+ r = r. wrapping_add( 2 * s - 1 ) ;
100+ s -= 1 ;
101+ }
102+ ( s, r)
103+ } } ;
104+ }
105+
106+ macro_rules! last_stage {
107+ ( $ty: ty, $n: ident, $s: ident, $r: ident) => { {
108+ const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
109+ const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
110+ const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
111+
112+ let lo = $n & LOWER_HALF_1_BITS ;
113+ let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
114+ let denominator = ( $s as $ty) << 1 ;
115+ let q = numerator / denominator;
116+ let mut s = ( $s << QUARTER_BITS ) as $ty + q;
117+ let ( s_squared, overflow) = s. overflowing_mul( s) ;
118+ if overflow || s_squared > $n {
119+ s -= 1 ;
120+ }
121+ s
122+ } } ;
123+ }
124+
125+ /*unsigned_fn!(u16, {
126+ let (s, r) = first_stage!(16, n);
127+ last_stage!(u16, n, s, r)
128+ });
129+
130+ unsigned_fn!(u32, {
131+ let (s, r) = first_stage!(32, n);
132+ let (s, r) = middle_stage!(32, u16, n, s, r);
133+ last_stage!(u32, n, s, r)
134+ })*/
135+
136+ pub const fn u16 ( mut n : u16 ) -> u16 {
137+ if n == 0 {
138+ return 0 ;
139+ }
140+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
141+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
142+ n <<= precondition_shift;
143+
144+ let ( s, r) = first_stage ! ( 16 , n) ;
145+ let s = last_stage ! ( u16 , n, s, r) ;
146+
147+ let result_shift = precondition_shift >> 1 ;
148+ s >> result_shift
149+ }
150+
151+ pub const fn u32 ( mut n : u32 ) -> u32 {
152+ if n == 0 {
153+ return 0 ;
84154 }
155+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
156+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
157+ n <<= precondition_shift;
158+
159+ let ( s, r) = first_stage ! ( 32 , n) ;
160+ let ( s, r) = middle_stage ! ( 32 , u16 , n, s, r) ;
161+ let s = last_stage ! ( u32 , n, s, r) ;
162+
163+ let result_shift = precondition_shift >> 1 ;
164+ s >> result_shift
165+ }
85166
86- # [ cfg ( target_pointer_width = "32" ) ]
87- {
88- u32 ( n as u32 ) as usize
167+ pub const fn u64 ( mut n : u64 ) -> u64 {
168+ if n == 0 {
169+ return 0 ;
89170 }
171+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
172+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
173+ n <<= precondition_shift;
174+
175+ let ( s, r) = first_stage ! ( 64 , n) ;
176+ let ( s, r) = middle_stage ! ( 64 , u16 , n, s, r) ;
177+ let ( s, r) = middle_stage ! ( 64 , u32 , n, s, r) ;
178+ let s = last_stage ! ( u64 , n, s, r) ;
179+
180+ let result_shift = precondition_shift >> 1 ;
181+ s >> result_shift
182+ }
90183
91- # [ cfg ( target_pointer_width = "64" ) ]
92- {
93- u64 ( n as u64 ) as usize
184+ pub const fn u128 ( mut n : u128 ) -> u128 {
185+ if n == 0 {
186+ return 0 ;
94187 }
188+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
189+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
190+ n <<= precondition_shift;
191+
192+ let ( s, r) = first_stage ! ( 128 , n) ;
193+ let ( s, r) = middle_stage ! ( 128 , u16 , n, s, r) ;
194+ let ( s, r) = middle_stage ! ( 128 , u32 , n, s, r) ;
195+ let ( s, r) = middle_stage ! ( 128 , u64 , n, s, r) ;
196+ let s = last_stage ! ( u128 , n, s, r) ;
197+
198+ let result_shift = precondition_shift >> 1 ;
199+ s >> result_shift
95200}
96201
97- /// Generates a `u*_with_remainder` function that returns the [integer square
98- /// root][1] and remainder of any input of a specific unsigned integer type.
202+ /// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
99203///
100204/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
101205/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
102206/// Encyclopedia."
103- macro_rules! unsigned_with_remainder_fn {
104- ( $FullBitsT: ty, $full_bits_with_remainder_fn: ident, $HalfBitsT: ty, $half_bits_with_remainder_fn: ident) => {
105- /// Returns the [integer square root][1] and remainder of any
106- #[ doc = concat!( "[`" , stringify!( $FullBitsT) , "`](prim@" , stringify!( $FullBitsT) , ")" ) ]
107- /// input.
108- ///
109- /// For example,
110- #[ doc = concat!( "`" , stringify!( $full_bits_with_remainder_fn) , "(17) == (4, 1)`" ) ]
111- /// because the integer square root of 17 is 4 and because 17 is 1
112- /// higher than 4 squared.
113- ///
114- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
115- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
116- /// Encyclopedia."
117- #[ must_use = "this returns the result of the operation, \
118- without modifying the original"]
119- const fn $full_bits_with_remainder_fn( mut n: $FullBitsT) -> ( $FullBitsT, $FullBitsT) {
120- // Performs a Karatsuba square root.
121- // https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf
122-
123- const HALF_BITS : u32 = <$FullBitsT>:: BITS >> 1 ;
124- const QUARTER_BITS : u32 = <$FullBitsT>:: BITS >> 2 ;
207+ #[ must_use = "this returns the result of the operation, \
208+ without modifying the original"]
209+ // `#[inline(always)]` because the programmer-accessible functions will use
210+ // this internally and the contents of this should be inlined there.
211+ #[ inline( always) ]
212+ pub const fn usize ( mut n : usize ) -> usize {
213+ if n == 0 {
214+ return 0 ;
215+ }
216+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
217+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
218+ n <<= precondition_shift;
219+
220+ let s = {
221+ #[ cfg( target_pointer_width = "16" ) ]
222+ {
223+ let ( s, r) = first_stage ! ( 16 , n) ;
224+ last_stage ! ( usize , n, s, r)
225+ }
125226
126- const LOWER_HALF_1_BITS : $FullBitsT = ( 1 << HALF_BITS ) - 1 ;
127- const LOWEST_QUARTER_1_BITS : $FullBitsT = ( 1 << QUARTER_BITS ) - 1 ;
227+ #[ cfg( target_pointer_width = "32" ) ]
228+ {
229+ let ( s, r) = first_stage ! ( 32 , n) ;
230+ let ( s, r) = middle_stage ! ( 32 , u16 , n, s, r) ;
231+ last_stage ! ( usize , n, s, r)
232+ }
128233
129- let leading_zeros = n. leading_zeros( ) ;
130- if leading_zeros >= HALF_BITS {
131- let ( s, r) = $half_bits_with_remainder_fn( n as $HalfBitsT) ;
132- ( s as $FullBitsT, r as $FullBitsT)
133- } else {
134- // If we've arrived here, there is at least one 1 bit in the
135- // upper half of the bits. What we want to do is to shift left
136- // an even number of bits so that the most-significant 1 bit is
137- // as far left as it can get.
138- //
139- // Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen.
140- const EVEN_BITMASK : u32 = u32 :: MAX & !1 ;
141- let precondition_shift = leading_zeros & EVEN_BITMASK ;
142- n <<= precondition_shift;
234+ #[ cfg( target_pointer_width = "64" ) ]
235+ {
236+ let ( s, r) = first_stage ! ( 64 , n) ;
237+ let ( s, r) = middle_stage ! ( 64 , u16 , n, s, r) ;
238+ let ( s, r) = middle_stage ! ( 64 , u32 , n, s, r) ;
239+ last_stage ! ( usize , n, s, r)
240+ }
241+ } ;
143242
144- let hi = ( n >> HALF_BITS ) as $HalfBitsT;
145- let lo = n & LOWER_HALF_1_BITS ;
243+ let result_shift = precondition_shift >> 1 ;
244+ s >> result_shift
245+ }
146246
147- let ( s_prime, r_prime) = $half_bits_with_remainder_fn( hi) ;
247+ pub const unsafe fn i8 ( n : i8 ) -> i8 {
248+ u8 ( n as u8 ) as i8
249+ }
148250
149- let numerator = ( ( r_prime as $FullBitsT) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
150- let denominator = ( s_prime as $FullBitsT) << 1 ;
251+ pub const unsafe fn i16 ( n : i16 ) -> i16 {
252+ u16 ( n as u16 ) as i16
253+ }
151254
152- // Integer type divided by nonzero version of that type is not a `const fn` yet.
153- // let denominator =
154- // unsafe { crate::num::NonZero::<$FullBitsT>::new_unchecked(denominator) };
155- //
156- // let q = numerator / denominator;
157- // let u = numerator % denominator;
158- let ( q, u) = unsafe {
159- (
160- crate :: intrinsics:: unchecked_div( numerator, denominator) ,
161- crate :: intrinsics:: unchecked_rem( numerator, denominator) ,
162- )
163- } ;
255+ pub const unsafe fn i32 ( n : i32 ) -> i32 {
256+ u32 ( n as u32 ) as i32
257+ }
164258
165- let mut s = ( s_prime << QUARTER_BITS ) as $FullBitsT + q;
166- let ( mut r, overflow) =
167- ( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
168- if overflow {
169- r = r. wrapping_add( ( s << 1 ) - 1 ) ;
170- s -= 1 ;
171- }
172- let result_shift = precondition_shift >> 1 ;
173- ( s >> result_shift, r >> result_shift)
174- }
175- }
176- } ;
259+ pub const unsafe fn i64 ( n : i64 ) -> i64 {
260+ u64 ( n as u64 ) as i64
177261}
178262
179- unsigned_with_remainder_fn ! ( u16 , u16_with_remainder , u8 , u8_with_remainder ) ;
180- unsigned_with_remainder_fn ! ( u32 , u32_with_remainder , u16 , u16_with_remainder ) ;
181- unsigned_with_remainder_fn ! ( u64 , u64_with_remainder , u32 , u32_with_remainder ) ;
263+ pub const unsafe fn i128 ( n : i128 ) -> i128 {
264+ u128 ( n as u128 ) as i128
265+ }
182266
183- /// Generates a `u*` function that returns the [integer square root][1] of any
267+ /*
268+ /// Generates a `u*` function that returns the [integer square root][1] of any
184269/// input of a specific unsigned integer type.
185270///
186271/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
187272/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
188- /// Encyclopedia."
273+ /// Encyclopedia."e
189274macro_rules! unsigned_fn {
190275 ($FullBitsT:ty, $full_bits_fn:ident, $HalfBitsT:ty, $half_bits_fn:ident, $half_bits_with_remainder_fn:ident) => {
191276 /// Returns the [integer square root][1] of any
@@ -263,49 +348,7 @@ macro_rules! unsigned_fn {
263348 }
264349 }
265350 };
266- }
267-
268- unsigned_fn ! ( u16 , u16 , u8 , u8 , u8_with_remainder) ;
269- unsigned_fn ! ( u32 , u32 , u16 , u16 , u16_with_remainder) ;
270- unsigned_fn ! ( u64 , u64 , u32 , u32 , u32_with_remainder) ;
271- unsigned_fn ! ( u128 , u128 , u64 , u64 , u64_with_remainder) ;
272-
273- /// Generates an `i*` function that returns the [integer square root][1] of any
274- /// **nonnegative** input of a specific signed integer type.
275- ///
276- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
277- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
278- /// Encyclopedia."
279- macro_rules! signed_fn {
280- ( $SignedT: ty, $signed_fn: ident, $UnsignedT: ty, $unsigned_fn: ident) => {
281- /// Returns the [integer square root][1] of any **nonnegative**
282- #[ doc = concat!( "[`" , stringify!( $SignedT) , "`](prim@" , stringify!( $SignedT) , ")" ) ]
283- /// input.
284- ///
285- /// # Safety
286- ///
287- /// This results in undefined behavior when the input is negative.
288- ///
289- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
290- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
291- /// Encyclopedia."
292- #[ must_use = "this returns the result of the operation, \
293- without modifying the original"]
294- // `#[inline(always)]` because the programmer-accessible functions will
295- // use this internally and the contents of this should be inlined
296- // there.
297- #[ inline( always) ]
298- pub const unsafe fn $signed_fn( n: $SignedT) -> $SignedT {
299- $unsigned_fn( n as $UnsignedT) as $SignedT
300- }
301- } ;
302- }
303-
304- signed_fn ! ( i8 , i8 , u8 , u8 ) ;
305- signed_fn ! ( i16 , i16 , u16 , u16 ) ;
306- signed_fn ! ( i32 , i32 , u32 , u32 ) ;
307- signed_fn ! ( i64 , i64 , u64 , u64 ) ;
308- signed_fn ! ( i128 , i128 , u128 , u128 ) ;
351+ }*/
309352
310353/// Instantiate this panic logic once, rather than for all the isqrt methods
311354/// on every single primitive type.
0 commit comments