aaaa

ChaiTRex · ChaiTRex · commit 82389f4f719b · 2024-07-29T21:48:01.000-04:00
diff --git a/library/core/src/num/int_sqrt.rs b/library/core/src/num/int_sqrt.rs
@@ -50,142 +50,227 @@ pub const fn u8(n: u8) -> u8 {
     U8_ISQRT_WITH_REMAINDER[n as usize].0
 }
 
-/// Returns the [integer square root][1] and remainder of any [`u8`](prim@u8)
-/// input.
-///
-/// For example, `u8_with_remainder(17) == (4, 1)` because the integer square
-/// root of 17 is 4 and because 17 is 1 higher than 4 squared.
-///
-/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
-/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-/// Encyclopedia."
-#[must_use = "this returns the result of the operation, \
-              without modifying the original"]
-// `#[inline(always)]` because this is just a memory access.
-#[inline(always)]
-const fn u8_with_remainder(n: u8) -> (u8, u8) {
-    U8_ISQRT_WITH_REMAINDER[n as usize]
+/*macro_rules! unsigned_fn {
+    ($unsigned_type:ident, $stages:block) => {
+        pub const fn $unsigned_type(mut n: u16) -> u16 {
+            if n == 0 {
+                return 0;
+            }
+            const EVEN_BITMASK: u32 = u32::MAX & !1;
+            let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+            n <<= precondition_shift;
+
+            let s = $stages;
+
+            let result_shift = precondition_shift >> 1;
+            s >> result_shift
+        }
+    };
+}*/
+
+macro_rules! first_stage {
+    ($original_bits:literal, $n:ident) => {{
+        const N_SHIFT: u32 = $original_bits - 8;
+        let n = $n >> N_SHIFT;
+
+        U8_ISQRT_WITH_REMAINDER[n as usize]
+    }};
 }
 
-/// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
-///
-/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
-/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-/// Encyclopedia."
-#[must_use = "this returns the result of the operation, \
-              without modifying the original"]
-// `#[inline(always)]` because the programmer-accessible functions will use
-// this internally and the contents of this should be inlined there.
-#[inline(always)]
-pub const fn usize(n: usize) -> usize {
-    #[cfg(target_pointer_width = "16")]
-    {
-        u16(n as u16) as usize
+macro_rules! middle_stage {
+    ($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{
+        const N_SHIFT: u32 = $original_bits - <$ty>::BITS;
+        let n = ($n >> N_SHIFT) as $ty;
+
+        const HALF_BITS: u32 = <$ty>::BITS >> 1;
+        const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
+        const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
+        const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1;
+
+        let lo = n & LOWER_HALF_1_BITS;
+        let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
+        let denominator = ($s as $ty) << 1;
+        let q = numerator / denominator;
+        let u = numerator % denominator;
+        let mut s = ($s << QUARTER_BITS) as $ty + q;
+        let (mut r, overflow) =
+            ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
+        if overflow {
+            r = r.wrapping_add(2 * s - 1);
+            s -= 1;
+        }
+        (s, r)
+    }};
+}
+
+macro_rules! last_stage {
+    ($ty:ty, $n:ident, $s:ident, $r:ident) => {{
+        const HALF_BITS: u32 = <$ty>::BITS >> 1;
+        const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
+        const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
+
+        let lo = $n & LOWER_HALF_1_BITS;
+        let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
+        let denominator = ($s as $ty) << 1;
+        let q = numerator / denominator;
+        let mut s = ($s << QUARTER_BITS) as $ty + q;
+        let (s_squared, overflow) = s.overflowing_mul(s);
+        if overflow || s_squared > $n {
+            s -= 1;
+        }
+        s
+    }};
+}
+
+/*unsigned_fn!(u16, {
+    let (s, r) = first_stage!(16, n);
+    last_stage!(u16, n, s, r)
+});
+
+unsigned_fn!(u32, {
+    let (s, r) = first_stage!(32, n);
+    let (s, r) = middle_stage!(32, u16, n, s, r);
+    last_stage!(u32, n, s, r)
+})*/
+
+pub const fn u16(mut n: u16) -> u16 {
+    if n == 0 {
+        return 0;
+    }
+    const EVEN_BITMASK: u32 = u32::MAX & !1;
+    let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+    n <<= precondition_shift;
+
+    let (s, r) = first_stage!(16, n);
+    let s = last_stage!(u16, n, s, r);
+
+    let result_shift = precondition_shift >> 1;
+    s >> result_shift
+}
+
+pub const fn u32(mut n: u32) -> u32 {
+    if n == 0 {
+        return 0;
     }
+    const EVEN_BITMASK: u32 = u32::MAX & !1;
+    let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+    n <<= precondition_shift;
+
+    let (s, r) = first_stage!(32, n);
+    let (s, r) = middle_stage!(32, u16, n, s, r);
+    let s = last_stage!(u32, n, s, r);
+
+    let result_shift = precondition_shift >> 1;
+    s >> result_shift
+}
 
-    #[cfg(target_pointer_width = "32")]
-    {
-        u32(n as u32) as usize
+pub const fn u64(mut n: u64) -> u64 {
+    if n == 0 {
+        return 0;
     }
+    const EVEN_BITMASK: u32 = u32::MAX & !1;
+    let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+    n <<= precondition_shift;
+
+    let (s, r) = first_stage!(64, n);
+    let (s, r) = middle_stage!(64, u16, n, s, r);
+    let (s, r) = middle_stage!(64, u32, n, s, r);
+    let s = last_stage!(u64, n, s, r);
+
+    let result_shift = precondition_shift >> 1;
+    s >> result_shift
+}
 
-    #[cfg(target_pointer_width = "64")]
-    {
-        u64(n as u64) as usize
+pub const fn u128(mut n: u128) -> u128 {
+    if n == 0 {
+        return 0;
     }
+    const EVEN_BITMASK: u32 = u32::MAX & !1;
+    let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+    n <<= precondition_shift;
+
+    let (s, r) = first_stage!(128, n);
+    let (s, r) = middle_stage!(128, u16, n, s, r);
+    let (s, r) = middle_stage!(128, u32, n, s, r);
+    let (s, r) = middle_stage!(128, u64, n, s, r);
+    let s = last_stage!(u128, n, s, r);
+
+    let result_shift = precondition_shift >> 1;
+    s >> result_shift
 }
 
-/// Generates a `u*_with_remainder` function that returns the [integer square
-/// root][1] and remainder of any input of a specific unsigned integer type.
+/// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
 ///
 /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
 /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
 /// Encyclopedia."
-macro_rules! unsigned_with_remainder_fn {
-    ($FullBitsT:ty, $full_bits_with_remainder_fn:ident, $HalfBitsT:ty, $half_bits_with_remainder_fn:ident) => {
-        /// Returns the [integer square root][1] and remainder of any
-        #[doc = concat!("[`", stringify!($FullBitsT), "`](prim@", stringify!($FullBitsT), ")")]
-        /// input.
-        ///
-        /// For example,
-        #[doc = concat!("`", stringify!($full_bits_with_remainder_fn), "(17) == (4, 1)`")]
-        /// because the integer square root of 17 is 4 and because 17 is 1
-        /// higher than 4 squared.
-        ///
-        /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
-        /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-        /// Encyclopedia."
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        const fn $full_bits_with_remainder_fn(mut n: $FullBitsT) -> ($FullBitsT, $FullBitsT) {
-            // Performs a Karatsuba square root.
-            // https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf
-
-            const HALF_BITS: u32 = <$FullBitsT>::BITS >> 1;
-            const QUARTER_BITS: u32 = <$FullBitsT>::BITS >> 2;
+#[must_use = "this returns the result of the operation, \
+              without modifying the original"]
+// `#[inline(always)]` because the programmer-accessible functions will use
+// this internally and the contents of this should be inlined there.
+#[inline(always)]
+pub const fn usize(mut n: usize) -> usize {
+    if n == 0 {
+        return 0;
+    }
+    const EVEN_BITMASK: u32 = u32::MAX & !1;
+    let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
+    n <<= precondition_shift;
+
+    let s = {
+        #[cfg(target_pointer_width = "16")]
+        {
+            let (s, r) = first_stage!(16, n);
+            last_stage!(usize, n, s, r)
+        }
 
-            const LOWER_HALF_1_BITS: $FullBitsT = (1 << HALF_BITS) - 1;
-            const LOWEST_QUARTER_1_BITS: $FullBitsT = (1 << QUARTER_BITS) - 1;
+        #[cfg(target_pointer_width = "32")]
+        {
+            let (s, r) = first_stage!(32, n);
+            let (s, r) = middle_stage!(32, u16, n, s, r);
+            last_stage!(usize, n, s, r)
+        }
 
-            let leading_zeros = n.leading_zeros();
-            if leading_zeros >= HALF_BITS {
-                let (s, r) = $half_bits_with_remainder_fn(n as $HalfBitsT);
-                (s as $FullBitsT, r as $FullBitsT)
-            } else {
-                // If we've arrived here, there is at least one 1 bit in the
-                // upper half of the bits. What we want to do is to shift left
-                // an even number of bits so that the most-significant 1 bit is
-                // as far left as it can get.
-                //
-                // Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen.
-                const EVEN_BITMASK: u32 = u32::MAX & !1;
-                let precondition_shift = leading_zeros & EVEN_BITMASK;
-                n <<= precondition_shift;
+        #[cfg(target_pointer_width = "64")]
+        {
+            let (s, r) = first_stage!(64, n);
+            let (s, r) = middle_stage!(64, u16, n, s, r);
+            let (s, r) = middle_stage!(64, u32, n, s, r);
+            last_stage!(usize, n, s, r)
+        }
+    };
 
-                let hi = (n >> HALF_BITS) as $HalfBitsT;
-                let lo = n & LOWER_HALF_1_BITS;
+    let result_shift = precondition_shift >> 1;
+    s >> result_shift
+}
 
-                let (s_prime, r_prime) = $half_bits_with_remainder_fn(hi);
+pub const unsafe fn i8(n: i8) -> i8 {
+    u8(n as u8) as i8
+}
 
-                let numerator = ((r_prime as $FullBitsT) << QUARTER_BITS) | (lo >> QUARTER_BITS);
-                let denominator = (s_prime as $FullBitsT) << 1;
+pub const unsafe fn i16(n: i16) -> i16 {
+    u16(n as u16) as i16
+}
 
-                // Integer type divided by nonzero version of that type is not a `const fn` yet.
-                // let denominator =
-                //     unsafe { crate::num::NonZero::<$FullBitsT>::new_unchecked(denominator) };
-                //
-                // let q = numerator / denominator;
-                // let u = numerator % denominator;
-                let (q, u) = unsafe {
-                    (
-                        crate::intrinsics::unchecked_div(numerator, denominator),
-                        crate::intrinsics::unchecked_rem(numerator, denominator),
-                    )
-                };
+pub const unsafe fn i32(n: i32) -> i32 {
+    u32(n as u32) as i32
+}
 
-                let mut s = (s_prime << QUARTER_BITS) as $FullBitsT + q;
-                let (mut r, overflow) =
-                    ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
-                if overflow {
-                    r = r.wrapping_add((s << 1) - 1);
-                    s -= 1;
-                }
-                let result_shift = precondition_shift >> 1;
-                (s >> result_shift, r >> result_shift)
-            }
-        }
-    };
+pub const unsafe fn i64(n: i64) -> i64 {
+    u64(n as u64) as i64
 }
 
-unsigned_with_remainder_fn!(u16, u16_with_remainder, u8, u8_with_remainder);
-unsigned_with_remainder_fn!(u32, u32_with_remainder, u16, u16_with_remainder);
-unsigned_with_remainder_fn!(u64, u64_with_remainder, u32, u32_with_remainder);
+pub const unsafe fn i128(n: i128) -> i128 {
+    u128(n as u128) as i128
+}
 
-/// Generates a `u*` function that returns the [integer square root][1] of any
+/*
+    /// Generates a `u*` function that returns the [integer square root][1] of any
 /// input of a specific unsigned integer type.
 ///
 /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
 /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-/// Encyclopedia."
+/// Encyclopedia."e
 macro_rules! unsigned_fn {
     ($FullBitsT:ty, $full_bits_fn:ident, $HalfBitsT:ty, $half_bits_fn:ident, $half_bits_with_remainder_fn:ident) => {
         /// Returns the [integer square root][1] of any
@@ -263,49 +348,7 @@ macro_rules! unsigned_fn {
             }
         }
     };
-}
-
-unsigned_fn!(u16, u16, u8, u8, u8_with_remainder);
-unsigned_fn!(u32, u32, u16, u16, u16_with_remainder);
-unsigned_fn!(u64, u64, u32, u32, u32_with_remainder);
-unsigned_fn!(u128, u128, u64, u64, u64_with_remainder);
-
-/// Generates an `i*` function that returns the [integer square root][1] of any
-/// **nonnegative** input of a specific signed integer type.
-///
-/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
-/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-/// Encyclopedia."
-macro_rules! signed_fn {
-    ($SignedT:ty, $signed_fn:ident, $UnsignedT:ty, $unsigned_fn:ident) => {
-        /// Returns the [integer square root][1] of any **nonnegative**
-        #[doc = concat!("[`", stringify!($SignedT), "`](prim@", stringify!($SignedT), ")")]
-        /// input.
-        ///
-        /// # Safety
-        ///
-        /// This results in undefined behavior when the input is negative.
-        ///
-        /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
-        /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
-        /// Encyclopedia."
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        // `#[inline(always)]` because the programmer-accessible functions will
-        // use this internally and the contents of this should be inlined
-        // there.
-        #[inline(always)]
-        pub const unsafe fn $signed_fn(n: $SignedT) -> $SignedT {
-            $unsigned_fn(n as $UnsignedT) as $SignedT
-        }
-    };
-}
-
-signed_fn!(i8, i8, u8, u8);
-signed_fn!(i16, i16, u16, u16);
-signed_fn!(i32, i32, u32, u32);
-signed_fn!(i64, i64, u64, u64);
-signed_fn!(i128, i128, u128, u128);
+    }*/
 
 /// Instantiate this panic logic once, rather than for all the isqrt methods
 /// on every single primitive type.