|  | 
|  | 1 | +/// These functions compute the integer square root of their type, assuming | 
|  | 2 | +/// that someone has already checked that the value is nonnegative. | 
|  | 3 | +
 | 
|  | 4 | +const ISQRT_AND_REMAINDER_8_BIT: [(u8, u8); 256] = { | 
|  | 5 | +    let mut result = [(0, 0); 256]; | 
|  | 6 | + | 
|  | 7 | +    let mut sqrt = 0; | 
|  | 8 | +    let mut i = 0; | 
|  | 9 | +    'outer: loop { | 
|  | 10 | +        let mut remaining = 2 * sqrt + 1; | 
|  | 11 | +        while remaining > 0 { | 
|  | 12 | +            result[i as usize] = (sqrt, 2 * sqrt + 1 - remaining); | 
|  | 13 | +            i += 1; | 
|  | 14 | +            if i >= result.len() { | 
|  | 15 | +                break 'outer; | 
|  | 16 | +            } | 
|  | 17 | +            remaining -= 1; | 
|  | 18 | +        } | 
|  | 19 | +        sqrt += 1; | 
|  | 20 | +    } | 
|  | 21 | + | 
|  | 22 | +    result | 
|  | 23 | +}; | 
|  | 24 | + | 
|  | 25 | +// `#[inline(always)]` because the programmer-accessible functions will use | 
|  | 26 | +// this internally and the contents of this should be inlined there. | 
|  | 27 | +#[inline(always)] | 
|  | 28 | +pub const fn u8(n: u8) -> u8 { | 
|  | 29 | +    ISQRT_AND_REMAINDER_8_BIT[n as usize].0 | 
|  | 30 | +} | 
|  | 31 | + | 
|  | 32 | +#[inline(always)] | 
|  | 33 | +const fn intermediate_u8(n: u8) -> (u8, u8) { | 
|  | 34 | +    ISQRT_AND_REMAINDER_8_BIT[n as usize] | 
|  | 35 | +} | 
|  | 36 | + | 
|  | 37 | +macro_rules! karatsuba_isqrt { | 
|  | 38 | +    ($FullBitsT:ty, $fn:ident, $intermediate_fn:ident, $HalfBitsT:ty, $half_fn:ident, $intermediate_half_fn:ident) => { | 
|  | 39 | +        // `#[inline(always)]` because the programmer-accessible functions will | 
|  | 40 | +        // use this internally and the contents of this should be inlined | 
|  | 41 | +        // there. | 
|  | 42 | +        #[inline(always)] | 
|  | 43 | +        pub const fn $fn(mut n: $FullBitsT) -> $FullBitsT { | 
|  | 44 | +            // Performs a Karatsuba square root. | 
|  | 45 | +            // https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf | 
|  | 46 | + | 
|  | 47 | +            const HALF_BITS: u32 = <$FullBitsT>::BITS >> 1; | 
|  | 48 | +            const QUARTER_BITS: u32 = <$FullBitsT>::BITS >> 2; | 
|  | 49 | + | 
|  | 50 | +            let leading_zeros = n.leading_zeros(); | 
|  | 51 | +            let result = if leading_zeros >= HALF_BITS { | 
|  | 52 | +                $half_fn(n as $HalfBitsT) as $FullBitsT | 
|  | 53 | +            } else { | 
|  | 54 | +                // Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen. | 
|  | 55 | +                let precondition_shift = leading_zeros & (HALF_BITS - 2); | 
|  | 56 | +                n <<= precondition_shift; | 
|  | 57 | + | 
|  | 58 | +                let hi = (n >> HALF_BITS) as $HalfBitsT; | 
|  | 59 | +                let lo = n & (<$HalfBitsT>::MAX as $FullBitsT); | 
|  | 60 | + | 
|  | 61 | +                let (s_prime, r_prime) = $intermediate_half_fn(hi); | 
|  | 62 | + | 
|  | 63 | +                let numerator = ((r_prime as $FullBitsT) << QUARTER_BITS) | (lo >> QUARTER_BITS); | 
|  | 64 | +                let denominator = (s_prime as $FullBitsT) << 1; | 
|  | 65 | + | 
|  | 66 | +                let q = numerator / denominator; | 
|  | 67 | +                let u = numerator % denominator; | 
|  | 68 | + | 
|  | 69 | +                let mut s = (s_prime << QUARTER_BITS) as $FullBitsT + q; | 
|  | 70 | +                if ((u << QUARTER_BITS) | (lo & ((1 << QUARTER_BITS) - 1))) < q * q { | 
|  | 71 | +                    s -= 1; | 
|  | 72 | +                } | 
|  | 73 | +                s >> (precondition_shift >> 1) | 
|  | 74 | +            }; | 
|  | 75 | + | 
|  | 76 | +            result | 
|  | 77 | +        } | 
|  | 78 | + | 
|  | 79 | +        const fn $intermediate_fn(mut n: $FullBitsT) -> ($FullBitsT, $FullBitsT) { | 
|  | 80 | +            // Performs a Karatsuba square root. | 
|  | 81 | +            // https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf | 
|  | 82 | + | 
|  | 83 | +            const HALF_BITS: u32 = <$FullBitsT>::BITS >> 1; | 
|  | 84 | +            const QUARTER_BITS: u32 = <$FullBitsT>::BITS >> 2; | 
|  | 85 | + | 
|  | 86 | +            let leading_zeros = n.leading_zeros(); | 
|  | 87 | +            let result = if leading_zeros >= HALF_BITS { | 
|  | 88 | +                let (s, r) = $intermediate_half_fn(n as $HalfBitsT); | 
|  | 89 | +                (s as $FullBitsT, r as $FullBitsT) | 
|  | 90 | +            } else { | 
|  | 91 | +                // Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen. | 
|  | 92 | +                let precondition_shift = leading_zeros & (HALF_BITS - 2); | 
|  | 93 | +                n <<= precondition_shift; | 
|  | 94 | + | 
|  | 95 | +                let hi = (n >> HALF_BITS) as $HalfBitsT; | 
|  | 96 | +                let lo = n & (<$HalfBitsT>::MAX as $FullBitsT); | 
|  | 97 | + | 
|  | 98 | +                let (s_prime, r_prime) = $intermediate_half_fn(hi); | 
|  | 99 | + | 
|  | 100 | +                let numerator = ((r_prime as $FullBitsT) << QUARTER_BITS) | (lo >> QUARTER_BITS); | 
|  | 101 | +                let denominator = (s_prime as $FullBitsT) << 1; | 
|  | 102 | + | 
|  | 103 | +                let q = numerator / denominator; | 
|  | 104 | +                let u = numerator % denominator; | 
|  | 105 | + | 
|  | 106 | +                let mut s = (s_prime << QUARTER_BITS) as $FullBitsT + q; | 
|  | 107 | +                let (mut r, overflow) = | 
|  | 108 | +                    ((u << QUARTER_BITS) | (lo & ((1 << QUARTER_BITS) - 1))).overflowing_sub(q * q); | 
|  | 109 | +                if overflow { | 
|  | 110 | +                    r = r.wrapping_add((s << 1) - 1); | 
|  | 111 | +                    s -= 1; | 
|  | 112 | +                } | 
|  | 113 | +                (s >> (precondition_shift >> 1), r >> (precondition_shift >> 1)) | 
|  | 114 | +            }; | 
|  | 115 | + | 
|  | 116 | +            result | 
|  | 117 | +        } | 
|  | 118 | +    }; | 
|  | 119 | +} | 
|  | 120 | + | 
|  | 121 | +karatsuba_isqrt!(u16, u16, intermediate_u16, u8, u8, intermediate_u8); | 
|  | 122 | +karatsuba_isqrt!(u32, u32, intermediate_u32, u16, u16, intermediate_u16); | 
|  | 123 | +karatsuba_isqrt!(u64, u64, intermediate_u64, u32, u32, intermediate_u32); | 
|  | 124 | +karatsuba_isqrt!(u128, u128, _intermediate_u128, u64, u64, intermediate_u64); | 
|  | 125 | + | 
|  | 126 | +#[cfg(target_pointer_width = "16")] | 
|  | 127 | +#[inline(always)] | 
|  | 128 | +pub const fn usize(n: usize) -> usize { | 
|  | 129 | +    u16(n as u16) as usize | 
|  | 130 | +} | 
|  | 131 | + | 
|  | 132 | +#[cfg(target_pointer_width = "32")] | 
|  | 133 | +#[inline(always)] | 
|  | 134 | +pub const fn usize(n: usize) -> usize { | 
|  | 135 | +    u32(n as u32) as usize | 
|  | 136 | +} | 
|  | 137 | + | 
|  | 138 | +#[cfg(target_pointer_width = "64")] | 
|  | 139 | +#[inline(always)] | 
|  | 140 | +pub const fn usize(n: usize) -> usize { | 
|  | 141 | +    u64(n as u64) as usize | 
|  | 142 | +} | 
|  | 143 | + | 
|  | 144 | +// 0 <= val <= i8::MAX | 
|  | 145 | +#[inline(always)] | 
|  | 146 | +pub const fn i8(n: i8) -> i8 { | 
|  | 147 | +    u8(n as u8) as i8 | 
|  | 148 | +} | 
|  | 149 | + | 
|  | 150 | +// 0 <= val <= i16::MAX | 
|  | 151 | +#[inline(always)] | 
|  | 152 | +pub const fn i16(n: i16) -> i16 { | 
|  | 153 | +    u16(n as u16) as i16 | 
|  | 154 | +} | 
|  | 155 | + | 
|  | 156 | +// 0 <= val <= i32::MAX | 
|  | 157 | +#[inline(always)] | 
|  | 158 | +pub const fn i32(n: i32) -> i32 { | 
|  | 159 | +    u32(n as u32) as i32 | 
|  | 160 | +} | 
|  | 161 | + | 
|  | 162 | +// 0 <= val <= i64::MAX | 
|  | 163 | +#[inline(always)] | 
|  | 164 | +pub const fn i64(n: i64) -> i64 { | 
|  | 165 | +    u64(n as u64) as i64 | 
|  | 166 | +} | 
|  | 167 | + | 
|  | 168 | +// 0 <= val <= i128::MAX | 
|  | 169 | +#[inline(always)] | 
|  | 170 | +pub const fn i128(n: i128) -> i128 { | 
|  | 171 | +    u128(n as u128) as i128 | 
|  | 172 | +} | 
|  | 173 | + | 
|  | 174 | +/* | 
|  | 175 | +This function is not used. | 
|  | 176 | +
 | 
|  | 177 | +// 0 <= val <= isize::MAX | 
|  | 178 | +#[inline(always)] | 
|  | 179 | +pub const fn isize(n: isize) -> isize { | 
|  | 180 | +    usize(n as usize) as isize | 
|  | 181 | +} | 
|  | 182 | +*/ | 
|  | 183 | + | 
|  | 184 | +/// Instantiate this panic logic once, rather than for all the ilog methods | 
|  | 185 | +/// on every single primitive type. | 
|  | 186 | +#[cold] | 
|  | 187 | +#[track_caller] | 
|  | 188 | +pub const fn panic_for_negative_argument() -> ! { | 
|  | 189 | +    panic!("argument of integer square root cannot be negative") | 
|  | 190 | +} | 
0 commit comments