11/* SPDX-License-Identifier: MIT OR Apache-2.0 */ 
2- use  crate :: support:: { CastFrom ,  Float ,  Int ,  MinInt } ; 
2+ use  crate :: support:: { CastFrom ,  CastInto ,   Float ,  HInt ,   Int ,  MinInt ,   NarrowingDiv } ; 
33
44#[ inline]  
5- pub  fn  fmod < F :  Float > ( x :  F ,  y :  F )  -> F  { 
5+ pub  fn  fmod < F :  Float > ( x :  F ,  y :  F )  -> F 
6+ where 
7+     F :: Int :  HInt , 
8+     <F :: Int  as  HInt >:: D :  NarrowingDiv , 
9+ { 
610    let  _1 = F :: Int :: ONE ; 
711    let  sx = x. to_bits ( )  &  F :: SIGN_MASK ; 
812    let  ux = x. to_bits ( )  &  !F :: SIGN_MASK ; 
@@ -29,7 +33,7 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
2933
3034    // To compute `(num << ex) % (div << ey)`, first 
3135    // evaluate `rem = (num << (ex - ey)) % div` ... 
32-     let  rem = reduction ( num,  ex - ey,  div) ; 
36+     let  rem = reduction :: < F > ( num,  ex - ey,  div) ; 
3337    // ... so the result will be `rem << ey` 
3438
3539    if  rem. is_zero ( )  { 
@@ -58,11 +62,55 @@ fn into_sig_exp<F: Float>(mut bits: F::Int) -> (F::Int, u32) {
5862} 
5963
6064/// Compute the remainder `(x * 2.pow(e)) % y` without overflow. 
61- fn  reduction < I :  Int > ( mut  x :  I ,  e :  u32 ,  y :  I )  -> I  { 
62-     x %= y; 
63-     for  _ in  0 ..e { 
64-         x <<= 1 ; 
65-         x = x. checked_sub ( y) . unwrap_or ( x) ; 
65+ fn  reduction < F > ( mut  x :  F :: Int ,  e :  u32 ,  y :  F :: Int )  -> F :: Int 
66+ where 
67+     F :  Float , 
68+     F :: Int :  HInt , 
69+     <<F  as  Float >:: Int  as  HInt >:: D :  NarrowingDiv , 
70+ { 
71+     // `f16` only has 5 exponent bits, so even `f16::MAX = 65504.0` is only 
72+     // a 40-bit integer multiple of the smallest subnormal. 
73+     if  F :: BITS  == 16  { 
74+         debug_assert ! ( F :: EXP_MAX  - F :: EXP_MIN  == 29 ) ; 
75+         debug_assert ! ( e <= 29 ) ; 
76+         let  u:  u16  = x. cast ( ) ; 
77+         let  v:  u16  = y. cast ( ) ; 
78+         let  u = ( u as  u64 )  << e; 
79+         let  v = v as  u64 ; 
80+         return  F :: Int :: cast_from ( ( u % v)  as  u16 ) ; 
6681    } 
67-     x
82+ 
83+     // Ensure `x < 2y` for later steps 
84+     if  x >= ( y << 1 )  { 
85+         // This case is only reached with subnormal divisors, 
86+         // but it might be better to just normalize all significands 
87+         // to make this unnecessary. The further calls could potentially 
88+         // benefit from assuming a specific fixed leading bit position. 
89+         x %= y; 
90+     } 
91+ 
92+     // The simple implementation seems to be fastest for a short reduction 
93+     // at this size. The limit here was chosen empirically on an Intel Nehalem. 
94+     // Less old CPUs that have faster `u64 * u64 -> u128` might not benefit, 
95+     // and 32-bit systems or architectures without hardware multipliers might 
96+     // want to do this in more cases. 
97+     if  F :: BITS  == 64  && e < 32  { 
98+         // Assumes `x < 2y` 
99+         for  _ in  0 ..e { 
100+             x = x. checked_sub ( y) . unwrap_or ( x) ; 
101+             x <<= 1 ; 
102+         } 
103+         return  x. checked_sub ( y) . unwrap_or ( x) ; 
104+     } 
105+ 
106+     // Fast path for short reductions 
107+     if  e < F :: BITS  { 
108+         let  w = x. widen ( )  << e; 
109+         if  let  Some ( ( _,  r) )  = w. checked_narrowing_div_rem ( y)  { 
110+             return  r; 
111+         } 
112+     } 
113+ 
114+     // Assumes `x < 2y` 
115+     crate :: support:: linear_mul_reduction ( x,  e,  y) 
68116} 
0 commit comments