@@ -4,11 +4,13 @@ use crate::fmt::{Debug, LowerExp};
4
4
use crate :: num:: FpCategory ;
5
5
use crate :: ops:: { Add , Div , Mul , Neg } ;
6
6
7
- /// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
7
+ use core:: f64;
8
+
9
+ /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
8
10
///
9
11
/// See the parent module's doc comment for why this is necessary.
10
12
///
11
- /// Should **never ever** be implemented for other types or be used outside the dec2flt module.
13
+ /// Should **never ever** be implemented for other types or be used outside the ` dec2flt` module.
12
14
#[ doc( hidden) ]
13
15
pub trait RawFloat :
14
16
Sized
@@ -24,62 +26,86 @@ pub trait RawFloat:
24
26
+ Copy
25
27
+ Debug
26
28
{
29
+ /* general constants */
30
+
27
31
const INFINITY : Self ;
28
32
const NEG_INFINITY : Self ;
29
33
const NAN : Self ;
30
34
const NEG_NAN : Self ;
31
35
36
+ /// Bit width of the float
37
+ const BITS : u32 ;
38
+
39
+ /// Mantissa digits including the hidden bit (provided by core)
40
+ const MANTISSA_BITS : u32 ;
41
+
32
42
/// The number of bits in the significand, *excluding* the hidden bit.
33
- const MANTISSA_EXPLICIT_BITS : usize ;
34
-
35
- // Round-to-even only happens for negative values of q
36
- // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37
- // the 32-bitcase.
38
- //
39
- // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40
- // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41
- // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42
- //
43
- // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44
- // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45
- // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46
- // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47
- // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48
- //
49
- // Thus we have that we only need to round ties to even when
50
- // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51
- // (in the 32-bit case). In both cases,the power of five(5^|q|)
52
- // fits in a 64-bit word.
43
+ const MANTISSA_EXPLICIT_BITS : u32 = Self :: MANTISSA_BITS - 1 ;
44
+
45
+ /// Bits for the exponent
46
+ const EXPONENT_BITS : u32 = Self :: BITS - Self :: MANTISSA_EXPLICIT_BITS - 1 ;
47
+
48
+ /// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
49
+ // assuming FLT_EVAL_METHOD = 0
50
+ const MAX_EXPONENT_FAST_PATH : i64 =
51
+ ( ( Self :: MANTISSA_BITS as f64 ) / ( f64:: consts:: LOG2_10 - 1.0 ) ) as i64 ;
52
+
53
+ /// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
54
+ const MIN_EXPONENT_FAST_PATH : i64 = -Self :: MAX_EXPONENT_FAST_PATH ;
55
+
56
+ /// Round-to-even only happens for negative values of q
57
+ /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
58
+ /// the 32-bitcase.
59
+ ///
60
+ /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
61
+ /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
62
+ /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
63
+ ///
64
+ /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
65
+ /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
66
+ /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
67
+ /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
68
+ /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
69
+ ///
70
+ /// Thus we have that we only need to round ties to even when
71
+ /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
72
+ /// (in the 32-bit case). In both cases,the power of five(5^|q|)
73
+ /// fits in a 64-bit word.
53
74
const MIN_EXPONENT_ROUND_TO_EVEN : i32 ;
54
75
const MAX_EXPONENT_ROUND_TO_EVEN : i32 ;
55
76
56
- // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57
- const MIN_EXPONENT_FAST_PATH : i64 ;
77
+ /// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
78
+ const MINIMUM_EXPONENT : i32 = - ( 1 << ( Self :: EXPONENT_BITS - 1 ) ) + 1 ;
58
79
59
- // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60
- const MAX_EXPONENT_FAST_PATH : i64 ;
80
+ /// Maximum exponent without overflowing to infinity
81
+ const MAXIMUM_EXPONENT : u32 = ( 1 << Self :: EXPONENT_BITS ) - 1 ;
61
82
62
- // Maximum exponent that can be represented for a disguised-fast path case.
63
- // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 ;
83
+ /// The exponent bias value
84
+ const EXPONENT_BIAS : u32 = Self :: MAXIMUM_EXPONENT >> 1 ;
65
85
66
- // Minimum exponent value `- (1 << ( EXP_BITS - 1)) + 1`.
67
- const MINIMUM_EXPONENT : i32 ;
86
+ /// Largest exponent value `(1 << EXP_BITS) - 1`.
87
+ const INFINITE_POWER : i32 = ( 1 << Self :: EXPONENT_BITS ) - 1 ;
68
88
69
- // Largest exponent value `(1 << EXP_BITS) - 1`.
70
- const INFINITE_POWER : i32 ;
89
+ /// Largest decimal exponent for a non-infinite value.
90
+ ///
91
+ /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
92
+ /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
93
+ // const LARGEST_POWER_OF_TEN: i32;
94
+ const LARGEST_POWER_OF_TEN : i32 = ( Self :: EXPONENT_BIAS as f64 / f64:: consts:: LOG2_10 ) as i32 ;
71
95
72
- // Index (in bits) of the sign.
73
- const SIGN_INDEX : usize ;
74
-
75
- // Smallest decimal exponent for a non-zero value.
96
+ /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
97
+ /// smaller than `10^SMALLEST_POWER_OF_TEN`.
76
98
const SMALLEST_POWER_OF_TEN : i32 ;
99
+ // const SMALLEST_POWER_OF_TEN: i32 =
100
+ // -(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS) as f64) / f64::consts::LOG2_10) as i32 - 2;
77
101
78
- // Largest decimal exponent for a non-infinite value.
79
- const LARGEST_POWER_OF_TEN : i32 ;
102
+ /// Maximum exponent that can be represented for a disguised-fast path case.
103
+ /// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
104
+ const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 =
105
+ Self :: MAX_EXPONENT_FAST_PATH + ( Self :: MANTISSA_BITS as f64 / f64:: consts:: LOG2_10 ) as i64 ;
80
106
81
- // Maximum mantissa for the fast-path (`1 << 53` for f64).
82
- const MAX_MANTISSA_FAST_PATH : u64 = 2_u64 << Self :: MANTISSA_EXPLICIT_BITS ;
107
+ /// Maximum mantissa for the fast-path (`1 << 53` for f64).
108
+ const MAX_MANTISSA_FAST_PATH : u64 = 1 << Self :: MANTISSA_BITS ;
83
109
84
110
/// Convert integer into float through an as cast.
85
111
/// This is only called in the fast-path algorithm, and therefore
@@ -100,23 +126,80 @@ pub trait RawFloat:
100
126
fn integer_decode ( self ) -> ( u64 , i16 , i8 ) ;
101
127
}
102
128
129
+ // #[cfg(not(bootstrap))]
130
+ // impl RawFloat for f16 {
131
+ // const INFINITY: Self = Self::INFINITY;
132
+ // const NEG_INFINITY: Self = Self::NEG_INFINITY;
133
+ // const NAN: Self = Self::NAN;
134
+ // const NEG_NAN: Self = -Self::NAN;
135
+
136
+ // const BITS: u32 = 16;
137
+ // const MANTISSA_DIGITS: u32 = Self::MANTISSA_DIGITS;
138
+
139
+ // const MIN_EXPONENT_FAST_PATH: i64 = -4; // assuming FLT_EVAL_METHOD = 0
140
+ // const MAX_EXPONENT_FAST_PATH: i64 = 4;
141
+
142
+ // const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
143
+ // const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
144
+ // const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
145
+ // const SMALLEST_POWER_OF_TEN: i32 = -65;
146
+ // const LARGEST_POWER_OF_TEN: i32 = Self::MAX_10_EXP;
147
+
148
+ // #[inline]
149
+ // fn from_u64(v: u64) -> Self {
150
+ // debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
151
+ // v as _
152
+ // }
153
+
154
+ // #[inline]
155
+ // fn from_u64_bits(v: u64) -> Self {
156
+ // Self::from_bits((v & 0xFFFF) as u16)
157
+ // }
158
+
159
+ // fn pow10_fast_path(exponent: usize) -> Self {
160
+ // #[allow(clippy::use_self)]
161
+ // const TABLE: [f32; 16] =
162
+ // [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
163
+ // TABLE[exponent & 15]
164
+ // }
165
+
166
+ // /// Returns the mantissa, exponent and sign as integers.
167
+ // fn integer_decode(self) -> (u64, i16, i8) {
168
+ // let bits = self.to_bits();
169
+ // let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
170
+ // let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
171
+ // let mantissa =
172
+ // if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
173
+ // // Exponent bias + mantissa shift
174
+ // exponent -= 127 + 23;
175
+ // (mantissa as u64, exponent, sign)
176
+ // }
177
+
178
+ // fn classify(self) -> FpCategory {
179
+ // self.classify()
180
+ // }
181
+ // }
182
+
103
183
impl RawFloat for f32 {
104
184
const INFINITY : Self = f32:: INFINITY ;
105
185
const NEG_INFINITY : Self = f32:: NEG_INFINITY ;
106
186
const NAN : Self = f32:: NAN ;
107
187
const NEG_NAN : Self = -f32:: NAN ;
108
188
109
- const MANTISSA_EXPLICIT_BITS : usize = 23 ;
189
+ const BITS : u32 = 32 ;
190
+ const MANTISSA_BITS : u32 = Self :: MANTISSA_DIGITS ;
191
+
192
+ // const MANTISSA_EXPLICIT_BITS: u32 = 23;
110
193
const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -17 ;
111
194
const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 10 ;
112
- const MIN_EXPONENT_FAST_PATH : i64 = -10 ; // assuming FLT_EVAL_METHOD = 0
113
- const MAX_EXPONENT_FAST_PATH : i64 = 10 ;
114
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 17 ;
115
- const MINIMUM_EXPONENT : i32 = -127 ;
116
- const INFINITE_POWER : i32 = 0xFF ;
117
- const SIGN_INDEX : usize = 31 ;
195
+ // const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
196
+ // const MAX_EXPONENT_FAST_PATH: i64 = 10;
197
+ // const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
198
+ // const MINIMUM_EXPONENT: i32 = -127;
199
+ // const INFINITE_POWER: i32 = 0xFF;
200
+ // const SIGN_INDEX: u32 = 31;
118
201
const SMALLEST_POWER_OF_TEN : i32 = -65 ;
119
- const LARGEST_POWER_OF_TEN : i32 = 38 ;
202
+ // const LARGEST_POWER_OF_TEN: i32 = 38;
120
203
121
204
#[ inline]
122
205
fn from_u64 ( v : u64 ) -> Self {
@@ -154,22 +237,25 @@ impl RawFloat for f32 {
154
237
}
155
238
156
239
impl RawFloat for f64 {
157
- const INFINITY : Self = f64:: INFINITY ;
158
- const NEG_INFINITY : Self = f64:: NEG_INFINITY ;
159
- const NAN : Self = f64:: NAN ;
160
- const NEG_NAN : Self = -f64:: NAN ;
240
+ const INFINITY : Self = Self :: INFINITY ;
241
+ const NEG_INFINITY : Self = Self :: NEG_INFINITY ;
242
+ const NAN : Self = Self :: NAN ;
243
+ const NEG_NAN : Self = -Self :: NAN ;
244
+
245
+ const BITS : u32 = 64 ;
246
+ const MANTISSA_BITS : u32 = Self :: MANTISSA_DIGITS ;
161
247
162
- const MANTISSA_EXPLICIT_BITS : usize = 52 ;
248
+ // const MANTISSA_EXPLICIT_BITS: u32 = 52;
163
249
const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -4 ;
164
250
const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 23 ;
165
- const MIN_EXPONENT_FAST_PATH : i64 = -22 ; // assuming FLT_EVAL_METHOD = 0
166
- const MAX_EXPONENT_FAST_PATH : i64 = 22 ;
167
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 37 ;
168
- const MINIMUM_EXPONENT : i32 = -1023 ;
169
- const INFINITE_POWER : i32 = 0x7FF ;
170
- const SIGN_INDEX : usize = 63 ;
251
+ // const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
252
+ // const MAX_EXPONENT_FAST_PATH: i64 = 22;
253
+ // const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
254
+ // const MINIMUM_EXPONENT: i32 = -1023;
255
+ // const INFINITE_POWER: i32 = 0x7FF;
256
+ // const SIGN_INDEX: u32 = 63;
171
257
const SMALLEST_POWER_OF_TEN : i32 = -342 ;
172
- const LARGEST_POWER_OF_TEN : i32 = 308 ;
258
+ // const LARGEST_POWER_OF_TEN: i32 = 308;
173
259
174
260
#[ inline]
175
261
fn from_u64 ( v : u64 ) -> Self {
0 commit comments