Try to win perf back

tgross35 · tgross35 · commit 5cfb02857b7f · 2025-01-22T07:08:24.000Z
experiment

try just using exp_unbiased

experiment

Fix experiment

Update
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
@@ -69,6 +69,8 @@ main!(
     icount_bench_cbrt_group,
     icount_bench_cbrtf_group,
     icount_bench_ceil_group,
+    icount_bench_ceilf128_group,
+    icount_bench_ceilf16_group,
     icount_bench_ceilf_group,
     icount_bench_copysign_group,
     icount_bench_copysignf128_group,
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
@@ -1,54 +1,65 @@
-/* SPDX-License-Identifier: MIT
- * origin: musl src/math/ceil.c */
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/ceil.c */
 
-use super::super::{CastInto, Float};
+//! Generic `ceil` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill`, because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::{Float, Int, IntTy, MinInt};
 
 pub fn ceil<F: Float>(x: F) -> F {
-    let toint = F::ONE / F::EPSILON;
+    let zero = IntTy::<F>::ZERO;
 
-    // NB: using `exp` here and comparing to values adjusted by `EXP_BIAS` has better
-    // perf than using `exp_unbiased` here.
-    let e = x.exp();
-    let y: F;
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
 
     // If the represented value has no fractional part, no truncation is needed.
-    if e >= (F::SIG_BITS + F::EXP_BIAS).cast() || x == F::ZERO {
+    if e >= F::SIG_BITS as i32 {
         return x;
     }
 
-    let neg = x.is_sign_negative();
+    if e >= 0 {
+        // |x| >= 1.0
+
+        let m = F::SIG_MASK >> e.unsigned();
+        if (ix & m) == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return x;
+        }
 
-    // y = int(x) - x, where int(x) is an integer neighbor of x.
-    // The `x - t + t - x` method is a way to expose non-round-to-even modes.
-    y = if neg { x - toint + toint - x } else { x + toint - toint - x };
+        // Otherwise, raise an inexact exception.
+        force_eval!(x + F::MAX);
+        if x.is_sign_positive() {
+            ix += m;
+        }
+        ix &= !m;
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        force_eval!(x + F::MAX);
 
-    // Exp < 0;  special case because of non-nearest rounding modes
-    if e < F::EXP_BIAS.cast() {
-        // Raise `FE_INEXACT`
-        force_eval!(y);
-        return if neg { F::NEG_ZERO } else { F::ONE };
+        if x.is_sign_negative() {
+            // -1.0 < x <= -0.0; rounding up goes toward -0.0.
+            return F::NEG_ZERO;
+        } else if ix << 1 != zero {
+            // 0.0 < x < 1.0; rounding up goes toward +1.0.
+            return F::ONE;
+        }
     }
 
-    if y < F::ZERO { x + y + F::ONE } else { x + y }
+    F::from_bits(ix)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    #[test]
-    fn sanity_check_f64() {
-        assert_eq!(ceil(1.1f64), 2.0);
-        assert_eq!(ceil(2.9f64), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests_f64() {
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
+    fn spec_test<F: Float>() {
         // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceil(f), f);
+        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
+            assert_biteq!(ceil(f), f);
         }
     }
 
@@ -58,13 +69,19 @@ mod tests {
         assert_eq!(ceil(2.9f32), 3.0);
     }
 
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
     #[test]
     fn spec_tests_f32() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceil(f), f);
-        }
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(ceil(1.1f64), 2.0);
+        assert_eq!(ceil(2.9f64), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
     }
 }
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
@@ -96,7 +96,7 @@ where
         ix = scaled.to_bits();
         match top {
             Exp::Shifted(ref mut v) => {
-                *v = scaled.exp().unsigned();
+                *v = scaled.exp();
                 *v = (*v).wrapping_sub(F::SIG_BITS);
             }
             Exp::NoShift(()) => {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
@@ -108,13 +108,13 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
-    fn exp(self) -> i32 {
-        (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed()
+    fn exp(self) -> u32 {
+        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
     fn exp_unbiased(self) -> i32 {
-        self.exp() - (Self::EXP_BIAS as i32)
+        self.exp().signed() - (Self::EXP_BIAS as i32)
     }
 
     /// Returns the significand with no implicit bit (or the "fractional" part)

Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ where`
`96`	`96`	`ix = scaled.to_bits();`
`97`	`97`	`match top {`
`98`	`98`	`Exp::Shifted(ref mut v) => {`
`99`		`- *v = scaled.exp().unsigned();`
	`99`	`+ *v = scaled.exp();`
`100`	`100`	`v = (v).wrapping_sub(F::SIG_BITS);`
`101`	`101`	`}`
`102`	`102`	`Exp::NoShift(()) => {`
Original file line number	Diff line number	Diff line change
`@@ -108,13 +108,13 @@ pub trait Float:`
`108`	`108`	`}`
`109`	`109`
`110`	`110`	`/// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.`
`111`		`- fn exp(self) -> i32 {`
`112`		`- (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed()`
	`111`	`+ fn exp(self) -> u32 {`
	`112`	`+ u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX`
`113`	`113`	`}`
`114`	`114`
`115`	`115`	`/// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.`
`116`	`116`	`fn exp_unbiased(self) -> i32 {`
`117`		`- self.exp() - (Self::EXP_BIAS as i32)`
	`117`	`+ self.exp().signed() - (Self::EXP_BIAS as i32)`
`118`	`118`	`}`
`119`	`119`
`120`	`120`	`/// Returns the significand with no implicit bit (or the "fractional" part)`