diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index b52145cdc0c..293daa21cfe 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -185,7 +185,6 @@ static_assert(sizeof(Float16) == sizeof(uint16_t)); } // namespace util } // namespace arrow -// TODO: Not complete template <> class std::numeric_limits { using T = arrow::util::Float16; @@ -193,14 +192,44 @@ class std::numeric_limits { public: static constexpr bool is_specialized = true; static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; static constexpr bool has_infinity = true; static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr std::float_denorm_style has_denorm = std::denorm_present; + static constexpr bool has_denorm_loss = false; + static constexpr bool is_iec559 = true; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int radix = 2; + + // Float16 has 10 explicit mantissa bits + 1 implicit bit = 11 bits precision + static constexpr int digits = 11; + // Number of decimal digits that can be represented: floor(10 * log10(2)) + static constexpr int digits10 = 3; + // Number of decimal digits to fully represent the type: ceil(11 * log10(2) + 1) + static constexpr int max_digits10 = 5; + + // Exponent range: bias = 15, min subnormal exponent = -14, min normal = -13 + static constexpr int min_exponent = -13; + static constexpr int min_exponent10 = -4; + // Max exponent before infinity: field value 30 -> 30 - 15 + 1 = 16 + static constexpr int max_exponent = 16; + static constexpr int max_exponent10 = 4; + + static constexpr bool traps = false; + static constexpr bool tinyness_before = false; + static constexpr std::float_round_style round_style = std::round_to_nearest; static constexpr T min() { return T::FromBits(0b0000010000000000); } static constexpr T max() { return T::FromBits(0b0111101111111111); } static constexpr T lowest() { return -max(); } + static constexpr T epsilon() { return T::FromBits(0b0001010000000000); } // 2^-10 + static constexpr T round_error() { return T::FromBits(0b0011100000000000); } // 0.5 + static constexpr T denorm_min() { return T::FromBits(0b0000000000000001); } static constexpr T infinity() { return T::FromBits(0b0111110000000000); } - static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); } + static constexpr T signaling_NaN() { return T::FromBits(0b0111110000000001); } }; diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 5918381a269..45e48d1bd4c 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -372,5 +372,69 @@ TEST(Float16Test, FromBytes) { ASSERT_EQ(Float16::FromBigEndian(bytes.data()), Float16::FromBits(0x1cd0)); } +TEST(Float16Test, NumericLimits) { + using F16 = std::numeric_limits; + using F32 = std::numeric_limits; + + // Boolean traits - should match standard float + ASSERT_EQ(F16::is_specialized, F32::is_specialized); + ASSERT_EQ(F16::is_signed, F32::is_signed); + ASSERT_EQ(F16::is_integer, F32::is_integer); + ASSERT_EQ(F16::is_exact, F32::is_exact); + ASSERT_EQ(F16::has_infinity, F32::has_infinity); + ASSERT_EQ(F16::has_quiet_NaN, F32::has_quiet_NaN); + ASSERT_EQ(F16::has_signaling_NaN, F32::has_signaling_NaN); + ASSERT_EQ(F16::has_denorm, F32::has_denorm); + ASSERT_EQ(F16::has_denorm_loss, F32::has_denorm_loss); + ASSERT_EQ(F16::is_iec559, F32::is_iec559); + ASSERT_EQ(F16::is_bounded, F32::is_bounded); + ASSERT_EQ(F16::is_modulo, F32::is_modulo); + ASSERT_EQ(F16::radix, F32::radix); + + // Check if IEEE 754 is implemented correctly. + // Precision and exponent range + ASSERT_EQ(F16::digits, 11); + ASSERT_EQ(F16::digits10, 3); + ASSERT_EQ(F16::max_digits10, 5); + ASSERT_EQ(F16::min_exponent, -13); + ASSERT_EQ(F16::max_exponent, 16); + ASSERT_EQ(F16::min_exponent10, -4); + ASSERT_EQ(F16::max_exponent10, 4); + + // Special values + ASSERT_FLOAT_EQ(F16::max().ToFloat(), 65504.0f); // Largest finite value + ASSERT_EQ(F16::lowest(), -F16::max()); // Most negative = -max + ASSERT_FLOAT_EQ(F16::epsilon().ToFloat(), 0.0009765625f); // 2^-10 + ASSERT_FLOAT_EQ(F16::round_error().ToFloat(), 0.5f); // Round-to-nearest + ASSERT_TRUE(F16::infinity().is_infinity()); + ASSERT_FALSE(F16::infinity().signbit()); + ASSERT_TRUE((-F16::infinity()).is_infinity()); + ASSERT_TRUE((-F16::infinity()).signbit()); + ASSERT_TRUE(F16::quiet_NaN().is_nan()); + ASSERT_TRUE(F16::signaling_NaN().is_nan()); + + // min() is smallest positive normal, denorm_min() is smallest subnormal + ASSERT_TRUE(F16::min().is_finite()); + ASSERT_FALSE(F16::min().signbit()); + ASSERT_TRUE(F16::denorm_min().is_finite()); + ASSERT_FALSE(F16::denorm_min().signbit()); + + // Verify special values semantics + ASSERT_TRUE(F16::infinity().is_infinity()); + ASSERT_TRUE((-F16::infinity()).is_infinity()); + ASSERT_TRUE(F16::min() > Float16::FromBits(0)); + ASSERT_TRUE(F16::denorm_min() > Float16::FromBits(0)); + ASSERT_TRUE(F16::denorm_min() < F16::min()); + + // Verify epsilon: 1 + epsilon != 1 + auto one = Float16(1.0f); + auto one_plus_epsilon = Float16(one.ToFloat() + F16::epsilon().ToFloat()); + ASSERT_NE(one, one_plus_epsilon); + + // Verify round_error is 0.5 + ASSERT_FLOAT_EQ(F16::round_error().ToFloat(), 0.5f); + ASSERT_FLOAT_EQ(F32::round_error(), 0.5f); +} + } // namespace } // namespace arrow::util