GH-49003: [C++] Don't consider out_of_range an error in float parsing (#49095)

Alvaro-Kothe · web-flow · commit c0d5a596f300 · 2026-02-03T18:59:44.000+01:00
### Rationale for this change This PR restores the behavior previous to version 23 for floating-point parsing on overflow and subnormal. `fast_float` didn't assign an error code on overflow in version `3.10.1` and assigned `±Inf` on overflow and `0.0` on subnormal. With the update to version `8.1`, it started to assign `std::errc::result_out_of_range` in such cases. ### What changes are included in this PR? Ignores `std::errc::result_out_of_range` and produce `±Inf` / `0.0` as appropriate instead of failing the conversion. ### Are these changes tested? Yes. Created tests for overflow with positive and negative signed mantissa, and also created tests for subnormal, all of them for binary{16,32,64}. ### Are there any user-facing changes? It's a user facing change. The CSV reader on version `libarrow==23` was assigning them as strings, while before it was parsing it as `0` or `+- inf`. With this patch, the CSV reader in PyArrow outputs: ```python >>> import pyarrow >>> import pyarrow.csv >>> import io >>> table = pyarrow.csv.read_csv(io.BytesIO(f"data\n10E-617\n10E617\n-10E617".encode())) >>> print(table) pyarrow.Table data: double ---- data: [[0,inf,-inf]] ``` Closes #49003 * GitHub Issue: #49003 Authored-by: Alvaro-Kothe <kothe65@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
diff --git a/cpp/src/arrow/util/value_parsing.cc b/cpp/src/arrow/util/value_parsing.cc
@@ -35,15 +35,21 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, float* out)
       ::arrow_vendored::fast_float::chars_format::general, decimal_point};
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, options);
-  return res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  return is_valid_number && consumed_entire_string;
 }
 
 bool StringToFloat(const char* s, size_t length, char decimal_point, double* out) {
   ::arrow_vendored::fast_float::parse_options options{
       ::arrow_vendored::fast_float::chars_format::general, decimal_point};
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, options);
-  return res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  return is_valid_number && consumed_entire_string;
 }
 
 // Half float
@@ -53,7 +59,10 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, Float16* ou
   float temp_out;
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, temp_out, options);
-  const bool ok = res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  const bool ok = is_valid_number && consumed_entire_string;
   if (ok) {
     *out = Float16::FromFloat(temp_out);
   }
diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc
@@ -141,6 +141,10 @@ TEST(StringConversion, ToFloat) {
   AssertConversion<FloatType>("0", 0.0f);
   AssertConversion<FloatType>("-0.0", -0.0f);
   AssertConversion<FloatType>("-1e20", -1e20f);
+  AssertConversion<FloatType>("4e38", std::numeric_limits<float>::infinity());
+  AssertConversion<FloatType>("-4e38", -std::numeric_limits<float>::infinity());
+  AssertConversion<FloatType>("1e-46", 0.0f);
+  AssertConversion<FloatType>("-1e-46", -0.0f);
   AssertConversion<FloatType>("+Infinity", std::numeric_limits<float>::infinity());
   AssertConversion<FloatType>("-Infinity", -std::numeric_limits<float>::infinity());
   AssertConversion<FloatType>("Infinity", std::numeric_limits<float>::infinity());
@@ -166,6 +170,10 @@ TEST(StringConversion, ToDouble) {
   AssertConversion<DoubleType>("0", 0);
   AssertConversion<DoubleType>("-0.0", -0.0);
   AssertConversion<DoubleType>("-1e100", -1e100);
+  AssertConversion<DoubleType>("2e308", std::numeric_limits<double>::infinity());
+  AssertConversion<DoubleType>("-2e308", -std::numeric_limits<double>::infinity());
+  AssertConversion<DoubleType>("1e-325", 0.0);
+  AssertConversion<DoubleType>("-1e-325", -0.0);
   AssertConversion<DoubleType>("+Infinity", std::numeric_limits<double>::infinity());
   AssertConversion<DoubleType>("-Infinity", -std::numeric_limits<double>::infinity());
   AssertConversion<DoubleType>("Infinity", std::numeric_limits<double>::infinity());
@@ -185,6 +193,10 @@ TEST(StringConversion, ToHalfFloat) {
   AssertConversion<HalfFloatType>("0", Float16(0.0f));
   AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f));
   AssertConversion<HalfFloatType>("-1e15", Float16(-1e15));
+  AssertConversion<HalfFloatType>("7e4", Float16::FromBits(0x7c00));
+  AssertConversion<HalfFloatType>("-7e4", Float16::FromBits(0xfc00));
+  AssertConversion<HalfFloatType>("1e-9", Float16(0.0f));
+  AssertConversion<HalfFloatType>("-1e-9", Float16(-0.0f));
   AssertConversion<HalfFloatType>("+Infinity", Float16::FromBits(0x7c00));
   AssertConversion<HalfFloatType>("-Infinity", Float16::FromBits(0xfc00));
   AssertConversion<HalfFloatType>("Infinity", Float16::FromBits(0x7c00));