Skip to content

Commit c0d5a59

Browse files
authored
GH-49003: [C++] Don't consider out_of_range an error in float parsing (#49095)
### Rationale for this change This PR restores the behavior previous to version 23 for floating-point parsing on overflow and subnormal. `fast_float` didn't assign an error code on overflow in version `3.10.1` and assigned `±Inf` on overflow and `0.0` on subnormal. With the update to version `8.1`, it started to assign `std::errc::result_out_of_range` in such cases. ### What changes are included in this PR? Ignores `std::errc::result_out_of_range` and produce `±Inf` / `0.0` as appropriate instead of failing the conversion. ### Are these changes tested? Yes. Created tests for overflow with positive and negative signed mantissa, and also created tests for subnormal, all of them for binary{16,32,64}. ### Are there any user-facing changes? It's a user facing change. The CSV reader on version `libarrow==23` was assigning them as strings, while before it was parsing it as `0` or `+- inf`. With this patch, the CSV reader in PyArrow outputs: ```python >>> import pyarrow >>> import pyarrow.csv >>> import io >>> table = pyarrow.csv.read_csv(io.BytesIO(f"data\n10E-617\n10E617\n-10E617".encode())) >>> print(table) pyarrow.Table data: double ---- data: [[0,inf,-inf]] ``` Closes #49003 * GitHub Issue: #49003 Authored-by: Alvaro-Kothe <kothe65@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 7532327 commit c0d5a59

2 files changed

Lines changed: 24 additions & 3 deletions

File tree

cpp/src/arrow/util/value_parsing.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,21 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, float* out)
3535
::arrow_vendored::fast_float::chars_format::general, decimal_point};
3636
const auto res =
3737
::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, options);
38-
return res.ec == std::errc() && res.ptr == s + length;
38+
const bool is_valid_number =
39+
res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
40+
const bool consumed_entire_string = res.ptr == s + length;
41+
return is_valid_number && consumed_entire_string;
3942
}
4043

4144
bool StringToFloat(const char* s, size_t length, char decimal_point, double* out) {
4245
::arrow_vendored::fast_float::parse_options options{
4346
::arrow_vendored::fast_float::chars_format::general, decimal_point};
4447
const auto res =
4548
::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, options);
46-
return res.ec == std::errc() && res.ptr == s + length;
49+
const bool is_valid_number =
50+
res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
51+
const bool consumed_entire_string = res.ptr == s + length;
52+
return is_valid_number && consumed_entire_string;
4753
}
4854

4955
// Half float
@@ -53,7 +59,10 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, Float16* ou
5359
float temp_out;
5460
const auto res =
5561
::arrow_vendored::fast_float::from_chars_advanced(s, s + length, temp_out, options);
56-
const bool ok = res.ec == std::errc() && res.ptr == s + length;
62+
const bool is_valid_number =
63+
res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
64+
const bool consumed_entire_string = res.ptr == s + length;
65+
const bool ok = is_valid_number && consumed_entire_string;
5766
if (ok) {
5867
*out = Float16::FromFloat(temp_out);
5968
}

cpp/src/arrow/util/value_parsing_test.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ TEST(StringConversion, ToFloat) {
141141
AssertConversion<FloatType>("0", 0.0f);
142142
AssertConversion<FloatType>("-0.0", -0.0f);
143143
AssertConversion<FloatType>("-1e20", -1e20f);
144+
AssertConversion<FloatType>("4e38", std::numeric_limits<float>::infinity());
145+
AssertConversion<FloatType>("-4e38", -std::numeric_limits<float>::infinity());
146+
AssertConversion<FloatType>("1e-46", 0.0f);
147+
AssertConversion<FloatType>("-1e-46", -0.0f);
144148
AssertConversion<FloatType>("+Infinity", std::numeric_limits<float>::infinity());
145149
AssertConversion<FloatType>("-Infinity", -std::numeric_limits<float>::infinity());
146150
AssertConversion<FloatType>("Infinity", std::numeric_limits<float>::infinity());
@@ -166,6 +170,10 @@ TEST(StringConversion, ToDouble) {
166170
AssertConversion<DoubleType>("0", 0);
167171
AssertConversion<DoubleType>("-0.0", -0.0);
168172
AssertConversion<DoubleType>("-1e100", -1e100);
173+
AssertConversion<DoubleType>("2e308", std::numeric_limits<double>::infinity());
174+
AssertConversion<DoubleType>("-2e308", -std::numeric_limits<double>::infinity());
175+
AssertConversion<DoubleType>("1e-325", 0.0);
176+
AssertConversion<DoubleType>("-1e-325", -0.0);
169177
AssertConversion<DoubleType>("+Infinity", std::numeric_limits<double>::infinity());
170178
AssertConversion<DoubleType>("-Infinity", -std::numeric_limits<double>::infinity());
171179
AssertConversion<DoubleType>("Infinity", std::numeric_limits<double>::infinity());
@@ -185,6 +193,10 @@ TEST(StringConversion, ToHalfFloat) {
185193
AssertConversion<HalfFloatType>("0", Float16(0.0f));
186194
AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f));
187195
AssertConversion<HalfFloatType>("-1e15", Float16(-1e15));
196+
AssertConversion<HalfFloatType>("7e4", Float16::FromBits(0x7c00));
197+
AssertConversion<HalfFloatType>("-7e4", Float16::FromBits(0xfc00));
198+
AssertConversion<HalfFloatType>("1e-9", Float16(0.0f));
199+
AssertConversion<HalfFloatType>("-1e-9", Float16(-0.0f));
188200
AssertConversion<HalfFloatType>("+Infinity", Float16::FromBits(0x7c00));
189201
AssertConversion<HalfFloatType>("-Infinity", Float16::FromBits(0xfc00));
190202
AssertConversion<HalfFloatType>("Infinity", Float16::FromBits(0x7c00));

0 commit comments

Comments
 (0)