-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
feature #49580: support new-style float_format string in to_csv #61650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1983866
7eccc89
c81352a
cbc096f
fec8e4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -455,7 +455,7 @@ def __init__( | |||||
self.na_rep = na_rep | ||||||
self.formatters = self._initialize_formatters(formatters) | ||||||
self.justify = self._initialize_justify(justify) | ||||||
self.float_format = float_format | ||||||
self.float_format = self._validate_float_format(float_format) | ||||||
self.sparsify = self._initialize_sparsify(sparsify) | ||||||
self.show_index_names = index_names | ||||||
self.decimal = decimal | ||||||
|
@@ -850,6 +850,34 @@ def _get_column_name_list(self) -> list[Hashable]: | |||||
names.append("" if columns.name is None else columns.name) | ||||||
return names | ||||||
|
||||||
def _validate_float_format( | ||||||
self, fmt: FloatFormatType | None | ||||||
) -> FloatFormatType | None: | ||||||
""" | ||||||
Validates and processes the float_format argument. | ||||||
Converts new-style format strings to callables. | ||||||
""" | ||||||
|
||||||
if fmt is None: | ||||||
return None | ||||||
|
||||||
if callable(fmt): | ||||||
return fmt | ||||||
|
||||||
if isinstance(fmt, str): | ||||||
if "%" in fmt: | ||||||
# Keeps old-style format strings as they are (C code handles them) | ||||||
return fmt | ||||||
else: | ||||||
try: | ||||||
_ = fmt.format(1.0) # Test with an arbitrary float | ||||||
return lambda x: fmt.format(x) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
except (ValueError, KeyError, IndexError) as e: | ||||||
raise ValueError(f"Invalid new-style format string {fmt!r}") from e | ||||||
|
||||||
# If fmt is neither None, nor callable, nor a successfully processed string, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
raise ValueError("float_format must be a string or callable") | ||||||
|
||||||
|
||||||
class DataFrameRenderer: | ||||||
"""Class for creating dataframe output in multiple formats. | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import io | ||
import os | ||
import sys | ||
import warnings | ||
from zipfile import ZipFile | ||
|
||
from _csv import Error | ||
|
@@ -741,3 +742,143 @@ def test_to_csv_iterative_compression_buffer(compression): | |
pd.read_csv(buffer, compression=compression, index_col=0), df | ||
) | ||
assert not buffer.closed | ||
|
||
|
||
def test_new_style_float_format_basic(): | ||
df = DataFrame({"A": [1234.56789, 9876.54321]}) | ||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n") | ||
expected = ",A\n0,1234.57\n1,9876.54\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_float_format_thousands(): | ||
df = DataFrame({"A": [1234.56789, 9876.54321]}) | ||
result = df.to_csv(float_format="{:,.2f}", lineterminator="\n") | ||
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n' | ||
assert result == expected | ||
|
||
|
||
def test_new_style_scientific_format(): | ||
df = DataFrame({"A": [0.000123, 0.000456]}) | ||
result = df.to_csv(float_format="{:.2e}", lineterminator="\n") | ||
expected = ",A\n0,1.23e-04\n1,4.56e-04\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_with_nan(): | ||
df = DataFrame({"A": [1.23, np.nan, 4.56]}) | ||
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n") | ||
expected = ",A\n0,1.23\n1,NA\n2,4.56\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_with_mixed_types(): | ||
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]}) | ||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n") | ||
expected = ",A,B\n0,1.23,x\n1,4.56,y\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_with_mixed_types_in_column(): | ||
df = DataFrame({"A": [1.23, "text", 4.56]}) | ||
with warnings.catch_warnings(record=True): | ||
warnings.simplefilter("always") | ||
Comment on lines
+784
to
+785
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n") | ||
|
||
expected = ",A\n0,1.23\n1,text\n2,4.56\n" | ||
assert result == expected | ||
|
||
|
||
def test_invalid_new_style_format_missing_brace(): | ||
df = DataFrame({"A": [1.23]}) | ||
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"): | ||
df.to_csv(float_format="{:.2f") | ||
|
||
|
||
def test_invalid_new_style_format_specifier(): | ||
df = DataFrame({"A": [1.23]}) | ||
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"): | ||
df.to_csv(float_format="{:.2z}") | ||
|
||
|
||
def test_old_style_format_compatibility(): | ||
df = DataFrame({"A": [1234.56789, 9876.54321]}) | ||
result = df.to_csv(float_format="%.2f", lineterminator="\n") | ||
expected = ",A\n0,1234.57\n1,9876.54\n" | ||
assert result == expected | ||
|
||
|
||
def test_callable_float_format_compatibility(): | ||
df = DataFrame({"A": [1234.56789, 9876.54321]}) | ||
result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n") | ||
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n' | ||
assert result == expected | ||
|
||
|
||
def test_no_float_format(): | ||
df = DataFrame({"A": [1.23, 4.56]}) | ||
result = df.to_csv(float_format=None, lineterminator="\n") | ||
expected = ",A\n0,1.23\n1,4.56\n" | ||
assert result == expected | ||
|
||
|
||
def test_large_numbers(): | ||
df = DataFrame({"A": [1e308, 2e308]}) | ||
result = df.to_csv(float_format="{:.2e}", lineterminator="\n") | ||
expected = ",A\n0,1.00e+308\n1,inf\n" | ||
assert result == expected | ||
|
||
|
||
def test_zero_and_negative(): | ||
df = DataFrame({"A": [0.0, -1.23456]}) | ||
result = df.to_csv(float_format="{:+.2f}", lineterminator="\n") | ||
expected = ",A\n0,+0.00\n1,-1.23\n" | ||
assert result == expected | ||
|
||
|
||
def test_unicode_format(): | ||
df = DataFrame({"A": [1.23, 4.56]}) | ||
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n") | ||
expected = ",A\n0,1.23€\n1,4.56€\n" | ||
assert result == expected | ||
|
||
|
||
def test_empty_dataframe(): | ||
df = DataFrame({"A": []}) | ||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n") | ||
expected = ",A\n" | ||
assert result == expected | ||
|
||
|
||
def test_multi_column_float(): | ||
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]}) | ||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n") | ||
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n" | ||
assert result == expected | ||
|
||
|
||
def test_invalid_float_format_type(): | ||
df = DataFrame({"A": [1.23]}) | ||
with pytest.raises(ValueError, match="float_format must be a string or callable"): | ||
df.to_csv(float_format=123) | ||
|
||
|
||
def test_new_style_with_inf(): | ||
df = DataFrame({"A": [1.23, np.inf, -np.inf]}) | ||
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n") | ||
expected = ",A\n0,1.23\n1,inf\n2,-inf\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_with_precision_edge(): | ||
df = DataFrame({"A": [1.23456789]}) | ||
result = df.to_csv(float_format="{:.10f}", lineterminator="\n") | ||
expected = ",A\n0,1.2345678900\n" | ||
assert result == expected | ||
|
||
|
||
def test_new_style_with_template(): | ||
df = DataFrame({"A": [1234.56789]}) | ||
result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n") | ||
expected = ',A\n0,"Value: 1,234.57"\n' | ||
assert result == expected |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.