Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ Reshaping
- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
- Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`)
- Bug in :meth:`DataFrame.pivot_table` where ``margins=True`` did not correctly include groups with ``NaN`` values in the index or columns when ``dropna=False`` was explicitly passed. (:issue:`61509`)
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
Expand Down
23 changes: 16 additions & 7 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ def __internal_pivot_table(
observed=dropna,
margins_name=margins_name,
fill_value=fill_value,
dropna=dropna,
)

# discard the top level
Expand All @@ -422,6 +423,7 @@ def _add_margins(
observed: bool,
margins_name: Hashable = "All",
fill_value=None,
dropna: bool = True,
):
if not isinstance(margins_name, str):
raise ValueError("margins_name argument must be a string")
Expand Down Expand Up @@ -461,6 +463,7 @@ def _add_margins(
kwargs,
observed,
margins_name,
dropna,
)
if not isinstance(marginal_result_set, tuple):
return marginal_result_set
Expand All @@ -469,7 +472,7 @@ def _add_margins(
# no values, and table is a DataFrame
assert isinstance(table, ABCDataFrame)
marginal_result_set = _generate_marginal_results_without_values(
table, data, rows, cols, aggfunc, kwargs, observed, margins_name
table, data, rows, cols, aggfunc, kwargs, observed, margins_name, dropna
)
if not isinstance(marginal_result_set, tuple):
return marginal_result_set
Expand Down Expand Up @@ -538,6 +541,7 @@ def _generate_marginal_results(
kwargs,
observed: bool,
margins_name: Hashable = "All",
dropna: bool = True,
):
margin_keys: list | Index
if len(cols) > 0:
Expand All @@ -551,7 +555,7 @@ def _all_key(key):
if len(rows) > 0:
margin = (
data[rows + values]
.groupby(rows, observed=observed)
.groupby(rows, observed=observed, dropna=dropna)
.agg(aggfunc, **kwargs)
)
cat_axis = 1
Expand All @@ -567,7 +571,7 @@ def _all_key(key):
else:
margin = (
data[cols[:1] + values]
.groupby(cols[:1], observed=observed)
.groupby(cols[:1], observed=observed, dropna=dropna)
.agg(aggfunc, **kwargs)
.T
)
Expand Down Expand Up @@ -610,7 +614,9 @@ def _all_key(key):

if len(cols) > 0:
row_margin = (
data[cols + values].groupby(cols, observed=observed).agg(aggfunc, **kwargs)
data[cols + values]
.groupby(cols, observed=observed, dropna=dropna)
.agg(aggfunc, **kwargs)
)
row_margin = row_margin.stack()

Expand All @@ -633,6 +639,7 @@ def _generate_marginal_results_without_values(
kwargs,
observed: bool,
margins_name: Hashable = "All",
dropna: bool = True,
):
margin_keys: list | Index
if len(cols) > 0:
Expand All @@ -645,7 +652,7 @@ def _all_key():
return (margins_name,) + ("",) * (len(cols) - 1)

if len(rows) > 0:
margin = data.groupby(rows, observed=observed)[rows].apply(
margin = data.groupby(rows, observed=observed, dropna=dropna)[rows].apply(
aggfunc, **kwargs
)
all_key = _all_key()
Expand All @@ -654,7 +661,9 @@ def _all_key():
margin_keys.append(all_key)

else:
margin = data.groupby(level=0, observed=observed).apply(aggfunc, **kwargs)
margin = data.groupby(level=0, observed=observed, dropna=dropna).apply(
aggfunc, **kwargs
)
all_key = _all_key()
table[all_key] = margin
result = table
Expand All @@ -665,7 +674,7 @@ def _all_key():
margin_keys = table.columns

if len(cols):
row_margin = data.groupby(cols, observed=observed)[cols].apply(
row_margin = data.groupby(cols, observed=observed, dropna=dropna)[cols].apply(
aggfunc, **kwargs
)
else:
Expand Down
20 changes: 10 additions & 10 deletions pandas/tests/reshape/test_crosstab.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def test_margin_dropna4(self):
# GH: 10772: Keep np.nan in result with dropna=False
df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
actual = crosstab(df.a, df.b, margins=True, dropna=False)
expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
expected = DataFrame([[1, 0, 1], [1, 3, 4], [0, 1, 1], [2, 4, 6]])
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
expected.columns = Index([3, 4, "All"], name="b")
tm.assert_frame_equal(actual, expected)
Expand All @@ -301,11 +301,11 @@ def test_margin_dropna5(self):
)
actual = crosstab(df.a, df.b, margins=True, dropna=False)
expected = DataFrame(
[[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
[[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, 4.0], [1, 4, 1, 6.0]]
)
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
tm.assert_frame_equal(actual, expected)
tm.assert_frame_equal(actual, expected, check_dtype=False)

def test_margin_dropna6(self):
# GH: 10772: Keep np.nan in result with dropna=False
Expand All @@ -326,7 +326,7 @@ def test_margin_dropna6(self):
names=["b", "c"],
)
expected = DataFrame(
[[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
[[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 1, 7]],
columns=m,
)
expected.index = Index(["bar", "foo", "All"], name="a")
Expand All @@ -344,13 +344,13 @@ def test_margin_dropna6(self):
)
expected = DataFrame(
[
[1, 0, 1.0],
[1, 0, 1.0],
[1, 0, 1],
[1, 0, 1],
[0, 0, np.nan],
[2, 0, 2.0],
[1, 1, 2.0],
[0, 1, np.nan],
[5, 2, 7.0],
[2, 0, 2],
[1, 1, 2],
[0, 1, 1],
[5, 2, 7],
],
index=m,
)
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2594,6 +2594,36 @@ def test_pivot_table_values_as_two_params(
expected = DataFrame(data=e_data, index=e_index, columns=e_cols)
tm.assert_frame_equal(result, expected)

def test_pivot_table_margins_include_nan_groups(self):
# GH#61509
df = DataFrame(
{
"i": [1, 2, 3],
"g1": ["a", "b", "b"],
"g2": ["x", None, None],
}
)

result = df.pivot_table(
index="g1",
columns="g2",
values="i",
aggfunc="count",
dropna=False,
margins=True,
)

expected = DataFrame(
{
"x": {"a": 1.0, "b": np.nan, "All": 1.0},
np.nan: {"a": np.nan, "b": 2.0, "All": 2.0},
"All": {"a": 1.0, "b": 2.0, "All": 3.0},
}
)
expected.index.name = "g1"
expected.columns.name = "g2"
tm.assert_frame_equal(result, expected, check_dtype=False)


class TestPivot:
def test_pivot(self):
Expand Down
Loading