Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

itertuples #842

Merged
merged 8 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ class DataFrame(NDFrame, OpsMixin):
def iterrows(self) -> Iterable[tuple[Hashable, Series]]: ...
def itertuples(
self, index: _bool = ..., name: _str | None = ...
) -> Iterable[tuple[Any, ...]]: ...
) -> Iterable[_PandasNamedTuple]: ...
def __len__(self) -> int: ...
@overload
def dot(self, other: DataFrame | ArrayLike) -> DataFrame: ...
Expand Down Expand Up @@ -2279,3 +2279,6 @@ class DataFrame(NDFrame, OpsMixin):
) -> Self: ...
def __truediv__(self, other: float | DataFrame | Series | Sequence) -> Self: ...
def __rtruediv__(self, other: float | DataFrame | Series | Sequence) -> Self: ...

class _PandasNamedTuple(tuple[Any, ...]):
def __getattr__(self, field: str) -> Any: ...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking we should return Scalar here, because we also return Scalar when someone does df.loc[3, "a"]

While it's true that some non-scalar value could be an individual element of a DataFrame, I've taken the philosophy of limiting the types to what is "normal" usage, and if you put a funky type in a DataFrame or Series, then you can do a cast to fix it. I've done that in some of our application code when we have lists or other objects inside a Series or DataFrame.

39 changes: 35 additions & 4 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
from pandas.core.resample import Resampler # noqa: F401
from pandas.core.series import Series
import pytest
from typing_extensions import assert_type
from typing_extensions import (
TypeAlias,
assert_type,
)
import xarray as xr

from pandas._typing import Scalar
Expand All @@ -47,6 +50,11 @@
from pandas.io.formats.style import Styler
from pandas.io.parsers import TextFileReader

if TYPE_CHECKING:
from pandas.core.frame import _PandasNamedTuple
else:
_PandasNamedTuple: TypeAlias = tuple

DF = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})


Expand Down Expand Up @@ -439,9 +447,23 @@ def test_types_iterrows() -> None:

def test_types_itertuples() -> None:
df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]})
res1: Iterable[tuple[Any, ...]] = df.itertuples()
res2: Iterable[tuple[Any, ...]] = df.itertuples(index=False, name="Foobar")
res3: Iterable[tuple[Any, ...]] = df.itertuples(index=False, name=None)
check(
assert_type(df.itertuples(), Iterable[_PandasNamedTuple]),
Iterable,
_PandasNamedTuple,
)
check(
assert_type(
df.itertuples(index=False, name="Foobar"), Iterable[_PandasNamedTuple]
),
Iterable,
_PandasNamedTuple,
)
check(
assert_type(df.itertuples(index=False, name=None), Iterable[_PandasNamedTuple]),
Iterable,
_PandasNamedTuple,
)


def test_types_sum() -> None:
Expand Down Expand Up @@ -2962,3 +2984,12 @@ def test_frame_setitem_na() -> None:
df["x"] = df["y"] + pd.Timedelta(days=3)
df.loc[ind, :] = pd.NaT
df.iloc[[0, 2], :] = pd.NaT


def test_itertuples() -> None:
# GH 822
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})

for item in df.itertuples():
check(assert_type(item, _PandasNamedTuple), tuple)
assert_type(item.a, Any)