From 579db3cdd0da36b4436f07992f6ad0ba60ca0354 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 15:30:17 +0200 Subject: [PATCH 01/14] default Series to Series[Any] --- pandas-stubs/_typing.pyi | 3 ++- pandas-stubs/core/frame.pyi | 14 +++++++------- pandas-stubs/core/groupby/generic.pyi | 12 ++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 28ee2fff..2d0f5104 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -18,7 +18,6 @@ from typing import ( Protocol, SupportsIndex, TypedDict, - TypeVar, overload, ) @@ -35,6 +34,7 @@ from pandas.core.tools.datetimes import FulldatetimeDict from typing_extensions import ( ParamSpec, TypeAlias, + TypeVar, ) from pandas._libs.interval import Interval @@ -842,6 +842,7 @@ S1 = TypeVar( | CategoricalDtype | BaseOffset | list[str], + default=Any, ) S2 = TypeVar( diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index f872ad2f..421a9749 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -75,7 +75,7 @@ from pandas._libs.tslibs import BaseOffset from pandas._libs.tslibs.nattype import NaTType from pandas._libs.tslibs.offsets import DateOffset from pandas._typing import ( - S1, + S2, AggFuncTypeBase, AggFuncTypeDictFrame, AggFuncTypeDictSeries, @@ -1392,13 +1392,13 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S1 | NAType], + f: Callable[..., S2 | NAType], axis: AxisIndex = ..., raw: _bool = ..., result_type: None = ..., args: Any = ..., **kwargs: Any, - ) -> Series[S1]: ... + ) -> Series[S2]: ... # Since non-scalar type T is not supported in Series[T], # we separate this overload from the above one @overload @@ -1416,14 +1416,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S1 | NAType], + f: Callable[..., S2 | NAType], axis: Axis = ..., raw: _bool = ..., args: Any = ..., *, result_type: Literal["expand", "reduce"], **kwargs: Any, - ) -> Series[S1]: ... + ) -> Series[S2]: ... @overload def apply( self, @@ -1477,14 +1477,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S1 | NAType], + f: Callable[..., S2 | NAType], raw: _bool = ..., result_type: None = ..., args: Any = ..., *, axis: AxisColumn, **kwargs: Any, - ) -> Series[S1]: ... + ) -> Series[S2]: ... @overload def apply( self, diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi index f618a592..3f9b269a 100644 --- a/pandas-stubs/core/groupby/generic.pyi +++ b/pandas-stubs/core/groupby/generic.pyi @@ -30,7 +30,7 @@ from typing_extensions import ( from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( - S1, + S2, AggFuncTypeBase, AggFuncTypeFrame, ByT, @@ -52,7 +52,7 @@ class NamedAgg(NamedTuple): column: str aggfunc: AggScalar -class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): +class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): @overload def aggregate( self, @@ -114,7 +114,7 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): self, indices: TakeIndexer, **kwargs, - ) -> Series[S1]: ... + ) -> Series[S2]: ... def skew( self, skipna: bool = ..., @@ -125,10 +125,10 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): def plot(self) -> GroupByPlot[Self]: ... def nlargest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S1]: ... + ) -> Series[S2]: ... def nsmallest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S1]: ... + ) -> Series[S2]: ... def idxmin(self, skipna: bool = ...) -> Series: ... def idxmax(self, skipna: bool = ...) -> Series: ... def corr( @@ -166,7 +166,7 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): @final # type: ignore[misc] def __iter__( # pyright: ignore[reportIncompatibleMethodOverride] self, - ) -> Iterator[tuple[ByT, Series[S1]]]: ... + ) -> Iterator[tuple[ByT, Series[S2]]]: ... _TT = TypeVar("_TT", bound=Literal[True, False]) From cb1a5c633ad66eed6e8006653bec95d2acc7814b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 15:30:48 +0200 Subject: [PATCH 02/14] remove UnknownSeries --- pandas-stubs/core/frame.pyi | 5 +- pandas-stubs/core/generic.pyi | 4 +- pandas-stubs/core/series.pyi | 96 ++++++++++++++--------------------- 3 files changed, 42 insertions(+), 63 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 421a9749..c0873412 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -52,7 +52,6 @@ from pandas.core.reshape.pivot import ( ) from pandas.core.series import ( Series, - UnknownSeries, ) from pandas.core.window import ( Expanding, @@ -2011,7 +2010,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): | Callable[[DataFrame], DataFrame] | Callable[[Any], _bool] ), - other: Scalar | UnknownSeries | DataFrame | Callable | NAType | None = ..., + other: Scalar | Series | DataFrame | Callable | NAType | None = ..., *, inplace: Literal[True], axis: Axis | None = ..., @@ -2027,7 +2026,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): | Callable[[DataFrame], DataFrame] | Callable[[Any], _bool] ), - other: Scalar | UnknownSeries | DataFrame | Callable | NAType | None = ..., + other: Scalar | Series | DataFrame | Callable | NAType | None = ..., *, inplace: Literal[False] = ..., axis: Axis | None = ..., diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index 9d521e74..da2e668d 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -20,7 +20,7 @@ from pandas import Index import pandas.core.indexing as indexing from pandas.core.resample import DatetimeIndexResampler from pandas.core.series import ( - UnknownSeries, + Series, ) import sqlalchemy.engine from typing_extensions import ( @@ -82,7 +82,7 @@ class NDFrame(indexing.IndexingMixin): def ndim(self) -> int: ... @property def size(self) -> int: ... - def equals(self, other: UnknownSeries) -> _bool: ... + def equals(self, other: Series) -> _bool: ... def __neg__(self) -> Self: ... def __pos__(self) -> Self: ... def __nonzero__(self) -> None: ... diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 9552d407..497c293d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -265,7 +265,7 @@ class Series(IndexOpsMixin[S1], NDFrame): dtype: Dtype = ..., name: Hashable = ..., copy: bool = ..., - ) -> UnknownSeries: ... + ) -> Series: ... @overload def __new__( cls, @@ -419,7 +419,7 @@ class Series(IndexOpsMixin[S1], NDFrame): dtype: Dtype = ..., name: Hashable = ..., copy: bool = ..., - ) -> UnknownSeries: ... + ) -> Series: ... @property def hasnans(self) -> bool: ... def div( @@ -732,7 +732,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def groupby( self, - by: CategoricalIndex | UnknownIndex | UnknownSeries, + by: CategoricalIndex | UnknownIndex | Series, axis: AxisIndex = ..., level: IndexLabel | None = ..., as_index: _bool = ..., @@ -815,15 +815,15 @@ class Series(IndexOpsMixin[S1], NDFrame): self, other: ArrayLike | dict[_str, np.ndarray] | Sequence[S1] | Index[S1] ) -> np.ndarray: ... @overload - def __matmul__(self, other: UnknownSeries) -> Scalar: ... + def __matmul__(self, other: Series) -> Scalar: ... @overload - def __matmul__(self, other: DataFrame) -> UnknownSeries: ... + def __matmul__(self, other: DataFrame) -> Series: ... @overload def __matmul__(self, other: np.ndarray) -> np.ndarray: ... @overload - def __rmatmul__(self, other: UnknownSeries) -> Scalar: ... + def __rmatmul__(self, other: Series) -> Scalar: ... @overload - def __rmatmul__(self, other: DataFrame) -> UnknownSeries: ... + def __rmatmul__(self, other: DataFrame) -> Series: ... @overload def __rmatmul__(self, other: np.ndarray) -> np.ndarray: ... @overload @@ -843,16 +843,16 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def compare( self, - other: UnknownSeries, + other: Series, align_axis: AxisIndex, keep_shape: bool = ..., keep_equal: bool = ..., result_names: Suffixes = ..., - ) -> UnknownSeries: ... + ) -> Series: ... @overload def compare( self, - other: UnknownSeries, + other: Series, align_axis: AxisColumn = ..., keep_shape: bool = ..., keep_equal: bool = ..., @@ -952,9 +952,9 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def map( self, - arg: Callable[[Any], Any] | Mapping[Any, Any] | UnknownSeries, + arg: Callable[[Any], Any] | Mapping[Any, Any] | Series, na_action: Literal["ignore"] | None = ..., - ) -> UnknownSeries: ... + ) -> Series: ... @overload def aggregate( self: Series[int], @@ -978,7 +978,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., *args: Any, **kwargs: Any, - ) -> UnknownSeries: ... + ) -> Series: ... agg = aggregate @overload def transform( @@ -1005,7 +1005,7 @@ class Series(IndexOpsMixin[S1], NDFrame): convertDType: _bool = ..., args: tuple = ..., **kwargs: Any, - ) -> UnknownSeries: ... + ) -> Series: ... @overload def apply( self, @@ -1017,20 +1017,20 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def apply( self, - func: Callable[..., UnknownSeries], + func: Callable[..., Series], convertDType: _bool = ..., args: tuple = ..., **kwargs: Any, ) -> DataFrame: ... def align( self, - other: DataFrame | UnknownSeries, + other: DataFrame | Series, join: JoinHow = ..., axis: Axis | None = ..., level: Level | None = ..., copy: _bool = ..., fill_value: Scalar | NAType | None = ..., - ) -> tuple[UnknownSeries, UnknownSeries]: ... + ) -> tuple[Series, Series]: ... @overload def rename( self, @@ -1125,7 +1125,7 @@ class Series(IndexOpsMixin[S1], NDFrame): freq: DateOffset | timedelta | _str | None = ..., axis: Axis = ..., fill_value: Scalar | NAType | None = ..., - ) -> UnknownSeries: ... + ) -> Series: ... def info( self, verbose: bool | None = ..., @@ -1508,15 +1508,11 @@ class Series(IndexOpsMixin[S1], NDFrame): tuple[ Sequence[bool] | Series[bool] - | Callable[ - [UnknownSeries], UnknownSeries | np.ndarray | Sequence[bool] - ], - ListLikeU - | Scalar - | Callable[[UnknownSeries], UnknownSeries | np.ndarray], + | Callable[[Series], Series | np.ndarray | Sequence[bool]], + ListLikeU | Scalar | Callable[[Series], Series | np.ndarray], ], ], - ) -> UnknownSeries: ... + ) -> Series: ... def truncate( self, before: date | _str | int | None = ..., @@ -1587,16 +1583,8 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __add__( self, - other: ( - num - | _str - | timedelta - | Timedelta - | _ListLike - | UnknownSeries - | np.timedelta64 - ), - ) -> UnknownSeries: ... + other: num | _str | timedelta | Timedelta | _ListLike | Series | np.timedelta64, + ) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] def __and__( # pyright: ignore[reportOverlappingOverload] @@ -1625,7 +1613,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, other: timedelta | Timedelta | TimedeltaSeries | np.timedelta64 ) -> TimedeltaSeries: ... @overload - def __mul__(self, other: num | _ListLike | UnknownSeries) -> UnknownSeries: ... + def __mul__(self, other: num | _ListLike | Series) -> Series: ... def __mod__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... def __ne__(self, other: object) -> Series[_bool]: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] def __pow__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... @@ -1639,9 +1627,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __radd__(self, other: S1 | Series[S1]) -> Self: ... @overload - def __radd__( - self, other: num | _str | _ListLike | UnknownSeries - ) -> UnknownSeries: ... + def __radd__(self, other: num | _str | _ListLike | Series) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] def __rand__( # pyright: ignore[reportOverlappingOverload] @@ -1658,7 +1644,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, other: timedelta | Timedelta | TimedeltaSeries | np.timedelta64 ) -> TimedeltaSeries: ... @overload - def __rmul__(self, other: num | _ListLike | UnknownSeries) -> UnknownSeries: ... + def __rmul__(self, other: num | _ListLike | Series) -> Series: ... def __rnatmul__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... def __rpow__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... # ignore needed for mypy as we want different results based on the arguments @@ -1668,10 +1654,8 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> Series[bool]: ... @overload def __ror__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... - def __rsub__(self, other: num | _ListLike | Series[S1]) -> UnknownSeries: ... - def __rtruediv__( - self, other: num | _ListLike | Series[S1] | Path - ) -> UnknownSeries: ... + def __rsub__(self, other: num | _ListLike | Series[S1]) -> Series: ... + def __rtruediv__(self, other: num | _ListLike | Series[S1] | Path) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] def __rxor__( # pyright: ignore[reportOverlappingOverload] @@ -1694,10 +1678,8 @@ class Series(IndexOpsMixin[S1], NDFrame): self, other: Timestamp | datetime | TimestampSeries ) -> TimedeltaSeries: ... @overload - def __sub__(self, other: num | _ListLike | UnknownSeries) -> UnknownSeries: ... - def __truediv__( - self, other: num | _ListLike | Series[S1] | Path - ) -> UnknownSeries: ... + def __sub__(self, other: num | _ListLike | Series) -> Series: ... + def __truediv__(self, other: num | _ListLike | Series[S1] | Path) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] def __xor__( # pyright: ignore[reportOverlappingOverload] @@ -1800,12 +1782,12 @@ class Series(IndexOpsMixin[S1], NDFrame): min_periods: int = ..., adjust: _bool = ..., ignore_na: _bool = ..., - ) -> ExponentialMovingWindow[UnknownSeries]: ... + ) -> ExponentialMovingWindow[Series]: ... def expanding( self, min_periods: int = ..., method: CalculationMethod = ..., - ) -> Expanding[UnknownSeries]: ... + ) -> Expanding[Series]: ... def floordiv( self, other: num | _ListLike | Series[S1], @@ -1908,11 +1890,11 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def mul( self, - other: num | _ListLike | UnknownSeries, + other: num | _ListLike | Series, level: Level | None = ..., fill_value: float | None = ..., axis: AxisIndex | None = ..., - ) -> UnknownSeries: ... + ) -> Series: ... def multiply( self, other: num | _ListLike | Series[S1], @@ -1992,11 +1974,11 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def rmul( self, - other: num | _ListLike | UnknownSeries, + other: num | _ListLike | Series, level: Level | None = ..., fill_value: float | None = ..., axis: AxisIndex = ..., - ) -> UnknownSeries: ... + ) -> Series: ... @overload def rolling( self, @@ -2009,7 +1991,7 @@ class Series(IndexOpsMixin[S1], NDFrame): method: CalculationMethod = ..., *, win_type: _str, - ) -> Window[UnknownSeries]: ... + ) -> Window[Series]: ... @overload def rolling( self, @@ -2022,7 +2004,7 @@ class Series(IndexOpsMixin[S1], NDFrame): method: CalculationMethod = ..., *, win_type: None = ..., - ) -> Rolling[UnknownSeries]: ... + ) -> Rolling[Series]: ... def rpow( self, other: Series[S1] | Scalar, @@ -2353,5 +2335,3 @@ class IntervalSeries(Series[Interval[_OrderableT]], Generic[_OrderableT]): @property def array(self) -> IntervalArray: ... def diff(self, periods: int = ...) -> Never: ... - -UnknownSeries: TypeAlias = Series[Any] From 53f13f16838785bbf288a0a9e003124d1436a1d3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 15:45:02 +0200 Subject: [PATCH 03/14] replace one UnknownIndex --- pandas-stubs/core/series.pyi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 497c293d..d9f6b179 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -59,7 +59,6 @@ from pandas.core.indexes.accessors import ( TimedeltaProperties, TimestampProperties, ) -from pandas.core.indexes.base import UnknownIndex from pandas.core.indexes.category import CategoricalIndex from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.interval import IntervalIndex @@ -732,7 +731,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def groupby( self, - by: CategoricalIndex | UnknownIndex | Series, + by: CategoricalIndex | Index | Series, axis: AxisIndex = ..., level: IndexLabel | None = ..., as_index: _bool = ..., From e67aaa90e97c76e0d942183379d38b340ed14983 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 15:49:37 +0200 Subject: [PATCH 04/14] replace one more UnknownIndex --- pandas-stubs/_libs/tslibs/timestamps.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/_libs/tslibs/timestamps.pyi b/pandas-stubs/_libs/tslibs/timestamps.pyi index 99de9663..795f98a9 100644 --- a/pandas-stubs/_libs/tslibs/timestamps.pyi +++ b/pandas-stubs/_libs/tslibs/timestamps.pyi @@ -19,9 +19,9 @@ from typing import ( import numpy as np from pandas import ( DatetimeIndex, + Index, TimedeltaIndex, ) -from pandas.core.indexes.base import UnknownIndex from pandas.core.series import ( Series, TimedeltaSeries, @@ -236,7 +236,7 @@ class Timestamp(datetime, SupportsIndex): @overload def __eq__(self, other: TimestampSeries) -> Series[bool]: ... # type: ignore[overload-overlap] @overload - def __eq__(self, other: npt.NDArray[np.datetime64] | UnknownIndex) -> np_ndarray_bool: ... # type: ignore[overload-overlap] + def __eq__(self, other: npt.NDArray[np.datetime64] | Index) -> np_ndarray_bool: ... # type: ignore[overload-overlap] @overload def __eq__(self, other: object) -> Literal[False]: ... @overload @@ -244,7 +244,7 @@ class Timestamp(datetime, SupportsIndex): @overload def __ne__(self, other: TimestampSeries) -> Series[bool]: ... # type: ignore[overload-overlap] @overload - def __ne__(self, other: npt.NDArray[np.datetime64] | UnknownIndex) -> np_ndarray_bool: ... # type: ignore[overload-overlap] + def __ne__(self, other: npt.NDArray[np.datetime64] | Index) -> np_ndarray_bool: ... # type: ignore[overload-overlap] @overload def __ne__(self, other: object) -> Literal[True]: ... def __hash__(self) -> int: ... From 8cb79b0b55b9af2a00bae5ddfe6c7f750523a559 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 16:03:14 +0200 Subject: [PATCH 05/14] replace Series[Any] with Series --- README.md | 2 +- docs/philosophy.md | 2 +- pandas-stubs/_typing.pyi | 4 +-- pandas-stubs/core/dtypes/missing.pyi | 4 +-- pandas-stubs/core/frame.pyi | 26 +++++++++---------- pandas-stubs/core/reshape/concat.pyi | 17 +++++-------- pandas-stubs/core/series.pyi | 2 +- tests/test_frame.py | 10 ++++---- tests/test_series.py | 38 ++++++++++++++-------------- 9 files changed, 51 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 66b281bc..6fb46d53 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ mypy round.py we get the following error message: ```text -round.py:6: error: Argument "decimals" to "round" of "DataFrame" has incompatible type "DataFrame"; expected "Union[int, Dict[Any, Any], Series[Any]]" [arg-type] +round.py:6: error: Argument "decimals" to "round" of "DataFrame" has incompatible type "DataFrame"; expected "Union[int, Dict[Any, Any], Series]" [arg-type] Found 1 error in 1 file (checked 1 source file) ``` diff --git a/docs/philosophy.md b/docs/philosophy.md index bc910631..ca6fb35c 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -29,7 +29,7 @@ lt = s < 3 In the pandas source, `lt` is a `Series` with a `dtype` of `bool`. In the pandas-stubs, the type of `lt` is `Series[bool]`. This allows further type checking to occur in other -pandas methods. Note that in the above example, `s` is typed as `Series[Any]` because +pandas methods. Note that in the above example, `s` is typed as `Series` because its type cannot be statically inferred. This also allows type checking for operations on series that contain date/time data. Consider diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2d0f5104..7aa56581 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -65,7 +65,7 @@ HashableT5 = TypeVar("HashableT5", bound=Hashable) # array-like ArrayLike: TypeAlias = ExtensionArray | np.ndarray -AnyArrayLike: TypeAlias = ArrayLike | Index[Any] | Series[Any] +AnyArrayLike: TypeAlias = ArrayLike | Index[Any] | Series # list-like @@ -950,7 +950,7 @@ ReplaceValue: TypeAlias = ( | NAType | Sequence[Scalar | Pattern] | Mapping[HashableT, ScalarT] - | Series[Any] + | Series | None ) diff --git a/pandas-stubs/core/dtypes/missing.pyi b/pandas-stubs/core/dtypes/missing.pyi index 11319304..fde2e535 100644 --- a/pandas-stubs/core/dtypes/missing.pyi +++ b/pandas-stubs/core/dtypes/missing.pyi @@ -26,7 +26,7 @@ isneginf_scalar = ... @overload def isna(obj: DataFrame) -> DataFrame: ... @overload -def isna(obj: Series[Any]) -> Series[bool]: ... +def isna(obj: Series) -> Series[bool]: ... @overload def isna(obj: Index[Any] | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload @@ -39,7 +39,7 @@ isnull = isna @overload def notna(obj: DataFrame) -> DataFrame: ... @overload -def notna(obj: Series[Any]) -> Series[bool]: ... +def notna(obj: Series) -> Series[bool]: ... @overload def notna(obj: Index[Any] | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index c0873412..c1409e3a 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1317,11 +1317,11 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def stack( self, level: Level | list[Level] = ..., dropna: _bool = ..., sort: _bool = ... - ) -> Self | Series[Any]: ... + ) -> Self | Series: ... @overload def stack( self, level: Level | list[Level] = ..., future_stack: _bool = ... - ) -> Self | Series[Any]: ... + ) -> Self | Series: ... def explode( self, column: Sequence[Hashable], ignore_index: _bool = ... ) -> Self: ... @@ -1381,7 +1381,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr | Series[Any]], + f: Callable[..., ListLikeExceptSeriesAndStr | Series], axis: AxisIndex = ..., raw: _bool = ..., result_type: None = ..., @@ -1409,7 +1409,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): result_type: None = ..., args: Any = ..., **kwargs: Any, - ) -> Series[Any]: ... + ) -> Series: ... # apply() overloads with keyword result_type, and axis does not matter @overload @@ -1426,7 +1426,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr | Series[Any] | Mapping[Any, Any]], + f: Callable[..., ListLikeExceptSeriesAndStr | Series | Mapping[Any, Any]], axis: Axis = ..., raw: _bool = ..., args: Any = ..., @@ -1444,12 +1444,12 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): *, result_type: Literal["reduce"], **kwargs: Any, - ) -> Series[Any]: ... + ) -> Series: ... @overload def apply( self, f: Callable[ - ..., ListLikeExceptSeriesAndStr | Series[Any] | Scalar | Mapping[Any, Any] + ..., ListLikeExceptSeriesAndStr | Series | Scalar | Mapping[Any, Any] ], axis: Axis = ..., raw: _bool = ..., @@ -1463,14 +1463,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., Series[Any]], + f: Callable[..., Series], axis: AxisIndex = ..., raw: _bool = ..., args: Any = ..., *, result_type: Literal["reduce"], **kwargs: Any, - ) -> Series[Any]: ... + ) -> Series: ... # apply() overloads with default result_type of None, and keyword axis=1 matters @overload @@ -1494,11 +1494,11 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): *, axis: AxisColumn, **kwargs: Any, - ) -> Series[Any]: ... + ) -> Series: ... @overload def apply( self, - f: Callable[..., Series[Any]], + f: Callable[..., Series], raw: _bool = ..., result_type: None = ..., args: Any = ..., @@ -1511,7 +1511,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., Series[Any]], + f: Callable[..., Series], raw: _bool = ..., args: Any = ..., *, @@ -1536,7 +1536,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) -> Self: ... def merge( self, - right: DataFrame | Series[Any], + right: DataFrame | Series, how: MergeHow = ..., on: IndexLabel | AnyArrayLike | None = ..., left_on: IndexLabel | AnyArrayLike | None = ..., diff --git a/pandas-stubs/core/reshape/concat.pyi b/pandas-stubs/core/reshape/concat.pyi index 5ea794ce..7ca896dc 100644 --- a/pandas-stubs/core/reshape/concat.pyi +++ b/pandas-stubs/core/reshape/concat.pyi @@ -4,7 +4,6 @@ from collections.abc import ( Sequence, ) from typing import ( - Any, Literal, overload, ) @@ -40,7 +39,7 @@ def concat( # type: ignore[overload-overlap] ) -> DataFrame: ... @overload def concat( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] - objs: Iterable[Series[Any]] | Mapping[HashableT1, Series[Any]], + objs: Iterable[Series] | Mapping[HashableT1, Series], *, axis: AxisIndex = ..., join: Literal["inner", "outer"] = ..., @@ -51,12 +50,10 @@ def concat( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappin verify_integrity: bool = ..., sort: bool = ..., copy: bool = ..., -) -> Series[Any]: ... +) -> Series: ... @overload def concat( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] - objs: ( - Iterable[Series[Any] | DataFrame] | Mapping[HashableT1, Series[Any] | DataFrame] - ), + objs: Iterable[Series | DataFrame] | Mapping[HashableT1, Series | DataFrame], *, axis: Axis = ..., join: Literal["inner", "outer"] = ..., @@ -98,7 +95,7 @@ def concat( # type: ignore[overload-overlap] ) -> DataFrame: ... @overload def concat( # type: ignore[overload-overlap] - objs: Iterable[Series[Any] | None] | Mapping[HashableT1, Series[Any] | None], + objs: Iterable[Series | None] | Mapping[HashableT1, Series | None], *, axis: AxisIndex = ..., join: Literal["inner", "outer"] = ..., @@ -109,12 +106,12 @@ def concat( # type: ignore[overload-overlap] verify_integrity: bool = ..., sort: bool = ..., copy: bool = ..., -) -> Series[Any]: ... +) -> Series: ... @overload def concat( objs: ( - Iterable[Series[Any] | DataFrame | None] - | Mapping[HashableT1, Series[Any] | DataFrame | None] + Iterable[Series | DataFrame | None] + | Mapping[HashableT1, Series | DataFrame | None] ), *, axis: Axis = ..., diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index d9f6b179..5e3bd9f6 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1308,7 +1308,7 @@ class Series(IndexOpsMixin[S1], NDFrame): dtype: ObjectDtypeArg | VoidDtypeArg | ExtensionDtype | DtypeObj, copy: _bool = ..., errors: IgnoreRaise = ..., - ) -> Series[Any]: ... + ) -> Series: ... def copy(self, deep: _bool = ...) -> Series[S1]: ... def infer_objects(self) -> Series[S1]: ... @overload diff --git a/tests/test_frame.py b/tests/test_frame.py index e133a046..9eda636b 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -317,7 +317,7 @@ def test_assign() -> None: def my_named_func_1(df: pd.DataFrame) -> pd.Series[str]: return df["a"] - def my_named_func_2(df: pd.DataFrame) -> pd.Series[Any]: + def my_named_func_2(df: pd.DataFrame) -> pd.Series: return df["a"] check(assert_type(df.assign(c=lambda df: df["a"] * 2), pd.DataFrame), pd.DataFrame) @@ -3105,14 +3105,14 @@ def test_frame_stack() -> None: ): check( assert_type( - df_multi_level_cols2.stack(0), Union[pd.DataFrame, "pd.Series[Any]"] + df_multi_level_cols2.stack(0), Union[pd.DataFrame, "pd.Series"] ), pd.DataFrame, ) check( assert_type( df_multi_level_cols2.stack([0, 1]), - Union[pd.DataFrame, "pd.Series[Any]"], + Union[pd.DataFrame, "pd.Series"], ), pd.Series, ) @@ -3120,14 +3120,14 @@ def test_frame_stack() -> None: check( assert_type( df_multi_level_cols2.stack(0, future_stack=False), - Union[pd.DataFrame, "pd.Series[Any]"], + Union[pd.DataFrame, "pd.Series"], ), pd.DataFrame, ) check( assert_type( df_multi_level_cols2.stack(0, dropna=True, sort=True), - Union[pd.DataFrame, "pd.Series[Any]"], + Union[pd.DataFrame, "pd.Series"], ), pd.DataFrame, ) diff --git a/tests/test_series.py b/tests/test_series.py index 57051997..c1ab9a3c 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2872,13 +2872,13 @@ def test_astype_object(cast_arg: ObjectDtypeArg, target_type: type) -> None: if TYPE_CHECKING: # python object - assert_type(s.astype(object), "pd.Series[Any]") - assert_type(s.astype("object"), "pd.Series[Any]") + assert_type(s.astype(object), "pd.Series") + assert_type(s.astype("object"), "pd.Series") # numpy object - assert_type(s.astype(np.object_), "pd.Series[Any]") - # assert_type(s.astype("object_"), "pd.Series[Any]") # NOTE: not assigned - # assert_type(s.astype("object0"), "pd.Series[Any]") # NOTE: not assigned - assert_type(s.astype("O"), "pd.Series[Any]") + assert_type(s.astype(np.object_), "pd.Series") + # assert_type(s.astype("object_"), "pd.Series") # NOTE: not assigned + # assert_type(s.astype("object0"), "pd.Series") # NOTE: not assigned + assert_type(s.astype("O"), "pd.Series") @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_VOID_ARGS, ids=repr) @@ -2888,9 +2888,9 @@ def test_astype_void(cast_arg: VoidDtypeArg, target_type: type) -> None: if TYPE_CHECKING: # numpy void - assert_type(s.astype(np.void), "pd.Series[Any]") - assert_type(s.astype("void"), "pd.Series[Any]") - assert_type(s.astype("V"), "pd.Series[Any]") + assert_type(s.astype(np.void), "pd.Series") + assert_type(s.astype("void"), "pd.Series") + assert_type(s.astype("V"), "pd.Series") def test_astype_other() -> None: @@ -2902,7 +2902,7 @@ def test_astype_other() -> None: # Test self-consistent with s.dtype (#747) # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 - check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) + check(assert_type(s.astype(s.dtype), "pd.Series"), pd.Series, np.integer) # test DecimalDtype orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) @@ -2917,7 +2917,7 @@ def test_astype_other() -> None: # Test non-literal string # NOTE: currently unsupported! Enable in future. # string: str = "int" # not Literal! - # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) + # check(assert_type(s.astype(string), "pd.Series"), pd.Series, np.integer) def test_all_astype_args_tested() -> None: @@ -3224,7 +3224,7 @@ def test_get() -> None: def test_series_new_empty() -> None: # GH 826 - check(assert_type(pd.Series(), "pd.Series[Any]"), pd.Series) + check(assert_type(pd.Series(), "pd.Series"), pd.Series) def test_series_mapping() -> None: @@ -3406,10 +3406,10 @@ def first_arg_not_series(argument_1: int, ser: pd.Series) -> pd.Series: def test_series_apply() -> None: s = pd.Series(["A", "B", "AB"]) - check(assert_type(s.apply(tuple), "pd.Series[Any]"), pd.Series) - check(assert_type(s.apply(list), "pd.Series[Any]"), pd.Series) - check(assert_type(s.apply(set), "pd.Series[Any]"), pd.Series) - check(assert_type(s.apply(frozenset), "pd.Series[Any]"), pd.Series) + check(assert_type(s.apply(tuple), "pd.Series"), pd.Series) + check(assert_type(s.apply(list), "pd.Series"), pd.Series) + check(assert_type(s.apply(set), "pd.Series"), pd.Series) + check(assert_type(s.apply(frozenset), "pd.Series"), pd.Series) def test_diff() -> None: @@ -3671,10 +3671,10 @@ class MyDict(TypedDict): def test_series_empty_dtype() -> None: - """Test for the creation of a Series from an empty list GH571 to map to a Series[Any].""" + """Test for the creation of a Series from an empty list GH571 to map to a Series.""" new_tab: Sequence[Never] = [] # need to be typehinted to please mypy - check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series) - check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) + check(assert_type(pd.Series(new_tab), "pd.Series"), pd.Series) + check(assert_type(pd.Series([]), "pd.Series"), pd.Series) # ensure that an empty string does not get matched to Sequence[Never] check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) From d5d942819454c37bbda1ce8f593202ab7c144749 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 16:09:18 +0200 Subject: [PATCH 06/14] replace Index[Any] with Index --- pandas-stubs/_typing.pyi | 4 ++-- pandas-stubs/core/dtypes/missing.pyi | 4 ++-- pandas-stubs/core/generic.pyi | 12 ++++++------ pandas-stubs/core/indexes/multi.pyi | 3 +-- pandas-stubs/io/formats/style_render.pyi | 4 ++-- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 7aa56581..b335802e 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -65,7 +65,7 @@ HashableT5 = TypeVar("HashableT5", bound=Hashable) # array-like ArrayLike: TypeAlias = ExtensionArray | np.ndarray -AnyArrayLike: TypeAlias = ArrayLike | Index[Any] | Series +AnyArrayLike: TypeAlias = ArrayLike | Index | Series # list-like @@ -801,7 +801,7 @@ DtypeNp = TypeVar("DtypeNp", bound=np.dtype[np.generic]) KeysArgType: TypeAlias = Any ListLikeT = TypeVar("ListLikeT", bound=ListLike) ListLikeExceptSeriesAndStr: TypeAlias = ( - MutableSequence[Any] | np.ndarray | tuple[Any, ...] | Index[Any] + MutableSequence[Any] | np.ndarray | tuple[Any, ...] | Index ) ListLikeU: TypeAlias = Sequence | np.ndarray | Series | Index ListLikeHashable: TypeAlias = ( diff --git a/pandas-stubs/core/dtypes/missing.pyi b/pandas-stubs/core/dtypes/missing.pyi index fde2e535..e36496cf 100644 --- a/pandas-stubs/core/dtypes/missing.pyi +++ b/pandas-stubs/core/dtypes/missing.pyi @@ -28,7 +28,7 @@ def isna(obj: DataFrame) -> DataFrame: ... @overload def isna(obj: Series) -> Series[bool]: ... @overload -def isna(obj: Index[Any] | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... +def isna(obj: Index | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload def isna( obj: Scalar | NaTType | NAType | None, @@ -41,7 +41,7 @@ def notna(obj: DataFrame) -> DataFrame: ... @overload def notna(obj: Series) -> Series[bool]: ... @overload -def notna(obj: Index[Any] | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... +def notna(obj: Index | list[Any] | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload def notna(obj: ScalarT | NaTType | NAType | None) -> TypeIs[ScalarT]: ... diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index da2e668d..f1db220a 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -306,7 +306,7 @@ class NDFrame(indexing.IndexingMixin): labels: None = ..., *, axis: Axis = ..., - index: Hashable | Sequence[Hashable] | Index[Any] = ..., + index: Hashable | Sequence[Hashable] | Index = ..., columns: Hashable | Iterable[Hashable], level: Level | None = ..., inplace: Literal[True], @@ -318,7 +318,7 @@ class NDFrame(indexing.IndexingMixin): labels: None = ..., *, axis: Axis = ..., - index: Hashable | Sequence[Hashable] | Index[Any], + index: Hashable | Sequence[Hashable] | Index, columns: Hashable | Iterable[Hashable] = ..., level: Level | None = ..., inplace: Literal[True], @@ -327,7 +327,7 @@ class NDFrame(indexing.IndexingMixin): @overload def drop( self, - labels: Hashable | Sequence[Hashable] | Index[Any], + labels: Hashable | Sequence[Hashable] | Index, *, axis: Axis = ..., index: None = ..., @@ -342,7 +342,7 @@ class NDFrame(indexing.IndexingMixin): labels: None = ..., *, axis: Axis = ..., - index: Hashable | Sequence[Hashable] | Index[Any] = ..., + index: Hashable | Sequence[Hashable] | Index = ..., columns: Hashable | Iterable[Hashable], level: Level | None = ..., inplace: Literal[False] = ..., @@ -354,7 +354,7 @@ class NDFrame(indexing.IndexingMixin): labels: None = ..., *, axis: Axis = ..., - index: Hashable | Sequence[Hashable] | Index[Any], + index: Hashable | Sequence[Hashable] | Index, columns: Hashable | Iterable[Hashable] = ..., level: Level | None = ..., inplace: Literal[False] = ..., @@ -363,7 +363,7 @@ class NDFrame(indexing.IndexingMixin): @overload def drop( self, - labels: Hashable | Sequence[Hashable] | Index[Any], + labels: Hashable | Sequence[Hashable] | Index, *, axis: Axis = ..., index: None = ..., diff --git a/pandas-stubs/core/indexes/multi.pyi b/pandas-stubs/core/indexes/multi.pyi index 50652020..9c9197d0 100644 --- a/pandas-stubs/core/indexes/multi.pyi +++ b/pandas-stubs/core/indexes/multi.pyi @@ -5,7 +5,6 @@ from collections.abc import ( Sequence, ) from typing import ( - Any, overload, ) @@ -27,7 +26,7 @@ from pandas._typing import ( np_ndarray_bool, ) -class MultiIndex(Index[Any]): +class MultiIndex(Index): def __new__( cls, levels: Sequence[SequenceNotStr[Hashable]] = ..., diff --git a/pandas-stubs/io/formats/style_render.pyi b/pandas-stubs/io/formats/style_render.pyi index a46ee60b..624ca0e6 100644 --- a/pandas-stubs/io/formats/style_render.pyi +++ b/pandas-stubs/io/formats/style_render.pyi @@ -82,6 +82,6 @@ class StylerRenderer: level: Level | list[Level] | None = ..., ) -> Self: ... @property - def columns(self) -> Index[Any]: ... + def columns(self) -> Index: ... @property - def index(self) -> Index[Any]: ... + def index(self) -> Index: ... From 76ee8bb8e6b09f3d0f97a84bf31d17c85c8dcbf8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 29 May 2025 16:31:43 +0200 Subject: [PATCH 07/14] go back to S1 in groupby, set defaults for ByT and _TT --- pandas-stubs/_typing.pyi | 1 + pandas-stubs/core/groupby/generic.pyi | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index b335802e..2d106f20 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -892,6 +892,7 @@ ByT = TypeVar( | Period | Interval[int | float | Timestamp | Timedelta] | tuple, + default=Any, ) # Use a distinct SeriesByT when using groupby with Series of known dtype. # Essentially, an intersection between Series S1 TypeVar, and ByT TypeVar diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi index 3f9b269a..962db038 100644 --- a/pandas-stubs/core/groupby/generic.pyi +++ b/pandas-stubs/core/groupby/generic.pyi @@ -30,7 +30,7 @@ from typing_extensions import ( from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( - S2, + S1, AggFuncTypeBase, AggFuncTypeFrame, ByT, @@ -52,7 +52,7 @@ class NamedAgg(NamedTuple): column: str aggfunc: AggScalar -class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): +class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): @overload def aggregate( self, @@ -114,7 +114,7 @@ class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): self, indices: TakeIndexer, **kwargs, - ) -> Series[S2]: ... + ) -> Series[S1]: ... def skew( self, skipna: bool = ..., @@ -125,10 +125,10 @@ class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): def plot(self) -> GroupByPlot[Self]: ... def nlargest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S2]: ... + ) -> Series[S1]: ... def nsmallest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S2]: ... + ) -> Series[S1]: ... def idxmin(self, skipna: bool = ...) -> Series: ... def idxmax(self, skipna: bool = ...) -> Series: ... def corr( @@ -166,9 +166,9 @@ class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): @final # type: ignore[misc] def __iter__( # pyright: ignore[reportIncompatibleMethodOverride] self, - ) -> Iterator[tuple[ByT, Series[S2]]]: ... + ) -> Iterator[tuple[ByT, Series[S1]]]: ... -_TT = TypeVar("_TT", bound=Literal[True, False]) +_TT = TypeVar("_TT", bound=Literal[True, False], default=Literal[True]) class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT, _TT]): # error: Overload 3 for "apply" will never be used because its parameters overlap overload 1 @@ -217,7 +217,7 @@ class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT, _TT]): def filter( self, func: Callable, dropna: bool = ..., *args, **kwargs ) -> DataFrame: ... - @overload + @overload # type: ignore[override] def __getitem__(self, key: Scalar) -> SeriesGroupBy[Any, ByT]: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] @overload def __getitem__( # pyright: ignore[reportIncompatibleMethodOverride] From 1c0f35142f446baa865be7bb3bf6077af052510e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:49:22 +0100 Subject: [PATCH 08/14] use S2 https://github.com/python/mypy/issues/19182 --- pandas-stubs/_typing.pyi | 31 ++++++---------------------- pandas-stubs/core/frame.pyi | 40 ++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2d106f20..9efec5cd 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -824,30 +824,8 @@ MaskType: TypeAlias = Series[bool] | np_ndarray_bool | list[bool] # Scratch types for generics -S1 = TypeVar( - "S1", - bound=str - | bytes - | datetime.date - | datetime.time - | bool - | int - | float - | complex - | Dtype - | datetime.datetime # includes pd.Timestamp - | datetime.timedelta # includes pd.Timedelta - | Period - | Interval - | CategoricalDtype - | BaseOffset - | list[str], - default=Any, -) - -S2 = TypeVar( - "S2", - bound=str +SeriesDType: TypeAlias = ( + str | bytes | datetime.date | datetime.time @@ -862,8 +840,11 @@ S2 = TypeVar( | Interval | CategoricalDtype | BaseOffset - | list[str], + | list[str] ) +S1 = TypeVar("S1", bound=SeriesDType, default=Any) +# Like S1, but without `default=Any`. +S2 = TypeVar("S2", bound=SeriesDType) IndexingInt: TypeAlias = ( int | np.int_ | np.integer | np.unsignedinteger | np.signedinteger | np.int8 diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index c1409e3a..e903e12f 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -136,6 +136,7 @@ from pandas._typing import ( ScalarT, SequenceNotStr, SeriesByT, + SeriesDType, SortKind, StataDateFormat, StorageOptions, @@ -1391,13 +1392,23 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S2 | NAType], + f: Callable[..., S2], axis: AxisIndex = ..., raw: _bool = ..., result_type: None = ..., args: Any = ..., **kwargs: Any, ) -> Series[S2]: ... + @overload + def apply( + self, + f: Callable[..., SeriesDType | NAType], + axis: AxisIndex = ..., + raw: _bool = ..., + result_type: None = ..., + args: Any = ..., + **kwargs: Any, + ) -> Series: ... # Since non-scalar type T is not supported in Series[T], # we separate this overload from the above one @overload @@ -1415,7 +1426,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S2 | NAType], + f: Callable[..., S2], axis: Axis = ..., raw: _bool = ..., args: Any = ..., @@ -1424,6 +1435,17 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): **kwargs: Any, ) -> Series[S2]: ... @overload + def apply( + self, + f: Callable[..., SeriesDType | NAType], + axis: Axis = ..., + raw: _bool = ..., + args: Any = ..., + *, + result_type: Literal["expand", "reduce"], + **kwargs: Any, + ) -> Series: ... + @overload def apply( self, f: Callable[..., ListLikeExceptSeriesAndStr | Series | Mapping[Any, Any]], @@ -1473,10 +1495,11 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) -> Series: ... # apply() overloads with default result_type of None, and keyword axis=1 matters + # Use S2 (TypeVar without the `Any` default) to avoid MyPy issue https://github.com/python/mypy/issues/19182. @overload def apply( self, - f: Callable[..., S2 | NAType], + f: Callable[..., S2], raw: _bool = ..., result_type: None = ..., args: Any = ..., @@ -1485,6 +1508,17 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): **kwargs: Any, ) -> Series[S2]: ... @overload + def apply( + self, + f: Callable[..., SeriesDType | NAType], + raw: _bool = ..., + result_type: None = ..., + args: Any = ..., + *, + axis: AxisColumn, + **kwargs: Any, + ) -> Series: ... + @overload def apply( self, f: Callable[..., ListLikeExceptSeriesAndStr | Mapping[Any, Any]], From a3ff79ea618caba208984b25fcfd688175a18950 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:53:04 +0100 Subject: [PATCH 09/14] Revert "use S2 https://github.com/python/mypy/issues/19182" This reverts commit 1c0f35142f446baa865be7bb3bf6077af052510e. --- pandas-stubs/_typing.pyi | 31 ++++++++++++++++++++++------ pandas-stubs/core/frame.pyi | 40 +++---------------------------------- 2 files changed, 28 insertions(+), 43 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 9efec5cd..2d106f20 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -824,8 +824,30 @@ MaskType: TypeAlias = Series[bool] | np_ndarray_bool | list[bool] # Scratch types for generics -SeriesDType: TypeAlias = ( - str +S1 = TypeVar( + "S1", + bound=str + | bytes + | datetime.date + | datetime.time + | bool + | int + | float + | complex + | Dtype + | datetime.datetime # includes pd.Timestamp + | datetime.timedelta # includes pd.Timedelta + | Period + | Interval + | CategoricalDtype + | BaseOffset + | list[str], + default=Any, +) + +S2 = TypeVar( + "S2", + bound=str | bytes | datetime.date | datetime.time @@ -840,11 +862,8 @@ SeriesDType: TypeAlias = ( | Interval | CategoricalDtype | BaseOffset - | list[str] + | list[str], ) -S1 = TypeVar("S1", bound=SeriesDType, default=Any) -# Like S1, but without `default=Any`. -S2 = TypeVar("S2", bound=SeriesDType) IndexingInt: TypeAlias = ( int | np.int_ | np.integer | np.unsignedinteger | np.signedinteger | np.int8 diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index e903e12f..c1409e3a 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -136,7 +136,6 @@ from pandas._typing import ( ScalarT, SequenceNotStr, SeriesByT, - SeriesDType, SortKind, StataDateFormat, StorageOptions, @@ -1392,23 +1391,13 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S2], + f: Callable[..., S2 | NAType], axis: AxisIndex = ..., raw: _bool = ..., result_type: None = ..., args: Any = ..., **kwargs: Any, ) -> Series[S2]: ... - @overload - def apply( - self, - f: Callable[..., SeriesDType | NAType], - axis: AxisIndex = ..., - raw: _bool = ..., - result_type: None = ..., - args: Any = ..., - **kwargs: Any, - ) -> Series: ... # Since non-scalar type T is not supported in Series[T], # we separate this overload from the above one @overload @@ -1426,7 +1415,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, - f: Callable[..., S2], + f: Callable[..., S2 | NAType], axis: Axis = ..., raw: _bool = ..., args: Any = ..., @@ -1435,17 +1424,6 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): **kwargs: Any, ) -> Series[S2]: ... @overload - def apply( - self, - f: Callable[..., SeriesDType | NAType], - axis: Axis = ..., - raw: _bool = ..., - args: Any = ..., - *, - result_type: Literal["expand", "reduce"], - **kwargs: Any, - ) -> Series: ... - @overload def apply( self, f: Callable[..., ListLikeExceptSeriesAndStr | Series | Mapping[Any, Any]], @@ -1495,11 +1473,10 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) -> Series: ... # apply() overloads with default result_type of None, and keyword axis=1 matters - # Use S2 (TypeVar without the `Any` default) to avoid MyPy issue https://github.com/python/mypy/issues/19182. @overload def apply( self, - f: Callable[..., S2], + f: Callable[..., S2 | NAType], raw: _bool = ..., result_type: None = ..., args: Any = ..., @@ -1508,17 +1485,6 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): **kwargs: Any, ) -> Series[S2]: ... @overload - def apply( - self, - f: Callable[..., SeriesDType | NAType], - raw: _bool = ..., - result_type: None = ..., - args: Any = ..., - *, - axis: AxisColumn, - **kwargs: Any, - ) -> Series: ... - @overload def apply( self, f: Callable[..., ListLikeExceptSeriesAndStr | Mapping[Any, Any]], From ddfc56e8c9421c927a058ec2c3f945564333a7ee Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:53:17 +0100 Subject: [PATCH 10/14] Revert "go back to S1 in groupby, set defaults for ByT and _TT" This reverts commit 76ee8bb8e6b09f3d0f97a84bf31d17c85c8dcbf8. --- pandas-stubs/_typing.pyi | 1 - pandas-stubs/core/groupby/generic.pyi | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2d106f20..b335802e 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -892,7 +892,6 @@ ByT = TypeVar( | Period | Interval[int | float | Timestamp | Timedelta] | tuple, - default=Any, ) # Use a distinct SeriesByT when using groupby with Series of known dtype. # Essentially, an intersection between Series S1 TypeVar, and ByT TypeVar diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi index 962db038..3f9b269a 100644 --- a/pandas-stubs/core/groupby/generic.pyi +++ b/pandas-stubs/core/groupby/generic.pyi @@ -30,7 +30,7 @@ from typing_extensions import ( from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( - S1, + S2, AggFuncTypeBase, AggFuncTypeFrame, ByT, @@ -52,7 +52,7 @@ class NamedAgg(NamedTuple): column: str aggfunc: AggScalar -class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): +class SeriesGroupBy(GroupBy[Series[S2]], Generic[S2, ByT]): @overload def aggregate( self, @@ -114,7 +114,7 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): self, indices: TakeIndexer, **kwargs, - ) -> Series[S1]: ... + ) -> Series[S2]: ... def skew( self, skipna: bool = ..., @@ -125,10 +125,10 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): def plot(self) -> GroupByPlot[Self]: ... def nlargest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S1]: ... + ) -> Series[S2]: ... def nsmallest( self, n: int = ..., keep: NsmallestNlargestKeep = ... - ) -> Series[S1]: ... + ) -> Series[S2]: ... def idxmin(self, skipna: bool = ...) -> Series: ... def idxmax(self, skipna: bool = ...) -> Series: ... def corr( @@ -166,9 +166,9 @@ class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): @final # type: ignore[misc] def __iter__( # pyright: ignore[reportIncompatibleMethodOverride] self, - ) -> Iterator[tuple[ByT, Series[S1]]]: ... + ) -> Iterator[tuple[ByT, Series[S2]]]: ... -_TT = TypeVar("_TT", bound=Literal[True, False], default=Literal[True]) +_TT = TypeVar("_TT", bound=Literal[True, False]) class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT, _TT]): # error: Overload 3 for "apply" will never be used because its parameters overlap overload 1 @@ -217,7 +217,7 @@ class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT, _TT]): def filter( self, func: Callable, dropna: bool = ..., *args, **kwargs ) -> DataFrame: ... - @overload # type: ignore[override] + @overload def __getitem__(self, key: Scalar) -> SeriesGroupBy[Any, ByT]: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] @overload def __getitem__( # pyright: ignore[reportIncompatibleMethodOverride] From 3a95013cb72cc6ae50d83148bb6d7465aaf1a7b9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 9 Jun 2025 12:08:10 +0100 Subject: [PATCH 11/14] simplify --- pandas-stubs/_typing.pyi | 31 ++++++------------------------- pandas-stubs/core/frame.pyi | 3 +++ 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index b335802e..0949a72b 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -824,30 +824,8 @@ MaskType: TypeAlias = Series[bool] | np_ndarray_bool | list[bool] # Scratch types for generics -S1 = TypeVar( - "S1", - bound=str - | bytes - | datetime.date - | datetime.time - | bool - | int - | float - | complex - | Dtype - | datetime.datetime # includes pd.Timestamp - | datetime.timedelta # includes pd.Timedelta - | Period - | Interval - | CategoricalDtype - | BaseOffset - | list[str], - default=Any, -) - -S2 = TypeVar( - "S2", - bound=str +SeriesDType: TypeAlias = ( + str | bytes | datetime.date | datetime.time @@ -862,8 +840,11 @@ S2 = TypeVar( | Interval | CategoricalDtype | BaseOffset - | list[str], + | list[str] ) +S1 = TypeVar("S1", bound=SeriesDType, default=Any) +# Like S1, but without `default=Any`. +S2 = TypeVar("S2", bound=SeriesDType) IndexingInt: TypeAlias = ( int | np.int_ | np.integer | np.unsignedinteger | np.signedinteger | np.int8 diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index c1409e3a..0c503d14 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1391,6 +1391,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, + # Use S2 (TypeVar without `default=Any`) instead of S1 due to https://github.com/python/mypy/issues/19182. f: Callable[..., S2 | NAType], axis: AxisIndex = ..., raw: _bool = ..., @@ -1415,6 +1416,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, + # Use S2 (TypeVar without `default=Any`) instead of S1 due to https://github.com/python/mypy/issues/19182. f: Callable[..., S2 | NAType], axis: Axis = ..., raw: _bool = ..., @@ -1476,6 +1478,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def apply( self, + # Use S2 (TypeVar without `default=Any`) instead of S1 due to https://github.com/python/mypy/issues/19182. f: Callable[..., S2 | NAType], raw: _bool = ..., result_type: None = ..., From ea854e8d739bdcbf3eb3b6a027496c9a8885df4a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 10 Jun 2025 09:30:31 +0100 Subject: [PATCH 12/14] unquote pd.Series in tests --- pandas-stubs/core/reshape/concat.pyi | 2 +- tests/test_frame.py | 14 +++++----- tests/test_series.py | 38 ++++++++++++++-------------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/pandas-stubs/core/reshape/concat.pyi b/pandas-stubs/core/reshape/concat.pyi index 7ca896dc..9b8624f5 100644 --- a/pandas-stubs/core/reshape/concat.pyi +++ b/pandas-stubs/core/reshape/concat.pyi @@ -126,7 +126,7 @@ def concat( ) -> DataFrame: ... # Including either of the next 2 overloads causes mypy to complain about -# test_pandas.py:test_types_concat() in assert_type(pd.concat([s, s2]), "pd.Series") +# test_pandas.py:test_types_concat() in assert_type(pd.concat([s, s2]), pd.Series) # It thinks that pd.concat([s, s2]) is Any . May be due to Series being # Generic, or the axis argument being unspecified, and then there is partial # overlap with the first 2 overloads. diff --git a/tests/test_frame.py b/tests/test_frame.py index 0517761b..5edb680a 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -682,8 +682,8 @@ def test_frame_iterator() -> None: """Test iterator methods for a dataframe GH1217.""" df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - check(assert_type(next(df.items()), tuple[Hashable, "pd.Series"]), tuple) - check(assert_type(next(df.iterrows()), tuple[Hashable, "pd.Series"]), tuple) + check(assert_type(next(df.items()), tuple[Hashable, pd.Series]), tuple) + check(assert_type(next(df.iterrows()), tuple[Hashable, pd.Series]), tuple) check(assert_type(next(df.itertuples()), _PandasNamedTuple), _PandasNamedTuple) @@ -3101,15 +3101,13 @@ def test_frame_stack() -> None: upper="2.2.99", ): check( - assert_type( - df_multi_level_cols2.stack(0), Union[pd.DataFrame, "pd.Series"] - ), + assert_type(df_multi_level_cols2.stack(0), Union[pd.DataFrame, pd.Series]), pd.DataFrame, ) check( assert_type( df_multi_level_cols2.stack([0, 1]), - Union[pd.DataFrame, "pd.Series"], + Union[pd.DataFrame, pd.Series], ), pd.Series, ) @@ -3117,14 +3115,14 @@ def test_frame_stack() -> None: check( assert_type( df_multi_level_cols2.stack(0, future_stack=False), - Union[pd.DataFrame, "pd.Series"], + Union[pd.DataFrame, pd.Series], ), pd.DataFrame, ) check( assert_type( df_multi_level_cols2.stack(0, dropna=True, sort=True), - Union[pd.DataFrame, "pd.Series"], + Union[pd.DataFrame, pd.Series], ), pd.DataFrame, ) diff --git a/tests/test_series.py b/tests/test_series.py index 27f4753f..b3457746 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2872,13 +2872,13 @@ def test_astype_object(cast_arg: ObjectDtypeArg, target_type: type) -> None: if TYPE_CHECKING: # python object - assert_type(s.astype(object), "pd.Series") - assert_type(s.astype("object"), "pd.Series") + assert_type(s.astype(object), pd.Series) + assert_type(s.astype("object"), pd.Series) # numpy object - assert_type(s.astype(np.object_), "pd.Series") - # assert_type(s.astype("object_"), "pd.Series") # NOTE: not assigned - # assert_type(s.astype("object0"), "pd.Series") # NOTE: not assigned - assert_type(s.astype("O"), "pd.Series") + assert_type(s.astype(np.object_), pd.Series) + # assert_type(s.astype("object_"), pd.Series) # NOTE: not assigned + # assert_type(s.astype("object0"), pd.Series) # NOTE: not assigned + assert_type(s.astype("O"), pd.Series) @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_VOID_ARGS, ids=repr) @@ -2888,9 +2888,9 @@ def test_astype_void(cast_arg: VoidDtypeArg, target_type: type) -> None: if TYPE_CHECKING: # numpy void - assert_type(s.astype(np.void), "pd.Series") - assert_type(s.astype("void"), "pd.Series") - assert_type(s.astype("V"), "pd.Series") + assert_type(s.astype(np.void), pd.Series) + assert_type(s.astype("void"), pd.Series) + assert_type(s.astype("V"), pd.Series) def test_astype_other() -> None: @@ -2902,7 +2902,7 @@ def test_astype_other() -> None: # Test self-consistent with s.dtype (#747) # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 - check(assert_type(s.astype(s.dtype), "pd.Series"), pd.Series, np.integer) + check(assert_type(s.astype(s.dtype), pd.Series), pd.Series, np.integer) # test DecimalDtype orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) @@ -2917,7 +2917,7 @@ def test_astype_other() -> None: # Test non-literal string # NOTE: currently unsupported! Enable in future. # string: str = "int" # not Literal! - # check(assert_type(s.astype(string), "pd.Series"), pd.Series, np.integer) + # check(assert_type(s.astype(string), pd.Series), pd.Series, np.integer) def test_all_astype_args_tested() -> None: @@ -3224,7 +3224,7 @@ def test_get() -> None: def test_series_new_empty() -> None: # GH 826 - check(assert_type(pd.Series(), "pd.Series"), pd.Series) + check(assert_type(pd.Series(), pd.Series), pd.Series) def test_series_mapping() -> None: @@ -3406,10 +3406,10 @@ def first_arg_not_series(argument_1: int, ser: pd.Series) -> pd.Series: def test_series_apply() -> None: s = pd.Series(["A", "B", "AB"]) - check(assert_type(s.apply(tuple), "pd.Series"), pd.Series) - check(assert_type(s.apply(list), "pd.Series"), pd.Series) - check(assert_type(s.apply(set), "pd.Series"), pd.Series) - check(assert_type(s.apply(frozenset), "pd.Series"), pd.Series) + check(assert_type(s.apply(tuple), pd.Series), pd.Series) + check(assert_type(s.apply(list), pd.Series), pd.Series) + check(assert_type(s.apply(set), pd.Series), pd.Series) + check(assert_type(s.apply(frozenset), pd.Series), pd.Series) def test_diff() -> None: @@ -3571,7 +3571,7 @@ def callable(x: int) -> str: unknown_series = pd.Series([1, 0, None]) check( - assert_type(unknown_series.map({1: True, 0: False, None: None}), "pd.Series"), + assert_type(unknown_series.map({1: True, 0: False, None: None}), pd.Series), pd.Series, ) @@ -3673,8 +3673,8 @@ class MyDict(TypedDict): def test_series_empty_dtype() -> None: """Test for the creation of a Series from an empty list GH571 to map to a Series.""" new_tab: Sequence[Never] = [] # need to be typehinted to please mypy - check(assert_type(pd.Series(new_tab), "pd.Series"), pd.Series) - check(assert_type(pd.Series([]), "pd.Series"), pd.Series) + check(assert_type(pd.Series(new_tab), pd.Series), pd.Series) + check(assert_type(pd.Series([]), pd.Series), pd.Series) # ensure that an empty string does not get matched to Sequence[Never] check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) From 375f630bef050933d060fb0fcdbbd92d12ba95df Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 10 Jun 2025 09:34:34 +0100 Subject: [PATCH 13/14] update philosophy --- docs/philosophy.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/philosophy.md b/docs/philosophy.md index ca6fb35c..853c516e 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -29,8 +29,8 @@ lt = s < 3 In the pandas source, `lt` is a `Series` with a `dtype` of `bool`. In the pandas-stubs, the type of `lt` is `Series[bool]`. This allows further type checking to occur in other -pandas methods. Note that in the above example, `s` is typed as `Series` because -its type cannot be statically inferred. +pandas methods. Note that in the above example, `s` is just typed as `Series` (which +defaults to `Series[Any]`) because its type cannot be statically inferred. This also allows type checking for operations on series that contain date/time data. Consider the following example that creates two series of datetimes with corresponding arithmetic. From e415282903054828127baa054e89ad985cfda3e4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 10 Jun 2025 09:48:28 +0100 Subject: [PATCH 14/14] add comment about why one UnknownIndex needs to stay (!) --- pandas-stubs/core/frame.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 2724508e..882694cb 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1686,6 +1686,8 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @property def iloc(self) -> _iLocIndexerFrame[Self]: ... @property + # mypy complains if we use Index[Any] instead of UnknownIndex here, even though + # the latter is aliased to the former ¯\_(ツ)_/¯. def index(self) -> UnknownIndex: ... @index.setter def index(self, idx: Index) -> None: ...