-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
(fix): extension array indexers #9671
base: main
Are you sure you want to change the base?
Changes from all commits
7b5f323
8784f33
b45ab23
39086ef
df49a40
adb8ca3
266b1ed
6d5f13b
5d68bfe
07bba69
6e7f0bb
b4a49bb
2e1ff4f
d5a7da0
821b68d
d066edf
ed22da1
8bf23f4
c3a2b39
3c44aed
48be73a
7ac9983
d86ad04
b5d0795
60324f0
c2bc4df
84569bc
90e390d
7c32bd0
795ecf6
8eca6e9
fb91812
a06f2b1
14027e8
a47a96f
6f2861a
1f07500
798b444
f487599
20d6c9d
7391948
308091c
5f40b4e
2a65d8d
43f7d61
59934b9
a01f9f3
8b91128
0e351ca
07a8e9c
2be5739
b9d0a8e
08afc3b
edc55e1
bffe919
150b982
7113ceb
7f47f0b
512808d
63c83f4
0efbbeb
2910250
57d8d72
5333240
6f35c81
d0c17a4
b2b6bb1
5dbc8a7
be0d3e0
f95408a
609e15c
ec7f165
1f1cf1c
14b1a88
05627dd
e7cbf3a
fc87e04
9ae645e
6e3ca57
277d1c6
81a9d94
be8642f
f3f62e5
c07df41
ae49850
e8f5aa8
9653a01
a405f03
503b313
c8ab8f3
66e5b06
f9fde3a
8a3e834
f5822fd
66c0b9f
ba51274
3ba3e3f
1ab43eb
45ba9d3
091a90d
ab3c9ed
53fe43a
a16a890
4283f8a
0ba848d
6cb8702
5de8d0d
fc985d9
799b750
a2d8e69
d6fe956
bd6a5d1
6557ef9
759fb72
2118191
262295a
941c4b5
6e41425
adebafa
6cd81e5
1ae9a22
1cec644
60aed87
797dc85
225c5b3
33a1563
4efe8b0
21a0ec6
48dea20
1145f4b
a9990cf
5fa630f
43c85d1
a4702d6
9bd292a
25b797e
3bd8cf4
8b9c85a
2555d89
3b2d861
66e181c
e380968
305938c
b32b02c
a2c46b1
fa2c4b6
c65c9af
21dffc1
8eeeb78
7ad2183
9e4cab6
5964a9e
308391d
0e886d6
80dc10b
d494fe0
ef6f722
4ea5241
151e9cd
8ecda4e
2bbf0ff
0308672
b043020
7182ce2
470235e
e619a4c
700e78d
4525ea1
0b93dbd
b38cd7e
a2d1c96
c4b2af3
45a0d56
3ef79cd
ac719e8
5292569
ecd603b
f6716dc
0556376
ffc1828
eaf3c73
1e6ba18
85a340b
9d77885
db69b63
b120917
f7cda22
4d70fd1
666d279
3623dd5
da0ee36
b53ba64
2068b8c
13fc8fe
067f8f2
459f56b
0aa5862
4a73535
9b4ce62
1fee9d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,10 +10,11 @@ | |
from dataclasses import dataclass, field | ||
from datetime import timedelta | ||
from html import escape | ||
from typing import TYPE_CHECKING, Any, overload | ||
from typing import TYPE_CHECKING, Any, cast, overload | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from numpy.typing import DTypeLike | ||
from packaging.version import Version | ||
|
||
from xarray.core import duck_array_ops | ||
|
@@ -33,8 +34,6 @@ | |
from xarray.namedarray.pycompat import array_type, integer_types, is_chunked_array | ||
|
||
if TYPE_CHECKING: | ||
from numpy.typing import DTypeLike | ||
|
||
from xarray.core.indexes import Index | ||
from xarray.core.types import Self | ||
from xarray.core.variable import Variable | ||
|
@@ -1707,27 +1706,44 @@ class PandasIndexingAdapter(ExplicitlyIndexedNDArrayMixin): | |
__slots__ = ("_dtype", "array") | ||
|
||
array: pd.Index | ||
_dtype: np.dtype | ||
_dtype: np.dtype | pd.api.extensions.ExtensionDtype | ||
|
||
def __init__(self, array: pd.Index, dtype: DTypeLike = None): | ||
def __init__( | ||
self, | ||
array: pd.Index, | ||
dtype: DTypeLike | pd.api.extensions.ExtensionDtype | None = None, | ||
): | ||
from xarray.core.indexes import safe_cast_to_index | ||
|
||
self.array = safe_cast_to_index(array) | ||
|
||
if dtype is None: | ||
self._dtype = get_valid_numpy_dtype(array) | ||
if pd.api.types.is_extension_array_dtype(array.dtype): | ||
cast(pd.api.extensions.ExtensionDtype, array.dtype) | ||
self._dtype = array.dtype | ||
else: | ||
self._dtype = get_valid_numpy_dtype(array) | ||
elif pd.api.types.is_extension_array_dtype(dtype): | ||
self._dtype = cast(pd.api.extensions.ExtensionDtype, dtype) | ||
else: | ||
self._dtype = np.dtype(dtype) | ||
self._dtype = np.dtype(cast(DTypeLike, dtype)) | ||
|
||
@property | ||
def dtype(self) -> np.dtype: | ||
def dtype(self) -> np.dtype | pd.api.extensions.ExtensionDtype: # type: ignore[override] | ||
return self._dtype | ||
|
||
def __array__( | ||
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None | ||
self, | ||
dtype: np.typing.DTypeLike | pd.api.extensions.ExtensionDtype | None = None, | ||
/, | ||
*, | ||
copy: bool | None = None, | ||
) -> np.ndarray: | ||
if dtype is None: | ||
dtype = self.dtype | ||
if pd.api.types.is_extension_array_dtype(dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here. Why is this needed? |
||
dtype = get_valid_numpy_dtype(self.array) | ||
dtype = cast(np.dtype, dtype) | ||
array = self.array | ||
if isinstance(array, pd.PeriodIndex): | ||
with suppress(AttributeError): | ||
|
@@ -1746,7 +1762,7 @@ def get_duck_array(self) -> np.ndarray: | |
def shape(self) -> _Shape: | ||
return (len(self.array),) | ||
|
||
def _convert_scalar(self, item): | ||
def _convert_scalar(self, item) -> np.ndarray: | ||
if item is pd.NaT: | ||
# work around the impossibility of casting NaT with asarray | ||
# note: it probably would be better in general to return | ||
|
@@ -1762,7 +1778,10 @@ def _convert_scalar(self, item): | |
# numpy fails to convert pd.Timestamp to np.datetime64[ns] | ||
item = np.asarray(item.to_datetime64()) | ||
elif self.dtype != object: | ||
item = np.asarray(item, dtype=self.dtype) | ||
dtype = self.dtype | ||
if pd.api.types.is_extension_array_dtype(dtype): | ||
dtype = get_valid_numpy_dtype(self.array) | ||
item = np.asarray(item, dtype=cast(np.dtype, dtype)) | ||
|
||
# as for numpy.ndarray indexing, we always want the result to be | ||
# a NumPy array. | ||
|
@@ -1877,23 +1896,28 @@ class PandasMultiIndexingAdapter(PandasIndexingAdapter): | |
__slots__ = ("_dtype", "adapter", "array", "level") | ||
|
||
array: pd.MultiIndex | ||
_dtype: np.dtype | ||
_dtype: np.dtype | pd.api.extensions.ExtensionDtype | ||
level: str | None | ||
|
||
def __init__( | ||
self, | ||
array: pd.MultiIndex, | ||
dtype: DTypeLike = None, | ||
dtype: DTypeLike | pd.api.extensions.ExtensionDtype | None = None, | ||
level: str | None = None, | ||
): | ||
super().__init__(array, dtype) | ||
self.level = level | ||
|
||
def __array__( | ||
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None | ||
self, | ||
dtype: DTypeLike | pd.api.extensions.ExtensionDtype | None = None, | ||
/, | ||
*, | ||
copy: bool | None = None, | ||
) -> np.ndarray: | ||
if dtype is None: | ||
dtype = self.dtype | ||
dtype = cast(np.dtype, dtype) | ||
if self.level is not None: | ||
return np.asarray( | ||
self.array.get_level_values(self.level).values, dtype=dtype | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,6 @@ | |
import numpy as np | ||
import pandas as pd | ||
from numpy.typing import ArrayLike | ||
from pandas.api.types import is_extension_array_dtype | ||
|
||
import xarray as xr # only for Dataset and DataArray | ||
from xarray.core import common, dtypes, duck_array_ops, indexing, nputils, ops, utils | ||
|
@@ -412,6 +411,10 @@ def data(self): | |
if is_duck_array(self._data): | ||
return self._data | ||
elif isinstance(self._data, indexing.ExplicitlyIndexed): | ||
if pd.api.types.is_extension_array_dtype(self._data) and isinstance( | ||
self._data, PandasIndexingAdapter | ||
): | ||
return self._data.array | ||
return self._data.get_duck_array() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, at the moment
which means we probably want to conserve the pandas array typing. But, maybe the implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now I've just added a few ignores. I think this issue is a bit separate since it doesn't actually affect the runtime behavior |
||
else: | ||
return self.values | ||
|
@@ -2592,11 +2595,6 @@ def chunk( # type: ignore[override] | |
dask.array.from_array | ||
""" | ||
|
||
if is_extension_array_dtype(self): | ||
raise ValueError( | ||
f"{self} was found to be a Pandas ExtensionArray. Please convert to numpy first." | ||
) | ||
|
||
if from_array_kwargs is None: | ||
from_array_kwargs = {} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -524,7 +524,7 @@ def line( | |
assert hueplt is not None | ||
ax.legend(handles=primitive, labels=list(hueplt.to_numpy()), title=hue_label) | ||
|
||
if np.issubdtype(xplt.dtype, np.datetime64): | ||
if isinstance(xplt.dtype, np.dtype) and np.issubdtype(xplt.dtype, np.datetime64): # type: ignore[redundant-expr] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, it complains about LHS not being a numpy dtype |
||
_set_concise_date(ax, axis="x") | ||
|
||
_update_axes(ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is amazing, it enables IntervalIndex indexing now.
cc @benbovy