Skip to content

enforced return types of groupby sequence arguments #10271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ Deprecations
Bug fixes
~~~~~~~~~

- Ensured tuple return types of ``groupby`` calls with sequences, regardless of ``len==1``.
By `Nick Papior <https://github.com/zerothi>`_.
- :py:meth:`~xarray.Dataset.to_stacked_array` now uses dimensions in order of appearance.
This fixes the issue where using :py:meth:`~xarray.Dataset.transpose` before :py:meth:`~xarray.Dataset.to_stacked_array`
had no effect. (Mentioned in :issue:`9921`)
Expand Down
61 changes: 37 additions & 24 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def _parse_group_and_groupers(
groupers: dict[str, Grouper],
*,
eagerly_compute_group: bool,
) -> tuple[ResolvedGrouper, ...]:
) -> ResolvedGrouper | tuple[ResolvedGrouper, ...]:
from xarray.core.dataarray import DataArray
from xarray.core.variable import Variable
from xarray.groupers import UniqueGrouper
Expand All @@ -407,6 +407,7 @@ def _parse_group_and_groupers(

rgroupers: tuple[ResolvedGrouper, ...]
if isinstance(group, DataArray | Variable):
# TODO add test for this, see gh-10246
rgroupers = (
ResolvedGrouper(
UniqueGrouper(), group, obj, eagerly_compute_group=eagerly_compute_group
Expand All @@ -429,6 +430,8 @@ def _parse_group_and_groupers(
)
for group, grouper in grouper_mapping.items()
)
if isinstance(group, str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead I'd return a flag squeeze_group_label_on_iter that is True if group is str

rgroupers = rgroupers[0]
return rgroupers


Expand All @@ -453,7 +456,6 @@ def _resolve_group(
"match the length of this variable along its "
"dimensions"
)

newgroup: T_Group
if isinstance(group, DataArray):
try:
Expand Down Expand Up @@ -602,7 +604,7 @@ class GroupBy(Generic[T_Xarray]):
"groupers",
)
_obj: T_Xarray
groupers: tuple[ResolvedGrouper, ...]
groupers: ResolvedGrouper | tuple[ResolvedGrouper, ...]
_restore_coord_dims: bool

_original_obj: T_Xarray
Expand All @@ -626,7 +628,7 @@ class GroupBy(Generic[T_Xarray]):
def __init__(
self,
obj: T_Xarray,
groupers: tuple[ResolvedGrouper, ...],
groupers: ResolvedGrouper | tuple[ResolvedGrouper, ...],
restore_coord_dims: bool = True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
restore_coord_dims: bool = True,
squeeze_group_label_on_iter: bool
restore_coord_dims: bool = True,

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and then I'd handle this inside __iter__ where you'll need

for label, group in zip(self.unique_coord.data, self._iter_grouped(), strict=True):
    # handle promotion to tuple
    yield label, group

) -> None:
"""Create a GroupBy object
Expand All @@ -635,7 +637,7 @@ def __init__(
----------
obj : Dataset or DataArray
Object to group.
grouper : Grouper
groupers : ResolvedGrouper or tuple[ResolvedGrouper, ...]
Grouper object
restore_coord_dims : bool, default: True
If True, also restore the dimension order of multi-dimensional
Expand All @@ -645,9 +647,8 @@ def __init__(
self._restore_coord_dims = restore_coord_dims
self.groupers = groupers

if len(groupers) == 1:
(grouper,) = groupers
self.encoded = grouper.encoded
if isinstance(groupers, ResolvedGrouper):
self.encoded = groupers.encoded
else:
if any(
isinstance(obj._indexes.get(grouper.name, None), PandasMultiIndex)
Expand Down Expand Up @@ -699,6 +700,12 @@ def sizes(self) -> Mapping[Hashable, int]:
self._sizes = self._obj.isel({self._group_dim: index}).sizes
return self._sizes

@property
def _groupers_tuple(self) -> tuple[ResolvedGrouper, ...]:
if isinstance(self.groupers, ResolvedGrouper):
return (self.groupers,)
return self.groupers

def shuffle_to_chunks(self, chunks: T_Chunks = None) -> T_Xarray:
"""
Sort or "shuffle" the underlying object.
Expand Down Expand Up @@ -756,7 +763,8 @@ def _shuffle_obj(self, chunks: T_Chunks) -> T_Xarray:
was_array = isinstance(self._obj, DataArray)
as_dataset = self._obj._to_temp_dataset() if was_array else self._obj

for grouper in self.groupers:
groupers = self._groupers_tuple
for grouper in groupers:
if grouper.name not in as_dataset._variables:
as_dataset.coords[grouper.name] = grouper.group

Expand Down Expand Up @@ -801,7 +809,9 @@ def _raise_if_by_is_chunked(self):
)

def _raise_if_not_single_group(self):
if len(self.groupers) != 1:
# This ensures that one does not do `groupby_bins(..., ["x"])`
# TODO this should be enabled later.
if not isinstance(self.groupers, ResolvedGrouper):
raise NotImplementedError(
"This method is not supported for grouping by multiple variables yet."
)
Expand Down Expand Up @@ -836,12 +846,13 @@ def __iter__(self) -> Iterator[tuple[GroupKey, T_Xarray]]:
return zip(self.encoded.unique_coord.data, self._iter_grouped(), strict=True)

def __repr__(self) -> str:
groupers = self._groupers_tuple
text = (
f"<{self.__class__.__name__}, "
f"grouped over {len(self.groupers)} grouper(s),"
f"grouped over {len(groupers)} grouper(s),"
f" {self._len} groups in total:"
)
for grouper in self.groupers:
for grouper in groupers:
coord = grouper.unique_coord
labels = ", ".join(format_array_flat(coord, 30).split())
text += f"\n {grouper.name!r}: {coord.size}/{grouper.full_index.size} groups present with labels {labels}"
Expand Down Expand Up @@ -871,7 +882,7 @@ def _binary_op(self, other, f, reflexive=False):
g = f if not reflexive else lambda x, y: f(y, x)

self._raise_if_not_single_group()
(grouper,) = self.groupers
(grouper,) = self._groupers_tuple
obj = self._original_obj
name = grouper.name
group = grouper.group
Expand Down Expand Up @@ -971,11 +982,12 @@ def _maybe_reindex(self, combined):
self.encoded.unique_coord.size != self.encoded.full_index.size
)
indexers = {}
for grouper in self.groupers:
groupers = self._groupers_tuple
for grouper in groupers:
index = combined._indexes.get(grouper.name, None)
if has_missing_groups and index is not None:
indexers[grouper.name] = grouper.full_index
elif len(self.groupers) > 1:
elif len(groupers) > 1:
if not isinstance(
grouper.full_index, pd.RangeIndex
) and not index.index.equals(grouper.full_index):
Expand All @@ -989,6 +1001,7 @@ def _maybe_unstack(self, obj):
multidimensional group."""
from xarray.groupers import UniqueGrouper

groupers = self._groupers_tuple
stacked_dim = self._stacked_dim
if stacked_dim is not None and stacked_dim in obj.dims:
inserted_dims = self._inserted_dims
Expand All @@ -997,7 +1010,7 @@ def _maybe_unstack(self, obj):
if dim in obj.coords:
del obj.coords[dim]
obj._indexes = filter_indexes_from_coords(obj._indexes, set(obj.coords))
elif len(self.groupers) > 1:
elif len(groupers) > 1:
# TODO: we could clean this up by setting the appropriate `stacked_dim`
# and `inserted_dims`
# if multiple groupers all share the same single dimension, then
Expand All @@ -1007,7 +1020,7 @@ def _maybe_unstack(self, obj):
obj = obj.unstack(*dims_to_unstack)
to_drop = [
grouper.name
for grouper in self.groupers
for grouper in groupers
if isinstance(grouper.group, _DummyGroup)
and isinstance(grouper.grouper, UniqueGrouper)
]
Expand Down Expand Up @@ -1044,7 +1057,7 @@ def _flox_reduce(
kwargs.setdefault("method", "cohorts")

midx_grouping_vars: tuple[Hashable, ...] = ()
for grouper in self.groupers:
for grouper in self._groupers_tuple:
name = grouper.name
maybe_midx = obj._indexes.get(name, None)
if isinstance(maybe_midx, PandasMultiIndex):
Expand Down Expand Up @@ -1082,7 +1095,7 @@ def _flox_reduce(
parsed_dim_list = list()
# preserve order
for dim_ in itertools.chain(
*(grouper.group.dims for grouper in self.groupers)
*(grouper.group.dims for grouper in self._groupers_tuple)
):
if dim_ not in parsed_dim_list:
parsed_dim_list.append(dim_)
Expand All @@ -1094,7 +1107,7 @@ def _flox_reduce(

# Do this so we raise the same error message whether flox is present or not.
# Better to control it here than in flox.
for grouper in self.groupers:
for grouper in self._groupers_tuple:
if any(
d not in grouper.group.dims and d not in obj.dims for d in parsed_dim
):
Expand All @@ -1117,10 +1130,10 @@ def _flox_reduce(

# pass RangeIndex as a hint to flox that `by` is already factorized
expected_groups = tuple(
pd.RangeIndex(len(grouper)) for grouper in self.groupers
pd.RangeIndex(len(grouper)) for grouper in self._groupers_tuple
)

codes = tuple(g.codes for g in self.groupers)
codes = tuple(g.codes for g in self._groupers_tuple)
result = xarray_reduce(
obj.drop_vars(non_numeric.keys()),
*codes,
Expand All @@ -1137,7 +1150,7 @@ def _flox_reduce(
new_coords = []
to_drop = []
if group_dims & set(parsed_dim):
for grouper in self.groupers:
for grouper in self._groupers_tuple:
output_index = grouper.full_index
if isinstance(output_index, pd.RangeIndex):
# flox always assigns an index so we must drop it here if we don't need it.
Expand Down Expand Up @@ -1529,7 +1542,7 @@ def _concat_shortcut(self, applied, dim, positions=None):

def _restore_dim_order(self, stacked: DataArray) -> DataArray:
def lookup_order(dimension):
for grouper in self.groupers:
for grouper in self._groupers_tuple:
if dimension == grouper.name and grouper.group.ndim == 1:
(dimension,) = grouper.group.dims
if dimension in self._obj.dims:
Expand Down
27 changes: 27 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@
coords={"month": np.arange(1, 13), "quantile": 0},
dims="month",
)
assert_identical(expected, actual)

Check failure on line 427 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

test_da_groupby_quantile[True] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (month: 12) != (stacked_month: 12) Differing coordinates: L * month (month) int64 96B 1 2 3 4 5 6 7 8 9 10 11 12 R * month (stacked_month) int64 96B 1 2 3 4 5 6 7 8 9 10 11 12 Coordinates only on the right object: * stacked_month (stacked_month) object 96B MultiIndex

actual = g.quantile(0, dim="time")[:2]
expected = xr.DataArray(
Expand Down Expand Up @@ -910,7 +910,7 @@
gb = data.groupby(by_func("x"))
with xr.set_options(use_flox=use_flox):
actual = gb.mean(...)
assert_allclose(expected, actual)

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * stacked_x (stacked_x) object 24B MultiIndex * x (stacked_x) int64 24B 0 1 2

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

test_groupby_dataset_reduce_ellipsis[False-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * stacked_x (stacked_x) object 24B MultiIndex * x (stacked_x) int64 24B 0 1 2

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * stacked_x (stacked_x) object 24B MultiIndex * x (stacked_x) int64 24B 0 1 2

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13

test_groupby_dataset_reduce_ellipsis[False-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * stacked_x (stacked_x) object 24B MultiIndex * x (stacked_x) int64 24B 0 1 2

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

test_groupby_dataset_reduce_ellipsis[False-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.13

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.13

test_groupby_dataset_reduce_ellipsis[False-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13 all-but-numba

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13 all-but-numba

test_groupby_dataset_reduce_ellipsis[False-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

Check failure on line 913 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 all-but-dask

test_groupby_dataset_reduce_ellipsis[True-group-by-unique-grouper-False] AssertionError: Left and right Dataset objects are not close Differing dimensions: (x: 3) != (stacked_x: 3) Coordinates only on the right object: * x (stacked_x) int64 24B 0 1 2 * stacked_x (stacked_x) object 24B MultiIndex

with xr.set_options(use_flox=use_flox):
actual = gb.mean("y")
Expand Down Expand Up @@ -1054,7 +1054,7 @@
"x_bins": ("x_bins", pd.IntervalIndex.from_breaks(x_bins, closed="left"))
},
)
assert_identical(expected, actual)

Check failure on line 1057 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

test_groupby_bins_cut_kwargs[False] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (x_bins: 3, y: 2) != (y: 2, stacked_x_bins: 3) Differing values: L array([[ 1., 2.], [ 5., 6.], [ 9., 10.]]) R array([[ 1., 5., 9.], [ 2., 6., 10.]]) Differing coordinates: L * x_bins (x_bins) interval[int64, left] 48B [0, 2) [2, 4) [4, 6) R * x_bins (stacked_x_bins) object 24B [0, 2) [2, 4) [4, 6) Coordinates only on the right object: * stacked_x_bins (stacked_x_bins) object 24B MultiIndex

Check failure on line 1057 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13

test_groupby_bins_cut_kwargs[False] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (x_bins: 3, y: 2) != (y: 2, stacked_x_bins: 3) Differing values: L array([[ 1., 2.], [ 5., 6.], [ 9., 10.]]) R array([[ 1., 5., 9.], [ 2., 6., 10.]]) Differing coordinates: L * x_bins (x_bins) interval[int64, left] 48B [0, 2) [2, 4) [4, 6) R * x_bins (stacked_x_bins) object 24B [0, 2) [2, 4) [4, 6) Coordinates only on the right object: * stacked_x_bins (stacked_x_bins) object 24B MultiIndex

Check failure on line 1057 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

test_groupby_bins_cut_kwargs[False] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (x_bins: 3, y: 2) != (y: 2, stacked_x_bins: 3) Differing values: L array([[ 1., 2.], [ 5., 6.], [ 9., 10.]]) R array([[ 1., 5., 9.], [ 2., 6., 10.]]) Differing coordinates: L * x_bins (x_bins) interval[int64, left] 48B [0, 2) [2, 4) [4, 6) R * x_bins (stacked_x_bins) object 24B [0, 2) [2, 4) [4, 6) Coordinates only on the right object: * stacked_x_bins (stacked_x_bins) object 24B MultiIndex

Check failure on line 1057 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.13

test_groupby_bins_cut_kwargs[False] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (x_bins: 3, y: 2) != (y: 2, stacked_x_bins: 3) Differing values: L array([[ 1., 2.], [ 5., 6.], [ 9., 10.]]) R array([[ 1., 5., 9.], [ 2., 6., 10.]]) Differing coordinates: L * x_bins (x_bins) interval[int64, left] 48B [0, 2) [2, 4) [4, 6) R * x_bins (stacked_x_bins) object 24B [0, 2) [2, 4) [4, 6) Coordinates only on the right object: * stacked_x_bins (stacked_x_bins) object 24B MultiIndex

Check failure on line 1057 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13 all-but-numba

test_groupby_bins_cut_kwargs[False] AssertionError: Left and right DataArray objects are not identical Differing dimensions: (x_bins: 3, y: 2) != (y: 2, stacked_x_bins: 3) Differing values: L array([[ 1., 2.], [ 5., 6.], [ 9., 10.]]) R array([[ 1., 5., 9.], [ 2., 6., 10.]]) Differing coordinates: L * x_bins (x_bins) interval[int64, left] 48B [0, 2) [2, 4) [4, 6) R * x_bins (stacked_x_bins) object 24B [0, 2) [2, 4) [4, 6) Coordinates only on the right object: * stacked_x_bins (stacked_x_bins) object 24B MultiIndex

with xr.set_options(use_flox=use_flox):
actual = da.groupby(
Expand Down Expand Up @@ -1899,7 +1899,7 @@

rs = array.resample(time=resample_freq)
shuffled = rs.shuffle_to_chunks().resample(time=resample_freq)
actual = rs.mean()

Check failure on line 1902 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-minimum

TestDataArrayResample.test_resample[False-1234567890us-True] xarray.structure.alignment.AlignmentError: cannot align objects on coordinate '__resample_dim__' because of conflicting indexes first index: PandasIndex(DatetimeIndex([ '2000-01-01 00:00:00', '2000-01-01 00:20:34.567890', '2000-01-01 00:41:09.135780', '2000-01-01 01:01:43.703670', '2000-01-01 01:22:18.271560', '2000-01-01 01:42:52.839450', '2000-01-01 02:03:27.407340', '2000-01-01 02:24:01.975230', '2000-01-01 02:44:36.543120', '2000-01-01 03:05:11.111010', ... '2000-01-03 02:45:16.047720', '2000-01-03 03:05:50.615610', '2000-01-03 03:26:25.183500', '2000-01-03 03:46:59.751390', '2000-01-03 04:07:34.319280', '2000-01-03 04:28:08.887170', '2000-01-03 04:48:43.455060', '2000-01-03 05:09:18.022950', '2000-01-03 05:29:52.590840', '2000-01-03 05:50:27.158730'], dtype='datetime64[ns]', name='__resample_dim__', length=158, freq='1234567890U')) second index: PandasIndex(MultiIndex([( '2000-01-01 00:00:00',), ('2000-01-01 05:49:47.654130',), ('2000-01-01 11:39:35.308260',), ('2000-01-01 17:49:57.530280',), ('2000-01-01 23:39:45.184410',), ('2000-01-02 05:50:07.406430',), ('2000-01-02 11:39:55.060560',), ('2000-01-02 17:50:17.282580',), ('2000-01-02 23:40:04.936710',), ('2000-01-03 05:50:27.158730',)], name='stacked___resample_dim__')) first variable: <xarray.IndexVariable '__resample_dim__' (__resample_dim__: 158)> Size: 1kB array(['2000-01-01T00:00:00.000000000', '2000-01-01T00:20:34.567890000', '2000-01-01T00:41:09.135780000', '2000-01-01T01:01:43.703670000', '2000-01-01T01:22:18.271560000', '2000-01-01T01:42:52.839450000', '2000-01-01T02:03:27.407340000', '2000-01-01T02:24:01.975230000', '2000-01-01T02:44:36.543120000', '2000-01-01T03:05:11.111010000', '2000-01-01T03:25:45.678900000', '2000-01-01T03:46:20.246790000', '2000-01-01T04:06:54.814680000', '2000-01-01T04:27:29.382570000', '2000-01-01T04:48:03.950460000', '2000-01-01T05:08:38.518350000', '2000-01-01T05:29:13.086240000', '2000-01-01T05:49:47.654130000', '2000-01-01T06:10:22.222020000', '2000-01-01T06:30:56.789910000', '2000-01-01T06:51:31.357800000', '2000-01-01T07:12:05.925690000', '2000-01-01T07:32:40.493580000', '2000-01-01T07:53:15.061470000', '2000-01-01T08:13:49.629360000', '2000-01-01T08:34:24.197250000', '2000-01-01T08:54:58.765140000', '2000-01-01T09:15:33.333030000', '2000-01-01T09:36:07.900920000', '2000-01-01T09:56:42.468810000', '2000-01-01T10:17:17.036700000', '2000-01-01T10:37:51.604590000', '2000-01-01T10:58:26.172480000', '2000-01-01T11:19:00.740370000', '2000-01-01T11:39:35.308260000', '2000-01-01T12:00:09.876150000', '2000-01-01T12:20:44.444040000', '2000-01-01T12:41:19.011930000', '2000-01-01T13:01:53.579820000', '2000-01-01T13:22:28.147710000', '2000-01-01T13:43:02.715600000', '2000-01-01T14:03:37.283490000', '2000-01-01T14:24:11.851380000', '2000-01-01T14:44:46.419270000', '2000-01-01T15:05:20.987160000', '2000-01-01T15:25:55.555050000', '2000-01-01T15:46:30.122940000', '2000-01-01T16:07:04.690830000', '2000-01-01T16:27:39.258720000', '2000-01-01T16:48:13.826610000', '2000-01-01T17:08:48.394500000', '2000-01-01T17:29:22.962390000', '2000-01-01T17:49:57.530280000', '2000-01-01T18:10:32.098170000', '2000-01-01T18:31:06.666060000', '2000-01-01T18:51:41.233950000', '2000-01-01T19:12:15.801840000', '2000-01-01T19:32:50.369730000', '2000-01-01T19:53:24.937620000', '2000-01-01T20:13:59.505510000', '2000-01-01T20:34:34.073400000', '2000-01-01T20:55:08.641290000', '2000-01-01T21:15:43.209180000', '2000-01-01T21:36:17.777070000', '2000-01-01T21:56:52.344960000', '2000-01-01T22:17:26.9128

Check failure on line 1902 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-minimum

TestDataArrayResample.test_resample[False-1234567890us-False] xarray.structure.alignment.AlignmentError: cannot align objects on coordinate '__resample_dim__' because of conflicting indexes first index: PandasIndex(DatetimeIndex([ '2000-01-01 00:00:00', '2000-01-01 00:20:34.567890', '2000-01-01 00:41:09.135780', '2000-01-01 01:01:43.703670', '2000-01-01 01:22:18.271560', '2000-01-01 01:42:52.839450', '2000-01-01 02:03:27.407340', '2000-01-01 02:24:01.975230', '2000-01-01 02:44:36.543120', '2000-01-01 03:05:11.111010', ... '2000-01-03 02:45:16.047720', '2000-01-03 03:05:50.615610', '2000-01-03 03:26:25.183500', '2000-01-03 03:46:59.751390', '2000-01-03 04:07:34.319280', '2000-01-03 04:28:08.887170', '2000-01-03 04:48:43.455060', '2000-01-03 05:09:18.022950', '2000-01-03 05:29:52.590840', '2000-01-03 05:50:27.158730'], dtype='datetime64[ns]', name='__resample_dim__', length=158, freq='1234567890U')) second index: PandasIndex(MultiIndex([( '2000-01-01 00:00:00',), ('2000-01-01 05:49:47.654130',), ('2000-01-01 11:39:35.308260',), ('2000-01-01 17:49:57.530280',), ('2000-01-01 23:39:45.184410',), ('2000-01-02 05:50:07.406430',), ('2000-01-02 11:39:55.060560',), ('2000-01-02 17:50:17.282580',), ('2000-01-02 23:40:04.936710',), ('2000-01-03 05:50:27.158730',)], name='stacked___resample_dim__')) first variable: <xarray.IndexVariable '__resample_dim__' (__resample_dim__: 158)> Size: 1kB array(['2000-01-01T00:00:00.000000000', '2000-01-01T00:20:34.567890000', '2000-01-01T00:41:09.135780000', '2000-01-01T01:01:43.703670000', '2000-01-01T01:22:18.271560000', '2000-01-01T01:42:52.839450000', '2000-01-01T02:03:27.407340000', '2000-01-01T02:24:01.975230000', '2000-01-01T02:44:36.543120000', '2000-01-01T03:05:11.111010000', '2000-01-01T03:25:45.678900000', '2000-01-01T03:46:20.246790000', '2000-01-01T04:06:54.814680000', '2000-01-01T04:27:29.382570000', '2000-01-01T04:48:03.950460000', '2000-01-01T05:08:38.518350000', '2000-01-01T05:29:13.086240000', '2000-01-01T05:49:47.654130000', '2000-01-01T06:10:22.222020000', '2000-01-01T06:30:56.789910000', '2000-01-01T06:51:31.357800000', '2000-01-01T07:12:05.925690000', '2000-01-01T07:32:40.493580000', '2000-01-01T07:53:15.061470000', '2000-01-01T08:13:49.629360000', '2000-01-01T08:34:24.197250000', '2000-01-01T08:54:58.765140000', '2000-01-01T09:15:33.333030000', '2000-01-01T09:36:07.900920000', '2000-01-01T09:56:42.468810000', '2000-01-01T10:17:17.036700000', '2000-01-01T10:37:51.604590000', '2000-01-01T10:58:26.172480000', '2000-01-01T11:19:00.740370000', '2000-01-01T11:39:35.308260000', '2000-01-01T12:00:09.876150000', '2000-01-01T12:20:44.444040000', '2000-01-01T12:41:19.011930000', '2000-01-01T13:01:53.579820000', '2000-01-01T13:22:28.147710000', '2000-01-01T13:43:02.715600000', '2000-01-01T14:03:37.283490000', '2000-01-01T14:24:11.851380000', '2000-01-01T14:44:46.419270000', '2000-01-01T15:05:20.987160000', '2000-01-01T15:25:55.555050000', '2000-01-01T15:46:30.122940000', '2000-01-01T16:07:04.690830000', '2000-01-01T16:27:39.258720000', '2000-01-01T16:48:13.826610000', '2000-01-01T17:08:48.394500000', '2000-01-01T17:29:22.962390000', '2000-01-01T17:49:57.530280000', '2000-01-01T18:10:32.098170000', '2000-01-01T18:31:06.666060000', '2000-01-01T18:51:41.233950000', '2000-01-01T19:12:15.801840000', '2000-01-01T19:32:50.369730000', '2000-01-01T19:53:24.937620000', '2000-01-01T20:13:59.505510000', '2000-01-01T20:34:34.073400000', '2000-01-01T20:55:08.641290000', '2000-01-01T21:15:43.209180000', '2000-01-01T21:36:17.777070000', '2000-01-01T21:56:52.344960000', '2000-01-01T22:17:26.912

Check failure on line 1902 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-minimum

TestDataArrayResample.test_resample[False-resample_freq3-True] xarray.structure.alignment.AlignmentError: cannot align objects on coordinate '__resample_dim__' because of conflicting indexes first index: PandasIndex(DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 02:00:00', '2000-01-01 04:00:00', '2000-01-01 06:00:00', '2000-01-01 08:00:00', '2000-01-01 10:00:00', '2000-01-01 12:00:00', '2000-01-01 14:00:00', '2000-01-01 16:00:00', '2000-01-01 18:00:00', '2000-01-01 20:00:00', '2000-01-01 22:00:00', '2000-01-02 00:00:00', '2000-01-02 02:00:00', '2000-01-02 04:00:00', '2000-01-02 06:00:00', '2000-01-02 08:00:00', '2000-01-02 10:00:00', '2000-01-02 12:00:00', '2000-01-02 14:00:00', '2000-01-02 16:00:00', '2000-01-02 18:00:00', '2000-01-02 20:00:00', '2000-01-02 22:00:00', '2000-01-03 00:00:00', '2000-01-03 02:00:00', '2000-01-03 04:00:00', '2000-01-03 06:00:00'], dtype='datetime64[ns]', name='__resample_dim__', freq='2H')) second index: PandasIndex(MultiIndex([('2000-01-01 00:00:00',), ('2000-01-01 06:00:00',), ('2000-01-01 12:00:00',), ('2000-01-01 18:00:00',), ('2000-01-02 00:00:00',), ('2000-01-02 06:00:00',), ('2000-01-02 12:00:00',), ('2000-01-02 18:00:00',), ('2000-01-03 00:00:00',), ('2000-01-03 06:00:00',)], name='stacked___resample_dim__')) first variable: <xarray.IndexVariable '__resample_dim__' (__resample_dim__: 28)> Size: 224B array(['2000-01-01T00:00:00.000000000', '2000-01-01T02:00:00.000000000', '2000-01-01T04:00:00.000000000', '2000-01-01T06:00:00.000000000', '2000-01-01T08:00:00.000000000', '2000-01-01T10:00:00.000000000', '2000-01-01T12:00:00.000000000', '2000-01-01T14:00:00.000000000', '2000-01-01T16:00:00.000000000', '2000-01-01T18:00:00.000000000', '2000-01-01T20:00:00.000000000', '2000-01-01T22:00:00.000000000', '2000-01-02T00:00:00.000000000', '2000-01-02T02:00:00.000000000', '2000-01-02T04:00:00.000000000', '2000-01-02T06:00:00.000000000', '2000-01-02T08:00:00.000000000', '2000-01-02T10:00:00.000000000', '2000-01-02T12:00:00.000000000', '2000-01-02T14:00:00.000000000', '2000-01-02T16:00:00.000000000', '2000-01-02T18:00:00.000000000', '2000-01-02T20:00:00.000000000', '2000-01-02T22:00:00.000000000', '2000-01-03T00:00:00.000000000', '2000-01-03T02:00:00.000000000', '2000-01-03T04:00:00.000000000', '2000-01-03T06:00:00.000000000'], dtype='datetime64[ns]') second variable: <xarray.IndexVariable 'stacked___resample_dim__' (stacked___resample_dim__: 10)> Size: 80B array(['2000-01-01T00:00:00.000000000', '2000-01-01T06:00:00.000000000', '2000-01-01T12:00:00.000000000', '2000-01-01T18:00:00.000000000', '2000-01-02T00:00:00.000000000', '2000-01-02T06:00:00.000000000', '2000-01-02T12:00:00.000000000', '2000-01-02T18:00:00.000000000', '2000-01-03T00:00:00.000000000', '2000-01-03T06:00:00.000000000'], dtype='datetime64[ns]')
expected = resample_as_pandas(array, resample_freq)
assert_identical(expected, actual)
assert_identical(expected, shuffled.mean())
Expand Down Expand Up @@ -3308,6 +3308,33 @@
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)


def test_groupby_return_group_dataset_type(dataset):
# Checks GH10246
def group_val(groupers):
ret = next(iter(groupers))[0]
return ret

assert isinstance(group_val(dataset.groupby("baz")), str)
assert isinstance(group_val(dataset.groupby(["baz"])), tuple)
assert isinstance(group_val(dataset.groupby(["baz"]))[0], str)


def test_groupby_return_group_dataarray_type(array):
# Checks GH10246
def group_val(groupers):
ret = next(iter(groupers))[0]
return ret

assert isinstance(group_val(array.groupby("x")), str)
assert isinstance(group_val(array.groupby(["x"])), tuple)
assert isinstance(group_val(array.groupby(["x"]))[0], str)


def test_groupby_return_group_type_raise(dataset):
with pytest.raises(TypeError, match="xarray variable or dimension"):
dataset.groupby_bins(["y"], [0, 1])


# TODO: Possible property tests to add to this module
# 1. lambda x: x
# 2. grouped-reduce on unique coords is identical to array
Expand Down
Loading