diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b884b246f47..76f4f0d5e7e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -54,6 +54,9 @@ jobs: - env: "bare-minimum" python-version: "3.10" os: ubuntu-latest + - env: "bare-min-and-scipy" + python-version: "3.10" + os: ubuntu-latest - env: "min-all-deps" python-version: "3.10" os: ubuntu-latest diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index ca4943bddb1..5f5db4a0f18 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml index fa7ad81f198..7c492aec704 100644 --- a/ci/requirements/all-but-numba.yml +++ b/ci/requirements/all-but-numba.yml @@ -6,7 +6,7 @@ dependencies: # Pin a "very new numpy" (updated Sept 24, 2024) - numpy>=2.1.1 - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/bare-min-and-scipy.yml b/ci/requirements/bare-min-and-scipy.yml new file mode 100644 index 00000000000..5e5522faaea --- /dev/null +++ b/ci/requirements/bare-min-and-scipy.yml @@ -0,0 +1,18 @@ +name: xarray-tests +channels: + - conda-forge + - nodefaults +dependencies: + - python=3.10 + - coveralls + - pip + - pytest + - pytest-cov + - pytest-env + - pytest-mypy-plugins + - pytest-timeout + - pytest-xdist + - numpy=1.24 + - packaging=23.1 + - pandas=2.1 + - scipy=1.11 diff --git a/ci/requirements/environment-3.14.yml b/ci/requirements/environment-3.14.yml index 1e6ee7ff5f9..06c4df82663 100644 --- a/ci/requirements/environment-3.14.yml +++ b/ci/requirements/environment-3.14.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows-3.14.yml b/ci/requirements/environment-windows-3.14.yml index 4eb2049f2e6..dd48add6b73 100644 --- a/ci/requirements/environment-windows-3.14.yml +++ b/ci/requirements/environment-windows-3.14.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 45cbebd38db..3213ef687d3 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index a9499694e15..fc54b6600fe 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 618fc72763d..ceb79a3e173 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,8 @@ Bug fixes ~~~~~~~~~ - Fix Pydap test_cmp_local_file for numpy 2.3.0 changes, 1. do always return arrays for all versions and 2. skip astype(str) for numpy >= 2.3.0 for expected data. (:pull:`10421`) By `Kai Mühlbauer `_. +- Fix the SciPy backend for netCDF3 files . (:issue:`8909`, :pull:`10376`) + By `Deepak Cherian `_. Documentation diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index aba767ab731..22cb47d85f2 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -5,6 +5,7 @@ import numpy as np from xarray.backends.common import AbstractWritableDataStore +from xarray.core import indexing from xarray.core.variable import Variable @@ -24,7 +25,12 @@ def get_attrs(self): return self._attributes def get_variables(self): - return self._variables + res = {} + for k, v in self._variables.items(): + v = v.copy(deep=True) + res[k] = v + v._data = indexing.LazilyIndexedArray(v._data) + return res def get_dimensions(self): return {d: s for v in self._variables.values() for d, s in v.dims.items()} diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 93d0e40a6e1..16fb4528f55 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -190,7 +190,7 @@ def ds(self): def open_store_variable(self, name, var): return Variable( var.dimensions, - ScipyArrayWrapper(name, self), + indexing.LazilyIndexedArray(ScipyArrayWrapper(name, self)), _decode_attrs(var._attributes), ) diff --git a/xarray/coding/common.py b/xarray/coding/common.py index 1b455009668..0e8d7e1955e 100644 --- a/xarray/coding/common.py +++ b/xarray/coding/common.py @@ -63,6 +63,10 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): def dtype(self) -> np.dtype: return np.dtype(self._dtype) + def transpose(self, order): + # For elementwise functions, we can compose transpose and function application + return type(self)(self.array.transpose(order), self.func, self.dtype) + def _oindex_get(self, key): return type(self)(self.array.oindex[key], self.func, self.dtype) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..ea2f58274b6 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -221,7 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): values, when accessed, are automatically stacked along the last dimension. >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> np.array(StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer]) array(b'abc', dtype='|S3') """ @@ -250,14 +250,17 @@ def __repr__(self): return f"{type(self).__name__}({self.array!r})" def _vindex_get(self, key): - return _numpy_char_to_bytes(self.array.vindex[key]) + return type(self)(self.array.vindex[key]) def _oindex_get(self, key): - return _numpy_char_to_bytes(self.array.oindex[key]) + return type(self)(self.array.oindex[key]) def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) if key.tuple[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return type(self)(self.array[key]) + + def get_duck_array(self): + return _numpy_char_to_bytes(self.array.get_duck_array()) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 662fec4b2c4..3b7be898ccf 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -21,6 +21,7 @@ ) from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing +from xarray.core.types import Self from xarray.core.variable import Variable if TYPE_CHECKING: @@ -58,13 +59,16 @@ def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) + return type(self)(self.array.oindex[key]) def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) + return type(self)(self.array.vindex[key]) - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + def __getitem__(self, key) -> Self: + return type(self)(self.array[key]) + + def get_duck_array(self): + return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): @@ -96,13 +100,16 @@ def dtype(self) -> np.dtype: return np.dtype("bool") def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) + return type(self)(self.array.oindex[key]) def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) + return type(self)(self.array.vindex[key]) + + def __getitem__(self, key) -> Self: + return type(self)(self.array[key]) - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + def get_duck_array(self): + return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) def _apply_mask( diff --git a/xarray/conventions.py b/xarray/conventions.py index c9cd2a5dcdc..5ae40ea57d8 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -18,7 +18,7 @@ ) from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray.utils import is_duck_dask_array +from xarray.namedarray.utils import is_duck_array CF_RELATED_DATA = ( "bounds", @@ -248,7 +248,15 @@ def decode_cf_variable( encoding.setdefault("dtype", original_dtype) - if not is_duck_dask_array(data): + if ( + # we don't need to lazily index duck arrays + not is_duck_array(data) + # These arrays already support lazy indexing + # OR for IndexingAdapters, it makes no sense to wrap them + and not isinstance(data, indexing.ExplicitlyIndexedNDArrayMixin) + ): + # this path applies to bare BackendArray objects. + # It is not hit for any internal Xarray backend data = indexing.LazilyIndexedArray(data) return Variable(dimensions, data, attributes, encoding=encoding, fastpath=True) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index ed71865060a..552e743e1f1 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -717,7 +717,7 @@ def from_variables( # preserve wrapped pd.Index (if any) # accessing `.data` can load data from disk, so we only access if needed - data = var._data.array if hasattr(var._data, "array") else var.data + data = var._data if isinstance(var._data, PandasIndexingAdapter) else var.data # type: ignore[redundant-expr] # multi-index level variable: get level index if isinstance(var._data, PandasMultiIndexingAdapter): level = var._data.level diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 68ff9233080..6a42e135587 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1427,6 +1427,25 @@ def test_string_object_warning(self) -> None: with self.roundtrip(original) as actual: assert_identical(original, actual) + @pytest.mark.parametrize( + "indexer", + ( + {"y": [1]}, + {"y": slice(2)}, + {"y": 1}, + {"x": [1], "y": [1]}, + {"x": ("x0", [0, 1]), "y": ("x0", [0, 1])}, + ), + ) + def test_indexing_roundtrip(self, indexer) -> None: + # regression test for GH8909 + ds = xr.Dataset() + ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"]) + with self.roundtrip(ds) as ds2: + expected = ds2.sel(indexer) + with self.roundtrip(expected) as actual: + assert_identical(actual, expected) + class NetCDFBase(CFEncodedBase): """Tests for all netCDF3 and netCDF4 backends."""