From 5c56acf671b3e46833e4632f57fbe85f0f026e3a Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 14 Feb 2025 13:40:16 -0500 Subject: [PATCH 01/20] Remove default values in private functions --- xarray/backends/api.py | 3 +- xarray/core/combine.py | 57 ++++++++++++++++++++---------------- xarray/core/concat.py | 26 ++++++++-------- xarray/tests/test_combine.py | 17 ++++++++++- 4 files changed, 63 insertions(+), 40 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 950a5d16273..ba048750bce 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -34,7 +34,7 @@ ) from xarray.backends.locks import _get_scheduler from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder -from xarray.core import indexing +from xarray.core import dtypes, indexing from xarray.core.combine import ( _infer_concat_order_from_positions, _nested_combine, @@ -1654,6 +1654,7 @@ def open_mfdataset( ids=ids, join=join, combine_attrs=combine_attrs, + fill_value=dtypes.NA, ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered diff --git a/xarray/core/combine.py b/xarray/core/combine.py index f02d046fff6..33f477a28ce 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -200,12 +200,12 @@ def _check_shape_tile_ids(combined_tile_ids): def _combine_nd( combined_ids, concat_dims, - data_vars="all", - coords="different", - compat: CompatOptions = "no_conflicts", - fill_value=dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "drop", + data_vars, + coords, + compat: CompatOptions, + fill_value, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, ): """ Combines an N-dimensional structure of datasets into one by applying a @@ -263,9 +263,9 @@ def _combine_all_along_first_dim( data_vars, coords, compat: CompatOptions, - fill_value=dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "drop", + fill_value, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, ): # Group into lines of datasets which must be combined along dim grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id) @@ -276,7 +276,14 @@ def _combine_all_along_first_dim( combined_ids = dict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _combine_1d( - datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs + datasets, + concat_dim=dim, + compat=compat, + data_vars=data_vars, + coords=coords, + fill_value=fill_value, + join=join, + combine_attrs=combine_attrs, ) return new_combined_ids @@ -284,12 +291,12 @@ def _combine_all_along_first_dim( def _combine_1d( datasets, concat_dim, - compat: CompatOptions = "no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "drop", + compat: CompatOptions, + data_vars, + coords, + fill_value, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, ): """ Applies either concat or merge to 1D list of datasets depending on value @@ -343,9 +350,9 @@ def _nested_combine( data_vars, coords, ids, - fill_value=dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "drop", + fill_value, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, ): if len(datasets) == 0: return Dataset() @@ -619,12 +626,12 @@ def groupby_defaultdict( def _combine_single_variable_hypercube( datasets, - fill_value=dtypes.NA, - data_vars="all", - coords="different", - compat: CompatOptions = "no_conflicts", - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "no_conflicts", + fill_value, + data_vars, + coords, + compat: CompatOptions, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, ): """ Attempt to combine a list of Datasets into a hypercube using their diff --git a/xarray/core/concat.py b/xarray/core/concat.py index b824aabbb23..a0ea72a7142 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -483,10 +483,10 @@ def _dataset_concat( coords: str | list[str], compat: CompatOptions, positions: Iterable[Iterable[int]] | None, - fill_value: Any = dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "override", - create_index_for_new_dim: bool = True, + fill_value: Any, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, + create_index_for_new_dim: bool, ) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension @@ -722,10 +722,10 @@ def _dataarray_concat( coords: str | list[str], compat: CompatOptions, positions: Iterable[Iterable[int]] | None, - fill_value: object = dtypes.NA, - join: JoinOptions = "outer", - combine_attrs: CombineAttrsOptions = "override", - create_index_for_new_dim: bool = True, + fill_value: object, + join: JoinOptions, + combine_attrs: CombineAttrsOptions, + create_index_for_new_dim: bool, ) -> T_DataArray: from xarray.core.dataarray import DataArray @@ -754,11 +754,11 @@ def _dataarray_concat( ds = _dataset_concat( datasets, - dim, - data_vars, - coords, - compat, - positions, + dim=dim, + data_vars=data_vars, + coords=coords, + compat=compat, + positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index cc20ab414ee..956bac350a2 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -290,6 +290,9 @@ def test_concat_once(self, create_combined_ids, concat_dim): data_vars="all", coords="different", compat="no_conflicts", + fill_value=dtypes.NA, + join="outer", + combine_attrs="drop", ) expected_ds = concat([ds(0), ds(1)], dim=concat_dim) @@ -304,6 +307,9 @@ def test_concat_only_first_dim(self, create_combined_ids): data_vars="all", coords="different", compat="no_conflicts", + fill_value=dtypes.NA, + join="outer", + combine_attrs="drop", ) ds = create_test_data @@ -319,7 +325,16 @@ def test_concat_only_first_dim(self, create_combined_ids): def test_concat_twice(self, create_combined_ids, concat_dim): shape = (2, 3) combined_ids = create_combined_ids(shape) - result = _combine_nd(combined_ids, concat_dims=["dim1", concat_dim]) + result = _combine_nd( + combined_ids, + concat_dims=["dim1", concat_dim], + data_vars="all", + coords="different", + compat="no_conflicts", + fill_value=dtypes.NA, + join="outer", + combine_attrs="drop", + ) ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") From 5461a9ff27bc9ece7d13e3d45af66130473073af Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 24 Feb 2025 15:04:16 -0500 Subject: [PATCH 02/20] Use sentinel value to change default with warnings --- xarray/backends/api.py | 22 +++-- xarray/core/alignment.py | 30 ++++++- xarray/core/combine.py | 51 ++++++----- xarray/core/concat.py | 140 +++++++++++++++++++++++------ xarray/core/dataset.py | 22 ++++- xarray/core/groupby.py | 18 +++- xarray/core/merge.py | 67 ++++++++++---- xarray/core/options.py | 6 ++ xarray/plot/dataarray_plot.py | 9 +- xarray/util/deprecation_helpers.py | 61 ++++++++++++- 10 files changed, 344 insertions(+), 82 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index ba048750bce..1da89ff9a82 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -49,6 +49,13 @@ from xarray.core.utils import is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: try: @@ -1402,14 +1409,16 @@ def open_mfdataset( | Sequence[Index] | None ) = None, - compat: CompatOptions = "no_conflicts", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, preprocess: Callable[[Dataset], Dataset] | None = None, engine: T_Engine | None = None, - data_vars: Literal["all", "minimal", "different"] | list[str] = "all", - coords="different", + data_vars: Literal["all", "minimal", "different"] + | list[str] + | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords=_COORDS_DEFAULT, combine: Literal["by_coords", "nested"] = "by_coords", parallel: bool = False, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, attrs_file: str | os.PathLike | None = None, combine_attrs: CombineAttrsOptions = "override", **kwargs, @@ -1596,9 +1605,6 @@ def open_mfdataset( paths1d: list[str | ReadBuffer] if combine == "nested": - if isinstance(concat_dim, str | DataArray) or concat_dim is None: - concat_dim = [concat_dim] # type: ignore[assignment] - # This creates a flat list which is easier to iterate over, whilst # encoding the originally-supplied structure as "ids". # The "ids" are not used at all if combine='by_coords`. @@ -1647,7 +1653,7 @@ def open_mfdataset( # along each dimension, using structure given by "ids" combined = _nested_combine( datasets, - concat_dims=concat_dim, + concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords, diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d6cdd45bb49..b2114155cbd 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -2,6 +2,7 @@ import functools import operator +import warnings from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress @@ -22,6 +23,7 @@ from xarray.core.types import T_Alignable from xarray.core.utils import is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions +from xarray.util.deprecation_helpers import CombineKwargDefault if TYPE_CHECKING: from xarray.core.dataarray import DataArray @@ -418,12 +420,38 @@ def align_indexes(self) -> None: else: need_reindex = False if need_reindex: + if ( + isinstance(self.join, CombineKwargDefault) + and self.join != "exact" + ): + warnings.warn( + self.join.warning_message( + "This change will result in the following ValueError:" + "cannot be aligned with join='exact' because " + "index/labels/sizes are not equal along " + "these coordinates (dimensions): " + + ", ".join( + f"{name!r} {dims!r}" for name, dims in key[0] + ), + recommend_set_options=False, + ), + category=FutureWarning, + stacklevel=2, + ) if self.join == "exact": + new_default_warning = ( + " Failure might be related to new default (join='exact'). " + "Previously the default was join='outer'. " + "The recommendation is to set join explicitly for this case." + ) raise ValueError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0]) + + new_default_warning + if isinstance(self.join, CombineKwargDefault) + else "" ) joiner = self._get_index_joiner(index_cls) joined_index = joiner(matching_indexes) @@ -886,7 +914,7 @@ def align( def deep_align( objects: Iterable[Any], - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 33f477a28ce..b9a0d9f614a 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -12,6 +12,13 @@ from xarray.core.dataset import Dataset from xarray.core.merge import merge from xarray.core.utils import iterate_nested +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.types import ( @@ -202,9 +209,9 @@ def _combine_nd( concat_dims, data_vars, coords, - compat: CompatOptions, + compat: CompatOptions | CombineKwargDefault, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -264,7 +271,7 @@ def _combine_all_along_first_dim( coords, compat: CompatOptions, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): # Group into lines of datasets which must be combined along dim @@ -295,7 +302,7 @@ def _combine_1d( data_vars, coords, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -345,18 +352,21 @@ def _new_tile_id(single_id_ds_pair): def _nested_combine( datasets, - concat_dims, + concat_dim, compat, data_vars, coords, ids, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): if len(datasets) == 0: return Dataset() + if isinstance(concat_dim, str | DataArray) or concat_dim is None: + concat_dim = [concat_dim] # type: ignore[assignment] + # Arrange datasets for concatenation # Use information from the shape of the user input if not ids: @@ -373,7 +383,7 @@ def _nested_combine( # Apply series of concatenate or merge operations along each dimension combined = _combine_nd( combined_ids, - concat_dims, + concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, @@ -391,11 +401,11 @@ def _nested_combine( def combine_nested( datasets: DATASET_HYPERCUBE, concat_dim: str | DataArray | None | Sequence[str | DataArray | pd.Index | None], - compat: str = "no_conflicts", - data_vars: str = "all", - coords: str = "different", + compat: str | CombineKwargDefault = _COMPAT_DEFAULT, + data_vars: str | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "drop", ) -> Dataset: """ @@ -588,13 +598,10 @@ def combine_nested( if mixed_datasets_and_arrays: raise ValueError("Can't combine datasets with unnamed arrays.") - if isinstance(concat_dim, str | DataArray) or concat_dim is None: - concat_dim = [concat_dim] - # The IDs argument tells _nested_combine that datasets aren't yet sorted return _nested_combine( datasets, - concat_dims=concat_dim, + concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords, @@ -629,8 +636,8 @@ def _combine_single_variable_hypercube( fill_value, data_vars, coords, - compat: CompatOptions, - join: JoinOptions, + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -685,11 +692,13 @@ def _combine_single_variable_hypercube( def combine_by_coords( data_objects: Iterable[Dataset | DataArray] = [], - compat: CompatOptions = "no_conflicts", - data_vars: Literal["all", "minimal", "different"] | list[str] = "all", - coords: str = "different", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + data_vars: Literal["all", "minimal", "different"] + | list[str] + | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "no_conflicts", ) -> Dataset | DataArray: """ diff --git a/xarray/core/concat.py b/xarray/core/concat.py index a0ea72a7142..c8776baa934 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from collections.abc import Hashable, Iterable from typing import TYPE_CHECKING, Any, Union, overload @@ -20,6 +21,13 @@ from xarray.core.types import T_DataArray, T_Dataset, T_Variable from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars +from xarray.util.deprecation_helpers import ( + _COMPAT_CONCAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.types import ( @@ -37,12 +45,12 @@ def concat( objs: Iterable[T_Dataset], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, - data_vars: T_DataVars = "all", - coords: ConcatOptions | list[Hashable] = "different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_Dataset: ... @@ -52,12 +60,12 @@ def concat( def concat( objs: Iterable[T_DataArray], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, - data_vars: T_DataVars = "all", - coords: ConcatOptions | list[Hashable] = "different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | None = None, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_DataArray: ... @@ -66,12 +74,12 @@ def concat( def concat( objs, dim, - data_vars: T_DataVars = "all", - coords="different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions=None, fill_value=dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ): @@ -255,7 +263,9 @@ def concat( except StopIteration as err: raise ValueError("must supply at least one object to concatenate") from err - if compat not in set(_VALID_COMPAT) - {"minimal"}: + if not isinstance(compat, CombineKwargDefault) and compat not in set( + _VALID_COMPAT + ) - {"minimal"}: raise ValueError( f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) @@ -320,7 +330,14 @@ def _calc_concat_dim_index( return dim, index -def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, compat): +def _calc_concat_over( + datasets, + dim, + dim_names, + data_vars: T_DataVars, + coords, + compat, +): """ Determine which dataset variables need to be concatenated in the result, """ @@ -344,11 +361,37 @@ def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, c concat_dim_lengths.append(ds.sizes.get(dim, 1)) def process_subset_opt(opt, subset): - if isinstance(opt, str): + original = set(concat_over) + compat_str = ( + compat._value if isinstance(compat, CombineKwargDefault) else compat + ) + if isinstance(opt, str | CombineKwargDefault): if opt == "different": + if isinstance(compat, CombineKwargDefault) and compat != "override": + if subset == "data_vars" or not isinstance( + opt, CombineKwargDefault + ): + warnings.warn( + compat.warning_message( + "This change will result in the following ValueError:" + f"Cannot specify both {subset}='different' and compat='override'.", + recommend_set_options=False, + ), + category=FutureWarning, + stacklevel=2, + ) + if compat == "override": + new_default_warning = ( + " Failure might be related to new default (compat='override'). " + "Previously the default was compat='equals' or compat='no_conflicts'. " + "The recommendation is to set compat explicitly for this case." + ) raise ValueError( f"Cannot specify both {subset}='different' and compat='override'." + + new_default_warning + if isinstance(compat, CombineKwargDefault) + else "" ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): @@ -372,7 +415,7 @@ def process_subset_opt(opt, subset): # first check without comparing values i.e. no computes for var in variables[1:]: - equals[k] = getattr(variables[0], compat)( + equals[k] = getattr(variables[0], compat_str)( var, equiv=lazy_array_equiv ) if equals[k] is not True: @@ -395,7 +438,7 @@ def process_subset_opt(opt, subset): for ds_rhs in datasets[1:]: v_rhs = ds_rhs.variables[k].compute() computed.append(v_rhs) - if not getattr(v_lhs, compat)(v_rhs): + if not getattr(v_lhs, compat_str)(v_rhs): concat_over.add(k) equals[k] = False # computed variables are not to be re-computed @@ -418,6 +461,20 @@ def process_subset_opt(opt, subset): pass else: raise ValueError(f"unexpected value for {subset}: {opt}") + + if ( + isinstance(opt, CombineKwargDefault) + and opt != "minimal" + and original != concat_over + ): + warnings.warn( + opt.warning_message( + "This is likely to lead to different results when multiple datasets" + "have matching variables with overlapping values.", + ), + category=FutureWarning, + stacklevel=2, + ) else: valid_vars = tuple(getattr(datasets[0], subset)) invalid_vars = [k for k in opt if k not in valid_vars] @@ -479,14 +536,15 @@ def _parse_datasets( def _dataset_concat( datasets: Iterable[T_Dataset], dim: str | T_Variable | T_DataArray | pd.Index, - data_vars: T_DataVars, - coords: str | list[str], - compat: CompatOptions, + data_vars: T_DataVars | CombineKwargDefault, + coords: str | list[str] | CombineKwargDefault, + compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: Any, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, + warn_about_data_vars: bool = True, ) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension @@ -501,6 +559,35 @@ def _dataset_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) + if not isinstance(compat, CombineKwargDefault) and compat not in set( + _VALID_COMPAT + ) - {"minimal"}: + raise ValueError( + f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" + ) + + if ( + warn_about_data_vars + and isinstance(data_vars, CombineKwargDefault) + and data_vars == "all" + ): + if not isinstance(dim, str): + warnings.warn( + data_vars.warning_message( + "This is likely to lead to different results when using an object as the concat_dim.", + ), + category=FutureWarning, + stacklevel=2, + ) + elif dim is not None and all(dim not in ds for ds in datasets): + warnings.warn( + data_vars.warning_message( + "This is likely to lead to different results when constructing a new dimension.", + ), + category=FutureWarning, + stacklevel=2, + ) + if isinstance(dim, DataArray): dim_var = dim.variable elif isinstance(dim, Variable): @@ -718,12 +805,12 @@ def get_indexes(name): def _dataarray_concat( arrays: Iterable[T_DataArray], dim: str | T_Variable | T_DataArray | pd.Index, - data_vars: T_DataVars, - coords: str | list[str], - compat: CompatOptions, + data_vars: T_DataVars | CombineKwargDefault, + coords: str | list[str] | CombineKwargDefault, + compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: object, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, ) -> T_DataArray: @@ -736,7 +823,7 @@ def _dataarray_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) - if data_vars != "all": + if not isinstance(data_vars, CombineKwargDefault) and data_vars != "all": raise ValueError( "data_vars is not a valid argument when concatenating DataArray objects" ) @@ -763,6 +850,7 @@ def _dataarray_concat( join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, + warn_about_data_vars=False, ) merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 449f502c43a..af37b1bb3f2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -132,7 +132,13 @@ from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array, to_numpy from xarray.plot.accessor import DatasetPlotAccessor -from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, + _deprecate_positional_args, + deprecate_dims, +) if TYPE_CHECKING: from dask.dataframe import DataFrame as DaskDataFrame @@ -413,6 +419,7 @@ def merge_data_and_coords(data_vars: DataVars, coords) -> _MergeResult: [data_vars, coords], compat="broadcast_equals", join="outer", + combine_attrs="override", explicit_coords=tuple(coords), indexes=coords.xindexes, priority_arg=1, @@ -5506,7 +5513,14 @@ def stack_dataarray(da): # concatenate the arrays stackable_vars = [stack_dataarray(da) for da in self.data_vars.values()] - data_array = concat(stackable_vars, dim=new_dim) + data_array = concat( + stackable_vars, + dim=new_dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) if name is not None: data_array.name = name @@ -5750,8 +5764,8 @@ def merge( self, other: CoercibleMapping | DataArray, overwrite_vars: Hashable | Iterable[Hashable] = frozenset(), - compat: CompatOptions = "no_conflicts", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: Any = xrdtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> Self: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b28ba390a9f..9a1827c4eb8 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1526,7 +1526,14 @@ def _combine(self, applied, shortcut=False): if shortcut: combined = self._concat_shortcut(applied, dim, positions) else: - combined = concat(applied, dim) + combined = concat( + applied, + dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size) if isinstance(combined, type(self._obj)): @@ -1686,7 +1693,14 @@ def _combine(self, applied): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) dim, positions = self._infer_concat_args(applied_example) - combined = concat(applied, dim) + combined = concat( + applied, + dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size) # assign coord when the applied function does not return that coord if dim not in applied_example.dims: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6426f741750..4a4100cde13 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping, Sequence, Set from typing import TYPE_CHECKING, Any, NamedTuple, Union @@ -17,6 +18,11 @@ ) from xarray.core.utils import Frozen, compat_dict_union, dict_equiv, equivalent from xarray.core.variable import Variable, as_variable, calculate_dimensions +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.coordinates import Coordinates @@ -83,7 +89,7 @@ class MergeError(ValueError): def unique_variable( name: Hashable, variables: list[Variable], - compat: CompatOptions = "broadcast_equals", + compat: CompatOptions | CombineKwargDefault = "broadcast_equals", equals: bool | None = None, ) -> Variable: """Return the unique variable from a list of variables or raise MergeError. @@ -126,9 +132,12 @@ def unique_variable( combine_method = "fillna" if equals is None: + compat_str = ( + compat._value if isinstance(compat, CombineKwargDefault) else compat + ) # first check without comparing values i.e. no computes for var in variables[1:]: - equals = getattr(out, compat)(var, equiv=lazy_array_equiv) + equals = getattr(out, compat_str)(var, equiv=lazy_array_equiv) if equals is not True: break @@ -136,7 +145,7 @@ def unique_variable( # now compare values with minimum number of computes out = out.compute() for var in variables[1:]: - equals = getattr(out, compat)(var) + equals = getattr(out, compat_str)(var) if not equals: break @@ -154,7 +163,7 @@ def unique_variable( def _assert_compat_valid(compat): - if compat not in _VALID_COMPAT: + if not isinstance(compat, CombineKwargDefault) and compat not in _VALID_COMPAT: raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}") @@ -196,7 +205,7 @@ def _assert_prioritized_valid( def merge_collected( grouped: dict[Any, list[MergeElement]], prioritized: Mapping[Any, MergeElement] | None = None, - compat: CompatOptions = "minimal", + compat: CompatOptions | CombineKwargDefault = "minimal", combine_attrs: CombineAttrsOptions = "override", equals: dict[Any, bool] | None = None, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: @@ -290,6 +299,21 @@ def merge_collected( merged_vars[name] = unique_variable( name, variables, compat, equals.get(name, None) ) + # This is very likely to result in false positives, but there is no way + # to tell if the output will change without computing. + if ( + isinstance(compat, CombineKwargDefault) + and compat == "no_conflicts" + and len(variables) > 1 + ): + warnings.warn( + compat.warning_message( + "This is likely to lead to different results when" + "combining overlapping variables with the same name.", + ), + category=FutureWarning, + stacklevel=2, + ) except MergeError: if compat != "minimal": # we need more than "minimal" compatibility (for which @@ -626,8 +650,8 @@ class _MergeResult(NamedTuple): def merge_core( objects: Iterable[CoercibleMapping], - compat: CompatOptions = "broadcast_equals", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions = "override", priority_arg: int | None = None, explicit_coords: Iterable[Hashable] | None = None, @@ -690,7 +714,11 @@ def merge_core( coerced = coerce_pandas_values(objects) aligned = deep_align( - coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value + coerced, + join=join, + copy=False, + indexes=indexes, + fill_value=fill_value, ) for pos, obj in skip_align_objs: @@ -699,7 +727,10 @@ def merge_core( collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( - collected, prioritized, compat=compat, combine_attrs=combine_attrs + collected, + prioritized, + compat=compat, + combine_attrs=combine_attrs, ) dims = calculate_dimensions(variables) @@ -730,8 +761,8 @@ def merge_core( def merge( objects: Iterable[DataArray | CoercibleMapping], - compat: CompatOptions = "no_conflicts", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: object = dtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> Dataset: @@ -975,8 +1006,8 @@ def merge( merge_result = merge_core( dict_like_objects, - compat, - join, + compat=compat, + join=join, combine_attrs=combine_attrs, fill_value=fill_value, ) @@ -987,8 +1018,8 @@ def dataset_merge_method( dataset: Dataset, other: CoercibleMapping, overwrite_vars: Hashable | Iterable[Hashable], - compat: CompatOptions, - join: JoinOptions, + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, fill_value: Any, combine_attrs: CombineAttrsOptions, ) -> _MergeResult: @@ -1021,8 +1052,8 @@ def dataset_merge_method( return merge_core( objs, - compat, - join, + compat=compat, + join=join, priority_arg=priority_arg, fill_value=fill_value, combine_attrs=combine_attrs, @@ -1054,6 +1085,8 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe return merge_core( [dataset, other], + compat="broadcast_equals", + join="outer", priority_arg=1, indexes=dataset.xindexes, combine_attrs="override", diff --git a/xarray/core/options.py b/xarray/core/options.py index 2d69e4b6584..f17cd8ab9d0 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -29,6 +29,7 @@ "keep_attrs", "warn_for_unclosed_files", "use_bottleneck", + "use_new_combine_kwarg_defaults", "use_numbagg", "use_opt_einsum", "use_flox", @@ -57,6 +58,7 @@ class T_Options(TypedDict): warn_for_unclosed_files: bool use_bottleneck: bool use_flox: bool + use_new_combine_kwarg_defaults: bool use_numbagg: bool use_opt_einsum: bool @@ -84,6 +86,7 @@ class T_Options(TypedDict): "warn_for_unclosed_files": False, "use_bottleneck": True, "use_flox": True, + "use_new_combine_kwarg_defaults": False, "use_numbagg": True, "use_opt_einsum": True, } @@ -113,6 +116,7 @@ def _positive_integer(value: Any) -> bool: "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], "use_bottleneck": lambda value: isinstance(value, bool), + "use_new_combine_kwarg_defaults": lambda value: isinstance(value, bool), "use_numbagg": lambda value: isinstance(value, bool), "use_opt_einsum": lambda value: isinstance(value, bool), "use_flox": lambda value: isinstance(value, bool), @@ -250,6 +254,8 @@ class set_options: use_flox : bool, default: True Whether to use ``numpy_groupies`` and `flox`` to accelerate groupby and resampling reductions. + use_new_combine_kwarg_defaults : bool, default False + Whether to use new default kwarg values for open_mfdataset. use_numbagg : bool, default: True Whether to use ``numbagg`` to accelerate reductions. Takes precedence over ``use_bottleneck`` when both are True. diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index cca9fe4f561..9663303276e 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -196,7 +196,14 @@ def _prepare_plot1d_data( dim = coords_to_plot.get(v, None) if (dim is not None) and (dim in darray.dims): darray_nan = np.nan * darray.isel({dim: -1}) - darray = concat([darray, darray_nan], dim=dim) + darray = concat( + [darray, darray_nan], + dim=dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) dims_T.append(coords_to_plot[v]) # Lines should never connect to the same coordinate when stacked, diff --git a/xarray/util/deprecation_helpers.py b/xarray/util/deprecation_helpers.py index 1064082872d..44c13560736 100644 --- a/xarray/util/deprecation_helpers.py +++ b/xarray/util/deprecation_helpers.py @@ -35,9 +35,10 @@ import warnings from collections.abc import Callable from functools import wraps -from typing import TypeVar +from typing import Any, TypeVar -from xarray.core.utils import emit_user_level_warning +from xarray.core.options import OPTIONS +from xarray.core.utils import ReprObject, emit_user_level_warning T = TypeVar("T", bound=Callable) @@ -145,3 +146,59 @@ def wrapper(*args, **kwargs): # We're quite confident we're just returning `T` from this function, so it's fine to ignore typing # within the function. return wrapper # type: ignore[return-value] + + +class CombineKwargDefault(ReprObject): + """Object that handles deprecation cycle for kwarg default values.""" + + _old: str + _new: str + _name: str + + def __init__(self, *, name: str, old: str, new: str): + self._name = name + self._old = old + self._new = new + + def __eq__(self, other: ReprObject | Any) -> bool: + # TODO: What type can other be? ArrayLike? + return ( + self._value == other._value + if isinstance(other, ReprObject) + else self._value == other + ) + + @property + def _value(self): + return self._new if OPTIONS["use_new_combine_kwarg_defaults"] else self._old + + def __hash__(self) -> int: + return hash(self._value) + + def warning_message(self, message: str, recommend_set_options: bool = True): + if recommend_set_options: + recommendation = ( + " To opt in to new defaults and get rid of these warnings now " + "use `set_options(use_new_combine_kwarg_defaults=True) or " + f"set {self._name} explicitly." + ) + else: + recommendation = ( + f" The recommendation is to set {self._name} explicitly for this case." + ) + + return ( + f"In a future version of xarray the default value for {self._name} will " + + f"change from {self._name}={self._old!r} to {self._name}={self._new!r}. " + + message + + recommendation + ) + + +_DATA_VARS_DEFAULT = CombineKwargDefault(name="data_vars", old="all", new="minimal") +_COORDS_DEFAULT = CombineKwargDefault(name="coords", old="different", new="minimal") +_COMPAT_CONCAT_DEFAULT = CombineKwargDefault( + name="compat", old="equals", new="override" +) +_COMPAT_DEFAULT = CombineKwargDefault(name="compat", old="no_conflicts", new="override") +_JOIN_DEFAULT = CombineKwargDefault(name="join", old="outer", new="exact") From e16834f7e903ce7f89b1f6be51c6cec1084a2770 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 24 Feb 2025 15:51:39 -0500 Subject: [PATCH 03/20] Remove unnecessary warnings --- xarray/core/concat.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index c8776baa934..c8230651aa6 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -544,7 +544,6 @@ def _dataset_concat( join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, - warn_about_data_vars: bool = True, ) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension @@ -566,28 +565,6 @@ def _dataset_concat( f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) - if ( - warn_about_data_vars - and isinstance(data_vars, CombineKwargDefault) - and data_vars == "all" - ): - if not isinstance(dim, str): - warnings.warn( - data_vars.warning_message( - "This is likely to lead to different results when using an object as the concat_dim.", - ), - category=FutureWarning, - stacklevel=2, - ) - elif dim is not None and all(dim not in ds for ds in datasets): - warnings.warn( - data_vars.warning_message( - "This is likely to lead to different results when constructing a new dimension.", - ), - category=FutureWarning, - stacklevel=2, - ) - if isinstance(dim, DataArray): dim_var = dim.variable elif isinstance(dim, Variable): @@ -850,7 +827,6 @@ def _dataarray_concat( join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, - warn_about_data_vars=False, ) merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) From 9c50125ab69d2fc4356e377fc8a3c89a66834e3c Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 25 Feb 2025 15:18:21 -0500 Subject: [PATCH 04/20] Use old kwarg values within map_blocks, concat dataarray --- xarray/core/alignment.py | 13 +++++-------- xarray/core/concat.py | 28 +++++++++++++--------------- xarray/core/merge.py | 2 +- xarray/core/options.py | 11 +++++++++-- xarray/core/parallel.py | 12 +++++++++--- xarray/util/deprecation_helpers.py | 7 +++++++ 6 files changed, 44 insertions(+), 29 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index b2114155cbd..dd7edbd88c2 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -439,19 +439,16 @@ def align_indexes(self) -> None: stacklevel=2, ) if self.join == "exact": - new_default_warning = ( - " Failure might be related to new default (join='exact'). " - "Previously the default was join='outer'. " - "The recommendation is to set join explicitly for this case." - ) raise ValueError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0]) - + new_default_warning - if isinstance(self.join, CombineKwargDefault) - else "" + + ( + self.join.error_message() + if isinstance(self.join, CombineKwargDefault) + else "" + ) ) joiner = self._get_index_joiner(index_cls) joined_index = joiner(matching_indexes) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index c8230651aa6..846f52bae17 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -368,12 +368,10 @@ def process_subset_opt(opt, subset): if isinstance(opt, str | CombineKwargDefault): if opt == "different": if isinstance(compat, CombineKwargDefault) and compat != "override": - if subset == "data_vars" or not isinstance( - opt, CombineKwargDefault - ): + if not isinstance(opt, CombineKwargDefault): warnings.warn( compat.warning_message( - "This change will result in the following ValueError:" + "This change will result in the following ValueError: " f"Cannot specify both {subset}='different' and compat='override'.", recommend_set_options=False, ), @@ -382,16 +380,13 @@ def process_subset_opt(opt, subset): ) if compat == "override": - new_default_warning = ( - " Failure might be related to new default (compat='override'). " - "Previously the default was compat='equals' or compat='no_conflicts'. " - "The recommendation is to set compat explicitly for this case." - ) raise ValueError( f"Cannot specify both {subset}='different' and compat='override'." - + new_default_warning - if isinstance(compat, CombineKwargDefault) - else "" + + ( + compat.error_message() + if isinstance(compat, CombineKwargDefault) + else "" + ) ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): @@ -469,7 +464,7 @@ def process_subset_opt(opt, subset): ): warnings.warn( opt.warning_message( - "This is likely to lead to different results when multiple datasets" + "This is likely to lead to different results when multiple datasets " "have matching variables with overlapping values.", ), category=FutureWarning, @@ -800,7 +795,10 @@ def _dataarray_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) - if not isinstance(data_vars, CombineKwargDefault) and data_vars != "all": + if not isinstance(data_vars, CombineKwargDefault) and data_vars not in [ + "all", + "minimal", + ]: raise ValueError( "data_vars is not a valid argument when concatenating DataArray objects" ) @@ -819,7 +817,7 @@ def _dataarray_concat( ds = _dataset_concat( datasets, dim=dim, - data_vars=data_vars, + data_vars="all", coords=coords, compat=compat, positions=positions, diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4a4100cde13..8c14582982b 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -308,7 +308,7 @@ def merge_collected( ): warnings.warn( compat.warning_message( - "This is likely to lead to different results when" + "This is likely to lead to different results when " "combining overlapping variables with the same name.", ), category=FutureWarning, diff --git a/xarray/core/options.py b/xarray/core/options.py index f17cd8ab9d0..f5eb72f37b4 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -86,7 +86,7 @@ class T_Options(TypedDict): "warn_for_unclosed_files": False, "use_bottleneck": True, "use_flox": True, - "use_new_combine_kwarg_defaults": False, + "use_new_combine_kwarg_defaults": True, "use_numbagg": True, "use_opt_einsum": True, } @@ -255,7 +255,14 @@ class set_options: Whether to use ``numpy_groupies`` and `flox`` to accelerate groupby and resampling reductions. use_new_combine_kwarg_defaults : bool, default False - Whether to use new default kwarg values for open_mfdataset. + Whether to use new kwarg default values for combine functions: + :py:func:`~xarray.concat`, :py:func:`~xarray.merge`, + :py:func:`~xarray.open_mfdataset`. New values are: + + * ``data_vars``: "minimal" + * ``coords``: "minimal" + * ``compat``: "override" + * ``join``: "exact" use_numbagg : bool, default: True Whether to use ``numbagg`` to accelerate reductions. Takes precedence over ``use_bottleneck`` when both are True. diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 6d6a6672470..d70a3b8b516 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -351,7 +351,9 @@ def _wrapper( result = func(*converted_args, **kwargs) merged_coordinates = merge( - [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)] + [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)], + join="outer", + compat="no_conflicts", ).coords # check all dims are present @@ -439,7 +441,9 @@ def _wrapper( # rechunk any numpy variables appropriately xarray_objs = tuple(arg.chunk(arg.chunksizes) for arg in xarray_objs) - merged_coordinates = merge([arg.coords for arg in aligned]).coords + merged_coordinates = merge( + [arg.coords for arg in aligned], join="outer", compat="no_conflicts" + ).coords _, npargs = unzip( sorted( @@ -472,7 +476,9 @@ def _wrapper( ) coordinates = merge( - (preserved_coords, template.coords.to_dataset()[new_coord_vars]) + (preserved_coords, template.coords.to_dataset()[new_coord_vars]), + join="outer", + compat="no_conflicts", ).coords output_chunks: Mapping[Hashable, tuple[int, ...]] = { dim: input_chunks[dim] for dim in template.dims if dim in input_chunks diff --git a/xarray/util/deprecation_helpers.py b/xarray/util/deprecation_helpers.py index 44c13560736..ddae2cdf9f3 100644 --- a/xarray/util/deprecation_helpers.py +++ b/xarray/util/deprecation_helpers.py @@ -194,6 +194,13 @@ def warning_message(self, message: str, recommend_set_options: bool = True): + recommendation ) + def error_message(self): + return ( + f" Error might be related to new default ({self._name}={self._new!r}). " + f"Previously the default was {self._name}={self._old!r}. " + f"The recommendation is to set {self._name} explicitly for this case." + ) + _DATA_VARS_DEFAULT = CombineKwargDefault(name="data_vars", old="all", new="minimal") _COORDS_DEFAULT = CombineKwargDefault(name="coords", old="different", new="minimal") From 0026ee845ec7b3442898c6f34aad27464b5794e6 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 26 Feb 2025 14:20:18 -0500 Subject: [PATCH 05/20] Switch options back to old defaults --- xarray/core/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/options.py b/xarray/core/options.py index f5eb72f37b4..df4bd94d074 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -86,7 +86,7 @@ class T_Options(TypedDict): "warn_for_unclosed_files": False, "use_bottleneck": True, "use_flox": True, - "use_new_combine_kwarg_defaults": True, + "use_new_combine_kwarg_defaults": False, "use_numbagg": True, "use_opt_einsum": True, } From 4d4deda0f3fc24190ba4e7bfc77b34f2515f2a79 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 26 Feb 2025 14:20:43 -0500 Subject: [PATCH 06/20] Update tests and add new ones to exercise options --- xarray/tests/test_backends.py | 277 +++++++++++++++++------ xarray/tests/test_combine.py | 238 ++++++++++++++++--- xarray/tests/test_dask.py | 26 ++- xarray/tests/test_dataarray.py | 15 +- xarray/tests/test_dataset.py | 6 +- xarray/tests/test_duck_array_wrapping.py | 2 +- xarray/tests/test_groupby.py | 1 + xarray/tests/test_merge.py | 160 ++++++++++--- 8 files changed, 581 insertions(+), 144 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 83d5afa6a09..e95f710c43c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -14,7 +14,7 @@ import uuid import warnings from collections.abc import Generator, Iterator, Mapping -from contextlib import ExitStack +from contextlib import ExitStack, nullcontext from io import BytesIO from os import listdir from pathlib import Path @@ -4511,13 +4511,14 @@ def setup_files_and_datasets(self, fuzz=0): # to test join='exact' ds1["x"] = ds1.x + fuzz - with create_tmp_file() as tmpfile1: - with create_tmp_file() as tmpfile2: - # save data to the temporary files - ds1.to_netcdf(tmpfile1) - ds2.to_netcdf(tmpfile2) + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_file() as tmpfile1: + with create_tmp_file() as tmpfile2: + # save data to the temporary files + ds1.to_netcdf(tmpfile1) + ds2.to_netcdf(tmpfile2) - yield [tmpfile1, tmpfile2], [ds1, ds2] + yield [tmpfile1, tmpfile2], [ds1, ds2] def gen_datasets_with_common_coord_and_time(self): # create coordinate data @@ -4554,11 +4555,19 @@ def test_open_mfdataset_does_same_as_concat( if combine == "by_coords": files.reverse() with open_mfdataset( - files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join + files, + data_vars=opt, + combine=combine, + concat_dim=concat_dim, + join=join, + compat="no_conflicts", ) as ds: - ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join) + ds_expect = xr.concat( + [ds1, ds2], data_vars=opt, dim="t", join=join, compat="equals" + ) assert_identical(ds, ds_expect) + @pytest.mark.parametrize("use_new_combine_kwarg_defaults", [True, False]) @pytest.mark.parametrize( ["combine_attrs", "attrs", "expected", "expect_error"], ( @@ -4586,7 +4595,12 @@ def test_open_mfdataset_does_same_as_concat( ), ) def test_open_mfdataset_dataset_combine_attrs( - self, combine_attrs, attrs, expected, expect_error + self, + use_new_combine_kwarg_defaults, + combine_attrs, + attrs, + expected, + expect_error, ): with self.setup_files_and_datasets() as (files, [ds1, ds2]): # Give the files an inconsistent attribute @@ -4596,22 +4610,24 @@ def test_open_mfdataset_dataset_combine_attrs( ds.close() ds.to_netcdf(f) - if expect_error: - with pytest.raises(xr.MergeError): - xr.open_mfdataset( - files, - combine="nested", - concat_dim="t", - combine_attrs=combine_attrs, - ) - else: - with xr.open_mfdataset( - files, - combine="nested", - concat_dim="t", - combine_attrs=combine_attrs, - ) as ds: - assert ds.attrs == expected + with set_options( + use_new_combine_kwarg_defaults=use_new_combine_kwarg_defaults + ): + warning = ( + pytest.warns(FutureWarning) + if not use_new_combine_kwarg_defaults + else nullcontext() + ) + error = pytest.raises(xr.MergeError) if expect_error else nullcontext() + with warning: + with error: + with xr.open_mfdataset( + files, + combine="nested", + concat_dim="t", + combine_attrs=combine_attrs, + ) as ds: + assert ds.attrs == expected def test_open_mfdataset_dataset_attr_by_coords(self) -> None: """ @@ -4640,30 +4656,65 @@ def test_open_mfdataset_dataarray_attr_by_coords(self) -> None: ds.close() ds.to_netcdf(f) - with xr.open_mfdataset(files, combine="nested", concat_dim="t") as ds: + with xr.open_mfdataset( + files, data_vars="minimal", combine="nested", concat_dim="t" + ) as ds: assert ds["v1"].test_dataarray_attr == 0 @pytest.mark.parametrize( "combine, concat_dim", [("nested", "t"), ("by_coords", None)] ) - @pytest.mark.parametrize("opt", ["all", "minimal", "different"]) + @pytest.mark.parametrize( + "kwargs", + [ + {"data_vars": "all"}, + {"data_vars": "minimal"}, + { + "data_vars": "all", + "coords": "different", + "compat": "no_conflicts", + }, # old defaults + { + "data_vars": "minimal", + "coords": "minimal", + "compat": "override", + }, # new defaults + {"data_vars": "different", "compat": "no_conflicts"}, + {}, + ], + ) def test_open_mfdataset_exact_join_raises_error( - self, combine, concat_dim, opt + self, combine, concat_dim, kwargs ) -> None: - with self.setup_files_and_datasets(fuzz=0.1) as (files, [ds1, ds2]): + with self.setup_files_and_datasets(fuzz=0.1) as (files, _): if combine == "by_coords": files.reverse() with pytest.raises( - ValueError, match=r"cannot align objects.*join.*exact.*" + ValueError, match="cannot align objects with join='exact'" ): open_mfdataset( files, - data_vars=opt, + **kwargs, combine=combine, concat_dim=concat_dim, join="exact", ) + def test_open_mfdataset_defaults_with_exact_join_warns_as_well_as_raising( + self, + ) -> None: + with self.setup_files_and_datasets(fuzz=0.1) as (files, _): + with set_options(use_new_combine_kwarg_defaults=False): + files.reverse() + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + with pytest.raises( + ValueError, match="cannot align objects with join='exact'" + ): + open_mfdataset(files, combine="by_coords", join="exact") + def test_common_coord_when_datavars_all(self) -> None: opt: Final = "all" @@ -4711,6 +4762,50 @@ def test_invalid_data_vars_value_should_fail(self) -> None: with open_mfdataset(files, coords="minimum", combine="by_coords"): pass + @pytest.mark.parametrize( + "combine, concat_dim", [("nested", "t"), ("by_coords", None)] + ) + @pytest.mark.parametrize( + "kwargs", [{"data_vars": "different"}, {"coords": "different"}] + ) + def test_open_mfdataset_warns_when_kwargs_set_to_different( + self, combine, concat_dim, kwargs + ) -> None: + with self.setup_files_and_datasets() as (files, [ds1, ds2]): + if combine == "by_coords": + files.reverse() + with pytest.raises( + ValueError, match="Previously the default was compat='no_conflicts'" + ): + open_mfdataset(files, combine=combine, concat_dim=concat_dim, **kwargs) + with pytest.raises( + ValueError, match="Previously the default was compat='equals'" + ): + xr.concat([ds1, ds2], dim="t", **kwargs) + + with set_options(use_new_combine_kwarg_defaults=False): + if "data_vars" not in kwargs: + expectation = pytest.warns( + FutureWarning, + match="will change from data_vars='all'", + ) + else: + expectation = nullcontext() + with pytest.warns( + FutureWarning, + match="will change from compat='equals'", + ): + with expectation: + ds_expect = xr.concat([ds1, ds2], dim="t", **kwargs) + with pytest.warns( + FutureWarning, match="will change from compat='no_conflicts'" + ): + with expectation: + with open_mfdataset( + files, combine=combine, concat_dim=concat_dim, **kwargs + ) as ds: + assert_identical(ds, ds_expect) + @requires_dask @requires_scipy @@ -4966,11 +5061,58 @@ def test_encoding_mfdataset(self) -> None: ds2.t.encoding["units"] = "days since 2000-01-01" ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) - with open_mfdataset([tmp1, tmp2], combine="nested") as actual: + with open_mfdataset( + [tmp1, tmp2], combine="nested", compat="no_conflicts", join="outer" + ) as actual: assert actual.t.encoding["units"] == original.t.encoding["units"] assert actual.t.encoding["units"] == ds1.t.encoding["units"] assert actual.t.encoding["units"] != ds2.t.encoding["units"] + def test_encoding_mfdataset_new_defaults(self) -> None: + original = Dataset( + { + "foo": ("t", np.random.randn(10)), + "t": ("t", pd.date_range(start="2010-01-01", periods=10, freq="1D")), + } + ) + original.t.encoding["units"] = "days since 2010-01-01" + + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + ds1 = original.isel(t=slice(5)) + ds2 = original.isel(t=slice(5, 10)) + ds1.t.encoding["units"] = "days since 2010-01-01" + ds2.t.encoding["units"] = "days since 2000-01-01" + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from join='outer' to join='exact'", + ): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + with open_mfdataset([tmp1, tmp2], combine="nested") as old: + assert ( + old.t.encoding["units"] + == original.t.encoding["units"] + ) + assert ( + old.t.encoding["units"] == ds1.t.encoding["units"] + ) + assert ( + old.t.encoding["units"] != ds2.t.encoding["units"] + ) + + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises( + ValueError, match="Error might be related to new default" + ): + open_mfdataset([tmp1, tmp2], combine="nested") + def test_preprocess_mfdataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp: @@ -5053,25 +5195,21 @@ def test_open_and_do_math(self) -> None: actual = 1.0 * ds assert_allclose(original, actual, decode_bytes=False) - def test_open_mfdataset_concat_dim_none(self) -> None: - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - data = Dataset({"x": 0}) - data.to_netcdf(tmp1) - Dataset({"x": np.nan}).to_netcdf(tmp2) - with open_mfdataset( - [tmp1, tmp2], concat_dim=None, combine="nested" - ) as actual: - assert_identical(data, actual) - - def test_open_mfdataset_concat_dim_default_none(self) -> None: - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - data = Dataset({"x": 0}) - data.to_netcdf(tmp1) - Dataset({"x": np.nan}).to_netcdf(tmp2) - with open_mfdataset([tmp1, tmp2], combine="nested") as actual: - assert_identical(data, actual) + @pytest.mark.parametrize( + "kwargs", + [pytest.param({"concat_dim": None}, id="none"), pytest.param({}, id="default")], + ) + def test_open_mfdataset_concat_dim(self, kwargs) -> None: + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + data = Dataset({"x": 0}) + data.to_netcdf(tmp1) + Dataset({"x": np.nan}).to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2], **kwargs, combine="nested" + ) as actual: + assert_identical(data, actual) def test_open_dataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) @@ -5098,7 +5236,9 @@ def test_open_single_dataset(self) -> None: ) with create_tmp_file() as tmp: original.to_netcdf(tmp) - with open_mfdataset([tmp], concat_dim=dim, combine="nested") as actual: + with open_mfdataset( + [tmp], concat_dim=dim, data_vars="all", combine="nested" + ) as actual: assert_identical(expected, actual) def test_open_multi_dataset(self) -> None: @@ -5122,7 +5262,7 @@ def test_open_multi_dataset(self) -> None: original.to_netcdf(tmp1) original.to_netcdf(tmp2) with open_mfdataset( - [tmp1, tmp2], concat_dim=dim, combine="nested" + [tmp1, tmp2], concat_dim=dim, data_vars="all", combine="nested" ) as actual: assert_identical(expected, actual) @@ -6579,19 +6719,20 @@ def test_zarr_safe_chunk_region(self, mode: Literal["r+", "a"]): @requires_h5netcdf @requires_fsspec def test_h5netcdf_storage_options() -> None: - with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2): - ds1 = create_test_data() - ds1.to_netcdf(f1, engine="h5netcdf") + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2): + ds1 = create_test_data() + ds1.to_netcdf(f1, engine="h5netcdf") - ds2 = create_test_data() - ds2.to_netcdf(f2, engine="h5netcdf") + ds2 = create_test_data() + ds2.to_netcdf(f2, engine="h5netcdf") - files = [f"file://{f}" for f in [f1, f2]] - ds = xr.open_mfdataset( - files, - engine="h5netcdf", - concat_dim="time", - combine="nested", - storage_options={"skip_instance_cache": False}, - ) - assert_identical(xr.concat([ds1, ds2], dim="time"), ds) + files = [f"file://{f}" for f in [f1, f2]] + ds = xr.open_mfdataset( + files, + engine="h5netcdf", + concat_dim="time", + combine="nested", + storage_options={"skip_instance_cache": False}, + ) + assert_identical(xr.concat([ds1, ds2], dim="time"), ds) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 956bac350a2..c1d61a6f424 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -13,6 +13,7 @@ combine_nested, concat, merge, + set_options, ) from xarray.core import dtypes from xarray.core.combine import ( @@ -295,7 +296,7 @@ def test_concat_once(self, create_combined_ids, concat_dim): combine_attrs="drop", ) - expected_ds = concat([ds(0), ds(1)], dim=concat_dim) + expected_ds = concat([ds(0), ds(1)], data_vars="all", dim=concat_dim) assert_combined_tile_ids_equal(result, {(): expected_ds}) def test_concat_only_first_dim(self, create_combined_ids): @@ -340,7 +341,9 @@ def test_concat_twice(self, create_combined_ids, concat_dim): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim=concat_dim) + expected = concat( + [partway1, partway2, partway3], data_vars="all", dim=concat_dim + ) assert_equal(result, expected) @@ -432,7 +435,7 @@ def test_nested_concat_along_new_dim(self): Dataset({"a": ("x", [20]), "x": [0]}), ] expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]}) - actual = combine_nested(objs, concat_dim="t") + actual = combine_nested(objs, data_vars="all", concat_dim="t") assert_identical(expected, actual) # Same but with a DataArray as new dim, see GH #1988 and #2647 @@ -440,42 +443,51 @@ def test_nested_concat_along_new_dim(self): expected = Dataset( {"a": (("baz", "x"), [[10], [20]]), "x": [0], "baz": [100, 150]} ) - actual = combine_nested(objs, concat_dim=dim) + actual = combine_nested(objs, data_vars="all", concat_dim=dim) assert_identical(expected, actual) - def test_nested_merge(self): + def test_nested_merge_with_self(self): data = Dataset({"x": 0}) - actual = combine_nested([data, data, data], concat_dim=None) + actual = combine_nested( + [data, data, data], compat="no_conflicts", concat_dim=None + ) assert_identical(data, actual) + def test_nested_merge_with_overlapping_values(self): ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - actual = combine_nested([ds1, ds2], concat_dim=None) + actual = combine_nested( + [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=None + ) assert_identical(expected, actual) - actual = combine_nested([ds1, ds2], concat_dim=[None]) + actual = combine_nested( + [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=[None] + ) assert_identical(expected, actual) + def test_nested_merge_with_nan(self): tmp1 = Dataset({"x": 0}) tmp2 = Dataset({"x": np.nan}) - actual = combine_nested([tmp1, tmp2], concat_dim=None) + actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=None) assert_identical(tmp1, actual) - actual = combine_nested([tmp1, tmp2], concat_dim=[None]) + actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=[None]) assert_identical(tmp1, actual) - # Single object, with a concat_dim explicitly provided + def test_nested_merge_with_concat_dim_explicitly_provided(self): # Test the issue reported in GH #1988 objs = [Dataset({"x": 0, "y": 1})] dim = DataArray([100], name="baz", dims="baz") - actual = combine_nested(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) assert_identical(expected, actual) + def test_nested_merge_with_non_scalars(self): # Just making sure that auto_combine is doing what is # expected for non-scalar values, too. objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] dim = DataArray([100], name="baz", dims="baz") - actual = combine_nested(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset( {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])}, {"baz": [100]}, @@ -525,10 +537,15 @@ def test_auto_combine_2d(self): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim="dim2") + expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] - result = combine_nested(datasets, concat_dim=["dim1", "dim2"]) + result = combine_nested( + datasets, + data_vars="all", + compat="no_conflicts", + concat_dim=["dim1", "dim2"], + ) assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): @@ -537,7 +554,7 @@ def test_auto_combine_2d_combine_attrs_kwarg(self): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim="dim2") + expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") expected_dict = {} expected_dict["drop"] = expected.copy(deep=True) @@ -568,12 +585,20 @@ def test_auto_combine_2d_combine_attrs_kwarg(self): with pytest.raises(ValueError, match=r"combine_attrs='identical'"): result = combine_nested( - datasets, concat_dim=["dim1", "dim2"], combine_attrs="identical" + datasets, + concat_dim=["dim1", "dim2"], + data_vars="all", + compat="no_conflicts", + combine_attrs="identical", ) for combine_attrs in expected_dict: result = combine_nested( - datasets, concat_dim=["dim1", "dim2"], combine_attrs=combine_attrs + datasets, + concat_dim=["dim1", "dim2"], + data_vars="all", + compat="no_conflicts", + combine_attrs=combine_attrs, ) assert_identical(result, expected_dict[combine_attrs]) @@ -587,7 +612,7 @@ def test_combine_nested_missing_data_new_dim(self): expected = Dataset( {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} ) - actual = combine_nested(datasets, concat_dim="t") + actual = combine_nested(datasets, data_vars="all", join="outer", concat_dim="t") assert_identical(expected, actual) def test_invalid_hypercube_input(self): @@ -665,7 +690,13 @@ def test_combine_nested_fill_value(self, fill_value): }, {"x": [0, 1, 2]}, ) - actual = combine_nested(datasets, concat_dim="t", fill_value=fill_value) + actual = combine_nested( + datasets, + concat_dim="t", + data_vars="all", + join="outer", + fill_value=fill_value, + ) assert_identical(expected, actual) def test_combine_nested_unnamed_data_arrays(self): @@ -725,26 +756,30 @@ def test_combine_by_coords(self): expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) + def test_combine_by_coords_handles_non_sorted_variables(self): # ensure auto_combine handles non-sorted variables objs = [ Dataset({"x": ("a", [0]), "y": ("a", [0]), "a": [0]}), Dataset({"x": ("a", [1]), "y": ("a", [1]), "a": [1]}), ] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1]}) assert_identical(expected, actual) + def test_combine_by_coords_multiple_variables(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) + def test_combine_by_coords_for_scalar_variables(self): objs = [Dataset({"x": 0}), Dataset({"x": 1})] with pytest.raises( ValueError, match=r"Could not find any dimension coordinates" ): combine_by_coords(objs) + def test_combine_by_coords_requires_coord_or_index(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with pytest.raises( ValueError, @@ -960,7 +995,9 @@ def test_combine_by_coords_combine_attrs_variables( with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: - actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) + actual = combine_by_coords( + [data1, data2], data_vars="all", combine_attrs=combine_attrs + ) expected = Dataset( { "x": ("a", [0, 1], expected_attrs), @@ -974,7 +1011,7 @@ def test_combine_by_coords_combine_attrs_variables( def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, data_vars="all", compat="no_conflicts") expected = data assert expected.broadcast_equals(actual) @@ -1012,7 +1049,7 @@ def test_combine_by_coords_previously_failed(self): Dataset({"a": ("x", [1]), "x": [1]}), ] expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]}) - actual = combine_by_coords(datasets) + actual = combine_by_coords(datasets, join="outer") assert_identical(expected, actual) def test_combine_by_coords_still_fails(self): @@ -1029,7 +1066,7 @@ def test_combine_by_coords_no_concat(self): assert_identical(expected, actual) objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, compat="no_conflicts") expected = Dataset({"x": 0, "y": 1, "z": 2}) assert_identical(expected, actual) @@ -1047,7 +1084,7 @@ def test_combine_by_coords_incomplete_hypercube(self): x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) - actual = combine_by_coords([x1, x2, x3]) + actual = combine_by_coords([x1, x2, x3], join="outer") expected = Dataset( {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, coords={"y": [0, 1], "x": [0, 1]}, @@ -1055,8 +1092,10 @@ def test_combine_by_coords_incomplete_hypercube(self): assert_identical(expected, actual) # test that this fails if fill_value is None - with pytest.raises(ValueError): - combine_by_coords([x1, x2, x3], fill_value=None) + with pytest.raises( + ValueError, match="supplied objects do not form a hypercube" + ): + combine_by_coords([x1, x2, x3], join="outer", fill_value=None) def test_combine_by_coords_override_order(self) -> None: # regression test for https://github.com/pydata/xarray/issues/8828 @@ -1126,7 +1165,7 @@ def test_combine_by_coords_all_named_dataarrays(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") - actual = combine_by_coords([named_da1, named_da2]) + actual = combine_by_coords([named_da1, named_da2], join="outer") expected = Dataset( { "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"), @@ -1139,11 +1178,146 @@ def test_combine_by_coords_all_dataarrays_with_the_same_name(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") - actual = combine_by_coords([named_da1, named_da2]) - expected = merge([named_da1, named_da2]) + actual = combine_by_coords( + [named_da1, named_da2], compat="no_conflicts", join="outer" + ) + expected = merge([named_da1, named_da2], compat="no_conflicts", join="outer") assert_identical(expected, actual) +class TestNewDefaults: + def test_concat_along_existing_dim(self): + concat_dim = "dim1" + ds = create_test_data + with set_options(use_new_combine_kwarg_defaults=False): + old = concat([ds(0), ds(1)], dim=concat_dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = concat([ds(0), ds(1)], dim=concat_dim) + + assert_identical(old, new) + + def test_concat_along_new_dim(self): + concat_dim = "new_dim" + ds = create_test_data + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = concat([ds(0), ds(1)], dim=concat_dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = concat([ds(0), ds(1)], dim=concat_dim) + + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_nested_merge_with_overlapping_values(self): + ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) + ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) + expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds1, ds2], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_nested([ds1, ds2], concat_dim=None) + + assert_identical(old, expected) + + def test_nested_merge_with_nan_order_matters(self): + ds1 = Dataset({"x": 0}) + ds2 = Dataset({"x": np.nan}) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds1, ds2], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested([ds1, ds2], concat_dim=None) + + assert_identical(ds1, old) + assert_identical(old, new) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds2, ds1], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested([ds2, ds1], concat_dim=None) + + assert_identical(ds1, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_nested_merge_with_concat_dim_explicitly_provided(self): + # Test the issue reported in GH #1988 + objs = [Dataset({"x": 0, "y": 1})] + dim = DataArray([100], name="baz", dims="baz") + expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = combine_nested(objs, concat_dim=dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested(objs, concat_dim=dim) + + assert_identical(expected, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_combine_nested_missing_data_new_dim(self): + # Your data includes "time" and "station" dimensions, and each year's + # data has a different set of stations. + datasets = [ + Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), + Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), + ] + expected = Dataset( + {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} + ) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = combine_nested(datasets, concat_dim="t") + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_nested(datasets, concat_dim="t") + + assert_identical(expected, old) + + def test_combine_by_coords_multiple_variables(self): + objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] + expected = Dataset({"x": [0, 1], "y": [0, 1]}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + old = combine_by_coords(objs) + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_by_coords(objs) + + assert_identical(old, expected) + + @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index b970781fe28..34ad36a1e12 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -446,7 +446,11 @@ def test_concat_loads_variables(self): assert kernel_call_count == 0 out = xr.concat( - [ds1, ds2, ds3], dim="n", data_vars="different", coords="different" + [ds1, ds2, ds3], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) # each kernel is computed exactly once assert kernel_call_count == 6 @@ -488,7 +492,11 @@ def test_concat_loads_variables(self): # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={"d": ("x", [2.0])}, coords={"c": ("x", [2.0])}) out = xr.concat( - [ds1, ds2, ds4, ds3], dim="n", data_vars="different", coords="different" + [ds1, ds2, ds4, ds3], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 @@ -509,7 +517,11 @@ def test_concat_loads_variables(self): # now check that concat() is correctly using dask name equality to skip loads out = xr.concat( - [ds1, ds1, ds1], dim="n", data_vars="different", coords="different" + [ds1, ds1, ds1], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) assert kernel_call_count == 24 # variables are not loaded in the output @@ -1375,7 +1387,9 @@ def test_map_blocks_ds_transformations(func, map_ds): def test_map_blocks_da_ds_with_template(obj): func = lambda x: x.isel(x=[1]) # a simple .isel(x=[1, 5, 9]) puts all those in a single chunk. - template = xr.concat([obj.isel(x=[i]) for i in [1, 5, 9]], dim="x") + template = xr.concat( + [obj.isel(x=[i]) for i in [1, 5, 9]], data_vars="minimal", dim="x" + ) with raise_if_dask_computes(): actual = xr.map_blocks(func, obj, template=template) assert_identical(actual, template) @@ -1448,7 +1462,9 @@ def test_map_blocks_errors_bad_template(obj): xr.map_blocks( lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values obj, - template=xr.concat([obj.isel(x=[i]) for i in [1, 5, 9]], dim="x"), + template=xr.concat( + [obj.isel(x=[i]) for i in [1, 5, 9]], data_vars="minimal", dim="x" + ), ).compute() diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 75d6d919e19..66546283d4b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1412,12 +1412,25 @@ def test_selection_multiindex_from_level(self) -> None: # GH: 3512 da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) - data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"]) + data = xr.concat( + [da, db], dim="x", coords="different", compat="equals" + ).set_index(xy=["x", "y"]) assert data.dims == ("xy",) actual = data.sel(y="a") expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y") assert_equal(actual, expected) + def test_concat_with_default_coords_warns(self) -> None: + da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) + db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) + + with pytest.warns(FutureWarning): + original = xr.concat([da, db], dim="x") + with set_options(use_new_combine_kwarg_defaults=True): + new = xr.concat([da, db], dim="x") + + assert original.y.shape != new.y.shape + def test_virtual_default_coords(self) -> None: array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7be2d13f9dd..28f932c8716 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6085,7 +6085,7 @@ def test_dataset_math_auto_align(self) -> None: assert_equal(actual, expected) actual = ds + ds[["bar"]] - expected = (2 * ds[["bar"]]).merge(ds.coords) + expected = (2 * ds[["bar"]]).merge(ds.coords, compat="override") assert_identical(expected, actual) assert_identical(ds + Dataset(), ds.coords.to_dataset()) @@ -6521,12 +6521,12 @@ def test_combine_first(self) -> None: coords={"x": ["a", "b", "c"]}, ) assert_equal(actual, expected) - assert_equal(actual, xr.merge([dsx0, dsx1])) + assert_equal(actual, xr.merge([dsx0, dsx1], join="outer")) # works just like xr.merge([self, other]) dsy2 = DataArray([2, 2, 2], [("x", ["b", "c", "d"])]).to_dataset(name="dsy2") actual = dsx0.combine_first(dsy2) - expected = xr.merge([dsy2, dsx0]) + expected = xr.merge([dsy2, dsx0], join="outer") assert_equal(actual, expected) def test_sortby(self) -> None: diff --git a/xarray/tests/test_duck_array_wrapping.py b/xarray/tests/test_duck_array_wrapping.py index 59928dce370..b0c9d40a8cc 100644 --- a/xarray/tests/test_duck_array_wrapping.py +++ b/xarray/tests/test_duck_array_wrapping.py @@ -155,7 +155,7 @@ def test_concat(self): assert isinstance(result.data, self.Array) def test_merge(self): - result = xr.merge([self.x1, self.x2], compat="override") + result = xr.merge([self.x1, self.x2], compat="override", join="outer") assert isinstance(result.foo.data, self.Array) def test_where(self): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d42f86f5ea6..be5ec0b28af 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2402,6 +2402,7 @@ def test_resample_min_count(self) -> None: for i in range(3) ], dim=actual["time"], + data_vars="all", ) assert_allclose(expected, actual) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 52935e9714e..7d346994d6b 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -6,6 +6,7 @@ import xarray as xr from xarray.core import dtypes, merge from xarray.core.merge import MergeError +from xarray.core.options import set_options from xarray.testing import assert_equal, assert_identical from xarray.tests.test_dataset import create_test_data @@ -36,15 +37,17 @@ def test_merge_arrays(self): expected = data[["var1", "var2"]] assert_identical(actual, expected) - def test_merge_datasets(self): - data = create_test_data(add_attrs=False, use_extension_array=True) + @pytest.mark.parametrize("use_new_combine_kwarg_defaults", [True, False]) + def test_merge_datasets(self, use_new_combine_kwarg_defaults): + with set_options(use_new_combine_kwarg_defaults=use_new_combine_kwarg_defaults): + data = create_test_data(add_attrs=False, use_extension_array=True) - actual = xr.merge([data[["var1"]], data[["var2"]]]) - expected = data[["var1", "var2"]] - assert_identical(actual, expected) + actual = xr.merge([data[["var1"]], data[["var2"]]]) + expected = data[["var1", "var2"]] + assert_identical(actual, expected) - actual = xr.merge([data, data]) - assert_identical(actual, data) + actual = xr.merge([data, data], compat="no_conflicts") + assert_identical(actual, data) def test_merge_dataarray_unnamed(self): data = xr.DataArray([1, 2], dims="x") @@ -191,9 +194,13 @@ def test_merge_arrays_attrs_variables( if expect_exception: with pytest.raises(MergeError, match="combine_attrs"): - actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + actual = xr.merge( + [data1, data2], compat="no_conflicts", combine_attrs=combine_attrs + ) else: - actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + actual = xr.merge( + [data1, data2], compat="no_conflicts", combine_attrs=combine_attrs + ) expected = xr.Dataset( {"var1": ("dim1", [], expected_attrs)}, coords={"dim1": ("dim1", [], expected_attrs)}, @@ -266,8 +273,12 @@ def test_merge_no_conflicts_single_var(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - assert expected.identical(xr.merge([ds1, ds2], compat="no_conflicts")) - assert expected.identical(xr.merge([ds2, ds1], compat="no_conflicts")) + assert expected.identical( + xr.merge([ds1, ds2], compat="no_conflicts", join="outer") + ) + assert expected.identical( + xr.merge([ds2, ds1], compat="no_conflicts", join="outer") + ) assert ds1.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="left")) assert ds2.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="right")) expected = xr.Dataset({"a": ("x", [2]), "x": [1]}) @@ -277,11 +288,11 @@ def test_merge_no_conflicts_single_var(self): with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) - xr.merge([ds1, ds3], compat="no_conflicts") + xr.merge([ds1, ds3], compat="no_conflicts", join="outer") with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) - xr.merge([ds1, ds3], compat="no_conflicts") + xr.merge([ds1, ds3], compat="no_conflicts", join="outer") def test_merge_no_conflicts_multi_var(self): data = create_test_data(add_attrs=False) @@ -303,17 +314,19 @@ def test_merge_no_conflicts_multi_var(self): def test_merge_no_conflicts_preserve_attrs(self): data = xr.Dataset({"x": ([], 0, {"foo": "bar"})}) - actual = xr.merge([data, data], combine_attrs="no_conflicts") + actual = xr.merge( + [data, data], compat="no_conflicts", combine_attrs="no_conflicts" + ) assert_identical(data, actual) def test_merge_no_conflicts_broadcast(self): datasets = [xr.Dataset({"x": ("y", [0])}), xr.Dataset({"x": np.nan})] - actual = xr.merge(datasets) + actual = xr.merge(datasets, compat="no_conflicts") expected = xr.Dataset({"x": ("y", [0])}) assert_identical(expected, actual) datasets = [xr.Dataset({"x": ("y", [np.nan])}), xr.Dataset({"x": 0})] - actual = xr.merge(datasets) + actual = xr.merge(datasets, compat="no_conflicts") assert_identical(expected, actual) @@ -329,27 +342,27 @@ def test_merge(self): actual = ds2.merge(ds1) assert_identical(expected, actual) - actual = data.merge(data) + actual = data.merge(data, compat="no_conflicts") assert_identical(data, actual) - actual = data.reset_coords(drop=True).merge(data) + actual = data.reset_coords(drop=True).merge(data, compat="no_conflicts") assert_identical(data, actual) - actual = data.merge(data.reset_coords(drop=True)) + actual = data.merge(data.reset_coords(drop=True), compat="no_conflicts") assert_identical(data, actual) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="conflicting values for variable"): ds1.merge(ds2.rename({"var3": "var1"})) with pytest.raises(ValueError, match=r"should be coordinates or not"): - data.reset_coords().merge(data) + data.reset_coords().merge(data, compat="no_conflicts") with pytest.raises(ValueError, match=r"should be coordinates or not"): - data.merge(data.reset_coords()) + data.merge(data.reset_coords(), compat="no_conflicts") def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": 0}) ds2 = xr.Dataset({"x": ("y", [0, 0])}) - actual = ds1.merge(ds2) + actual = ds1.merge(ds2, compat="no_conflicts") assert_identical(ds2, actual) - actual = ds2.merge(ds1) + actual = ds2.merge(ds1, compat="override") assert_identical(ds2, actual) actual = ds1.copy() @@ -358,7 +371,7 @@ def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": np.nan}) ds2 = xr.Dataset({"x": ("y", [np.nan, np.nan])}) - actual = ds1.merge(ds2) + actual = ds1.merge(ds2, compat="no_conflicts") assert_identical(ds2, actual) def test_merge_compat(self): @@ -398,8 +411,8 @@ def test_merge_auto_align(self): expected = xr.Dataset( {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} ) - assert expected.identical(ds1.merge(ds2)) - assert expected.identical(ds2.merge(ds1)) + assert expected.identical(ds1.merge(ds2, join="outer")) + assert expected.identical(ds2.merge(ds1, join="outer")) expected = expected.isel(x=slice(2)) assert expected.identical(ds1.merge(ds2, join="left")) @@ -427,17 +440,19 @@ def test_merge_fill_value(self, fill_value): {"a": ("x", [1, 2, fill_value_a]), "b": ("x", [fill_value_b, 3, 4])}, {"x": [0, 1, 2]}, ) - assert expected.identical(ds1.merge(ds2, fill_value=fill_value)) - assert expected.identical(ds2.merge(ds1, fill_value=fill_value)) - assert expected.identical(xr.merge([ds1, ds2], fill_value=fill_value)) + assert expected.identical(ds1.merge(ds2, join="outer", fill_value=fill_value)) + assert expected.identical(ds2.merge(ds1, join="outer", fill_value=fill_value)) + assert expected.identical( + xr.merge([ds1, ds2], join="outer", fill_value=fill_value) + ) def test_merge_no_conflicts(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - assert expected.identical(ds1.merge(ds2, compat="no_conflicts")) - assert expected.identical(ds2.merge(ds1, compat="no_conflicts")) + assert expected.identical(ds1.merge(ds2, compat="no_conflicts", join="outer")) + assert expected.identical(ds2.merge(ds1, compat="no_conflicts", join="outer")) assert ds1.identical(ds1.merge(ds2, compat="no_conflicts", join="left")) @@ -448,11 +463,11 @@ def test_merge_no_conflicts(self): with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) - ds1.merge(ds3, compat="no_conflicts") + ds1.merge(ds3, compat="no_conflicts", join="outer") with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) - ds1.merge(ds3, compat="no_conflicts") + ds1.merge(ds3, compat="no_conflicts", join="outer") def test_merge_dataarray(self): ds = xr.Dataset({"a": 0}) @@ -490,3 +505,80 @@ def test_merge_combine_attrs( actual = ds1.merge(ds2, combine_attrs=combine_attrs) expected = xr.Dataset(attrs=expected_attrs) assert_identical(actual, expected) + + +class TestNewDefaults: + def test_merge_datasets_false_warning(self): + data = create_test_data(add_attrs=False, use_extension_array=True) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = xr.merge([data, data]) + + with set_options(use_new_combine_kwarg_defaults=True): + new = xr.merge([data, data]) + + assert_identical(old, new) + + def test_merge(self): + data = create_test_data() + ds1 = data[["var1"]] + ds2 = data[["var3"]] + expected = data[["var1", "var3"]] + with set_options(use_new_combine_kwarg_defaults=True): + actual = ds1.merge(ds2) + assert_identical(expected, actual) + + actual = ds2.merge(ds1) + assert_identical(expected, actual) + + actual = data.merge(data) + assert_identical(data, actual) + + ds1.merge(ds2.rename({"var3": "var1"})) + + with pytest.raises(ValueError, match=r"should be coordinates or not"): + data.reset_coords().merge(data) + with pytest.raises(ValueError, match=r"should be coordinates or not"): + data.merge(data.reset_coords()) + + def test_merge_broadcast_equals(self): + ds1 = xr.Dataset({"x": 0}) + ds2 = xr.Dataset({"x": ("y", [0, 0])}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = ds1.merge(ds2) + + with set_options(use_new_combine_kwarg_defaults=True): + new = ds1.merge(ds2) + + assert_identical(ds2, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_merge_auto_align(self): + ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) + ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) + expected = xr.Dataset( + {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} + ) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + assert expected.identical(ds1.merge(ds2)) + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + assert expected.identical(ds2.merge(ds1)) + + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + expected.identical(ds2.merge(ds1)) From 912638b790d80a6fcf41a1ad858c893e5225172f Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 4 Mar 2025 09:26:59 -0500 Subject: [PATCH 07/20] Use `emit_user_level_warning` rather than `warnings.warn` --- xarray/core/alignment.py | 8 +++----- xarray/core/concat.py | 12 +++++------- xarray/core/merge.py | 14 +++++++++----- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index dd7edbd88c2..54673c69434 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -2,7 +2,6 @@ import functools import operator -import warnings from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress @@ -21,7 +20,7 @@ safe_cast_to_index, ) from xarray.core.types import T_Alignable -from xarray.core.utils import is_dict_like, is_full_slice +from xarray.core.utils import emit_user_level_warning, is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions from xarray.util.deprecation_helpers import CombineKwargDefault @@ -424,7 +423,7 @@ def align_indexes(self) -> None: isinstance(self.join, CombineKwargDefault) and self.join != "exact" ): - warnings.warn( + emit_user_level_warning( self.join.warning_message( "This change will result in the following ValueError:" "cannot be aligned with join='exact' because " @@ -435,8 +434,7 @@ def align_indexes(self) -> None: ), recommend_set_options=False, ), - category=FutureWarning, - stacklevel=2, + FutureWarning, ) if self.join == "exact": raise ValueError( diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 846f52bae17..ef2bba8e720 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,6 +1,5 @@ from __future__ import annotations -import warnings from collections.abc import Hashable, Iterable from typing import TYPE_CHECKING, Any, Union, overload @@ -19,6 +18,7 @@ merge_collected, ) from xarray.core.types import T_DataArray, T_Dataset, T_Variable +from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars from xarray.util.deprecation_helpers import ( @@ -369,14 +369,13 @@ def process_subset_opt(opt, subset): if opt == "different": if isinstance(compat, CombineKwargDefault) and compat != "override": if not isinstance(opt, CombineKwargDefault): - warnings.warn( + emit_user_level_warning( compat.warning_message( "This change will result in the following ValueError: " f"Cannot specify both {subset}='different' and compat='override'.", recommend_set_options=False, ), - category=FutureWarning, - stacklevel=2, + FutureWarning, ) if compat == "override": @@ -462,13 +461,12 @@ def process_subset_opt(opt, subset): and opt != "minimal" and original != concat_over ): - warnings.warn( + emit_user_level_warning( opt.warning_message( "This is likely to lead to different results when multiple datasets " "have matching variables with overlapping values.", ), - category=FutureWarning, - stacklevel=2, + FutureWarning, ) else: valid_vars = tuple(getattr(datasets[0], subset)) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 8c14582982b..b7d7d86f9d9 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -1,6 +1,5 @@ from __future__ import annotations -import warnings from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping, Sequence, Set from typing import TYPE_CHECKING, Any, NamedTuple, Union @@ -16,7 +15,13 @@ filter_indexes_from_coords, indexes_equal, ) -from xarray.core.utils import Frozen, compat_dict_union, dict_equiv, equivalent +from xarray.core.utils import ( + Frozen, + compat_dict_union, + dict_equiv, + emit_user_level_warning, + equivalent, +) from xarray.core.variable import Variable, as_variable, calculate_dimensions from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, @@ -306,13 +311,12 @@ def merge_collected( and compat == "no_conflicts" and len(variables) > 1 ): - warnings.warn( + emit_user_level_warning( compat.warning_message( "This is likely to lead to different results when " "combining overlapping variables with the same name.", ), - category=FutureWarning, - stacklevel=2, + FutureWarning, ) except MergeError: if compat != "minimal": From 67fd4ffb64693992556bd51c91897166b64854c9 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 4 Mar 2025 11:11:05 -0500 Subject: [PATCH 08/20] Change hardcoded defaults --- xarray/core/parallel.py | 10 ++++++---- xarray/plot/dataarray_plot.py | 7 +++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index a3248987c2b..a1255a2c95e 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -352,8 +352,8 @@ def _wrapper( merged_coordinates = merge( [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)], - join="outer", - compat="no_conflicts", + join="exact", + compat="override", ).coords # check all dims are present @@ -442,7 +442,9 @@ def _wrapper( xarray_objs = tuple(arg.chunk(arg.chunksizes) for arg in xarray_objs) merged_coordinates = merge( - [arg.coords for arg in aligned], join="outer", compat="no_conflicts" + [arg.coords for arg in aligned], + join="exact", + compat="override", ).coords _, npargs = unzip( @@ -478,7 +480,7 @@ def _wrapper( coordinates = merge( (preserved_coords, template.coords.to_dataset()[new_coord_vars]), join="outer", - compat="no_conflicts", + compat="override", ).coords output_chunks: Mapping[Hashable, tuple[int, ...]] = { dim: input_chunks[dim] for dim in template.dims if dim in input_chunks diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index 9663303276e..71557f8077c 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -199,10 +199,9 @@ def _prepare_plot1d_data( darray = concat( [darray, darray_nan], dim=dim, - data_vars="all", - coords="different", - compat="equals", - join="outer", + coords="minimal", + compat="override", + join="exact", ) dims_T.append(coords_to_plot[v]) From 4f38292c7d8df02fc39c40a39cae7ecdad42940e Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 4 Mar 2025 15:04:06 -0500 Subject: [PATCH 09/20] Fix up test_concat --- xarray/core/concat.py | 10 +- xarray/tests/test_concat.py | 257 ++++++++++++++++++++++++++++-------- 2 files changed, 206 insertions(+), 61 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index ef2bba8e720..644f15f1fb5 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -461,12 +461,11 @@ def process_subset_opt(opt, subset): and opt != "minimal" and original != concat_over ): - emit_user_level_warning( + warnings.append( opt.warning_message( "This is likely to lead to different results when multiple datasets " "have matching variables with overlapping values.", - ), - FutureWarning, + ) ) else: valid_vars = tuple(getattr(datasets[0], subset)) @@ -486,8 +485,13 @@ def process_subset_opt(opt, subset): ) concat_over.update(opt) + warnings = [] process_subset_opt(data_vars, "data_vars") process_subset_opt(coords, "coords") + + for warning in warnings: + emit_user_level_warning(warning, FutureWarning) + return concat_over, equals, concat_dim_lengths diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 9e8e06fc1ee..6b4d67158ca 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable +from contextlib import nullcontext from copy import deepcopy from typing import TYPE_CHECKING, Any, Literal @@ -8,7 +9,7 @@ import pandas as pd import pytest -from xarray import DataArray, Dataset, Variable, concat +from xarray import DataArray, Dataset, Variable, concat, set_options from xarray.core import dtypes, merge from xarray.core.coordinates import Coordinates from xarray.core.indexes import PandasIndex @@ -132,9 +133,9 @@ def test_concat_compat() -> None: for var in ["has_x", "no_x_y"]: assert "y" not in result[var].dims and "y" not in result[var].coords with pytest.raises(ValueError, match=r"'q' not present in all datasets"): - concat([ds1, ds2], dim="q") + concat([ds1, ds2], dim="q", data_vars="all", join="outer") with pytest.raises(ValueError, match=r"'q' not present in all datasets"): - concat([ds2, ds1], dim="q") + concat([ds2, ds1], dim="q", data_vars="all", join="outer") def test_concat_missing_var() -> None: @@ -213,8 +214,12 @@ def test_concat_second_empty() -> None: actual = concat([ds1, ds2], dim="y", coords="all") assert_identical(actual, expected) + +def test_concat_second_empty_with_scalar_data_var_only_on_first() -> None: # Check concatenating scalar data_var only present in ds1 - ds1["b"] = 0.1 + ds1 = Dataset(data_vars={"a": ("y", [0.1]), "b": 0.1}, coords={"x": 0.1}) + ds2 = Dataset(coords={"x": 0.1}) + expected = Dataset( data_vars={"a": ("y", [0.1, np.nan]), "b": ("y", [0.1, np.nan])}, coords={"x": ("y", [0.1, 0.1])}, @@ -225,7 +230,9 @@ def test_concat_second_empty() -> None: expected = Dataset( data_vars={"a": ("y", [0.1, np.nan]), "b": 0.1}, coords={"x": 0.1} ) - actual = concat([ds1, ds2], dim="y", coords="different", data_vars="different") + actual = concat( + [ds1, ds2], dim="y", coords="different", data_vars="different", compat="equals" + ) assert_identical(actual, expected) @@ -265,7 +272,7 @@ def test_concat_multiple_datasets_missing_vars(include_day: bool) -> None: datasets = create_concat_datasets( len(vars_to_drop), seed=123, include_day=include_day ) - expected = concat(datasets, dim="day") + expected = concat(datasets, dim="day", data_vars="all") for i, name in enumerate(vars_to_drop): if include_day: @@ -279,7 +286,7 @@ def test_concat_multiple_datasets_missing_vars(include_day: bool) -> None: for ds, varname in zip(datasets, vars_to_drop, strict=True) ] - actual = concat(datasets, dim="day") + actual = concat(datasets, dim="day", data_vars="all") assert list(actual.data_vars.keys()) == [ "pressure", @@ -490,7 +497,7 @@ def rectify_dim_order(self, data: Dataset, dataset) -> Dataset: ) def test_concat_simple(self, data: Dataset, dim, coords) -> None: datasets = [g for _, g in data.groupby(dim, squeeze=False)] - assert_identical(data, concat(datasets, dim, coords=coords)) + assert_identical(data, concat(datasets, dim, coords=coords, compat="equals")) def test_concat_merge_variables_present_in_some_datasets( self, data: Dataset @@ -511,7 +518,7 @@ def test_concat_merge_variables_present_in_some_datasets( assert_identical(expected, actual) # expand foo - actual = concat([data0, data1], "dim1") + actual = concat([data0, data1], "dim1", data_vars="all") foo = np.ones((8, 10), dtype=data1.foo.dtype) * np.nan foo[3:] = data1.foo.values[None, ...] expected = data.copy().assign(foo=(["dim1", "bar"], foo)) @@ -535,7 +542,9 @@ def test_concat_coords_kwarg( data.coords["extra"] = ("dim4", np.arange(3)) datasets = [g.squeeze() for _, g in data.groupby(dim, squeeze=False)] - actual = concat(datasets, data[dim], coords=coords) + actual = concat( + datasets, data[dim], coords=coords, data_vars="all", compat="equals" + ) if coords == "all": expected = np.array([data["extra"].values for _ in range(data.sizes[dim])]) assert_array_equal(actual["extra"].values, expected) @@ -567,41 +576,52 @@ def test_concat_data_vars_typing(self) -> None: actual = concat(objs, dim="x", data_vars="minimal") assert_identical(data, actual) - def test_concat_data_vars(self) -> None: + @pytest.mark.parametrize("data_vars", ["minimal", "different", "all", [], ["foo"]]) + def test_concat_data_vars(self, data_vars) -> None: data = Dataset({"foo": ("x", np.random.randn(10))}) objs: list[Dataset] = [data.isel(x=slice(5)), data.isel(x=slice(5, None))] - for data_vars in ["minimal", "different", "all", [], ["foo"]]: - actual = concat(objs, dim="x", data_vars=data_vars) - assert_identical(data, actual) + actual = concat(objs, dim="x", data_vars=data_vars, compat="equals") + assert_identical(data, actual) - def test_concat_coords(self): - # TODO: annotating this func fails + @pytest.mark.parametrize("coords", ["different", "all", ["c"]]) + def test_concat_coords(self, coords) -> None: data = Dataset({"foo": ("x", np.random.randn(10))}) expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5)) objs = [ data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1), ] - for coords in ["different", "all", ["c"]]: - actual = concat(objs, dim="x", coords=coords) - assert_identical(expected, actual) - for coords in ["minimal", []]: - with pytest.raises(merge.MergeError, match="conflicting values"): - concat(objs, dim="x", coords=coords) + extra_kwargs = dict(compat="equals") if coords == "different" else {} + actual = concat(objs, dim="x", coords=coords, **extra_kwargs) + assert_identical(expected, actual) - def test_concat_constant_index(self): - # TODO: annotating this func fails + @pytest.mark.parametrize("coords", ["minimal", []]) + def test_concat_coords_raises_merge_error(self, coords) -> None: + data = Dataset({"foo": ("x", np.random.randn(10))}) + objs = [ + data.isel(x=slice(5)).assign_coords(c=0), + data.isel(x=slice(5, None)).assign_coords(c=1), + ] + with pytest.raises(merge.MergeError, match="conflicting values"): + concat(objs, dim="x", coords=coords, compat="equals") + + @pytest.mark.parametrize("data_vars", ["different", "all", ["foo"]]) + def test_concat_constant_index(self, data_vars) -> None: # GH425 ds1 = Dataset({"foo": 1.5}, {"y": 1}) ds2 = Dataset({"foo": 2.5}, {"y": 1}) expected = Dataset({"foo": ("y", [1.5, 2.5]), "y": [1, 1]}) - for mode in ["different", "all", ["foo"]]: - actual = concat([ds1, ds2], "y", data_vars=mode) - assert_identical(expected, actual) + extra_kwargs = dict(compat="equals") if data_vars == "different" else {} + actual = concat([ds1, ds2], "y", data_vars=data_vars, **extra_kwargs) + assert_identical(expected, actual) + + def test_concat_constant_index_minimal_raises_merge_error(self) -> None: + ds1 = Dataset({"foo": 1.5}, {"y": 1}) + ds2 = Dataset({"foo": 2.5}, {"y": 1}) with pytest.raises(merge.MergeError, match="conflicting values"): # previously dim="y", and raised error which makes no sense. # "foo" has dimension "y" so minimal should concatenate it? - concat([ds1, ds2], "new_dim", data_vars="minimal") + concat([ds1, ds2], "new_dim", data_vars="minimal", compat="equals") def test_concat_size0(self) -> None: data = create_test_data() @@ -615,7 +635,7 @@ def test_concat_size0(self) -> None: def test_concat_autoalign(self) -> None: ds1 = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 2])])}) ds2 = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 3])])}) - actual = concat([ds1, ds2], "y") + actual = concat([ds1, ds2], "y", data_vars="all", join="outer") expected = Dataset( { "foo": DataArray( @@ -627,8 +647,7 @@ def test_concat_autoalign(self) -> None: ) assert_identical(expected, actual) - def test_concat_errors(self): - # TODO: annotating this func fails + def test_concat_errors(self) -> None: data = create_test_data() split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] @@ -843,8 +862,7 @@ def test_concat_combine_attrs_kwarg_variables( assert_identical(actual, expected) - def test_concat_promote_shape(self) -> None: - # mixed dims within variables + def test_concat_promote_shape_with_mixed_dims_within_variables(self) -> None: objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] actual = concat(objs, "x") expected = Dataset({"x": [0, 1]}) @@ -854,25 +872,28 @@ def test_concat_promote_shape(self) -> None: actual = concat(objs, "x") assert_identical(actual, expected) - # mixed dims between variables + def test_concat_promote_shape_with_mixed_dims_between_variables(self) -> None: objs = [Dataset({"x": [2], "y": 3}), Dataset({"x": [4], "y": 5})] - actual = concat(objs, "x") + actual = concat(objs, "x", data_vars="all") expected = Dataset({"x": [2, 4], "y": ("x", [3, 5])}) assert_identical(actual, expected) - # mixed dims in coord variable + def test_concat_promote_shape_with_mixed_dims_in_coord_variable(self) -> None: objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1]}, {"y": ("x", [-2])})] actual = concat(objs, "x") expected = Dataset({"x": [0, 1]}, {"y": ("x", [-1, -2])}) assert_identical(actual, expected) - # scalars with mixed lengths along concat dim -- values should repeat + def test_concat_promote_shape_for_scalars_with_mixed_lengths_along_concat_dim( + self, + ) -> None: + # values should repeat objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1, 2]}, {"y": -2})] - actual = concat(objs, "x") + actual = concat(objs, "x", coords="different", compat="equals") expected = Dataset({"x": [0, 1, 2]}, {"y": ("x", [-1, -2, -2])}) assert_identical(actual, expected) - # broadcast 1d x 1d -> 2d + def test_concat_promote_shape_broadcast_1d_x_1d_goes_to_2d(self) -> None: objs = [ Dataset({"z": ("x", [-1])}, {"x": [0], "y": [0]}), Dataset({"z": ("y", [1])}, {"x": [1], "y": [0]}), @@ -881,6 +902,7 @@ def test_concat_promote_shape(self) -> None: expected = Dataset({"z": (("x", "y"), [[-1], [1]])}, {"x": [0, 1], "y": [0]}) assert_identical(actual, expected) + def test_concat_promote_shape_with_scalar_coordinates(self) -> None: # regression GH6384 objs = [ Dataset({}, {"x": pd.Interval(-1, 0, closed="right")}), @@ -897,6 +919,7 @@ def test_concat_promote_shape(self) -> None: ) assert_identical(actual, expected) + def test_concat_promote_shape_with_coordinates_of_particular_dtypes(self) -> None: # regression GH6416 (coord dtype) and GH6434 time_data1 = np.array(["2022-01-01", "2022-02-01"], dtype="datetime64[ns]") time_data2 = np.array("2022-03-01", dtype="datetime64[ns]") @@ -930,14 +953,14 @@ def test_concat_dim_is_variable(self) -> None: objs = [Dataset({"x": 0}), Dataset({"x": 1})] coord = Variable("y", [3, 4], attrs={"foo": "bar"}) expected = Dataset({"x": ("y", [0, 1]), "y": coord}) - actual = concat(objs, coord) + actual = concat(objs, coord, data_vars="all") assert_identical(actual, expected) def test_concat_dim_is_dataarray(self) -> None: objs = [Dataset({"x": 0}), Dataset({"x": 1})] coord = DataArray([3, 4], dims="y", attrs={"foo": "bar"}) expected = Dataset({"x": ("y", [0, 1]), "y": coord}) - actual = concat(objs, coord) + actual = concat(objs, coord, data_vars="all") assert_identical(actual, expected) def test_concat_multiindex(self) -> None: @@ -983,7 +1006,9 @@ def test_concat_fill_value(self, fill_value) -> None: }, {"x": [0, 1, 2]}, ) - actual = concat(datasets, dim="t", fill_value=fill_value) + actual = concat( + datasets, dim="t", fill_value=fill_value, data_vars="all", join="outer" + ) assert_identical(actual, expected) @pytest.mark.parametrize("dtype", [str, bytes]) @@ -1005,7 +1030,7 @@ def test_concat_str_dtype(self, dtype, dim) -> None: "x2": np.array(["c", "d"], dtype=dtype), } ) - actual = concat([da1, da2], dim=dim) + actual = concat([da1, da2], dim=dim, join="outer") assert np.issubdtype(actual.x2.dtype, dtype) @@ -1030,7 +1055,7 @@ def test_concat_avoids_index_auto_creation(self) -> None: assert combined.indexes == {} # should not raise on stack - combined = concat(datasets, dim="z") + combined = concat(datasets, dim="z", data_vars="all") assert combined["a"].shape == (2, 3, 3) assert combined["a"].dims == ("z", "x", "y") @@ -1095,11 +1120,13 @@ def test_concat(self) -> None: stacked = concat(grouped, pd.Index(ds["x"], name="x")) assert_identical(foo, stacked) - actual2 = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True) + actual2 = concat([foo[0], foo[1]], pd.Index([0, 1]), coords="all").reset_coords( + drop=True + ) expected = foo[:2].rename({"x": "concat_dim"}) assert_identical(expected, actual2) - actual3 = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True) + actual3 = concat([foo[0], foo[1]], [0, 1], coords="all").reset_coords(drop=True) expected = foo[:2].rename({"x": "concat_dim"}) assert_identical(expected, actual3) @@ -1107,7 +1134,7 @@ def test_concat(self) -> None: concat([foo, bar], dim="w", compat="identical") with pytest.raises(ValueError, match=r"not a valid argument"): - concat([foo, bar], dim="w", data_vars="minimal") + concat([foo, bar], dim="w", data_vars="different") def test_concat_encoding(self) -> None: # Regression test for GH1297 @@ -1161,7 +1188,7 @@ def test_concat_avoids_index_auto_creation(self) -> None: assert combined.indexes == {} # should not raise on stack - combined = concat(arrays, dim="z") + combined = concat(arrays, dim="z", coords="different", compat="equals") assert combined.shape == (2, 3, 3) assert combined.dims == ("z", "x", "y") @@ -1181,7 +1208,7 @@ def test_concat_fill_value(self, fill_value) -> None: dims=["y", "x"], coords={"x": [1, 2, 3]}, ) - actual = concat((foo, bar), dim="y", fill_value=fill_value) + actual = concat((foo, bar), dim="y", fill_value=fill_value, join="outer") assert_identical(actual, expected) def test_concat_join_kwarg(self) -> None: @@ -1259,7 +1286,7 @@ def test_concat_str_dtype(self, dtype, dim) -> None: dims=["x1", "x2"], coords={"x1": np.array([1, 2]), "x2": np.array(["c", "d"], dtype=dtype)}, ) - actual = concat([da1, da2], dim=dim) + actual = concat([da1, da2], dim=dim, join="outer") assert np.issubdtype(actual.x2.dtype, dtype) @@ -1284,16 +1311,17 @@ def test_concat_attrs_first_variable(attr1, attr2) -> None: assert concat_attrs == attr1 -def test_concat_merge_single_non_dim_coord(): - # TODO: annotating this func fails +def test_concat_merge_single_non_dim_coord() -> None: da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) expected = DataArray(range(1, 7), dims="x", coords={"x": range(1, 7), "y": 1}) - for coords in ["different", "minimal"]: - actual = concat([da1, da2], "x", coords=coords) - assert_identical(actual, expected) + actual = concat([da1, da2], "x", coords="minimal", compat="override") + assert_identical(actual, expected) + + actual = concat([da1, da2], "x", coords="different", compat="equals") + assert_identical(actual, expected) with pytest.raises(ValueError, match=r"'y' not present in all datasets."): concat([da1, da2], dim="x", coords="all") @@ -1301,9 +1329,12 @@ def test_concat_merge_single_non_dim_coord(): da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1}) - for coords in ["different", "all"]: - with pytest.raises(ValueError, match=r"'y' not present in all datasets"): - concat([da1, da2, da3], dim="x", coords=coords) + + with pytest.raises(ValueError, match=r"'y' not present in all datasets"): + concat([da1, da2, da3], dim="x", coords="all") + + with pytest.raises(ValueError, match=r"'y' not present in all datasets"): + concat([da1, da2, da3], dim="x", coords="different", compat="equals") def test_concat_preserve_coordinate_order() -> None: @@ -1378,3 +1409,113 @@ def test_concat_index_not_same_dim() -> None: match=r"Cannot concatenate along dimension 'x' indexes with dimensions.*", ): concat([ds1, ds2], dim="x") + + +class TestNewDefaults: + def test_concat_second_empty_with_scalar_data_var_only_on_first(self) -> None: + ds1 = Dataset(data_vars={"a": ("y", [0.1]), "b": 0.1}, coords={"x": 0.1}) + ds2 = Dataset(coords={"x": 0.1}) + + expected = Dataset( + data_vars={"a": ("y", [0.1, np.nan]), "b": 0.1}, coords={"x": 0.1} + ) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='equals' to compat='override'", + ): + actual = concat( + [ds1, ds2], dim="y", coords="different", data_vars="different" + ) + assert_identical(actual, expected) + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + concat([ds1, ds2], dim="y", coords="different", data_vars="different") + + def test_concat_multiple_datasets_missing_vars(self) -> None: + vars_to_drop = [ + "temperature", + "pressure", + "humidity", + "precipitation", + "cloud_cover", + ] + + datasets = create_concat_datasets( + len(vars_to_drop), seed=123, include_day=False + ) + # set up the test data + datasets = [ + ds.drop_vars(varname) + for ds, varname in zip(datasets, vars_to_drop, strict=True) + ] + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = concat(datasets, dim="day") + with set_options(use_new_combine_kwarg_defaults=True): + new = concat(datasets, dim="day") + + with pytest.raises(AssertionError): + assert_identical(old, new) + + @pytest.mark.parametrize("coords", ["different", "minimal", "all"]) + def test_concat_coords_kwarg( + self, coords: Literal["all", "minimal", "different"] + ) -> None: + data = create_test_data().drop_dims("dim3") + + # make sure the coords argument behaves as expected + data.coords["extra"] = ("dim4", np.arange(3)) + datasets = [g.squeeze() for _, g in data.groupby("dim1", squeeze=False)] + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + if coords == "different": + expectation = pytest.warns( + FutureWarning, + match="will change from compat='equals' to compat='override'", + ) + else: + expectation = nullcontext() + with expectation: + old = concat(datasets, data["dim1"], coords=coords) + + with set_options(use_new_combine_kwarg_defaults=True): + if coords == "different": + with pytest.raises(ValueError): + concat(datasets, data["dim1"], coords=coords) + else: + new = concat(datasets, data["dim1"], coords=coords) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_concat_promote_shape_for_scalars_with_mixed_lengths_along_concat_dim( + self, + ) -> None: + # values should repeat + objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1, 2]}, {"y": -2})] + expected = Dataset({"x": [0, 1, 2]}, {"y": ("x", [-1, -2, -2])}) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from coords='different' to coords='minimal'", + ): + old = concat(objs, "x") + assert_identical(old, expected) + with set_options(use_new_combine_kwarg_defaults=True): + new = concat(objs, "x") + with pytest.raises(AssertionError): + assert_identical(new, old) + with pytest.raises(ValueError, match="might be related to new default"): + concat(objs, "x", coords="different") + with pytest.raises(merge.MergeError, match="conflicting values"): + concat(objs, "x", compat="equals") + + new = concat(objs, "x", coords="different", compat="equals") + assert_identical(old, new) From 51ccc898322c1e3832372031793baa4c05222b27 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 4 Mar 2025 15:07:26 -0500 Subject: [PATCH 10/20] Add comment about why we allow data_vars='minimial' for concat over datarrays --- xarray/core/concat.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 644f15f1fb5..e0caef9bc10 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -797,6 +797,10 @@ def _dataarray_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) + # Allow passing old or new default even though we always use `data_vars="all"` + # when passing off to `_dataset_concat`. This allows people to explicitly + # set the data_vars value to the new default without worrying about whether + # they have datasets or dataarrays. if not isinstance(data_vars, CombineKwargDefault) and data_vars not in [ "all", "minimal", From aa3180e8318dd1e73deba13cfb958bf8cebe72ac Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 4 Mar 2025 15:34:15 -0500 Subject: [PATCH 11/20] Tidy up tests based on review --- xarray/tests/test_merge.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 7d346994d6b..64a7e0dd87e 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -341,8 +341,8 @@ def test_merge(self): actual = ds2.merge(ds1) assert_identical(expected, actual) - - actual = data.merge(data, compat="no_conflicts") + with pytest.warns(FutureWarning): # this is a false alarm + actual = data.merge(data) assert_identical(data, actual) actual = data.reset_coords(drop=True).merge(data, compat="no_conflicts") assert_identical(data, actual) @@ -356,13 +356,13 @@ def test_merge(self): with pytest.raises(ValueError, match=r"should be coordinates or not"): data.merge(data.reset_coords(), compat="no_conflicts") - def test_merge_broadcast_equals(self): + def test_merge_compat_broadcast_equals(self): ds1 = xr.Dataset({"x": 0}) ds2 = xr.Dataset({"x": ("y", [0, 0])}) - actual = ds1.merge(ds2, compat="no_conflicts") + actual = ds1.merge(ds2, compat="broadcast_equals") assert_identical(ds2, actual) - actual = ds2.merge(ds1, compat="override") + actual = ds2.merge(ds1, compat="broadcast_equals") assert_identical(ds2, actual) actual = ds1.copy() @@ -371,7 +371,7 @@ def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": np.nan}) ds2 = xr.Dataset({"x": ("y", [np.nan, np.nan])}) - actual = ds1.merge(ds2, compat="no_conflicts") + actual = ds1.merge(ds2, compat="broadcast_equals") assert_identical(ds2, actual) def test_merge_compat(self): @@ -405,7 +405,7 @@ def test_merge_compat_minimal(self) -> None: expected = xr.Dataset(coords={"foo": [1, 2, 3]}) assert_identical(actual, expected) - def test_merge_auto_align(self): + def test_merge_join(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) expected = xr.Dataset( From e517dcc6c4aff70cfb9d1cddad49c02b1e3248a6 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Mar 2025 12:58:53 -0400 Subject: [PATCH 12/20] Trying to resolve mypy issues --- xarray/core/alignment.py | 27 +++++++++++++++++---------- xarray/core/combine.py | 6 +++--- xarray/core/concat.py | 10 +++++----- xarray/core/merge.py | 2 +- xarray/tests/test_backends.py | 22 ++++++++++++++-------- xarray/tests/test_concat.py | 11 ++++++----- xarray/util/deprecation_helpers.py | 22 ++++++++++++++++------ 7 files changed, 62 insertions(+), 38 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index f83df55358c..c2da139759f 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -113,7 +113,7 @@ class Aligner(Generic[T_Alignable]): objects: tuple[T_Alignable, ...] results: tuple[T_Alignable, ...] objects_matching_indexes: tuple[dict[MatchingIndexKey, Index], ...] - join: str + join: str | CombineKwargDefault exclude_dims: frozenset[Hashable] exclude_vars: frozenset[Hashable] copy: bool @@ -133,7 +133,7 @@ class Aligner(Generic[T_Alignable]): def __init__( self, objects: Iterable[T_Alignable], - join: str = "inner", + join: str | CombineKwargDefault = "inner", indexes: Mapping[Any, Any] | None = None, exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), @@ -146,7 +146,14 @@ def __init__( self.objects = tuple(objects) self.objects_matching_indexes = () - if join not in ["inner", "outer", "override", "exact", "left", "right"]: + if not isinstance(join, CombineKwargDefault) and join not in [ + "inner", + "outer", + "override", + "exact", + "left", + "right", + ]: raise ValueError(f"invalid value for join: {join}") self.join = join @@ -618,7 +625,7 @@ def align( obj1: T_Obj1, /, *, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -632,7 +639,7 @@ def align( obj2: T_Obj2, /, *, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -647,7 +654,7 @@ def align( obj3: T_Obj3, /, *, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -663,7 +670,7 @@ def align( obj4: T_Obj4, /, *, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -680,7 +687,7 @@ def align( obj5: T_Obj5, /, *, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -691,7 +698,7 @@ def align( @overload def align( *objects: T_Alignable, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -701,7 +708,7 @@ def align( def align( *objects: T_Alignable, - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b9a0d9f614a..ef03802bbb7 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -269,7 +269,7 @@ def _combine_all_along_first_dim( dim, data_vars, coords, - compat: CompatOptions, + compat: CompatOptions | CombineKwargDefault, fill_value, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, @@ -298,7 +298,7 @@ def _combine_all_along_first_dim( def _combine_1d( datasets, concat_dim, - compat: CompatOptions, + compat: CompatOptions | CombineKwargDefault, data_vars, coords, fill_value, @@ -365,7 +365,7 @@ def _nested_combine( return Dataset() if isinstance(concat_dim, str | DataArray) or concat_dim is None: - concat_dim = [concat_dim] # type: ignore[assignment] + concat_dim = [concat_dim] # Arrange datasets for concatenation # Use information from the shape of the user input diff --git a/xarray/core/concat.py b/xarray/core/concat.py index e0caef9bc10..9d5bf477c94 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -65,7 +65,7 @@ def concat( compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, - join: JoinOptions | None = None, + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_DataArray: ... @@ -334,7 +334,7 @@ def _calc_concat_over( datasets, dim, dim_names, - data_vars: T_DataVars, + data_vars: T_DataVars | CombineKwargDefault, coords, compat, ): @@ -485,7 +485,7 @@ def process_subset_opt(opt, subset): ) concat_over.update(opt) - warnings = [] + warnings: list[str] = [] process_subset_opt(data_vars, "data_vars") process_subset_opt(coords, "coords") @@ -534,7 +534,7 @@ def _dataset_concat( datasets: Iterable[T_Dataset], dim: str | T_Variable | T_DataArray | pd.Index, data_vars: T_DataVars | CombineKwargDefault, - coords: str | list[str] | CombineKwargDefault, + coords: str | list[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: Any, @@ -780,7 +780,7 @@ def _dataarray_concat( arrays: Iterable[T_DataArray], dim: str | T_Variable | T_DataArray | pd.Index, data_vars: T_DataVars | CombineKwargDefault, - coords: str | list[str] | CombineKwargDefault, + coords: str | list[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: object, diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 41ef8680ca6..373da86d5e0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -527,7 +527,7 @@ def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLik def _get_priority_vars_and_indexes( objects: Sequence[DatasetLike], priority_arg: int | None, - compat: CompatOptions = "equals", + compat: CompatOptions | CombineKwargDefault = "equals", ) -> dict[Hashable, MergeElement]: """Extract the priority variable from a list of mappings. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7dd4fe6824e..26cd481bd80 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -14,7 +14,7 @@ import uuid import warnings from collections.abc import Generator, Iterator, Mapping -from contextlib import ExitStack, nullcontext +from contextlib import ExitStack from io import BytesIO from os import listdir from pathlib import Path @@ -4614,12 +4614,16 @@ def test_open_mfdataset_dataset_combine_attrs( with set_options( use_new_combine_kwarg_defaults=use_new_combine_kwarg_defaults ): - warning = ( + warning: contextlib.AbstractContextManager = ( pytest.warns(FutureWarning) if not use_new_combine_kwarg_defaults - else nullcontext() + else contextlib.nullcontext() + ) + error: contextlib.AbstractContextManager = ( + pytest.raises(xr.MergeError) + if expect_error + else contextlib.nullcontext() ) - error = pytest.raises(xr.MergeError) if expect_error else nullcontext() with warning: with error: with xr.open_mfdataset( @@ -4785,13 +4789,15 @@ def test_open_mfdataset_warns_when_kwargs_set_to_different( xr.concat([ds1, ds2], dim="t", **kwargs) with set_options(use_new_combine_kwarg_defaults=False): - if "data_vars" not in kwargs: - expectation = pytest.warns( + expectation: contextlib.AbstractContextManager = ( + pytest.warns( FutureWarning, match="will change from data_vars='all'", ) - else: - expectation = nullcontext() + if "data_vars" not in kwargs + else contextlib.nullcontext() + ) + with pytest.warns( FutureWarning, match="will change from compat='equals'", diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 6b4d67158ca..84b8a792eff 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Callable -from contextlib import nullcontext +from contextlib import AbstractContextManager, nullcontext from copy import deepcopy from typing import TYPE_CHECKING, Any, Literal @@ -1476,13 +1476,14 @@ def test_concat_coords_kwarg( FutureWarning, match="will change from data_vars='all' to data_vars='minimal'", ): - if coords == "different": - expectation = pytest.warns( + expectation: AbstractContextManager = ( + pytest.warns( FutureWarning, match="will change from compat='equals' to compat='override'", ) - else: - expectation = nullcontext() + if coords == "different" + else nullcontext() + ) with expectation: old = concat(datasets, data["dim1"], coords=coords) diff --git a/xarray/util/deprecation_helpers.py b/xarray/util/deprecation_helpers.py index ddae2cdf9f3..dd4c044ec7f 100644 --- a/xarray/util/deprecation_helpers.py +++ b/xarray/util/deprecation_helpers.py @@ -148,8 +148,11 @@ def wrapper(*args, **kwargs): return wrapper # type: ignore[return-value] -class CombineKwargDefault(ReprObject): - """Object that handles deprecation cycle for kwarg default values.""" +class CombineKwargDefault: + """Object that handles deprecation cycle for kwarg default values. + + Similar to ReprObject + """ _old: str _new: str @@ -160,8 +163,10 @@ def __init__(self, *, name: str, old: str, new: str): self._old = old self._new = new + def __repr__(self) -> str: + return self._value + def __eq__(self, other: ReprObject | Any) -> bool: - # TODO: What type can other be? ArrayLike? return ( self._value == other._value if isinstance(other, ReprObject) @@ -169,13 +174,18 @@ def __eq__(self, other: ReprObject | Any) -> bool: ) @property - def _value(self): + def _value(self) -> str: return self._new if OPTIONS["use_new_combine_kwarg_defaults"] else self._old def __hash__(self) -> int: return hash(self._value) - def warning_message(self, message: str, recommend_set_options: bool = True): + def __dask_tokenize__(self) -> object: + from dask.base import normalize_token + + return normalize_token((type(self), self._value)) + + def warning_message(self, message: str, recommend_set_options: bool = True) -> str: if recommend_set_options: recommendation = ( " To opt in to new defaults and get rid of these warnings now " @@ -194,7 +204,7 @@ def warning_message(self, message: str, recommend_set_options: bool = True): + recommendation ) - def error_message(self): + def error_message(self) -> str: return ( f" Error might be related to new default ({self._name}={self._new!r}). " f"Previously the default was {self._name}={self._old!r}. " From 0e678e5d6f6271c5a7262f44f2370551a1a23d45 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Mar 2025 13:36:38 -0400 Subject: [PATCH 13/20] Fix mypy in tests --- xarray/tests/test_concat.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 84b8a792eff..e7058fc396e 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -591,8 +591,10 @@ def test_concat_coords(self, coords) -> None: data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1), ] - extra_kwargs = dict(compat="equals") if coords == "different" else {} - actual = concat(objs, dim="x", coords=coords, **extra_kwargs) + if coords == "different": + actual = concat(objs, dim="x", coords=coords, compat="equals") + else: + actual = concat(objs, dim="x", coords=coords) assert_identical(expected, actual) @pytest.mark.parametrize("coords", ["minimal", []]) @@ -611,8 +613,10 @@ def test_concat_constant_index(self, data_vars) -> None: ds1 = Dataset({"foo": 1.5}, {"y": 1}) ds2 = Dataset({"foo": 2.5}, {"y": 1}) expected = Dataset({"foo": ("y", [1.5, 2.5]), "y": [1, 1]}) - extra_kwargs = dict(compat="equals") if data_vars == "different" else {} - actual = concat([ds1, ds2], "y", data_vars=data_vars, **extra_kwargs) + if data_vars == "different": + actual = concat([ds1, ds2], "y", data_vars=data_vars, compat="equals") + else: + actual = concat([ds1, ds2], "y", data_vars=data_vars) assert_identical(expected, actual) def test_concat_constant_index_minimal_raises_merge_error(self) -> None: @@ -677,13 +681,13 @@ def test_concat_errors(self) -> None: assert_identical(data, concat([data0, data1], "dim1", compat="equals")) with pytest.raises(ValueError, match=r"compat.* invalid"): - concat(split_data, "dim1", compat="foobar") + concat(split_data, "dim1", compat="foobar") # type: ignore[call-overload] with pytest.raises(ValueError, match=r"compat.* invalid"): concat(split_data, "dim1", compat="minimal") with pytest.raises(ValueError, match=r"unexpected value for"): - concat([data, data], "new_dim", coords="foobar") + concat([data, data], "new_dim", coords="foobar") # type: ignore[call-overload] with pytest.raises( ValueError, match=r"coordinate in some datasets but not others" From 37f0147f0d1fa9f02bb66b9b51492c775b7e5c09 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Mar 2025 13:52:12 -0400 Subject: [PATCH 14/20] Fix doctests --- xarray/core/combine.py | 4 ++-- xarray/core/concat.py | 10 +++++++--- xarray/core/merge.py | 8 ++++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index ef03802bbb7..33752efeb7b 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -878,7 +878,7 @@ def combine_by_coords( temperature (y, x) float64 96B 10.98 14.3 12.06 ... 1.743 0.4044 16.65 precipitation (y, x) float64 96B 0.4376 0.8918 0.9637 ... 0.4615 0.7805 - >>> xr.combine_by_coords([x3, x1]) + >>> xr.combine_by_coords([x3, x1], join="outer") Size: 464B Dimensions: (y: 4, x: 6) Coordinates: @@ -898,7 +898,7 @@ def combine_by_coords( temperature (y, x) float64 96B 10.98 14.3 12.06 ... 18.89 10.44 8.293 precipitation (y, x) float64 96B 0.4376 0.8918 0.9637 ... 0.01879 0.6176 - >>> xr.combine_by_coords([x1, x2, x3]) + >>> xr.combine_by_coords([x1, x2, x3], join="outer") Size: 464B Dimensions: (y: 4, x: 6) Coordinates: diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 9d5bf477c94..b04870569c5 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -207,7 +207,7 @@ def concat( * x (x) >> xr.concat([da.isel(x=0), da.isel(x=1)], "x") + >>> xr.concat([da.isel(x=0), da.isel(x=1)], "x", coords="minimal") Size: 48B array([[0, 1, 2], [3, 4, 5]]) @@ -215,7 +215,7 @@ def concat( * x (x) >> xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim") + >>> xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim", coords="all") Size: 48B array([[0, 1, 2], [3, 4, 5]]) @@ -224,7 +224,11 @@ def concat( * y (y) int64 24B 10 20 30 Dimensions without coordinates: new_dim - >>> xr.concat([da.isel(x=0), da.isel(x=1)], pd.Index([-90, -100], name="new_dim")) + >>> xr.concat( + ... [da.isel(x=0), da.isel(x=1)], + ... pd.Index([-90, -100], name="new_dim"), + ... coords="all", + ... ) Size: 48B array([[0, 1, 2], [3, 4, 5]]) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 373da86d5e0..f4305e66805 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -882,7 +882,7 @@ def merge( * time (time) float64 16B 30.0 60.0 * lon (lon) float64 16B 100.0 150.0 - >>> xr.merge([x, y, z]) + >>> xr.merge([x, y, z], join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -894,7 +894,7 @@ def merge( var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat="identical") + >>> xr.merge([x, y, z], compat="identical", join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -906,7 +906,7 @@ def merge( var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat="equals") + >>> xr.merge([x, y, z], compat="equals", join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -918,7 +918,7 @@ def merge( var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat="equals", fill_value=-999.0) + >>> xr.merge([x, y, z], compat="equals", join="outer", fill_value=-999.0) Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: From dac337ca9394b62dcca16f4a6fd00f8b2753991f Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Mar 2025 14:40:26 -0400 Subject: [PATCH 15/20] Ignore warnings on error tests --- xarray/tests/test_units.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 3999ae1a57c..da3dccb47ca 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -734,6 +734,9 @@ def test_broadcast_dataset(dtype): "coords", ), ) +@pytest.mark.filterwarnings( + "ignore:.*the default value for coords will change:FutureWarning" +) def test_combine_by_coords(variant, unit, error, dtype): original_unit = unit_registry.m @@ -811,6 +814,12 @@ def test_combine_by_coords(variant, unit, error, dtype): "coords", ), ) +@pytest.mark.filterwarnings( + "ignore:.*the default value for join will change:FutureWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*the default value for compat will change:FutureWarning" +) def test_combine_nested(variant, unit, error, dtype): original_unit = unit_registry.m @@ -1051,6 +1060,12 @@ def test_concat_dataset(variant, unit, error, dtype): "coords", ), ) +@pytest.mark.filterwarnings( + "ignore:.*the default value for join will change:FutureWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*the default value for compat will change:FutureWarning" +) def test_merge_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m @@ -1155,6 +1170,12 @@ def test_merge_dataarray(variant, unit, error, dtype): "coords", ), ) +@pytest.mark.filterwarnings( + "ignore:.*the default value for join will change:FutureWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*the default value for compat will change:FutureWarning" +) def test_merge_dataset(variant, unit, error, dtype): original_unit = unit_registry.m @@ -5573,6 +5594,12 @@ def test_content_manipulation(self, func, variant, dtype): "coords", ), ) + @pytest.mark.filterwarnings( + "ignore:.*the default value for join will change:FutureWarning" + ) + @pytest.mark.filterwarnings( + "ignore:.*the default value for compat will change:FutureWarning" + ) def test_merge(self, variant, unit, error, dtype): left_variants = { "data": (unit_registry.m, 1, 1), From 4eb275c6aeb6d964b69a6c8775237e6dcb88919d Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 13 Mar 2025 11:37:33 -0400 Subject: [PATCH 16/20] Use typing.get_args when possible --- xarray/core/alignment.py | 19 +++++++------------ xarray/core/concat.py | 40 +++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 35 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index c2da139759f..0b0e804afeb 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -5,7 +5,7 @@ from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress -from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast, overload +from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast, get_args, overload import numpy as np import pandas as pd @@ -19,7 +19,7 @@ indexes_all_equal, safe_cast_to_index, ) -from xarray.core.types import T_Alignable +from xarray.core.types import JoinOptions, T_Alignable from xarray.core.utils import emit_user_level_warning, is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions from xarray.util.deprecation_helpers import CombineKwargDefault @@ -113,7 +113,7 @@ class Aligner(Generic[T_Alignable]): objects: tuple[T_Alignable, ...] results: tuple[T_Alignable, ...] objects_matching_indexes: tuple[dict[MatchingIndexKey, Index], ...] - join: str | CombineKwargDefault + join: JoinOptions | CombineKwargDefault exclude_dims: frozenset[Hashable] exclude_vars: frozenset[Hashable] copy: bool @@ -133,7 +133,7 @@ class Aligner(Generic[T_Alignable]): def __init__( self, objects: Iterable[T_Alignable], - join: str | CombineKwargDefault = "inner", + join: JoinOptions | CombineKwargDefault = "inner", indexes: Mapping[Any, Any] | None = None, exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), @@ -146,14 +146,9 @@ def __init__( self.objects = tuple(objects) self.objects_matching_indexes = () - if not isinstance(join, CombineKwargDefault) and join not in [ - "inner", - "outer", - "override", - "exact", - "left", - "right", - ]: + if not isinstance(join, CombineKwargDefault) and join not in get_args( + JoinOptions + ): raise ValueError(f"invalid value for join: {join}") self.join = join diff --git a/xarray/core/concat.py b/xarray/core/concat.py index b04870569c5..8e957786d1d 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Hashable, Iterable -from typing import TYPE_CHECKING, Any, Union, overload +from typing import Any, Literal, get_args, overload import numpy as np import pandas as pd @@ -12,12 +12,18 @@ from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import Index, PandasIndex from xarray.core.merge import ( - _VALID_COMPAT, collect_variables_and_indexes, merge_attrs, merge_collected, ) -from xarray.core.types import T_DataArray, T_Dataset, T_Variable +from xarray.core.types import ( + CombineAttrsOptions, + ConcatOptions, + JoinOptions, + T_DataArray, + T_Dataset, + T_Variable, +) from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars @@ -29,15 +35,10 @@ CombineKwargDefault, ) -if TYPE_CHECKING: - from xarray.core.types import ( - CombineAttrsOptions, - CompatOptions, - ConcatOptions, - JoinOptions, - ) - - T_DataVars = Union[ConcatOptions, Iterable[Hashable]] +T_DataVars = ConcatOptions | Iterable[Hashable] +CompatOptions = Literal[ + "identical", "equals", "broadcast_equals", "no_conflicts", "override" +] # TODO: replace dim: Any by 1D array_likes @@ -267,11 +268,11 @@ def concat( except StopIteration as err: raise ValueError("must supply at least one object to concatenate") from err - if not isinstance(compat, CombineKwargDefault) and compat not in set( - _VALID_COMPAT - ) - {"minimal"}: + if not isinstance(compat, CombineKwargDefault) and compat not in get_args( + CompatOptions + ): raise ValueError( - f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" + f"compat={compat!r} invalid: valid options: {get_args(CompatOptions)}" ) if isinstance(first_obj, DataArray): @@ -559,13 +560,6 @@ def _dataset_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) - if not isinstance(compat, CombineKwargDefault) and compat not in set( - _VALID_COMPAT - ) - {"minimal"}: - raise ValueError( - f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" - ) - if isinstance(dim, DataArray): dim_var = dim.variable elif isinstance(dim, Variable): From 03f15028f17c426c4b4546fd182d5386553b4ad9 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 13 Mar 2025 11:46:54 -0400 Subject: [PATCH 17/20] Allow `minimal` in concat options at the type level --- xarray/core/concat.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 8e957786d1d..2bfc2a2a658 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Hashable, Iterable -from typing import Any, Literal, get_args, overload +from typing import TYPE_CHECKING, Any, Union, overload import numpy as np import pandas as pd @@ -12,18 +12,12 @@ from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import Index, PandasIndex from xarray.core.merge import ( + _VALID_COMPAT, collect_variables_and_indexes, merge_attrs, merge_collected, ) -from xarray.core.types import ( - CombineAttrsOptions, - ConcatOptions, - JoinOptions, - T_DataArray, - T_Dataset, - T_Variable, -) +from xarray.core.types import T_DataArray, T_Dataset, T_Variable from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars @@ -35,10 +29,15 @@ CombineKwargDefault, ) -T_DataVars = ConcatOptions | Iterable[Hashable] -CompatOptions = Literal[ - "identical", "equals", "broadcast_equals", "no_conflicts", "override" -] +if TYPE_CHECKING: + from xarray.core.types import ( + CombineAttrsOptions, + CompatOptions, + ConcatOptions, + JoinOptions, + ) + + T_DataVars = Union[ConcatOptions, Iterable[Hashable]] # TODO: replace dim: Any by 1D array_likes @@ -268,11 +267,11 @@ def concat( except StopIteration as err: raise ValueError("must supply at least one object to concatenate") from err - if not isinstance(compat, CombineKwargDefault) and compat not in get_args( - CompatOptions - ): + if not isinstance(compat, CombineKwargDefault) and compat not in set( + _VALID_COMPAT + ) - {"minimal"}: raise ValueError( - f"compat={compat!r} invalid: valid options: {get_args(CompatOptions)}" + f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) if isinstance(first_obj, DataArray): From 7dbdd4aada827e6aa09659e301ffff02f6086aae Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 13 Mar 2025 17:26:20 -0400 Subject: [PATCH 18/20] Minimal docs update --- doc/user-guide/combining.rst | 44 ++++++++++++++++++++++++++++------ doc/user-guide/terminology.rst | 2 +- doc/whats-new.rst | 14 +++++++---- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst index 1dad2009665..039711fe31b 100644 --- a/doc/user-guide/combining.rst +++ b/doc/user-guide/combining.rst @@ -43,7 +43,6 @@ new dimension by stacking lower dimensional arrays together: .. ipython:: python - da.sel(x="a") xr.concat([da.isel(x=0), da.isel(x=1)], "x") If the second argument to ``concat`` is a new dimension name, the arrays will @@ -52,7 +51,10 @@ dimension: .. ipython:: python - xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim") + da0 = da.isel(x=0).drop_vars("x") + da1 = da.isel(x=1).drop_vars("x") + + xr.concat([da0, da1], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -60,7 +62,7 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([da.isel(x=0), da.isel(x=1)], pd.Index([-90, -100], name="new_dim")) + xr.concat([da0, da1], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: @@ -75,6 +77,12 @@ between datasets. With the default parameters, xarray will load some coordinate variables into memory to compare them between datasets. This may be prohibitively expensive if you are manipulating your dataset lazily using :ref:`dask`. +.. note:: + + The default values for many of these options will be changing in a future + version of xarray. You can opt into the new default values early using + ``xr.set_options(use_new_combine_kwarg_defaults=True)``. + .. _merge: Merge @@ -94,10 +102,18 @@ If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index coordinates: +.. note:: + + The default value for ``join`` and ``compat`` will be changing in a future + version of xarray. This change will mean that the resulting dataset will be + not be aligned. You can opt into the new default values early using + ``xr.set_options(use_new_combine_kwarg_defaults=True)``. Or explicitly set + ``join='outer'`` to preserve old behavior. + .. ipython:: python other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) - xr.merge([ds, other]) + xr.merge([ds, other], join="outer") This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised if you attempt to merge two variables with the same name but different values: @@ -114,6 +130,16 @@ if you attempt to merge two variables with the same name but different values: array([[ 1.4691123 , 0.71713666, -0.5090585 ], [-0.13563237, 2.21211203, 0.82678535]]) +.. note:: + + In the future the default value for ``compat`` will change from + ``compat='no_conflicts'`` to ``compat='override'``. In this scenario the + values in the first object override all the values in other objects. + + .. ipython:: python + + xr.merge([ds, ds + 1], compat="override") + The same non-destructive merging between ``DataArray`` index coordinates is used in the :py:class:`~xarray.Dataset` constructor: @@ -144,6 +170,11 @@ For datasets, ``ds0.combine_first(ds1)`` works similarly to there are conflicting values in variables to be merged, whereas ``.combine_first`` defaults to the calling object's values. +.. note:: + + In a future version of xarray the default options for ``xr.merge`` will change + such that the behavior matches ``combine_first``. + .. _update: Update @@ -236,7 +267,7 @@ coordinates as long as any non-missing values agree or are disjoint: ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat="no_conflicts") + xr.merge([ds1, ds2], join="outer", compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -295,13 +326,12 @@ they are concatenated in order based on the values in their dimension coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python - :okwarning: x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) -These functions can be used by :py:func:`~xarray.open_mfdataset` to open many +These functions are used by :py:func:`~xarray.open_mfdataset` to open many files as one dataset. The particular function used is specified by setting the argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for situations where your data is split across many files in multiple locations, diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index c581fcb374d..295fbcd8b51 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -217,7 +217,7 @@ complete examples, please consult the relevant documentation.* ) # combine the datasets - combined_ds = xr.combine_by_coords([ds1, ds2]) + combined_ds = xr.combine_by_coords([ds1, ds2], join="outer") combined_ds lazy diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 994fc70339c..05b648ba53f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -7925,13 +7925,17 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: - - ds = xray.Dataset({"x": 0}) + :verbatim: - .. ipython:: python + In [1]: ds = xray.Dataset({"x": 0}) - xray.concat([ds, ds], dim="y") + In [2]: xray.concat([ds, ds], dim="y") + Out[2]: + Size: 16B + Dimensions: (y: 2) + Dimensions without coordinates: y + Data variables: + x (y) int64 16B 0 0 To obtain the old behavior, supply the argument ``concat_over=[]``. From c6a557beb41ad0a5e0990aa1d5de233d7a9c68f5 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 13 Mar 2025 17:41:45 -0400 Subject: [PATCH 19/20] Tighten up language --- doc/user-guide/combining.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst index 039711fe31b..3b8efad8432 100644 --- a/doc/user-guide/combining.rst +++ b/doc/user-guide/combining.rst @@ -79,8 +79,8 @@ expensive if you are manipulating your dataset lazily using :ref:`dask`. .. note:: - The default values for many of these options will be changing in a future - version of xarray. You can opt into the new default values early using + In a future version of xarray the default values for many of these options + will change. You can opt into the new default values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``. .. _merge: @@ -104,11 +104,11 @@ coordinates: .. note:: - The default value for ``join`` and ``compat`` will be changing in a future - version of xarray. This change will mean that the resulting dataset will be - not be aligned. You can opt into the new default values early using - ``xr.set_options(use_new_combine_kwarg_defaults=True)``. Or explicitly set - ``join='outer'`` to preserve old behavior. + In a future version of xarray the default value for ``join`` and ``compat`` + will change. This change will mean that xarray will no longer attempt + to align the indices of the merged dataset. You can opt into the new default + values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``. + Or explicitly set ``join='outer'`` to preserve old behavior. .. ipython:: python @@ -132,9 +132,9 @@ if you attempt to merge two variables with the same name but different values: .. note:: - In the future the default value for ``compat`` will change from - ``compat='no_conflicts'`` to ``compat='override'``. In this scenario the - values in the first object override all the values in other objects. + In a future version of xarray the default value for ``compat`` will change + from ``compat='no_conflicts'`` to ``compat='override'``. In this scenario + the values in the first object override all the values in other objects. .. ipython:: python From ba45599a5f9b719cfb30386abb5e58d1a95a5f57 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 18 Apr 2025 09:58:25 -0400 Subject: [PATCH 20/20] Add to deprecated section of whats new --- doc/whats-new.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cd5426cc16..8a1bbc733fb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,15 @@ Breaking changes Deprecations ~~~~~~~~~~~~ +- Start deprecation cycle for changing the default keyword arguments to ``concat``, ``merge``, ``combine``, ``open_mfdataset``. + Emits a ``FutureWarning`` when using old defaults and new defaults would result in different behavior. + Adds an option: ``use_new_combine_kwarg_defaults`` to opt in to new defaults immediately. + New values are: + - ``data_vars``: "minimal" + - ``coords``: "minimal" + - ``compat``: "override" + - ``join``: "exact" + By `Julia Signell `_. Bug fixes ~~~~~~~~~