diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py index 300238b3c1..20fd16377b 100644 --- a/lib/iris/tests/unit/coords/test_AuxCoord.py +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -774,3 +774,30 @@ def test_nanpoints_eq_copy(self): def test_nanbounds_eq_self(self): co1 = AuxCoord([15.0, 25.0], bounds=[[14.0, 16.0], [24.0, np.nan]]) assert co1 == co1 + + @pytest.mark.parametrize("bothlazy", [True, False], ids=["bothlazy", "onelazy"]) + def test_lazy_compares_via_hash(self, bothlazy): + def lazify(coord): + coord.bounds = coord.lazy_bounds() + + co1 = AuxCoord([15.0, 25.0]) + co2 = AuxCoord([15.0, 25.001]) + + co1.points = co1.lazy_points() + if bothlazy: + co2.points = co2.lazy_points() + assert co1.has_lazy_points() + assert co2.has_lazy_points() == bothlazy + + assert not hasattr(co1.core_points(), "_iris_array_hash") + if bothlazy: + assert not hasattr(co2.core_points(), "_iris_array_hash") + + eq = co1 == co2 + assert not eq + + assert co1.has_lazy_points() + assert hasattr(co1.core_points(), "_iris_array_hash") + if bothlazy: + assert co2.has_lazy_points() + assert hasattr(co2.core_points(), "_iris_array_hash") diff --git a/lib/iris/util.py b/lib/iris/util.py index b3ce7941c5..dc6dc58586 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -509,20 +509,34 @@ def normalise_array(array): eq = array1.shape == array2.shape if eq: if is_lazy_data(array1) or is_lazy_data(array2): - # Use a separate map and reduce operation to avoid running out of memory. - ndim = array1.ndim - indices = tuple(range(ndim)) - eq = da.blockwise( - _masked_array_equal, - indices, - array1, - indices, - array2, - indices, - dtype=bool, - meta=np.empty((0,) * ndim, dtype=bool), - equal_nan=withnans, - ).all() + # Compare lazy arrays by hashing, and cache the hashes... + def array_hash(array): + if hasattr(array, "_iris_array_hash"): + hash = array._iris_array_hash + else: + from iris._concatenate import _hash_array + + hash = _hash_array(array) + if isinstance(array, da.Array): + # Can't save hashes on a numpy array, but CAN on a Dask array + array._iris_array_hash = hash + return hash + + eq = array_hash(array1) == array_hash(array2) + # # Use a separate map and reduce operation to avoid running out of memory. + # ndim = array1.ndim + # indices = tuple(range(ndim)) + # eq = da.blockwise( + # _masked_array_equal, + # indices, + # array1, + # indices, + # array2, + # indices, + # dtype=bool, + # meta=np.empty((0,) * ndim, dtype=bool), + # equal_nan=withnans, + # ).all() else: eq = _masked_array_equal(array1, array2, equal_nan=withnans).all()