diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 5bc24b08dc..9b3fad7a4b 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -38,6 +38,9 @@ This document explains the changes made to Iris for this release #. N/A +#. `@rcomer`_ enabled partial collapse of multi-dimensional string coordinates, + fixing :issue:`3653`. (:pull:`5955`) + 💣 Incompatible Changes ======================= diff --git a/lib/iris/coords.py b/lib/iris/coords.py index d2f5b05f89..8afe9dad41 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2115,22 +2115,39 @@ def collapsed(self, dims_to_collapse=None): if np.issubdtype(self.dtype, np.str_): # Collapse the coordinate by serializing the points and # bounds as strings. - def serialize(x): - return "|".join([str(i) for i in x.flatten()]) + def serialize(x, axis): + if axis is None: + return "|".join(str(i) for i in x.flatten()) + + # np.apply_along_axis combined with str.join will truncate strings in + # some cases (https://github.com/numpy/numpy/issues/8352), so we need to + # loop through the array directly. First move (possibly multiple) axis + # of interest to trailing dim(s), then make a 2D array we can loop + # through. + work_array = np.moveaxis(x, axis, range(-len(axis), 0)) + out_shape = work_array.shape[: -len(axis)] + work_array = work_array.reshape(np.prod(out_shape, dtype=int), -1) + + joined = [] + for arr_slice in work_array: + joined.append(serialize(arr_slice, None)) + + return np.array(joined).reshape(out_shape) bounds = None if self.has_bounds(): - shape = self._bounds_dm.shape[1:] - bounds = [] - for index in np.ndindex(shape): - index_slice = (slice(None),) + tuple(index) - bounds.append(serialize(self.bounds[index_slice])) - dtype = np.dtype("U{}".format(max(map(len, bounds)))) - bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape) - points = serialize(self.points) - dtype = np.dtype("U{}".format(len(points))) + # Express dims_to_collapse as non-negative integers. + if dims_to_collapse is None: + dims_to_collapse = range(self.ndim) + else: + dims_to_collapse = tuple( + dim % self.ndim for dim in dims_to_collapse + ) + bounds = serialize(self.bounds, dims_to_collapse) + + points = serialize(self.points, dims_to_collapse) # Create the new collapsed coordinate. - coord = self.copy(points=np.array(points, dtype=dtype), bounds=bounds) + coord = self.copy(points=np.array(points), bounds=bounds) else: # Collapse the coordinate by calculating the bounded extremes. if self.ndim > 1: diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index c63261f95c..97429f58f8 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -16,6 +16,7 @@ import cf_units import dask.array as da import numpy as np +import numpy.ma as ma import pytest import iris @@ -701,6 +702,112 @@ def test_lazy_3_bounds(self): self.assertArrayAlmostEqual(collapsed_coord.points, da.array([2.0])) self.assertArrayAlmostEqual(collapsed_coord.bounds, da.array([[0.0, 4.0]])) + def test_string_masked(self): + points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str) + coord = AuxCoord(points) + + collapsed_coord = coord.collapsed(0) + + expected = "foo|--|bing" + self.assertEqual(collapsed_coord.points, expected) + + def test_string_nd_first(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed(0) + expected = [ + "0.0|40.0|80.0", + "10.0|50.0|90.0", + "20.0|60.0|100.0", + "30.0|70.0|110.0", + ] + + self.assertArrayEqual(collapsed_coord.points, expected) + + def test_string_nd_second(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed(1) + expected = [ + "0.0|10.0|20.0|30.0", + "40.0|50.0|60.0|70.0", + "80.0|90.0|100.0|110.0", + ] + + self.assertArrayEqual(collapsed_coord.points, expected) + + def test_string_nd_both(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed() + expected = ["0.0|10.0|20.0|30.0|40.0|50.0|60.0|70.0|80.0|90.0|100.0|110.0"] + + self.assertArrayEqual(collapsed_coord.points, expected) + + def test_string_nd_bounds_first(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) + + collapsed_coord = coord.collapsed(0) + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = [ + "-2.0|38.0|78.0", + "8.0|48.0|88.0", + "18.0|58.0|98.0", + "28.0|68.0|108.0", + ] + + expected_upper = [ + "2.0|42.0|82.0", + "12.0|52.0|92.0", + "22.0|62.0|102.0", + "32.0|72.0|112.0", + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + + def test_string_nd_bounds_second(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) + + collapsed_coord = coord.collapsed(1) + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = [ + "-2.0|8.0|18.0|28.0", + "38.0|48.0|58.0|68.0", + "78.0|88.0|98.0|108.0", + ] + + expected_upper = [ + "2.0|12.0|22.0|32.0", + "42.0|52.0|62.0|72.0", + "82.0|92.0|102.0|112.0", + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + + def test_string_nd_bounds_both(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) + + collapsed_coord = coord.collapsed() + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = ["-2.0|8.0|18.0|28.0|38.0|48.0|58.0|68.0|78.0|88.0|98.0|108.0"] + expected_upper = [ + "2.0|12.0|22.0|32.0|42.0|52.0|62.0|72.0|82.0|92.0|102.0|112.0" + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + class Test_is_compatible(tests.IrisTest): def setUp(self):