From f2f9c88b9ac424b1a72177cb218b72489556f017 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Thu, 19 Aug 2021 17:21:00 +0100 Subject: [PATCH 01/12] add failing tests --- lib/iris/tests/unit/coords/test_Coord.py | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index c63261f95c..3ab336220f 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -701,6 +701,33 @@ def test_lazy_3_bounds(self): self.assertArrayAlmostEqual(collapsed_coord.points, da.array([2.0])) self.assertArrayAlmostEqual(collapsed_coord.bounds, da.array([[0.0, 4.0]])) + def test_string_nd_first(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed(0) + expected = [ + "0.0|40.0|80.0", + "10.0|50.0|90.0", + "20.0|60.0|100.0", + "30.0|70.0|111.0", + ] + + self.assertArrayEqual(collapsed_coord.points, expected) + + def test_string_nd_second(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed(1) + expected = [ + "0.0|10.0|20.0|30.0", + "40.0|50.0|60.0|70.0", + "80.0|90.0|100.0|110.0", + ] + + self.assertArrayEqual(collapsed_coord.points, expected) + class Test_is_compatible(tests.IrisTest): def setUp(self): From 8507b943294164821d04aa212b906ee3c0682f4a Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Thu, 19 Aug 2021 18:23:33 +0100 Subject: [PATCH 02/12] pass tests --- lib/iris/coords.py | 32 +++++++++++++++++------- lib/iris/tests/unit/coords/test_Coord.py | 2 +- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index d2f5b05f89..14ba661d83 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2115,22 +2115,36 @@ def collapsed(self, dims_to_collapse=None): if np.issubdtype(self.dtype, np.str_): # Collapse the coordinate by serializing the points and # bounds as strings. - def serialize(x): - return "|".join([str(i) for i in x.flatten()]) + def serialize(x, axis): + if axis is None: + return "|".join(x.flatten()) + # np.apply_along_axis does not work with str.join, so we + # need to loop through the array directly. First move (possibly + # multiple) axis of interest to trailing dims, then make a 2D + # array we can loop through. + work_array = np.moveaxis(x, axis, range(-len(axis), 0)) + out_shape = work_array.shape[: -len(axis)] + work_array = work_array.reshape(np.prod(out_shape), -1) + + joined = [] + for arr_slice in work_array: + joined.append("|".join(arr_slice)) + + return np.array(joined).reshape(out_shape) bounds = None if self.has_bounds(): - shape = self._bounds_dm.shape[1:] + shape = self._bounds_dm.shape[-1:] bounds = [] for index in np.ndindex(shape): index_slice = (slice(None),) + tuple(index) - bounds.append(serialize(self.bounds[index_slice])) - dtype = np.dtype("U{}".format(max(map(len, bounds)))) - bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape) - points = serialize(self.points) - dtype = np.dtype("U{}".format(len(points))) + bounds.append( + serialize(self.bounds[index_slice], dims_to_collapse) + ) + bounds = np.array(bounds).reshape((1,) + shape) + points = serialize(self.points, dims_to_collapse) # Create the new collapsed coordinate. - coord = self.copy(points=np.array(points, dtype=dtype), bounds=bounds) + coord = self.copy(points=np.array(points), bounds=bounds) else: # Collapse the coordinate by calculating the bounded extremes. if self.ndim > 1: diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index 3ab336220f..e040e69719 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -710,7 +710,7 @@ def test_string_nd_first(self): "0.0|40.0|80.0", "10.0|50.0|90.0", "20.0|60.0|100.0", - "30.0|70.0|111.0", + "30.0|70.0|110.0", ] self.assertArrayEqual(collapsed_coord.points, expected) From f70f82db6330415977482f821c5797d29c1ae9fa Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Thu, 19 Aug 2021 19:51:09 +0100 Subject: [PATCH 03/12] add and pass bounded tests --- lib/iris/coords.py | 5 ++- lib/iris/tests/unit/coords/test_Coord.py | 50 ++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 14ba661d83..325592452f 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2137,11 +2137,12 @@ def serialize(x, axis): shape = self._bounds_dm.shape[-1:] bounds = [] for index in np.ndindex(shape): - index_slice = (slice(None),) + tuple(index) + index_slice = (slice(None),) * self.ndim + tuple(index) bounds.append( serialize(self.bounds[index_slice], dims_to_collapse) ) - bounds = np.array(bounds).reshape((1,) + shape) + # Make sure bounds dim comes last. + bounds = np.moveaxis(bounds, 0, -1) points = serialize(self.points, dims_to_collapse) # Create the new collapsed coordinate. coord = self.copy(points=np.array(points), bounds=bounds) diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index e040e69719..3d0529f4d5 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -728,6 +728,56 @@ def test_string_nd_second(self): self.assertArrayEqual(collapsed_coord.points, expected) + def test_string_nd_bounds_first(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord( + self.pts_real.astype(str), bounds=self.bds_real.astype(str) + ) + + collapsed_coord = coord.collapsed(0) + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = [ + "-2.0|38.0|78.0", + "8.0|48.0|88.0", + "18.0|58.0|98.0", + "28.0|68.0|108.0", + ] + + expected_upper = [ + "2.0|42.0|82.0", + "12.0|52.0|92.0", + "22.0|62.0|102.0", + "32.0|72.0|112.0", + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + + def test_string_nd_bounds_second(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord( + self.pts_real.astype(str), bounds=self.bds_real.astype(str) + ) + + collapsed_coord = coord.collapsed(1) + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = [ + "-2.0|8.0|18.0|28.0", + "38.0|48.0|58.0|68.0", + "78.0|88.0|98.0|108.0", + ] + + expected_upper = [ + "2.0|12.0|22.0|32.0", + "42.0|52.0|62.0|72.0", + "82.0|92.0|102.0|112.0", + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + class Test_is_compatible(tests.IrisTest): def setUp(self): From 019a403a2f1f870879329c48259430f9775230fd Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Thu, 19 Aug 2021 20:05:02 +0100 Subject: [PATCH 04/12] simpler bounds loop --- lib/iris/coords.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 325592452f..57f799e54d 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2120,7 +2120,7 @@ def serialize(x, axis): return "|".join(x.flatten()) # np.apply_along_axis does not work with str.join, so we # need to loop through the array directly. First move (possibly - # multiple) axis of interest to trailing dims, then make a 2D + # multiple) axis of interest to trailing dim(s), then make a 2D # array we can loop through. work_array = np.moveaxis(x, axis, range(-len(axis), 0)) out_shape = work_array.shape[: -len(axis)] @@ -2134,13 +2134,11 @@ def serialize(x, axis): bounds = None if self.has_bounds(): - shape = self._bounds_dm.shape[-1:] bounds = [] - for index in np.ndindex(shape): - index_slice = (slice(None),) * self.ndim + tuple(index) - bounds.append( - serialize(self.bounds[index_slice], dims_to_collapse) - ) + for index in range(self.nbounds): + bounds_slice = np.take(self.bounds, index, axis=-1) + serialized = serialize(bounds_slice, dims_to_collapse) + bounds.append(serialized) # Make sure bounds dim comes last. bounds = np.moveaxis(bounds, 0, -1) points = serialize(self.points, dims_to_collapse) From 9f049663c12700e042b43c4183c08c23c8d0423d Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 24 Aug 2021 16:30:12 +0100 Subject: [PATCH 05/12] consider masked case --- lib/iris/coords.py | 4 ++-- lib/iris/tests/unit/coords/test_Coord.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 57f799e54d..ee82a8c62b 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2117,7 +2117,7 @@ def collapsed(self, dims_to_collapse=None): # bounds as strings. def serialize(x, axis): if axis is None: - return "|".join(x.flatten()) + return "|".join(str(i) for i in x.flatten()) # np.apply_along_axis does not work with str.join, so we # need to loop through the array directly. First move (possibly # multiple) axis of interest to trailing dim(s), then make a 2D @@ -2128,7 +2128,7 @@ def serialize(x, axis): joined = [] for arr_slice in work_array: - joined.append("|".join(arr_slice)) + joined.append(serialize(arr_slice, None)) return np.array(joined).reshape(out_shape) diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index 3d0529f4d5..ca07487f98 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -16,6 +16,7 @@ import cf_units import dask.array as da import numpy as np +import numpy.ma as ma import pytest import iris @@ -715,6 +716,15 @@ def test_string_nd_first(self): self.assertArrayEqual(collapsed_coord.points, expected) + def test_string_masked(self): + points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str) + coord = AuxCoord(points) + + collapsed_coord = coord.collapsed() + + expected = "foo|--|bing" + self.assertEqual(collapsed_coord.points, expected) + def test_string_nd_second(self): self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_real.astype(str)) From d890199ffe85f8dcc622dd4e3b93beaa88ddea8c Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 24 Aug 2021 16:32:34 +0100 Subject: [PATCH 06/12] whatsnew --- docs/src/whatsnew/latest.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index efe9fc621c..28b21ccd87 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -38,6 +38,9 @@ This document explains the changes made to Iris for this release #. N/A +#. `@rcomer`_ enabled partial collapse of multi-dimensional string coordinates, + fixing :issue:`3653`. (:pull:`5955`) + 💣 Incompatible Changes ======================= From c2b3295091eca317c67071b6dd4df17306eb5f38 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 24 Aug 2021 17:33:59 +0100 Subject: [PATCH 07/12] consider case when all axes chosen --- lib/iris/coords.py | 3 ++- lib/iris/tests/unit/coords/test_Coord.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index ee82a8c62b..55b4b5afcf 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2116,8 +2116,9 @@ def collapsed(self, dims_to_collapse=None): # Collapse the coordinate by serializing the points and # bounds as strings. def serialize(x, axis): - if axis is None: + if axis is None or len(axis) == x.ndim: return "|".join(str(i) for i in x.flatten()) + # np.apply_along_axis does not work with str.join, so we # need to loop through the array directly. First move (possibly # multiple) axis of interest to trailing dim(s), then make a 2D diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index ca07487f98..18b68ce189 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -720,7 +720,7 @@ def test_string_masked(self): points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str) coord = AuxCoord(points) - collapsed_coord = coord.collapsed() + collapsed_coord = coord.collapsed(0) expected = "foo|--|bing" self.assertEqual(collapsed_coord.points, expected) From e2367792db0c7d245f22a06249a65664ee6302b2 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 24 Aug 2021 19:37:16 +0100 Subject: [PATCH 08/12] slicing loops -= 1 --- lib/iris/coords.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 55b4b5afcf..eb5626c509 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2135,13 +2135,15 @@ def serialize(x, axis): bounds = None if self.has_bounds(): - bounds = [] - for index in range(self.nbounds): - bounds_slice = np.take(self.bounds, index, axis=-1) - serialized = serialize(bounds_slice, dims_to_collapse) - bounds.append(serialized) - # Make sure bounds dim comes last. - bounds = np.moveaxis(bounds, 0, -1) + # Express dims_to_collapse as non-negative integers. + if dims_to_collapse is None: + dims_to_collapse = range(self.ndim) + else: + dims_to_collapse = tuple( + dim % self.ndim for dim in dims_to_collapse + ) + bounds = serialize(self.bounds, dims_to_collapse) + points = serialize(self.points, dims_to_collapse) # Create the new collapsed coordinate. coord = self.copy(points=np.array(points), bounds=bounds) From 9e9e34843b06076857abf9aded898a3ccb205f1e Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 31 Aug 2021 14:51:34 +0100 Subject: [PATCH 09/12] remove dubious check; reorder tests --- lib/iris/coords.py | 6 ++++-- lib/iris/tests/unit/coords/test_Coord.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index eb5626c509..6653bf39f9 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2116,7 +2116,7 @@ def collapsed(self, dims_to_collapse=None): # Collapse the coordinate by serializing the points and # bounds as strings. def serialize(x, axis): - if axis is None or len(axis) == x.ndim: + if axis is None: return "|".join(str(i) for i in x.flatten()) # np.apply_along_axis does not work with str.join, so we @@ -2125,7 +2125,9 @@ def serialize(x, axis): # array we can loop through. work_array = np.moveaxis(x, axis, range(-len(axis), 0)) out_shape = work_array.shape[: -len(axis)] - work_array = work_array.reshape(np.prod(out_shape), -1) + work_array = work_array.reshape( + np.prod(out_shape, dtype=int), -1 + ) joined = [] for arr_slice in work_array: diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index 18b68ce189..4509277d20 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -702,6 +702,15 @@ def test_lazy_3_bounds(self): self.assertArrayAlmostEqual(collapsed_coord.points, da.array([2.0])) self.assertArrayAlmostEqual(collapsed_coord.bounds, da.array([[0.0, 4.0]])) + def test_string_masked(self): + points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str) + coord = AuxCoord(points) + + collapsed_coord = coord.collapsed(0) + + expected = "foo|--|bing" + self.assertEqual(collapsed_coord.points, expected) + def test_string_nd_first(self): self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_real.astype(str)) @@ -716,15 +725,6 @@ def test_string_nd_first(self): self.assertArrayEqual(collapsed_coord.points, expected) - def test_string_masked(self): - points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str) - coord = AuxCoord(points) - - collapsed_coord = coord.collapsed(0) - - expected = "foo|--|bing" - self.assertEqual(collapsed_coord.points, expected) - def test_string_nd_second(self): self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_real.astype(str)) From feb4d60336dd45dad2308fd47193d2ff77a32a3a Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Wed, 15 May 2024 10:39:18 +0100 Subject: [PATCH 10/12] ruff format --- lib/iris/coords.py | 4 +--- lib/iris/tests/unit/coords/test_Coord.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 6653bf39f9..4cb262cf75 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2125,9 +2125,7 @@ def serialize(x, axis): # array we can loop through. work_array = np.moveaxis(x, axis, range(-len(axis), 0)) out_shape = work_array.shape[: -len(axis)] - work_array = work_array.reshape( - np.prod(out_shape, dtype=int), -1 - ) + work_array = work_array.reshape(np.prod(out_shape, dtype=int), -1) joined = [] for arr_slice in work_array: diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index 4509277d20..be3388a1f0 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -740,9 +740,7 @@ def test_string_nd_second(self): def test_string_nd_bounds_first(self): self.setupTestArrays((3, 4)) - coord = AuxCoord( - self.pts_real.astype(str), bounds=self.bds_real.astype(str) - ) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) collapsed_coord = coord.collapsed(0) @@ -766,9 +764,7 @@ def test_string_nd_bounds_first(self): def test_string_nd_bounds_second(self): self.setupTestArrays((3, 4)) - coord = AuxCoord( - self.pts_real.astype(str), bounds=self.bds_real.astype(str) - ) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) collapsed_coord = coord.collapsed(1) From 5d5d6aee8915d5fb2f51bd61bd0281e4beb6a7fe Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 30 Aug 2024 11:25:24 +0100 Subject: [PATCH 11/12] improve apply_along_axis comment --- lib/iris/coords.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 4cb262cf75..8afe9dad41 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -2119,10 +2119,11 @@ def serialize(x, axis): if axis is None: return "|".join(str(i) for i in x.flatten()) - # np.apply_along_axis does not work with str.join, so we - # need to loop through the array directly. First move (possibly - # multiple) axis of interest to trailing dim(s), then make a 2D - # array we can loop through. + # np.apply_along_axis combined with str.join will truncate strings in + # some cases (https://github.com/numpy/numpy/issues/8352), so we need to + # loop through the array directly. First move (possibly multiple) axis + # of interest to trailing dim(s), then make a 2D array we can loop + # through. work_array = np.moveaxis(x, axis, range(-len(axis), 0)) out_shape = work_array.shape[: -len(axis)] work_array = work_array.reshape(np.prod(out_shape, dtype=int), -1) From 638d4910ae05b4efa32c36ed4f58ffa2491a9517 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 30 Aug 2024 11:42:18 +0100 Subject: [PATCH 12/12] add tests for collapsing 2 dimensions --- lib/iris/tests/unit/coords/test_Coord.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index be3388a1f0..97429f58f8 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -738,6 +738,15 @@ def test_string_nd_second(self): self.assertArrayEqual(collapsed_coord.points, expected) + def test_string_nd_both(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str)) + + collapsed_coord = coord.collapsed() + expected = ["0.0|10.0|20.0|30.0|40.0|50.0|60.0|70.0|80.0|90.0|100.0|110.0"] + + self.assertArrayEqual(collapsed_coord.points, expected) + def test_string_nd_bounds_first(self): self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) @@ -784,6 +793,21 @@ def test_string_nd_bounds_second(self): self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + def test_string_nd_bounds_both(self): + self.setupTestArrays((3, 4)) + coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str)) + + collapsed_coord = coord.collapsed() + + # Points handling is as for non bounded case. So just check bounds. + expected_lower = ["-2.0|8.0|18.0|28.0|38.0|48.0|58.0|68.0|78.0|88.0|98.0|108.0"] + expected_upper = [ + "2.0|12.0|22.0|32.0|42.0|52.0|62.0|72.0|82.0|92.0|102.0|112.0" + ] + + self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower) + self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper) + class Test_is_compatible(tests.IrisTest): def setUp(self):