Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cube.aggregated_by output bounds #4315

Merged
merged 7 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ This document explains the changes made to Iris for this release
array laziness, allowing efficient comparisons even with larger-than-memory
objects. (:pull:`4439`)

#. `@rcomer`_ modified :meth:`~iris.cube.Cube.aggregated_by` to calculate new
coordinate bounds using minimum and maximum for unordered coordinates,
fixing :issue:`1528`. (:pull:`4315`)


💣 Incompatible Changes
=======================
Expand Down
83 changes: 44 additions & 39 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2170,15 +2170,20 @@ def _compute_groupby_coords(self):
def _compute_shared_coords(self):
"""Create the new shared coordinates given the group slices."""

groupby_indices = []
groupby_bounds = []

# Iterate over the ordered dictionary in order to construct
# a list of tuple group boundary indexes.
# Iterate over the ordered dictionary in order to construct a list of
# tuple group indices, and a list of the respective bounds of those
# indices.
for key_slice in self._slices_by_key.values():
if isinstance(key_slice, tuple):
groupby_bounds.append((key_slice[0], key_slice[-1]))
indices = key_slice
else:
groupby_bounds.append((key_slice.start, key_slice.stop - 1))
indices = tuple(range(*key_slice.indices(self._stop)))

groupby_indices.append(indices)
groupby_bounds.append((indices[0], indices[-1]))

# Create new shared bounded coordinates.
for coord, dim in self._shared_coords:
Expand All @@ -2197,15 +2202,9 @@ def _compute_shared_coords(self):
new_shape += shape[:-1]
work_arr = work_arr.reshape(work_shape)

for key_slice in self._slices_by_key.values():
if isinstance(key_slice, slice):
indices = key_slice.indices(
coord.points.shape[dim]
)
key_slice = range(*indices)

for indices in groupby_indices:
for arr in work_arr:
new_points.append("|".join(arr.take(key_slice)))
new_points.append("|".join(arr.take(indices)))

# Reinstate flattened dimensions. Aggregated dim now leads.
new_points = np.array(new_points).reshape(new_shape)
Expand All @@ -2220,48 +2219,54 @@ def _compute_shared_coords(self):
raise ValueError(msg)
else:
new_bounds = []
if coord.has_bounds():
# Derive new coord's bounds from bounds.
item = coord.bounds
maxmin_axis = (dim, -1)
first_choices = coord.bounds.take(0, -1)
last_choices = coord.bounds.take(1, -1)
else:
# Derive new coord's bounds from points.
item = coord.points
maxmin_axis = dim
first_choices = last_choices = coord.points

# Check whether item is monotonic along the dimension of interest.
deltas = np.diff(item, 1, dim)
monotonic = np.all(deltas >= 0) or np.all(deltas <= 0)

# Construct list of coordinate group boundary pairs.
for start, stop in groupby_bounds:
if coord.has_bounds():
# Collapse group bounds into bounds.
if monotonic:
# Use first and last bound or point for new bounds.
for start, stop in groupby_bounds:
if (
getattr(coord, "circular", False)
and (stop + 1) == coord.shape[dim]
and (stop + 1) == self._stop
):
new_bounds.append(
[
coord.bounds.take(start, dim).take(0, -1),
coord.bounds.take(0, dim).take(0, -1)
+ coord.units.modulus,
]
)
else:
new_bounds.append(
[
coord.bounds.take(start, dim).take(0, -1),
coord.bounds.take(stop, dim).take(1, -1),
]
)
else:
# Collapse group points into bounds.
if getattr(coord, "circular", False) and (
stop + 1
) == len(coord.points):
new_bounds.append(
[
coord.points.take(start, dim),
coord.points.take(0, dim)
first_choices.take(start, dim),
first_choices.take(0, dim)
+ coord.units.modulus,
]
)
else:
new_bounds.append(
[
coord.points.take(start, dim),
coord.points.take(stop, dim),
first_choices.take(start, dim),
last_choices.take(stop, dim),
]
)
else:
# Use min and max bound or point for new bounds.
for indices in groupby_indices:
item_slice = item.take(indices, dim)
new_bounds.append(
[
item_slice.min(axis=maxmin_axis),
item_slice.max(axis=maxmin_axis),
]
)

# Bounds needs to be an array with the length 2 start-stop
# dimension last, and the aggregated dimension back in its
Expand Down
26 changes: 24 additions & 2 deletions lib/iris/tests/unit/cube/test_Cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,29 @@ def test_agg_by_label(self):
res_cube = self.cube.aggregated_by("label", self.mock_agg)
val_coord = AuxCoord(
np.array([1.0, 0.5, 1.0]),
bounds=np.array([[0, 2], [0, 1], [2, 0]]),
bounds=np.array([[0, 2], [0, 1], [0, 2]]),
lbdreyer marked this conversation as resolved.
Show resolved Hide resolved
long_name="val",
)
label_coord = AuxCoord(
np.array(["alpha", "beta", "gamma"]),
long_name="label",
units="no_unit",
)
self.assertEqual(res_cube.coord("val"), val_coord)
self.assertEqual(res_cube.coord("label"), label_coord)

def test_agg_by_label_bounded(self):
# Aggregate a cube on a string coordinate label where label
# and val entries are not in step; the resulting cube has a val
# coord of bounded cells and a label coord of single string entries.
val_points = self.cube.coord("val").points
self.cube.coord("val").bounds = np.array(
[val_points - 0.5, val_points + 0.5]
).T
res_cube = self.cube.aggregated_by("label", self.mock_agg)
val_coord = AuxCoord(
np.array([1.0, 0.5, 1.0]),
bounds=np.array([[-0.5, 2.5], [-0.5, 1.5], [-0.5, 2.5]]),
long_name="val",
)
label_coord = AuxCoord(
Expand Down Expand Up @@ -890,7 +912,7 @@ def test_agg_by_label__lazy(self):
res_cube = self.cube.aggregated_by("label", MEAN)
val_coord = AuxCoord(
np.array([1.0, 0.5, 1.0]),
bounds=np.array([[0, 2], [0, 1], [2, 0]]),
bounds=np.array([[0, 2], [0, 1], [0, 2]]),
long_name="val",
)
label_coord = AuxCoord(
Expand Down