From fdc78bead0f2829a8da873ce1a311023648b4d0a Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 11 Dec 2024 18:28:21 +0000 Subject: [PATCH 01/16] Initial equalise_cubes util. --- lib/iris/util.py | 124 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/lib/iris/util.py b/lib/iris/util.py index a808087fd8..22c0f4baff 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2189,3 +2189,127 @@ def mask_cube_from_shapefile(cube, shape, minimum_weight=0.0, in_place=False): masked_cube = mask_cube(cube, shapefile_mask, in_place=in_place) if not in_place: return masked_cube + + +def equalise_cubes( + cubes, + apply_all=False, + unify_names=False, + equalise_attributes=False, + unify_time_units=False, +): + """Modify a set of cubes to assist merge/concatenate operations. + + Various different adjustments can be applied to the input cubes, to remove + differences which may prevent them from combining into larger cubes. The requested + adjustment operations are applied to each group of input cubes with matching + cube metadata (names, units, attributes and cell-methods). + + Parameters + ---------- + cubes : sequence of :class:`~iris.cube.Cube` + The input cubes, in a list or similar. + + apply_all : bool, default=False + Enable *all possible* equalisation operations : that is, all those which have a + simple boolean key, so require no additional context information. + + unify_names : bool, default=False + When True, remove any redundant ``var_name`` and ``long_name`` properties, + leaving only one ``standard_name``, ``long_name`` or ``var_name`` per cube. + In this case, the revised name properties are also used in selecting input + groups. + + equalise_attributes : bool, default=False + When ``True``, apply an :func:`equalise_attributes` operation to each input + group. In this case, all attributes are ignored when selecting input groups. + + unify_time_units : bool, default=False + When True, apply the :func:`unify_time_units` operation to each input group. + In this case, all time-reference type (i.e. date) units are treated as + equivalent when selecting input groups. + + Returns + ------- + :class:`~iris.cube.CubeList` + A CubeList containing one output cube for each input cube, ready for a merge or + concatenate operation. Each result cube is either the corresponding input + cube, modified or unmodified, or a new replacement derived from it. + + Notes + ----- + The various 'equalise' operations are not applied to the entire input, but to + each group of input cubes with the same ``cube.metadata``. + + Every 'equalise' operation operates in a similar fashion, in that it identifies and + removes one specific type of metadata difference so that a group of cubes can + potentially combine into a single result cube. + + In some cases, the identification of input cube groups *also* depends on the + equalisation operation(s) selected : Operations which work on cube metadata + elements (names, units, attributes and cell-methods) will prevent that element from + discriminating between different input groups. + + """ + from iris.common.metadata import CubeMetadata + from iris.cube import CubeList + + if unify_names or apply_all: + # Tidy all cube names + # Note: this option operates as a special case, independent of + # and *in advance of* the group selection + for cube in cubes: + if cube.standard_name: + cube.long_name = None + cube.var_name = None + elif cube.long_name: + cube.var_name = None + + # Snapshot the cube metadata elements which we use to identify input groups + # TODO: we might want to sanitise practically comparable types here ? + # (e.g. large object arrays ??) + cube_grouping_keys = [ + {key: getattr(cube.metadata, key) for key in CubeMetadata._fields} + for cube in cubes + ] + + # Collect the selected operations which we are going to apply. + equalisation_ops = [] + + if equalise_attributes or apply_all: + # get the function of the same name in this module + equalisation_ops.append(globals()["equalise_attributes"]) + # Prevent any attributes from distinguishing input groups + for cat in cube_grouping_keys: + cat.pop["attributes"] + + if unify_time_units or apply_all: + # get the function of the same name in this module + equalisation_ops.append(globals()["unify_time_units"]) + # Treat all time-reference units (only) as identical when identifying + # input groups + for cat in cube_grouping_keys: + if cat["units"].is_time_reference(): + cat["units"] = "" + + if not equalisation_ops: + # Nothing more to do. + # Note that, if 'unify-names' was done, we already modified cubes in-place. + result = cubes + else: + # Compute the cube groups + # TODO: might something nasty happen here if attributes contain weird stuff ?? + cube_group_keys = set(cube_grouping_keys) + + # Process each cube group + collect the results + cubes_and_keys = zip(cubes, cube_grouping_keys) + result = [] + for group_keys in cube_group_keys: + group_cubes = [cube for cube, keys in cubes_and_keys if keys == group_keys] + for op in equalisation_ops: + op(group_cubes) + result.extend(group_cubes) + + # Always return a CubeList result + result = CubeList(result) + return result From 0b71f5985459b4db8c82d081bf9d454f706a079d Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 12 Dec 2024 18:21:26 +0000 Subject: [PATCH 02/16] Initial something working. --- lib/iris/util.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 22c0f4baff..696a0cb4cc 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -8,6 +8,7 @@ from abc import ABCMeta, abstractmethod from collections.abc import Hashable, Iterable +from copy import deepcopy import functools import inspect import os @@ -2202,7 +2203,7 @@ def equalise_cubes( Various different adjustments can be applied to the input cubes, to remove differences which may prevent them from combining into larger cubes. The requested - adjustment operations are applied to each group of input cubes with matching + "equalisation" operations are applied to each group of input cubes with matching cube metadata (names, units, attributes and cell-methods). Parameters @@ -2269,7 +2270,7 @@ def equalise_cubes( # TODO: we might want to sanitise practically comparable types here ? # (e.g. large object arrays ??) cube_grouping_keys = [ - {key: getattr(cube.metadata, key) for key in CubeMetadata._fields} + {key: deepcopy(getattr(cube.metadata, key)) for key in CubeMetadata._fields} for cube in cubes ] @@ -2281,7 +2282,7 @@ def equalise_cubes( equalisation_ops.append(globals()["equalise_attributes"]) # Prevent any attributes from distinguishing input groups for cat in cube_grouping_keys: - cat.pop["attributes"] + cat.pop("attributes") if unify_time_units or apply_all: # get the function of the same name in this module @@ -2299,12 +2300,20 @@ def equalise_cubes( else: # Compute the cube groups # TODO: might something nasty happen here if attributes contain weird stuff ?? - cube_group_keys = set(cube_grouping_keys) + def find_uniques(inputs): + results = [] + while inputs: + candidate, inputs = inputs[0], inputs[1:] + if candidate not in results: + results.append(candidate) + return results + + input_group_keys = find_uniques(cube_grouping_keys) # Process each cube group + collect the results - cubes_and_keys = zip(cubes, cube_grouping_keys) + cubes_and_keys = list(zip(cubes, cube_grouping_keys)) result = [] - for group_keys in cube_group_keys: + for group_keys in input_group_keys: group_cubes = [cube for cube, keys in cubes_and_keys if keys == group_keys] for op in equalisation_ops: op(group_cubes) From f483d4b6043ca761e1605590136324ba134b0b01 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 16 Dec 2024 14:20:35 +0000 Subject: [PATCH 03/16] Tweaks, improvements, notes. --- lib/iris/util.py | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 696a0cb4cc..f576329d25 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2212,53 +2212,52 @@ def equalise_cubes( The input cubes, in a list or similar. apply_all : bool, default=False - Enable *all possible* equalisation operations : that is, all those which have a - simple boolean key, so require no additional context information. + Enable *all* the equalisation operations. unify_names : bool, default=False When True, remove any redundant ``var_name`` and ``long_name`` properties, leaving only one ``standard_name``, ``long_name`` or ``var_name`` per cube. - In this case, the revised name properties are also used in selecting input - groups. + In this case, the adjusted names are also used when selecting input groups. equalise_attributes : bool, default=False When ``True``, apply an :func:`equalise_attributes` operation to each input - group. In this case, all attributes are ignored when selecting input groups. + group. In this case, attributes are ignored when selecting input groups. unify_time_units : bool, default=False When True, apply the :func:`unify_time_units` operation to each input group. In this case, all time-reference type (i.e. date) units are treated as - equivalent when selecting input groups. + identical when selecting input groups. Returns ------- :class:`~iris.cube.CubeList` - A CubeList containing one output cube for each input cube, ready for a merge or - concatenate operation. Each result cube is either the corresponding input - cube, modified or unmodified, or a new replacement derived from it. + A CubeList containing the original input cubes, ready for merge or concatenate + operations. The cubes are possibly modified (in-place), and possibly in a + different order. Notes ----- - The various 'equalise' operations are not applied to the entire input, but to - each group of input cubes with the same ``cube.metadata``. + All the 'equalise' operations operate in a similar fashion, in that they identify + and remove differences in a specific metadata element, altering metadata so that + a merge or concatenate can potentially combine a group of cubes into a single + result cube. - Every 'equalise' operation operates in a similar fashion, in that it identifies and - removes one specific type of metadata difference so that a group of cubes can - potentially combine into a single result cube. + The various 'equalise' operations are not applied to the entire input, but to + groups of input cubes with the same ``cube.metadata``. - In some cases, the identification of input cube groups *also* depends on the - equalisation operation(s) selected : Operations which work on cube metadata - elements (names, units, attributes and cell-methods) will prevent that element from - discriminating between different input groups. + The input cube groups also depend on the equalisation operation(s) selected : + Operations which equalise a specific cube metadata element (names, units, + attributes or cell-methods) exclude that element from the input grouping criteria. """ from iris.common.metadata import CubeMetadata from iris.cube import CubeList if unify_names or apply_all: - # Tidy all cube names + # Rationalise all the cube names # Note: this option operates as a special case, independent of # and *in advance of* the group selection + # (hence, it affects the groups which other operations are applied to) for cube in cubes: if cube.standard_name: cube.long_name = None @@ -2280,7 +2279,7 @@ def equalise_cubes( if equalise_attributes or apply_all: # get the function of the same name in this module equalisation_ops.append(globals()["equalise_attributes"]) - # Prevent any attributes from distinguishing input groups + # Prevent attributes from distinguishing input groups for cat in cube_grouping_keys: cat.pop("attributes") @@ -2295,11 +2294,15 @@ def equalise_cubes( if not equalisation_ops: # Nothing more to do. - # Note that, if 'unify-names' was done, we already modified cubes in-place. + # Note that, if 'unify-names' was done, we *already* modified cubes in-place. result = cubes else: # Compute the cube groups + # N.B. *can't* use sets, as contents not always hashable, e.g. array attributes + # I fear could be inefficient (repeated array compare), but maybe unavoidable # TODO: might something nasty happen here if attributes contain weird stuff ?? + + # TODO: this can be improved -- there is no need to re-scan for each group def find_uniques(inputs): results = [] while inputs: From cbf7514bc39b4bdbe173f801b310eb73f6b8001f Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 16 Dec 2024 14:35:33 +0000 Subject: [PATCH 04/16] Initial partial testing --- .../tests/unit/util/test_equalise_cubes.py | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 lib/iris/tests/unit/util/test_equalise_cubes.py diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py new file mode 100644 index 0000000000..d908cf72e1 --- /dev/null +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -0,0 +1,157 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :func:`iris.util.equalise_cubes` function.""" + +import numpy as np +from numpy.random import Generator +import pytest + +from iris.cube import Cube +from iris.util import equalise_cubes + + +@pytest.fixture(params=["off", "on", "applyall", "scrambled"]) +def usage(request): + return request.param + + +_RNG = 95297 + + +def _scramble(inputs, rng=_RNG): + # Make a simple check that the input order does not affect the result + if not isinstance(rng, Generator): + rng = np.random.default_rng(rng) + n_inputs = len(inputs) + # NOTE: make object array of explicit shape + fill it, + # since np.array(inputs) *fails* specifically with a list of metadata objects + inputs_array = np.empty((n_inputs,), dtype=object) + inputs_array[:] = inputs + n_inputs = inputs_array.shape[0] + scramble_inds = rng.permutation(n_inputs) + inputs_array = inputs_array[scramble_inds] + # Modify input list **BUT N.B. IN PLACE** + inputs[0:] = inputs_array + return inputs + + +def _cube( + stdname=None, + varname=None, + longname=None, + units="unknown", + cell_methods=(), + **kwattributes, +): + # Construct a simple test-cube with given metadata properties + cube = Cube( + [1], + standard_name=stdname, + long_name=longname, + var_name=varname, + cell_methods=cell_methods, + units=units, + attributes=kwattributes, + ) + return cube + + +def _usage_common(usage, op_keyword_name, test_cubes): + kwargs = {} + if usage == "off": + pass + elif usage in ("on", "scrambled"): + kwargs[op_keyword_name] = True + if usage == "scrambled": + # reorder the input cubes, but in-place + _scramble(test_cubes) + elif usage == "applyall": + kwargs["apply_all"] = True + else: + raise ValueError(f"Unrecognised 'usage' option {usage!r}") + default_expected_metadatas = [cube.metadata for cube in test_cubes] + return kwargs, default_expected_metadatas + + +class TestUnifyNames: + def test_stdnames_simple(self, usage): + sn = "air_temperature" + stdnames = [sn, sn, sn] + longnames = [None, "long1", "long2"] + varnames = ["var1", None, "var2"] + test_cubes = [ + _cube(stdname=stdname, longname=longname, varname=varname) + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + meta = _cube(stdname=sn).metadata + expected_metadatas = [meta] * len(test_cubes) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_stdnames_multi(self, usage): + # Show that two different standard-name groups are handled independently + sn1, sn2 = "air_temperature", "air_pressure" + stdnames = [sn1, sn1, sn1, sn2, sn2, sn2] + varnames = ["v1", None, "v2", "v3", None, None] + test_cubes = [ + _cube(stdname, varname) for stdname, varname in zip(stdnames, varnames) + ] + kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + + # Calculate expected results + if usage != "off": + # result cube metadata should be of only 2 types + meta1 = _cube(stdname=sn1).metadata + meta2 = _cube(stdname=sn2).metadata + # the result cubes should still correspond to the original input order, + # since all cube equalisation operations occur in-place + expected_metadatas = [ + meta1 if cube.standard_name == sn1 else meta2 for cube in test_cubes + ] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_missing_names(self, usage): + # Show that two different standard-name groups are handled independently + sn = "air_temperature" + stdnames = [sn, None, None, None] + longnames = ["long1", "long2", None, None] + varnames = ["var1", "var2", "var3", None] + test_cubes = [ + _cube(stdname=stdname, longname=longname, varname=varname) + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + + # Calculate expected results + if usage != "off": + stdnames = [sn, None, None, None] + longnames = [None, "long2", None, None] + varnames = [None, None, "var3", None] + expected_metadatas = [ + _cube(stdname=stdname, longname=longname, varname=varname).metadata + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + if usage == "scrambled": + expected_metadatas = _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas From 94a70a2968e9772668d5037378ffb0908858e470 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 17 Dec 2024 16:07:46 +0000 Subject: [PATCH 05/16] Small tweaks. --- lib/iris/tests/unit/util/test_equalise_cubes.py | 6 +++--- lib/iris/util.py | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index d908cf72e1..fbaa5cadab 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -39,11 +39,11 @@ def _scramble(inputs, rng=_RNG): def _cube( stdname=None, - varname=None, longname=None, + varname=None, units="unknown", cell_methods=(), - **kwattributes, + **attributes, ): # Construct a simple test-cube with given metadata properties cube = Cube( @@ -53,7 +53,7 @@ def _cube( var_name=varname, cell_methods=cell_methods, units=units, - attributes=kwattributes, + attributes=attributes, ) return cube diff --git a/lib/iris/util.py b/lib/iris/util.py index f576329d25..9de62ed7f5 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2304,12 +2304,11 @@ def equalise_cubes( # TODO: this can be improved -- there is no need to re-scan for each group def find_uniques(inputs): - results = [] - while inputs: - candidate, inputs = inputs[0], inputs[1:] - if candidate not in results: - results.append(candidate) - return results + unique_inputs = [] + for candidate in inputs: + if candidate not in unique_inputs: + unique_inputs.append(candidate) + return unique_inputs input_group_keys = find_uniques(cube_grouping_keys) From dcf40d53d84e3d23cb88c6155a14e4e76b7fb19b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 17 Dec 2024 21:40:28 +0000 Subject: [PATCH 06/16] Tidy a bit. Test 'unify_time_units'. NB time-units are on coords not cubes. --- .../tests/unit/util/test_equalise_cubes.py | 219 +++++++++++++----- lib/iris/util.py | 9 +- 2 files changed, 164 insertions(+), 64 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index fbaa5cadab..16ad1d3c0d 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -4,24 +4,18 @@ # See LICENSE in the root of the repository for full licensing details. """Unit tests for the :func:`iris.util.equalise_cubes` function.""" +from cf_units import Unit import numpy as np from numpy.random import Generator import pytest +from iris.coords import DimCoord from iris.cube import Cube from iris.util import equalise_cubes -@pytest.fixture(params=["off", "on", "applyall", "scrambled"]) -def usage(request): - return request.param - - -_RNG = 95297 - - -def _scramble(inputs, rng=_RNG): - # Make a simple check that the input order does not affect the result +def _scramble(inputs, rng=95297): + # Reorder items to check that order does not affect the operation if not isinstance(rng, Generator): rng = np.random.default_rng(rng) n_inputs = len(inputs) @@ -37,6 +31,29 @@ def _scramble(inputs, rng=_RNG): return inputs +@pytest.fixture(params=["off", "on", "applyall", "scrambled"]) +def usage(request): + # Fixture to check different usage modes for a given operation control keyword + return request.param + + +def _usage_common(usage, op_keyword_name, test_cubes): + kwargs = {} + if usage == "off": + pass + elif usage in ("on", "scrambled"): + kwargs[op_keyword_name] = True + if usage == "scrambled": + # reorder the input cubes, but in-place + _scramble(test_cubes) + elif usage == "applyall": + kwargs["apply_all"] = True + else: + raise ValueError(f"Unrecognised 'usage' option {usage!r}") + default_expected_metadatas = [cube.metadata for cube in test_cubes] + return kwargs, default_expected_metadatas + + def _cube( stdname=None, longname=None, @@ -45,7 +62,7 @@ def _cube( cell_methods=(), **attributes, ): - # Construct a simple test-cube with given metadata properties + # Construct a simple test-cube with given metadata properties. cube = Cube( [1], standard_name=stdname, @@ -58,25 +75,9 @@ def _cube( return cube -def _usage_common(usage, op_keyword_name, test_cubes): - kwargs = {} - if usage == "off": - pass - elif usage in ("on", "scrambled"): - kwargs[op_keyword_name] = True - if usage == "scrambled": - # reorder the input cubes, but in-place - _scramble(test_cubes) - elif usage == "applyall": - kwargs["apply_all"] = True - else: - raise ValueError(f"Unrecognised 'usage' option {usage!r}") - default_expected_metadatas = [cube.metadata for cube in test_cubes] - return kwargs, default_expected_metadatas - - class TestUnifyNames: - def test_stdnames_simple(self, usage): + # Test the 'unify_names' operation. + def test_simple(self, usage): sn = "air_temperature" stdnames = [sn, sn, sn] longnames = [None, "long1", "long2"] @@ -91,7 +92,7 @@ def test_stdnames_simple(self, usage): if usage != "off": # result cube metadata should all be the same, with no varname meta = _cube(stdname=sn).metadata - expected_metadatas = [meta] * len(test_cubes) + expected_metadatas = [meta, meta, meta] # Apply operation results = equalise_cubes(test_cubes, **kwargs) @@ -99,26 +100,29 @@ def test_stdnames_simple(self, usage): # Assert result assert [cube.metadata for cube in results] == expected_metadatas - def test_stdnames_multi(self, usage): - # Show that two different standard-name groups are handled independently + def test_multi(self, usage): + # Show that different cases are resolved independently sn1, sn2 = "air_temperature", "air_pressure" - stdnames = [sn1, sn1, sn1, sn2, sn2, sn2] - varnames = ["v1", None, "v2", "v3", None, None] + stdnames = [sn1, None, None, None, sn2, None] + longnames = ["long1", "long2", None, None, "long3", None] + varnames = ["var1", None, "var3", "var4", None, None] test_cubes = [ - _cube(stdname, varname) for stdname, varname in zip(stdnames, varnames) + _cube(stdname=stdname, longname=longname, varname=varname) + for stdname, longname, varname in zip(stdnames, longnames, varnames) ] kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) # Calculate expected results if usage != "off": - # result cube metadata should be of only 2 types - meta1 = _cube(stdname=sn1).metadata - meta2 = _cube(stdname=sn2).metadata - # the result cubes should still correspond to the original input order, - # since all cube equalisation operations occur in-place + stdnames = [sn1, None, None, None, sn2, None] + longnames = [None, "long2", None, None, None, None] + varnames = [None, None, "var3", "var4", None, None] expected_metadatas = [ - meta1 if cube.standard_name == sn1 else meta2 for cube in test_cubes + _cube(stdname=stdname, longname=longname, varname=varname).metadata + for stdname, longname, varname in zip(stdnames, longnames, varnames) ] + if usage == "scrambled": + expected_metadatas = _scramble(expected_metadatas) # Apply operation results = equalise_cubes(test_cubes, **kwargs) @@ -126,29 +130,130 @@ def test_stdnames_multi(self, usage): # Assert result assert [cube.metadata for cube in results] == expected_metadatas - def test_missing_names(self, usage): - # Show that two different standard-name groups are handled independently - sn = "air_temperature" - stdnames = [sn, None, None, None] - longnames = ["long1", "long2", None, None] - varnames = ["var1", "var2", "var3", None] + +class TestEqualiseAttributes: + # Test the 'equalise_attributes' operation. + def test_calling(self, usage, mocker): + patch = mocker.patch("iris.util.equalise_attributes") + test_cubes = [_cube()] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Apply operation + equalise_cubes(test_cubes, **kwargs) + + expected_calls = 0 if usage == "off" else 1 + assert len(patch.call_args_list) == expected_calls + + def test_basic_function(self, usage): + test_cubes = [_cube(att_a=10, att_b=1), _cube(att_a=10, att_b=2)] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + meta = _cube(att_a=10).metadata + expected_metadatas = [meta, meta] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_operation_in_groups(self, usage): + # Check that it acts independently within groups (as defined, here, by naming) test_cubes = [ - _cube(stdname=stdname, longname=longname, varname=varname) - for stdname, longname, varname in zip(stdnames, longnames, varnames) + _cube(longname="a", att_a=10, att_b=1), + _cube(longname="a", att_a=10, att_b=2), + _cube(longname="b", att_a=10, att_b=1), + _cube(longname="b", att_a=10, att_b=1), ] - kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) # Calculate expected results if usage != "off": - stdnames = [sn, None, None, None] - longnames = [None, "long2", None, None] - varnames = [None, None, "var3", None] + # result cube metadata should all be the same, with no varname expected_metadatas = [ - _cube(stdname=stdname, longname=longname, varname=varname).metadata - for stdname, longname, varname in zip(stdnames, longnames, varnames) + # the "a" cubes have lost att_b, but the "b" cubes retain it + _cube(longname="a", att_a=10).metadata, + _cube(longname="a", att_a=10).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + ] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + +class TestUnifyTimeUnits: + # Test the 'unify_time_units' operation. + def test_calling(self, usage, mocker): + patch = mocker.patch("iris.util.unify_time_units") + test_cubes = [_cube()] + kwargs, expected_metadatas = _usage_common( + usage, "unify_time_units", test_cubes + ) + + # Apply operation + equalise_cubes(test_cubes, **kwargs) + + expected_calls = 0 if usage == "off" else 1 + assert len(patch.call_args_list) == expected_calls + + def _cube_timeunits(self, unit, **kwargs): + cube = _cube(**kwargs) + cube.add_dim_coord(DimCoord([0.0], standard_name="time", units=unit), 0) + return cube + + def test_basic_function(self, usage): + if usage == "scrambled": + pytest.skip("scrambled mode not supported") + tu1, tu2 = [Unit(name) for name in ("days since 1970", "days since 1971")] + cu1, cu2 = self._cube_timeunits(tu1), self._cube_timeunits(tu2) + test_cubes = [cu1, cu2] + kwargs, expected_metadatas = _usage_common( + usage, "unify_time_units", test_cubes + ) + + expected_units = [tu1, tu2 if usage == "off" else tu1] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.coord("time").units for cube in results] == expected_units + + def test_operation_in_groups(self, usage): + # Check that it acts independently within groups (as defined, here, by naming) + test_cubes = [ + _cube(longname="a", att_a=10, att_b=1), + _cube(longname="a", att_a=10, att_b=2), + _cube(longname="b", att_a=10, att_b=1), + _cube(longname="b", att_a=10, att_b=1), + ] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + expected_metadatas = [ + # the "a" cubes have lost att_b, but the "b" cubes retain it + _cube(longname="a", att_a=10).metadata, + _cube(longname="a", att_a=10).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, ] - if usage == "scrambled": - expected_metadatas = _scramble(expected_metadatas) # Apply operation results = equalise_cubes(test_cubes, **kwargs) diff --git a/lib/iris/util.py b/lib/iris/util.py index 9de62ed7f5..db392efe4d 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2225,8 +2225,8 @@ def equalise_cubes( unify_time_units : bool, default=False When True, apply the :func:`unify_time_units` operation to each input group. - In this case, all time-reference type (i.e. date) units are treated as - identical when selecting input groups. + Note : while this may convert units of time reference coordinates, it does + not affect the units of the cubes themselves. Returns ------- @@ -2286,11 +2286,6 @@ def equalise_cubes( if unify_time_units or apply_all: # get the function of the same name in this module equalisation_ops.append(globals()["unify_time_units"]) - # Treat all time-reference units (only) as identical when identifying - # input groups - for cat in cube_grouping_keys: - if cat["units"].is_time_reference(): - cat["units"] = "" if not equalisation_ops: # Nothing more to do. From 1265c26000c7fc460deff7de239a485f89fcc2c5 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 18 Dec 2024 00:42:34 +0000 Subject: [PATCH 07/16] Fix grouping efficiency. --- .../tests/unit/util/test_equalise_cubes.py | 4 ++ lib/iris/util.py | 45 +++++++++---------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index 16ad1d3c0d..c20b43250f 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -186,6 +186,8 @@ def test_operation_in_groups(self, usage): _cube(longname="b", att_a=10, att_b=1).metadata, _cube(longname="b", att_a=10, att_b=1).metadata, ] + if usage == "scrambled": + _scramble(expected_metadatas) # Apply operation results = equalise_cubes(test_cubes, **kwargs) @@ -254,6 +256,8 @@ def test_operation_in_groups(self, usage): _cube(longname="b", att_a=10, att_b=1).metadata, _cube(longname="b", att_a=10, att_b=1).metadata, ] + if usage == "scrambled": + _scramble(expected_metadatas) # Apply operation results = equalise_cubes(test_cubes, **kwargs) diff --git a/lib/iris/util.py b/lib/iris/util.py index db392efe4d..0109fb609c 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2287,35 +2287,30 @@ def equalise_cubes( # get the function of the same name in this module equalisation_ops.append(globals()["unify_time_units"]) - if not equalisation_ops: - # Nothing more to do. - # Note that, if 'unify-names' was done, we *already* modified cubes in-place. - result = cubes - else: + if equalisation_ops: + # NOTE: if no "equalisation_ops", nothing more to do. + # However, if 'unify-names' was done, we *already* modified cubes in-place. + # Compute the cube groups - # N.B. *can't* use sets, as contents not always hashable, e.g. array attributes - # I fear could be inefficient (repeated array compare), but maybe unavoidable + # N.B. *can't* use sets, or dictionary key checking, as our 'keys' are not + # always hashable -- e.g. especially, array attributes. + # I fear this can be inefficient (repeated array compare), but maybe unavoidable # TODO: might something nasty happen here if attributes contain weird stuff ?? + cube_group_keys = [] + cube_group_cubes = [] + for cube, cube_group_key in zip(cubes, cube_grouping_keys): + if cube_group_key not in cube_group_keys: + cube_group_keys.append(cube_group_key) + cube_group_cubes.append([cube]) + else: + i_at = cube_group_keys.index(cube_group_key) + cube_group_cubes[i_at].append(cube) - # TODO: this can be improved -- there is no need to re-scan for each group - def find_uniques(inputs): - unique_inputs = [] - for candidate in inputs: - if candidate not in unique_inputs: - unique_inputs.append(candidate) - return unique_inputs - - input_group_keys = find_uniques(cube_grouping_keys) - - # Process each cube group + collect the results - cubes_and_keys = list(zip(cubes, cube_grouping_keys)) - result = [] - for group_keys in input_group_keys: - group_cubes = [cube for cube, keys in cubes_and_keys if keys == group_keys] + # Apply operations to the groups : in-place modifications on the cubes + for group_cubes in cube_group_cubes: for op in equalisation_ops: op(group_cubes) - result.extend(group_cubes) - # Always return a CubeList result - result = CubeList(result) + # Return a CubeList result = the *original* cubes, as modified + result = CubeList(cubes) return result From f6150deac6ab04a7c7f737dd1bfc6d4fb8c4f8f4 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 11:37:04 +0000 Subject: [PATCH 08/16] Review changes: rename vars. --- lib/iris/util.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 0109fb609c..ef5b9e2254 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2268,8 +2268,11 @@ def equalise_cubes( # Snapshot the cube metadata elements which we use to identify input groups # TODO: we might want to sanitise practically comparable types here ? # (e.g. large object arrays ??) - cube_grouping_keys = [ - {key: deepcopy(getattr(cube.metadata, key)) for key in CubeMetadata._fields} + cube_grouping_values = [ + { + field: deepcopy(getattr(cube.metadata, field)) + for field in CubeMetadata._fields + } for cube in cubes ] @@ -2280,8 +2283,8 @@ def equalise_cubes( # get the function of the same name in this module equalisation_ops.append(globals()["equalise_attributes"]) # Prevent attributes from distinguishing input groups - for cat in cube_grouping_keys: - cat.pop("attributes") + for grouping_values in cube_grouping_values: + grouping_values.pop("attributes") if unify_time_units or apply_all: # get the function of the same name in this module @@ -2291,23 +2294,23 @@ def equalise_cubes( # NOTE: if no "equalisation_ops", nothing more to do. # However, if 'unify-names' was done, we *already* modified cubes in-place. - # Compute the cube groups - # N.B. *can't* use sets, or dictionary key checking, as our 'keys' are not + # Group the cubes into sets with the same 'grouping values'. + # N.B. we *can't* use sets, or dictionary key checking, as our 'values' are not # always hashable -- e.g. especially, array attributes. # I fear this can be inefficient (repeated array compare), but maybe unavoidable # TODO: might something nasty happen here if attributes contain weird stuff ?? - cube_group_keys = [] - cube_group_cubes = [] - for cube, cube_group_key in zip(cubes, cube_grouping_keys): - if cube_group_key not in cube_group_keys: - cube_group_keys.append(cube_group_key) - cube_group_cubes.append([cube]) + cubegroup_values = [] + cubegroup_cubes = [] + for cube, grouping_values in zip(cubes, cube_grouping_values): + if grouping_values not in cubegroup_values: + cubegroup_values.append(grouping_values) + cubegroup_cubes.append([cube]) else: - i_at = cube_group_keys.index(cube_group_key) - cube_group_cubes[i_at].append(cube) + i_at = cubegroup_values.index(grouping_values) + cubegroup_cubes[i_at].append(cube) # Apply operations to the groups : in-place modifications on the cubes - for group_cubes in cube_group_cubes: + for group_cubes in cubegroup_cubes: for op in equalisation_ops: op(group_cubes) From 844b18d99bda75e54031c341002c94944ba0c256 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 11:39:32 +0000 Subject: [PATCH 09/16] Review changes: rename 'unify_names'. --- lib/iris/tests/unit/util/test_equalise_cubes.py | 6 +++--- lib/iris/util.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index c20b43250f..a44ce76a1a 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -76,7 +76,7 @@ def _cube( class TestUnifyNames: - # Test the 'unify_names' operation. + # Test the 'normalise_names' operation. def test_simple(self, usage): sn = "air_temperature" stdnames = [sn, sn, sn] @@ -86,7 +86,7 @@ def test_simple(self, usage): _cube(stdname=stdname, longname=longname, varname=varname) for stdname, longname, varname in zip(stdnames, longnames, varnames) ] - kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + kwargs, expected_metadatas = _usage_common(usage, "normalise_names", test_cubes) # Calculate expected results if usage != "off": @@ -110,7 +110,7 @@ def test_multi(self, usage): _cube(stdname=stdname, longname=longname, varname=varname) for stdname, longname, varname in zip(stdnames, longnames, varnames) ] - kwargs, expected_metadatas = _usage_common(usage, "unify_names", test_cubes) + kwargs, expected_metadatas = _usage_common(usage, "normalise_names", test_cubes) # Calculate expected results if usage != "off": diff --git a/lib/iris/util.py b/lib/iris/util.py index ef5b9e2254..359ff84b58 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2195,7 +2195,7 @@ def mask_cube_from_shapefile(cube, shape, minimum_weight=0.0, in_place=False): def equalise_cubes( cubes, apply_all=False, - unify_names=False, + normalise_names=False, equalise_attributes=False, unify_time_units=False, ): @@ -2214,7 +2214,7 @@ def equalise_cubes( apply_all : bool, default=False Enable *all* the equalisation operations. - unify_names : bool, default=False + normalise_names : bool, default=False When True, remove any redundant ``var_name`` and ``long_name`` properties, leaving only one ``standard_name``, ``long_name`` or ``var_name`` per cube. In this case, the adjusted names are also used when selecting input groups. @@ -2253,7 +2253,7 @@ def equalise_cubes( from iris.common.metadata import CubeMetadata from iris.cube import CubeList - if unify_names or apply_all: + if normalise_names or apply_all: # Rationalise all the cube names # Note: this option operates as a special case, independent of # and *in advance of* the group selection From 8a0080844894775fec91de75fbc34c32ca4d99b7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 12:55:21 +0000 Subject: [PATCH 10/16] Review changes: Add warning on null operation. --- .../tests/unit/util/test_equalise_cubes.py | 31 +++++++++++++++++-- lib/iris/util.py | 12 ++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index a44ce76a1a..011583cec0 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -4,6 +4,8 @@ # See LICENSE in the root of the repository for full licensing details. """Unit tests for the :func:`iris.util.equalise_cubes` function.""" +import warnings + from cf_units import Unit import numpy as np from numpy.random import Generator @@ -12,6 +14,7 @@ from iris.coords import DimCoord from iris.cube import Cube from iris.util import equalise_cubes +from iris.warnings import IrisUserWarning def _scramble(inputs, rng=95297): @@ -75,7 +78,29 @@ def _cube( return cube -class TestUnifyNames: +_NO_OP_MESSAGE = "'equalise_cubes' call does nothing" + + +class TestNoOperation: + def test(self): + # A standalone test, that a call with no operations enabled raises a warning + with pytest.warns(IrisUserWarning, match=_NO_OP_MESSAGE): + equalise_cubes([]) + + +class WarnChecked: + @pytest.fixture(autouse=True) + def nowarn(self, usage): + if usage == "off": + with pytest.warns(IrisUserWarning, match=_NO_OP_MESSAGE): + yield + else: + with warnings.catch_warnings(): + warnings.simplefilter("error") + yield + + +class TestUnifyNames(WarnChecked): # Test the 'normalise_names' operation. def test_simple(self, usage): sn = "air_temperature" @@ -131,7 +156,7 @@ def test_multi(self, usage): assert [cube.metadata for cube in results] == expected_metadatas -class TestEqualiseAttributes: +class TestEqualiseAttributes(WarnChecked): # Test the 'equalise_attributes' operation. def test_calling(self, usage, mocker): patch = mocker.patch("iris.util.equalise_attributes") @@ -196,7 +221,7 @@ def test_operation_in_groups(self, usage): assert [cube.metadata for cube in results] == expected_metadatas -class TestUnifyTimeUnits: +class TestUnifyTimeUnits(WarnChecked): # Test the 'unify_time_units' operation. def test_calling(self, usage, mocker): patch = mocker.patch("iris.util.unify_time_units") diff --git a/lib/iris/util.py b/lib/iris/util.py index 359ff84b58..02dc1782a9 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -16,6 +16,7 @@ import sys import tempfile from typing import Literal +from warnings import warn import cf_units from dask import array as da @@ -28,6 +29,7 @@ from iris.common import SERVICES from iris.common.lenient import _lenient_client import iris.exceptions +import iris.warnings def broadcast_to_shape(array, shape, dim_map, chunks=None): @@ -2290,7 +2292,15 @@ def equalise_cubes( # get the function of the same name in this module equalisation_ops.append(globals()["unify_time_units"]) - if equalisation_ops: + if not equalisation_ops: + if not normalise_names: + msg = ( + "'equalise_cubes' call does nothing, as no equalisation operations " + "are enabled (neither `apply_all` nor any individual keywords set)." + ) + warn(msg, category=iris.warnings.IrisUserWarning) + + else: # NOTE: if no "equalisation_ops", nothing more to do. # However, if 'unify-names' was done, we *already* modified cubes in-place. From 8b56344b77090ebb31f7f9a86a536ce723bed66f Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 13:05:44 +0000 Subject: [PATCH 11/16] Review changes: Remove mistaken docstring reference to re-ordering. --- lib/iris/util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 02dc1782a9..9681ab1484 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2233,9 +2233,8 @@ def equalise_cubes( Returns ------- :class:`~iris.cube.CubeList` - A CubeList containing the original input cubes, ready for merge or concatenate - operations. The cubes are possibly modified (in-place), and possibly in a - different order. + A CubeList containing the original input cubes, modified as required (in-place) + ready for merge or concatenate operations. Notes ----- From ae992ffd333afc9493627916ba5564d73b24d953 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 13:08:08 +0000 Subject: [PATCH 12/16] Review changes: Simplify in-place replacement of list/array content. --- lib/iris/tests/unit/util/test_equalise_cubes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index 011583cec0..738933326e 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -30,7 +30,7 @@ def _scramble(inputs, rng=95297): scramble_inds = rng.permutation(n_inputs) inputs_array = inputs_array[scramble_inds] # Modify input list **BUT N.B. IN PLACE** - inputs[0:] = inputs_array + inputs[:] = inputs_array return inputs From 62285b58e1986145215a731402eedcef90268575 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 13:49:51 +0000 Subject: [PATCH 13/16] Added whatsnew. --- docs/src/whatsnew/latest.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index ae13b8a883..7d99411fbd 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -38,6 +38,11 @@ This document explains the changes made to Iris for this release your code for new floating point problems if activating this (e.g. when using the :class:`~iris.Constraint` API). (:pull:`6260`) +#. `@pp-mo`_ added a new utility function :func:`~iris.util.equalise_cubes`, to help + with aligning cubes so they can merge / concatenate. + (:issue:`6248`, :pull:`6257`) + + 🐛 Bugs Fixed ============= From e841ec2ba695f7fcd345fbcfa945d5b937196b17 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 19 Dec 2024 17:36:58 +0000 Subject: [PATCH 14/16] Review changes: explain scrambling; in-place scramble doesn't return result --- lib/iris/tests/unit/util/test_equalise_cubes.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index 738933326e..bca03bb0d0 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -18,7 +18,16 @@ def _scramble(inputs, rng=95297): - # Reorder items to check that order does not affect the operation + # Reorder items (IN PLACE) to check that order does not affect operation + # NOTE: the "magic" number is chosen because it happens to encode a permutation + # which is usefully non-trivial for small numbers + # examples: + # [0, 1] --> [1, 0] + # [0, 1, 2] --> [1, 2, 0] + # [0, 1, 2, 3] --> [1, 2, 3, 0] + # [0, 1, 2, 3, 4] --> [1, 2, 3, 0, 4] + # [0, 1, 2, 3, 4, 5] --> [1, 3, 2, 0, 5, 4] + # [0, 1, 2, 3, 4, 5, 6] --> [1, 5, 3, 2, 0, 6, 4] if not isinstance(rng, Generator): rng = np.random.default_rng(rng) n_inputs = len(inputs) @@ -31,7 +40,6 @@ def _scramble(inputs, rng=95297): inputs_array = inputs_array[scramble_inds] # Modify input list **BUT N.B. IN PLACE** inputs[:] = inputs_array - return inputs @pytest.fixture(params=["off", "on", "applyall", "scrambled"]) @@ -147,7 +155,7 @@ def test_multi(self, usage): for stdname, longname, varname in zip(stdnames, longnames, varnames) ] if usage == "scrambled": - expected_metadatas = _scramble(expected_metadatas) + _scramble(expected_metadatas) # Apply operation results = equalise_cubes(test_cubes, **kwargs) From e30e5d8708908c212089e6b223a7016eb1fd9c14 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 20 Dec 2024 11:34:58 +0000 Subject: [PATCH 15/16] Added specific test for array attribute handling. --- .../tests/unit/util/test_equalise_cubes.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index bca03bb0d0..bb29b47314 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -228,6 +228,40 @@ def test_operation_in_groups(self, usage): # Assert result assert [cube.metadata for cube in results] == expected_metadatas + def test_array_attributes(self, usage): + # Array content is worth a special test because it breaks dictionary equality. + a1 = np.array([4.1, 5.2, 6.3]) + a2 = np.array([1, 2]) + a3 = np.array([1, 3]) + test_cubes = [ + _cube(longname="a", v1=a1, v2=a2), + _cube(longname="a", v1=a1, v2=a3), + _cube(longname="b", v1=a1, v2=a2), + _cube(longname="b", v1=a1, v2=a2), + ] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + result_cubes = [ + _cube(longname="a", v1=a1), + _cube(longname="a", v1=a1), + _cube(longname="b", v1=a1, v2=a2), + _cube(longname="b", v1=a1, v2=a2), + ] + expected_metadatas = [cube.metadata for cube in result_cubes] + if usage == "scrambled": + _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + class TestUnifyTimeUnits(WarnChecked): # Test the 'unify_time_units' operation. From a19b4090fad6705d9d6ad89ce2c2987252e617cd Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 20 Dec 2024 12:52:04 +0000 Subject: [PATCH 16/16] Simplify 'scramble' operation. --- .../tests/unit/util/test_equalise_cubes.py | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py index bb29b47314..5aa0e28c2e 100644 --- a/lib/iris/tests/unit/util/test_equalise_cubes.py +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -8,7 +8,6 @@ from cf_units import Unit import numpy as np -from numpy.random import Generator import pytest from iris.coords import DimCoord @@ -17,29 +16,10 @@ from iris.warnings import IrisUserWarning -def _scramble(inputs, rng=95297): - # Reorder items (IN PLACE) to check that order does not affect operation - # NOTE: the "magic" number is chosen because it happens to encode a permutation - # which is usefully non-trivial for small numbers - # examples: - # [0, 1] --> [1, 0] - # [0, 1, 2] --> [1, 2, 0] - # [0, 1, 2, 3] --> [1, 2, 3, 0] - # [0, 1, 2, 3, 4] --> [1, 2, 3, 0, 4] - # [0, 1, 2, 3, 4, 5] --> [1, 3, 2, 0, 5, 4] - # [0, 1, 2, 3, 4, 5, 6] --> [1, 5, 3, 2, 0, 6, 4] - if not isinstance(rng, Generator): - rng = np.random.default_rng(rng) - n_inputs = len(inputs) - # NOTE: make object array of explicit shape + fill it, - # since np.array(inputs) *fails* specifically with a list of metadata objects - inputs_array = np.empty((n_inputs,), dtype=object) - inputs_array[:] = inputs - n_inputs = inputs_array.shape[0] - scramble_inds = rng.permutation(n_inputs) - inputs_array = inputs_array[scramble_inds] - # Modify input list **BUT N.B. IN PLACE** - inputs[:] = inputs_array +def _scramble(inputs): + # Reorder items (IN PLACE) to check that order does not affect operation. + # Rather than anything more clever, we'll settle for just reversing the order. + inputs[:] = inputs[::-1] @pytest.fixture(params=["off", "on", "applyall", "scrambled"])