diff --git a/docs/iris/src/whatsnew/contributions_1.12/deprecate_2016-Dec-08_share_data.txt b/docs/iris/src/whatsnew/contributions_1.12/deprecate_2016-Dec-08_share_data.txt new file mode 100644 index 00000000000..e35dabf23ab --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.12/deprecate_2016-Dec-08_share_data.txt @@ -0,0 +1,3 @@ +* Deprecated the data-copying behaviour of Cube indexing. + The `share_data` attribute of `iris.FUTURE` can be used to switch to + the new data-sharing behaviour See :class:`iris.Future`. diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index a76f4dc85b8..2bfb4a59654 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # @@ -145,7 +145,7 @@ class Future(threading.local): def __init__(self, cell_datetime_objects=False, netcdf_promote=False, strict_grib_load=False, netcdf_no_unlimited=False, - clip_latitudes=False): + clip_latitudes=False, share_data=False): """ A container for run-time options controls. @@ -200,20 +200,27 @@ def __init__(self, cell_datetime_objects=False, netcdf_promote=False, :meth:`iris.coords.Coord.guess_bounds()` method limits the guessed bounds to [-90, 90] for latitudes. + The option `share_data` controls whether indexing a Cube returns + a Cube whose data is a view onto the original Cube's data, as + opposed to a independent copy of the relevant data. Conversely, when + share_data=False (currently the default), an indexed partial cube + always contains fully independent, copied data arrays. + """ self.__dict__['cell_datetime_objects'] = cell_datetime_objects self.__dict__['netcdf_promote'] = netcdf_promote self.__dict__['strict_grib_load'] = strict_grib_load self.__dict__['netcdf_no_unlimited'] = netcdf_no_unlimited self.__dict__['clip_latitudes'] = clip_latitudes + self.__dict__['share_data'] = share_data def __repr__(self): msg = ('Future(cell_datetime_objects={}, netcdf_promote={}, ' 'strict_grib_load={}, netcdf_no_unlimited={}, ' - 'clip_latitudes={})') + 'clip_latitudes={}, share_data={})') return msg.format(self.cell_datetime_objects, self.netcdf_promote, self.strict_grib_load, self.netcdf_no_unlimited, - self.clip_latitudes) + self.clip_latitudes, self.share_data) deprecated_options = { 'strict_grib_load': ('This is because "iris.fileformats.grib" is now ' diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 20d4058b6a3..701515858af 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # @@ -522,8 +522,13 @@ def copy(self, points=None, bounds=None): raise ValueError('If bounds are specified, points must also be ' 'specified') - new_coord = copy.deepcopy(self) if points is not None: + # We do not perform a deepcopy when we supply new points so as to + # not unnecessarily copy the old points. + new_coord = copy.copy(self) + new_coord.attributes = copy.deepcopy(self.attributes) + new_coord.coord_system = copy.deepcopy(self.coord_system) + # Explicitly not using the points property as we don't want the # shape the new points to be constrained by the shape of # self.points @@ -533,6 +538,8 @@ def copy(self, points=None, bounds=None): # points will result in new bounds, discarding those copied from # self. new_coord.bounds = bounds + else: + new_coord = copy.deepcopy(self) return new_coord @@ -1502,7 +1509,7 @@ def points(self): @points.setter def points(self, points): - points = np.array(points, ndmin=1) + points = np.array(points, ndmin=1, copy=False) # If points are already defined for this coordinate, if hasattr(self, '_points') and self._points is not None: # Check that setting these points wouldn't change self.shape @@ -1538,7 +1545,7 @@ def bounds(self): def bounds(self, bounds): if bounds is not None: # Ensure the bounds are a compatible shape. - bounds = np.array(bounds, ndmin=2) + bounds = np.array(bounds, ndmin=2, copy=False) if self.shape != bounds.shape[:-1]: raise ValueError( "The shape of the bounds array should be " diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 79fb074ea01..756f9745d3f 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # @@ -2153,6 +2153,14 @@ def __getitem__(self, keys): requested must be applicable directly to the cube.data attribute. All metadata will be subsequently indexed appropriately. + .. deprecated:: 1.12 + In future, the `data` attribute of the indexing result may be a + view onto the original data array, to avoid unnecessary copying. + For the present, however, indexing always produces a full + independent copy. The `share_data` attribute of `iris.FUTURE` + should be set to True to enable this new data-sharing behaviour + (if not, a deprecation warning will be issued). + """ # turn the keys into a full slice spec (all dims) full_slice = iris.util._build_full_slice_given_keys(keys, @@ -2177,7 +2185,7 @@ def new_cell_measure_dims(cm_): try: first_slice = next(slice_gen) except StopIteration: - first_slice = None + first_slice = Ellipsis if iris.FUTURE.share_data else None if first_slice is not None: data = self._my_data[first_slice] @@ -2187,10 +2195,18 @@ def new_cell_measure_dims(cm_): for other_slice in slice_gen: data = data[other_slice] - # We don't want a view of the data, so take a copy of it if it's - # not already our own. - if isinstance(data, biggus.Array) or not data.flags['OWNDATA']: - data = copy.deepcopy(data) + if not iris.FUTURE.share_data: + # We don't want a view of the data, so take a copy of it if it's + # not already our own. + msg = ('The `data` attribute of the indexing result is currently ' + 'a copy of the original data array. This behaviour was ' + 'deprecated in v1.12.0 and will be replaced with views ' + 'onto the original data array where possible. Set ' + 'iris.FUTURE.share_data=True to switch to the new ' + 'behaviour.') + warn_deprecated(msg) + if isinstance(data, biggus.Array) or not data.flags['OWNDATA']: + data = copy.deepcopy(data) # We can turn a masked array into a normal array if it's full. if isinstance(data, ma.core.MaskedArray): @@ -3108,7 +3124,7 @@ def add_history(self, string): .. deprecated:: 1.6 Add/modify history metadata within - attr:`~iris.cube.Cube.attributes` as needed. + :attr:`~iris.cube.Cube.attributes` as needed. """ warn_deprecated("Cube.add_history() has been deprecated - " diff --git a/lib/iris/tests/test_coord_api.py b/lib/iris/tests/test_coord_api.py index 5e2811d6ad2..cc4448630da 100644 --- a/lib/iris/tests/test_coord_api.py +++ b/lib/iris/tests/test_coord_api.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2016, Met Office +# (C) British Crown Copyright 2010 - 2017, Met Office # # This file is part of Iris. # @@ -639,30 +639,44 @@ def test_get_set_points_and_bounds(self): coord.bounds = [[123, 456], [234, 567], [345, 678]] self.assertEqual(coord.shape, (3, )) self.assertEqual(coord.bounds.shape, (3, 2)) - + class TestGuessBounds(tests.IrisTest): def test_guess_bounds(self): - coord = iris.coords.DimCoord(np.array([0, 10, 20, 30]), long_name="foo", units="1") + coord = iris.coords.DimCoord(np.array([0, 10, 20, 30]), + long_name="foo", units="1") coord.guess_bounds() - self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]])) - + self.assertArrayEqual(coord.bounds, np.array([[-5, 5], + [5, 15], + [15, 25], + [25, 35]])) + coord.bounds = None coord.guess_bounds(0.25) - self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]]) + 2.5) - + self.assertArrayEqual(coord.bounds, np.array([[-5, 5], + [5, 15], + [15, 25], + [25, 35]]) + 2.5) + coord.bounds = None coord.guess_bounds(0.75) - self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]]) - 2.5) + self.assertArrayEqual(coord.bounds, np.array([[-5, 5], + [5, 15], + [15, 25], + [25, 35]]) - 2.5) points = coord.points.copy() points[2] = 25 coord.points = points coord.bounds = None coord.guess_bounds() - self.assertArrayEqual(coord.bounds, np.array([[-5., 5.], [5., 17.5], [17.5, 27.5], [27.5, 32.5]])) - + self.assertArrayEqual(coord.bounds, np.array([[-5., 5.], + [5., 17.5], + [17.5, 27.5], + [27.5, 32.5]])) + # if the points are not monotonic, then guess_bounds should fail + points = coord.points.copy() points[2] = 32 coord = iris.coords.AuxCoord.from_coord(coord) coord.points = points diff --git a/lib/iris/tests/unit/analysis/cartography/test_project.py b/lib/iris/tests/unit/analysis/cartography/test_project.py index 4adf8241076..e627e9cfd1f 100644 --- a/lib/iris/tests/unit/analysis/cartography/test_project.py +++ b/lib/iris/tests/unit/analysis/cartography/test_project.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2014 - 2016, Met Office +# (C) British Crown Copyright 2014 - 2017, Met Office # # This file is part of Iris. # @@ -151,9 +151,9 @@ def test_no_coord_system(self): cube.coord('grid_latitude').coord_system = None with iris.tests.mock.patch('warnings.warn') as warn: _, _ = project(cube, ROBINSON) - warn.assert_called_once_with('Coordinate system of latitude and ' - 'longitude coordinates is not specified. ' - 'Assuming WGS84 Geodetic.') + msg = ('Coordinate system of latitude and longitude coordinates is ' + 'not specified. Assuming WGS84 Geodetic.') + self.assertIn(msg, warn.call_args_list[0][0][0]) if __name__ == '__main__': diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py new file mode 100644 index 00000000000..89a51164548 --- /dev/null +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -0,0 +1,120 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for :class:`iris.coords.AuxCoord`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np + +import biggus +from iris.coords import AuxCoord + + +class Test___init__(tests.IrisTest): + def test_writeable(self): + coord = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]]) + self.assertTrue(coord.points.flags.writeable) + self.assertTrue(coord.bounds.flags.writeable) + + +def fetch_base(ndarray): + if ndarray.base is not None: + return fetch_base(ndarray.base) + return ndarray + + +class Test___getitem__(tests.IrisTest): + def test_share_data(self): + # Ensure that slicing a coordinate behaves like slicing a numpy array + # i.e. that the points and bounds are views of the original. + original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]], + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + sliced_coord = original[:] + self.assertIs(fetch_base(sliced_coord._points), + fetch_base(original._points)) + self.assertIs(fetch_base(sliced_coord._bounds), + fetch_base(original._bounds)) + self.assertIsNot(sliced_coord.coord_system, original.coord_system) + self.assertIsNot(sliced_coord.attributes, original.attributes) + + def test_lazy_data_realisation(self): + # Capture the fact that we realise the data when slicing. + points = np.array([1, 2]) + points = biggus.NumpyArrayAdapter(points) + + bounds = np.array([[1, 2], [2, 3]]) + bounds = biggus.NumpyArrayAdapter(bounds) + + original = AuxCoord(points, bounds=bounds, + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + sliced_coord = original[:] + # Returned coord is realised. + self.assertIsInstance(sliced_coord._points, biggus.NumpyArrayAdapter) + self.assertIsInstance(sliced_coord._bounds, biggus.NumpyArrayAdapter) + + # Original coord remains unrealised. + self.assertIsInstance(points, biggus.NumpyArrayAdapter) + self.assertIsInstance(bounds, biggus.NumpyArrayAdapter) + + +class Test_copy(tests.IrisTest): + def setUp(self): + self.original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]], + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + + def assert_data_no_share(self, coord_copy): + self.assertIsNot(fetch_base(coord_copy._points), + fetch_base(self.original._points)) + self.assertIsNot(fetch_base(coord_copy._bounds), + fetch_base(self.original._bounds)) + self.assertIsNot(coord_copy.coord_system, self.original.coord_system) + self.assertIsNot(coord_copy.attributes, self.original.attributes) + + def test_existing_points(self): + # Ensure that copying a coordinate does not return a view of its + # points or bounds. + coord_copy = self.original.copy() + self.assert_data_no_share(coord_copy) + + def test_existing_points_deepcopy_call(self): + # Ensure that the coordinate object itself is deepcopied called. + with tests.mock.patch('copy.deepcopy') as mock_copy: + self.original.copy() + mock_copy.assert_called_once_with(self.original) + + def test_new_points(self): + coord_copy = self.original.copy([1, 2], bounds=[[1, 2], [2, 3]]) + self.assert_data_no_share(coord_copy) + + def test_new_points_shallowcopy_call(self): + # Ensure that the coordinate object itself is shallow copied so that + # the points and bounds are not unnecessarily copied. + with tests.mock.patch('copy.copy') as mock_copy: + self.original.copy([1, 2], bounds=[[1, 2], [2, 3]]) + mock_copy.assert_called_once_with(self.original) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/coords/test_DimCoord.py b/lib/iris/tests/unit/coords/test_DimCoord.py new file mode 100644 index 00000000000..02eb43a20a8 --- /dev/null +++ b/lib/iris/tests/unit/coords/test_DimCoord.py @@ -0,0 +1,125 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for :class:`iris.coords.DimCoord`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import copy + +import numpy as np + +import biggus +from iris.coords import DimCoord + + +class Test___init__(tests.IrisTest): + def test_writeable(self): + coord = DimCoord([1, 2], bounds=[[1, 2], [2, 3]]) + self.assertFalse(coord.points.flags.writeable) + self.assertFalse(coord.bounds.flags.writeable) + + +def fetch_base(ndarray): + if ndarray.base is not None: + return fetch_base(ndarray.base) + return ndarray + + +class Test___getitem__(tests.IrisTest): + def test_share_data(self): + # Ensure that slicing a coordinate behaves like slicing a numpy array + # i.e. that the points and bounds are views of the original. + original = DimCoord([1, 2], bounds=[[1, 2], [2, 3]], + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + sliced_coord = original[:] + self.assertIs(fetch_base(sliced_coord._points), + fetch_base(original._points)) + self.assertIs(fetch_base(sliced_coord._bounds), + fetch_base(original._bounds)) + self.assertIsNot(sliced_coord.coord_system, original.coord_system) + self.assertIsNot(sliced_coord.attributes, original.attributes) + + def test_lazy_data_realisation(self): + # Capture the fact that we realise the data when slicing. + points = np.array([1, 2]) + points = biggus.NumpyArrayAdapter(points) + + bounds = np.array([[1, 2], [2, 3]]) + bounds = biggus.NumpyArrayAdapter(bounds) + + original = DimCoord(points, bounds=bounds, + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + sliced_coord = original[:] + # Returned coord is realised. + self.assertIsInstance(sliced_coord._points, np.ndarray) + self.assertIsInstance(sliced_coord._bounds, np.ndarray) + + # Original coord remains unrealised. + self.assertIsInstance(points, biggus.NumpyArrayAdapter) + self.assertIsInstance(bounds, biggus.NumpyArrayAdapter) + + +class Test_copy(tests.IrisTest): + def setUp(self): + self.original = DimCoord([1, 2], bounds=[[1, 2], [2, 3]], + attributes={'dummy1': None}, + coord_system=tests.mock.sentinel.coord_system) + + def assert_data_no_share(self, coord_copy): + self.assertIsNot(fetch_base(coord_copy._points), + fetch_base(self.original._points)) + self.assertIsNot(fetch_base(coord_copy._bounds), + fetch_base(self.original._bounds)) + self.assertIsNot(coord_copy.coord_system, self.original.coord_system) + self.assertIsNot(coord_copy.attributes, self.original.attributes) + + def test_existing_points(self): + # Ensure that copying a coordinate does not return a view of its + # points or bounds. + coord_copy = self.original.copy() + self.assert_data_no_share(coord_copy) + + def test_existing_points_deepcopy_call(self): + # Ensure that the coordinate object itself is deepcopied called. + cp_orig = copy.deepcopy(self.original) + with tests.mock.patch('copy.deepcopy', return_value=cp_orig) as \ + mock_copy: + self.original.copy() + mock_copy.assert_called_once_with(self.original) + + def test_new_points(self): + coord_copy = self.original.copy([1, 2], bounds=[[1, 2], [2, 3]]) + self.assert_data_no_share(coord_copy) + + def test_new_points_shallowcopy_call(self): + # Ensure that the coordinate object itself is shallow copied so that + # the points and bounds are not unnecessarily copied. + cp_orig = copy.copy(self.original) + with tests.mock.patch('copy.copy', return_value=cp_orig) as mock_copy: + self.original.copy([1, 2], bounds=[[1, 2], [2, 3]]) + mock_copy.assert_called_once_with(self.original) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index 1b421f3c330..06a959bbb5e 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2016, Met Office +# (C) British Crown Copyright 2013 - 2017, Met Office # # This file is part of Iris. # @@ -1329,6 +1329,54 @@ def test_remove_cell_measure(self): [[self.b_cell_measure, (0, 1)]]) +class Test___getitem__nofuture(tests.IrisTest): + def setUp(self): + patch = mock.patch('iris.FUTURE.share_data', new=False) + self.mock_fshare = patch.start() + self.addCleanup(patch.stop) + + def test_lazy_array(self): + cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) + cube2 = cube[1:] + self.assertTrue(cube2.has_lazy_data()) + cube.data + self.assertTrue(cube2.has_lazy_data()) + + def test_ndarray(self): + cube = Cube(np.arange(6).reshape(2, 3)) + cube2 = cube[1:] + self.assertIsNot(cube.data.base, cube2.data.base) + + def test_deprecation_warning(self): + warn_call = mock.patch('iris.cube.warn_deprecated') + cube = Cube(np.arange(6).reshape(2, 3)) + with warn_call as mocked_warn_call: + cube[1:] + msg = ('The `data` attribute of the indexing result is currently a ' + 'copy of the original data array. This behaviour was ' + 'deprecated in v1.12.0') + self.assertIn(msg, mocked_warn_call.call_args[0][0]) + + +class Test___getitem__future(tests.IrisTest): + def setUp(self): + patch = mock.patch('iris.FUTURE.share_data', new=True) + self.mock_fshare = patch.start() + self.addCleanup(patch.stop) + + def test_lazy_array(self): + cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) + cube2 = cube[1:] + self.assertTrue(cube2.has_lazy_data()) + cube.data + self.assertTrue(cube2.has_lazy_data()) + + def test_ndarray(self): + cube = Cube(np.arange(6).reshape(2, 3)) + cube2 = cube[1:] + self.assertIs(cube.data.base, cube2.data.base) + + class Test__getitem_CellMeasure(tests.IrisTest): def setUp(self): cube = Cube(np.arange(6).reshape(2, 3)) diff --git a/lib/iris/tests/unit/test_Future.py b/lib/iris/tests/unit/test_Future.py index d55a9ae58f7..ad6ee8a6723 100644 --- a/lib/iris/tests/unit/test_Future.py +++ b/lib/iris/tests/unit/test_Future.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2016, Met Office +# (C) British Crown Copyright 2013 - 2017, Met Office # # This file is part of Iris. # @@ -51,6 +51,12 @@ def test_valid_clip_latitudes(self): future.clip_latitudes = new_value self.assertEqual(future.clip_latitudes, new_value) + def test_valid_share_data(self): + future = Future() + new_value = not future.share_data + future.share_data = new_value + self.assertEqual(future.share_data, new_value) + def test_invalid_attribute(self): future = Future() with self.assertRaises(AttributeError):