Skip to content

Commit

Permalink
ENH: Remove deepcopies when slicing cubes and copying coords
Browse files Browse the repository at this point in the history
- When indexing a cube when the data is reaslised (not lazy), a view
  of the original array is returned where possible (subject to the
  rules when slicing in numpy).
- When indexing a cube when the data is not reaslised (lazy),
  realising the data on one will still not realise the data on the
  other.
- Existing behaviour is that slicing coordinates returns views of the
  original points and bounds (where possible).  This was likely chosen
  behaviour on the basis that DimCoords at least are not writeable.
  This is not the same however for Auxiliary coordinates and likely
  raises the likely case for this being a bug (i.e. one can modify
  AuxCoord object points and bounds).
- DimCoord slicing will now return views of its data like AuxCoords.
  DimCoords will continue to realise data unlike AuxCoords due to the
  validation necessary for being monotonically increasing.
  • Loading branch information
Carwyn Pelley committed Jan 9, 2017
1 parent ff8cdfa commit e1add28
Show file tree
Hide file tree
Showing 10 changed files with 370 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* Deprecated the data-copying behaviour of Cube indexing.
The `share_data` attribute of `iris.FUTURE` can be used to switch to
the new data-sharing behaviour See :class:`iris.Future`.
13 changes: 10 additions & 3 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class Future(threading.local):

def __init__(self, cell_datetime_objects=False, netcdf_promote=False,
strict_grib_load=False, netcdf_no_unlimited=False,
clip_latitudes=False):
clip_latitudes=False, share_data=False):
"""
A container for run-time options controls.
Expand Down Expand Up @@ -200,20 +200,27 @@ def __init__(self, cell_datetime_objects=False, netcdf_promote=False,
:meth:`iris.coords.Coord.guess_bounds()` method limits the
guessed bounds to [-90, 90] for latitudes.
The option `share_data` controls whether indexing a Cube returns
a Cube whose data is a view onto the original Cube's data, as
opposed to a independent copy of the relevant data. Conversely, when
share_data=False (currently the default), an indexed partial cube
always contains fully independent, copied data arrays.
"""
self.__dict__['cell_datetime_objects'] = cell_datetime_objects
self.__dict__['netcdf_promote'] = netcdf_promote
self.__dict__['strict_grib_load'] = strict_grib_load
self.__dict__['netcdf_no_unlimited'] = netcdf_no_unlimited
self.__dict__['clip_latitudes'] = clip_latitudes
self.__dict__['share_data'] = share_data

def __repr__(self):
msg = ('Future(cell_datetime_objects={}, netcdf_promote={}, '
'strict_grib_load={}, netcdf_no_unlimited={}, '
'clip_latitudes={})')
'clip_latitudes={}, share_data={})')
return msg.format(self.cell_datetime_objects, self.netcdf_promote,
self.strict_grib_load, self.netcdf_no_unlimited,
self.clip_latitudes)
self.clip_latitudes, self.share_data)

deprecated_options = {
'strict_grib_load': ('This is because "iris.fileformats.grib" is now '
Expand Down
13 changes: 10 additions & 3 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,13 @@ def copy(self, points=None, bounds=None):
raise ValueError('If bounds are specified, points must also be '
'specified')

new_coord = copy.deepcopy(self)
if points is not None:
# We do not perform a deepcopy when we supply new points so as to
# not unnecessarily copy the old points.
new_coord = copy.copy(self)
new_coord.attributes = copy.deepcopy(self.attributes)
new_coord.coord_system = copy.deepcopy(self.coord_system)

# Explicitly not using the points property as we don't want the
# shape the new points to be constrained by the shape of
# self.points
Expand All @@ -533,6 +538,8 @@ def copy(self, points=None, bounds=None):
# points will result in new bounds, discarding those copied from
# self.
new_coord.bounds = bounds
else:
new_coord = copy.deepcopy(self)

return new_coord

Expand Down Expand Up @@ -1502,7 +1509,7 @@ def points(self):

@points.setter
def points(self, points):
points = np.array(points, ndmin=1)
points = np.array(points, ndmin=1, copy=False)
# If points are already defined for this coordinate,
if hasattr(self, '_points') and self._points is not None:
# Check that setting these points wouldn't change self.shape
Expand Down Expand Up @@ -1538,7 +1545,7 @@ def bounds(self):
def bounds(self, bounds):
if bounds is not None:
# Ensure the bounds are a compatible shape.
bounds = np.array(bounds, ndmin=2)
bounds = np.array(bounds, ndmin=2, copy=False)
if self.shape != bounds.shape[:-1]:
raise ValueError(
"The shape of the bounds array should be "
Expand Down
28 changes: 22 additions & 6 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2153,6 +2153,14 @@ def __getitem__(self, keys):
requested must be applicable directly to the cube.data attribute. All
metadata will be subsequently indexed appropriately.
.. deprecated:: 1.12
In future, the `data` attribute of the indexing result may be a
view onto the original data array, to avoid unnecessary copying.
For the present, however, indexing always produces a full
independent copy. The `share_data` attribute of `iris.FUTURE`
should be set to True to enable this new data-sharing behaviour
(if not, a deprecation warning will be issued).
"""
# turn the keys into a full slice spec (all dims)
full_slice = iris.util._build_full_slice_given_keys(keys,
Expand All @@ -2177,7 +2185,7 @@ def new_cell_measure_dims(cm_):
try:
first_slice = next(slice_gen)
except StopIteration:
first_slice = None
first_slice = Ellipsis if iris.FUTURE.share_data else None

if first_slice is not None:
data = self._my_data[first_slice]
Expand All @@ -2187,10 +2195,18 @@ def new_cell_measure_dims(cm_):
for other_slice in slice_gen:
data = data[other_slice]

# We don't want a view of the data, so take a copy of it if it's
# not already our own.
if isinstance(data, biggus.Array) or not data.flags['OWNDATA']:
data = copy.deepcopy(data)
if not iris.FUTURE.share_data:
# We don't want a view of the data, so take a copy of it if it's
# not already our own.
msg = ('The `data` attribute of the indexing result is currently '
'a copy of the original data array. This behaviour was '
'deprecated in v1.12.0 and will be replaced with views '
'onto the original data array where possible. Set '
'iris.FUTURE.share_data=True to switch to the new '
'behaviour.')
warn_deprecated(msg)
if isinstance(data, biggus.Array) or not data.flags['OWNDATA']:
data = copy.deepcopy(data)

# We can turn a masked array into a normal array if it's full.
if isinstance(data, ma.core.MaskedArray):
Expand Down Expand Up @@ -3108,7 +3124,7 @@ def add_history(self, string):
.. deprecated:: 1.6
Add/modify history metadata within
attr:`~iris.cube.Cube.attributes` as needed.
:attr:`~iris.cube.Cube.attributes` as needed.
"""
warn_deprecated("Cube.add_history() has been deprecated - "
Expand Down
32 changes: 23 additions & 9 deletions lib/iris/tests/test_coord_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,30 +639,44 @@ def test_get_set_points_and_bounds(self):
coord.bounds = [[123, 456], [234, 567], [345, 678]]
self.assertEqual(coord.shape, (3, ))
self.assertEqual(coord.bounds.shape, (3, 2))


class TestGuessBounds(tests.IrisTest):
def test_guess_bounds(self):
coord = iris.coords.DimCoord(np.array([0, 10, 20, 30]), long_name="foo", units="1")
coord = iris.coords.DimCoord(np.array([0, 10, 20, 30]),
long_name="foo", units="1")
coord.guess_bounds()
self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]]))

self.assertArrayEqual(coord.bounds, np.array([[-5, 5],
[5, 15],
[15, 25],
[25, 35]]))

coord.bounds = None
coord.guess_bounds(0.25)
self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]]) + 2.5)

self.assertArrayEqual(coord.bounds, np.array([[-5, 5],
[5, 15],
[15, 25],
[25, 35]]) + 2.5)

coord.bounds = None
coord.guess_bounds(0.75)
self.assertArrayEqual(coord.bounds, np.array([[-5, 5], [5, 15], [15, 25], [25, 35]]) - 2.5)
self.assertArrayEqual(coord.bounds, np.array([[-5, 5],
[5, 15],
[15, 25],
[25, 35]]) - 2.5)

points = coord.points.copy()
points[2] = 25
coord.points = points
coord.bounds = None
coord.guess_bounds()
self.assertArrayEqual(coord.bounds, np.array([[-5., 5.], [5., 17.5], [17.5, 27.5], [27.5, 32.5]]))

self.assertArrayEqual(coord.bounds, np.array([[-5., 5.],
[5., 17.5],
[17.5, 27.5],
[27.5, 32.5]]))

# if the points are not monotonic, then guess_bounds should fail
points = coord.points.copy()
points[2] = 32
coord = iris.coords.AuxCoord.from_coord(coord)
coord.points = points
Expand Down
6 changes: 3 additions & 3 deletions lib/iris/tests/unit/analysis/cartography/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ def test_no_coord_system(self):
cube.coord('grid_latitude').coord_system = None
with iris.tests.mock.patch('warnings.warn') as warn:
_, _ = project(cube, ROBINSON)
warn.assert_called_once_with('Coordinate system of latitude and '
'longitude coordinates is not specified. '
'Assuming WGS84 Geodetic.')
msg = ('Coordinate system of latitude and longitude coordinates is '
'not specified. Assuming WGS84 Geodetic.')
self.assertIn(msg, warn.call_args_list[0][0][0])


if __name__ == '__main__':
Expand Down
120 changes: 120 additions & 0 deletions lib/iris/tests/unit/coords/test_AuxCoord.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# (C) British Crown Copyright 2016, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Unit tests for :class:`iris.coords.AuxCoord`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import numpy as np

import biggus
from iris.coords import AuxCoord


class Test___init__(tests.IrisTest):
def test_writeable(self):
coord = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]])
self.assertTrue(coord.points.flags.writeable)
self.assertTrue(coord.bounds.flags.writeable)


def fetch_base(ndarray):
if ndarray.base is not None:
return fetch_base(ndarray.base)
return ndarray


class Test___getitem__(tests.IrisTest):
def test_share_data(self):
# Ensure that slicing a coordinate behaves like slicing a numpy array
# i.e. that the points and bounds are views of the original.
original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
self.assertIs(fetch_base(sliced_coord._points),
fetch_base(original._points))
self.assertIs(fetch_base(sliced_coord._bounds),
fetch_base(original._bounds))
self.assertIsNot(sliced_coord.coord_system, original.coord_system)
self.assertIsNot(sliced_coord.attributes, original.attributes)

def test_lazy_data_realisation(self):
# Capture the fact that we realise the data when slicing.
points = np.array([1, 2])
points = biggus.NumpyArrayAdapter(points)

bounds = np.array([[1, 2], [2, 3]])
bounds = biggus.NumpyArrayAdapter(bounds)

original = AuxCoord(points, bounds=bounds,
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
# Returned coord is realised.
self.assertIsInstance(sliced_coord._points, biggus.NumpyArrayAdapter)
self.assertIsInstance(sliced_coord._bounds, biggus.NumpyArrayAdapter)

# Original coord remains unrealised.
self.assertIsInstance(points, biggus.NumpyArrayAdapter)
self.assertIsInstance(bounds, biggus.NumpyArrayAdapter)


class Test_copy(tests.IrisTest):
def setUp(self):
self.original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)

def assert_data_no_share(self, coord_copy):
self.assertIsNot(fetch_base(coord_copy._points),
fetch_base(self.original._points))
self.assertIsNot(fetch_base(coord_copy._bounds),
fetch_base(self.original._bounds))
self.assertIsNot(coord_copy.coord_system, self.original.coord_system)
self.assertIsNot(coord_copy.attributes, self.original.attributes)

def test_existing_points(self):
# Ensure that copying a coordinate does not return a view of its
# points or bounds.
coord_copy = self.original.copy()
self.assert_data_no_share(coord_copy)

def test_existing_points_deepcopy_call(self):
# Ensure that the coordinate object itself is deepcopied called.
with tests.mock.patch('copy.deepcopy') as mock_copy:
self.original.copy()
mock_copy.assert_called_once_with(self.original)

def test_new_points(self):
coord_copy = self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
self.assert_data_no_share(coord_copy)

def test_new_points_shallowcopy_call(self):
# Ensure that the coordinate object itself is shallow copied so that
# the points and bounds are not unnecessarily copied.
with tests.mock.patch('copy.copy') as mock_copy:
self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
mock_copy.assert_called_once_with(self.original)


if __name__ == '__main__':
tests.main()
Loading

0 comments on commit e1add28

Please sign in to comment.