Skip to content

Commit

Permalink
Lazy unit conversions for cube and coord data. (#2964)
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo authored and pelson committed May 2, 2018
1 parent 98df0a9 commit a01036b
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* The methods :meth:`iris.cube.Cube.convert_units` and
:meth:`iris.coords.Coord.convert_units` no longer forcibly realise the cube
data or coordinate points/bounds : The converted values are now lazy arrays
if the originals were.
33 changes: 33 additions & 0 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,36 @@ def co_realise_cubes(*cubes):
results = _co_realise_lazy_arrays([cube.core_data() for cube in cubes])
for cube, result in zip(cubes, results):
cube.data = result


def lazy_elementwise(lazy_array, elementwise_op):
"""
Apply a (numpy-style) elementwise array operation to a lazy array.
Elementwise means that it performs a independent calculation at each point
of the input, producing a result array of the same shape.
Args:
* lazy_array:
The lazy array object to operate on.
* elementwise_op:
The elementwise operation, a function operating on numpy arrays.
.. note:
A single-point "dummy" call is made to the operation function, to
determine dtype of the result.
This return dtype must be stable in actual operation (!)
"""
# This is just a wrapper to provide an Iris-specific abstraction for a
# lazy operation in Dask (map_blocks).

# Explicitly determine the return type with a dummy call.
# This makes good practical sense for unit conversions, as a Unit.convert
# call may cast to float, or not, depending on unit equality : Thus, it's
# much safer to get udunits to decide that for us.
dtype = elementwise_op(np.zeros(1, lazy_array.dtype)).dtype

return da.map_blocks(elementwise_op, lazy_array, dtype=dtype)
26 changes: 23 additions & 3 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@

from iris._data_manager import DataManager
from iris._deprecation import warn_deprecated
from iris._lazy_data import as_concrete_data, is_lazy_data, multidim_lazy_stack
from iris._lazy_data import (as_concrete_data, is_lazy_data,
multidim_lazy_stack, lazy_elementwise)
import iris.aux_factory
import iris.exceptions
import iris.time
Expand Down Expand Up @@ -908,9 +909,28 @@ def convert_units(self, unit):
raise iris.exceptions.UnitConversionError(
'Cannot convert from unknown units. '
'The "coord.units" attribute may be set directly.')
self.points = self.units.convert(self.points, unit)
if self.has_lazy_points() or self.has_lazy_bounds():
# Make fixed copies of old + new units for a delayed conversion.
old_unit = self.units
new_unit = unit

# Define a delayed conversion operation (i.e. a callback).
def pointwise_convert(values):
return old_unit.convert(values, new_unit)

if self.has_lazy_points():
new_points = lazy_elementwise(self.lazy_points(),
pointwise_convert)
else:
new_points = self.units.convert(self.points, unit)
self.points = new_points
if self.has_bounds():
self.bounds = self.units.convert(self.bounds, unit)
if self.has_lazy_bounds():
new_bounds = lazy_elementwise(self.lazy_bounds(),
pointwise_convert)
else:
new_bounds = self.units.convert(self.bounds, unit)
self.bounds = new_bounds
self.units = unit

def cells(self):
Expand Down
15 changes: 14 additions & 1 deletion lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import iris._concatenate
import iris._constraints
from iris._data_manager import DataManager
from iris._lazy_data import lazy_elementwise

import iris._merge
import iris.analysis
Expand Down Expand Up @@ -873,7 +874,19 @@ def convert_units(self, unit):
raise iris.exceptions.UnitConversionError(
'Cannot convert from unknown units. '
'The "cube.units" attribute may be set directly.')
self.data = self.units.convert(self.data, unit)
if self.has_lazy_data():
# Make fixed copies of old + new units for a delayed conversion.
old_unit = self.units
new_unit = unit

# Define a delayed conversion operation (i.e. a callback).
def pointwise_convert(values):
return old_unit.convert(values, new_unit)

new_data = lazy_elementwise(self.lazy_data(), pointwise_convert)
else:
new_data = self.units.convert(self.data, unit)
self.data = new_data
self.units = unit

def add_cell_method(self, cell_method):
Expand Down
23 changes: 22 additions & 1 deletion lib/iris/tests/unit/coords/test_AuxCoord.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2017, Met Office
# (C) British Crown Copyright 2017 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -35,7 +35,9 @@
lazyness_string,
coords_all_dtypes_and_lazynesses)

from cf_units import Unit
from iris.coords import AuxCoord
from iris._lazy_data import as_lazy_data


class AuxCoordTestMixin(CoordTestMixin):
Expand Down Expand Up @@ -603,5 +605,24 @@ def test_set_bounds_with_lazy_points(self):
self.assertTrue(coord.has_lazy_points())


class Test_convert_units(tests.IrisTest):
def test_preserves_lazy(self):
test_bounds = np.array([[[11.0, 12.0], [12.0, 13.0], [13.0, 14.0]],
[[21.0, 22.0], [22.0, 23.0], [23.0, 24.0]]])
test_points = np.array([[11.1, 12.2, 13.3],
[21.4, 22.5, 23.6]])
lazy_points = as_lazy_data(test_points)
lazy_bounds = as_lazy_data(test_bounds)
coord = AuxCoord(points=lazy_points, bounds=lazy_bounds,
units='m')
coord.convert_units('ft')
self.assertTrue(coord.has_lazy_points())
self.assertTrue(coord.has_lazy_bounds())
test_points_ft = Unit('m').convert(test_points, 'ft')
test_bounds_ft = Unit('m').convert(test_bounds, 'ft')
self.assertArrayAllClose(coord.points, test_points_ft)
self.assertArrayAllClose(coord.bounds, test_bounds_ft)


if __name__ == '__main__':
tests.main()
15 changes: 13 additions & 2 deletions lib/iris/tests/unit/cube/test_Cube.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2013 - 2017, Met Office
# (C) British Crown Copyright 2013 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -28,6 +28,8 @@
import numpy as np
import numpy.ma as ma

from cf_units import Unit

import iris.analysis
import iris.aux_factory
import iris.coords
Expand All @@ -39,9 +41,9 @@
from iris.coords import AuxCoord, DimCoord, CellMeasure
from iris.exceptions import (CoordinateNotFoundError, CellMeasureNotFoundError,
UnitConversionError)
from iris._lazy_data import as_lazy_data
from iris.tests import mock
import iris.tests.stock as stock
from iris._lazy_data import as_lazy_data


class Test___init___data(tests.IrisTest):
Expand Down Expand Up @@ -1710,6 +1712,15 @@ def test_convert_unknown_units(self):
with self.assertRaisesRegexp(UnitConversionError, emsg):
cube.convert_units('mm day-1')

def test_preserves_lazy(self):
real_data = np.arange(12.).reshape((3, 4))
lazy_data = as_lazy_data(real_data)
cube = iris.cube.Cube(lazy_data, units='m')
real_data_ft = Unit('m').convert(real_data, 'ft')
cube.convert_units('ft')
self.assertTrue(cube.has_lazy_data())
self.assertArrayAllClose(cube.data, real_data_ft)


if __name__ == '__main__':
tests.main()
65 changes: 65 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_lazy_elementwise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# (C) British Crown Copyright 2018, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test function :func:`iris._lazy data.lazy_elementwise`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import numpy as np

from iris._lazy_data import as_lazy_data, is_lazy_data

from iris._lazy_data import lazy_elementwise


def _test_elementwise_op(array):
# Promotes the type of a bool argument, but not a float.
return array + 1


class Test_lazy_elementwise(tests.IrisTest):
def test_basic(self):
concrete_array = np.arange(30).reshape((2, 5, 3))
lazy_array = as_lazy_data(concrete_array)
wrapped = lazy_elementwise(lazy_array, _test_elementwise_op)
self.assertTrue(is_lazy_data(wrapped))
self.assertArrayAllClose(wrapped.compute(),
_test_elementwise_op(concrete_array))

def test_dtype_same(self):
concrete_array = np.array([3.], dtype=np.float16)
lazy_array = as_lazy_data(concrete_array)
wrapped = lazy_elementwise(lazy_array, _test_elementwise_op)
self.assertTrue(is_lazy_data(wrapped))
self.assertEqual(wrapped.dtype, np.float16)
self.assertEqual(wrapped.compute().dtype, np.float16)

def test_dtype_change(self):
concrete_array = np.array([True, False])
lazy_array = as_lazy_data(concrete_array)
wrapped = lazy_elementwise(lazy_array, _test_elementwise_op)
self.assertTrue(is_lazy_data(wrapped))
self.assertEqual(wrapped.dtype, np.int)
self.assertEqual(wrapped.compute().dtype, wrapped.dtype)


if __name__ == '__main__':
tests.main()

0 comments on commit a01036b

Please sign in to comment.