From 13015909c53d1a4ccefdccdb86d9a7e0a32d1671 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Wed, 7 Mar 2018 11:32:58 +0000 Subject: [PATCH 1/6] Silently catch any exception inside Subset.__del__ --- glue/core/subset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glue/core/subset.py b/glue/core/subset.py index a29030d1d..4887676b7 100644 --- a/glue/core/subset.py +++ b/glue/core/subset.py @@ -357,7 +357,10 @@ def read_mask(self, file_name): self.subset_state = state def __del__(self): - self.delete() + try: + self.delete() + except Exception: + pass def __setattr__(self, attribute, value): object.__setattr__(self, attribute, value) From d5011ded160626cceb20aa2291b0f31eb62d13b6 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 20 Mar 2018 12:06:03 +0000 Subject: [PATCH 2/6] Compute links with minimal un-broadcasted arrays --- CHANGES.md | 3 ++ glue/core/component_link.py | 68 ++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 67993635f..d8a061e06 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -81,6 +81,9 @@ v0.13.0 (unreleased) fixed performance issues when adding/removing links or loading data collections with many links. [#1531, #1533] +* Significantly improve performance of link computations when the + links depend only on pixel or world coordinate components. [#1585] + * Added the ability to customize the appearance of tick and axis labels in Matplotlib plots. [#1511] diff --git a/glue/core/component_link.py b/glue/core/component_link.py index 430b26a25..e70985135 100644 --- a/glue/core/component_link.py +++ b/glue/core/component_link.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function import numbers -import logging import operator import numpy as np @@ -10,7 +9,8 @@ from glue.core.contracts import contract, ContractsMeta from glue.core.subset import InequalitySubsetState from glue.core.util import join_component_view - +from glue.utils import unbroadcast, broadcast_to +from glue.logger import logger __all__ = ['ComponentLink', 'BinaryComponentLink', 'CoordinateComponentLink'] @@ -131,34 +131,64 @@ def __init__(self, comp_from, comp_to, using=None, inverse=None, inverse_compone @contract(data='isinstance(Data)', view='array_view') def compute(self, data, view=None): - """For a given data set, compute the component comp_to given - the data associated with each comp_from and the ``using`` - function - - :param data: The data set to use - :param view: Optional view (e.g. slice) through the data to use - - - *Returns*: - + """ + For a given data set, compute the component comp_to given the data + associated with each comp_from and the ``using`` function + + This raises an :class:`glue.core.exceptions.InvalidAttribute` if the + data set doesn't have all the ComponentIDs needed for the transformation + + Parameters + ---------- + data : `~glue.core.data.Data` + The data set to use + view : `None` or `slice` or `tuple` + Optional view (e.g. slice) through the data to use + + Returns + ------- + result The data associated with comp_to component - - *Raises*: - - InvalidAttribute, if the data set doesn't have all the - ComponentIDs needed for the transformation """ - logger = logging.getLogger(__name__) + + # First we get the values of all the 'from' components. args = [data[join_component_view(f, view)] for f in self._from] - logger.debug("shape of first argument: %s", args[0].shape) + + # We keep track of the original shape of the arguments + original_shape = args[0].shape + logger.debug("shape of first argument: %s", original_shape) + + # We now unbroadcast the arrays to only compute the link with the + # smallest number of values we can. This can help for cases where + # the link depends only on e.g. pixel components or world coordinates + # that themselves only depend on a subset of pixel components. + # Unbroadcasting is the act of returning the smallest array that + # contains all the information needed to be broadcasted back to its + # full value + args = [unbroadcast(arg) for arg in args] + + # We now broadcast these to the smallest common shape in case the + # linking functions don't know how to broadcast arrays with different + # shapes. + args = np.broadcast_arrays(*args) + + # We call the actual linking function result = self._using(*args) + # We call asarray since link functions may return Python scalars in some cases result = np.asarray(result) + + # In some cases, linking functions return ravelled arrays, so we + # fix this here. logger.debug("shape of result: %s", result.shape) if result.shape != args[0].shape: logger.debug("ComponentLink function %s changed shape. Fixing", self._using.__name__) result.shape = args[0].shape + + # Finally we broadcast the final result to desired shape + result = broadcast_to(result, original_shape) + return result def get_from_ids(self): From fd225a03b81825f752efcb1eface4ad6445b72dc Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 20 Mar 2018 12:23:09 +0000 Subject: [PATCH 3/6] Added unit test to make sure link calculation is more efficient when broadcasting is used. --- glue/core/tests/test_component_link.py | 28 ++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/glue/core/tests/test_component_link.py b/glue/core/tests/test_component_link.py index 6e0eca7e6..fdac243e6 100644 --- a/glue/core/tests/test_component_link.py +++ b/glue/core/tests/test_component_link.py @@ -6,11 +6,11 @@ import numpy as np from numpy.testing import assert_array_equal +from glue.utils import unbroadcast + from ..component import DerivedComponent from ..component_link import ComponentLink, BinaryComponentLink from ..data import ComponentID, Data, Component -from ..data_collection import DataCollection -from ..link_helpers import LinkSame from ..subset import InequalitySubsetState @@ -293,3 +293,27 @@ def test_link_str(): assert str(x + x + y) == ('((x + x) + y)') assert repr(x + y) == '' + + +def test_efficiency(): + + def add(x, y): + # Make sure we don't benefit from broadcasting here + return x.copy() + y.copy() + + data = Data(x=np.ones((2, 3, 4, 5)), y=np.ones((2, 3, 4, 5))) + + comp = ComponentLink([data.id['x'], data.id['y']], ComponentID('a'), using=add) + result = comp.compute(data) + assert result.shape == (2, 3, 4, 5) + assert unbroadcast(result).shape == (2, 3, 4, 5) + + comp = ComponentLink(data.world_component_ids[:2], ComponentID('b'), using=add) + result = comp.compute(data) + assert result.shape == (2, 3, 4, 5) + assert unbroadcast(result).shape == (2, 3, 1, 1) + + comp = ComponentLink(data.pixel_component_ids[:2], ComponentID('c'), using=add) + result = comp.compute(data) + assert result.shape == (2, 3, 4, 5) + assert unbroadcast(result).shape == (2, 3, 1, 1) From a4569ed20ffdf05a29eb6d1d1af68585c9a1fedd Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 20 Mar 2018 12:33:25 +0000 Subject: [PATCH 4/6] Improve performance of BinaryComponentLink for broadcasted arrays --- glue/core/component_link.py | 17 ++++++++++--- glue/core/tests/test_component_link.py | 33 +++++++++++++++----------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/glue/core/component_link.py b/glue/core/component_link.py index e70985135..1ea78385b 100644 --- a/glue/core/component_link.py +++ b/glue/core/component_link.py @@ -327,8 +327,6 @@ def __ge__(self, other): return InequalitySubsetState(self, other, operator.ge) - - class CoordinateComponentLink(ComponentLink): @contract(comp_from='list(isinstance(ComponentID))', @@ -429,13 +427,26 @@ def replace_ids(self, old, new): self._right.replace_ids(old, new) def compute(self, data, view=None): + left = self._left right = self._right + if not isinstance(self._left, numbers.Number): left = data[self._left, view] if not isinstance(self._right, numbers.Number): right = data[self._right, view] - return self._op(left, right) + + # As described in more detail in ComponentLink.compute, we can + # 'unbroadcast' the arrays to ensure a minimal operation, since _op + # is not guaranteed to be a function that recognizes broadcasted arrays + original_shape = left.shape + left = unbroadcast(left) + right = unbroadcast(right) + left, right = np.broadcast_arrays(left, right) + + result = self._op(left, right) + + return broadcast_to(result, original_shape) def __gluestate__(self, context): left = context.id(self._left) diff --git a/glue/core/tests/test_component_link.py b/glue/core/tests/test_component_link.py index fdac243e6..e61f6ae44 100644 --- a/glue/core/tests/test_component_link.py +++ b/glue/core/tests/test_component_link.py @@ -303,17 +303,22 @@ def add(x, y): data = Data(x=np.ones((2, 3, 4, 5)), y=np.ones((2, 3, 4, 5))) - comp = ComponentLink([data.id['x'], data.id['y']], ComponentID('a'), using=add) - result = comp.compute(data) - assert result.shape == (2, 3, 4, 5) - assert unbroadcast(result).shape == (2, 3, 4, 5) - - comp = ComponentLink(data.world_component_ids[:2], ComponentID('b'), using=add) - result = comp.compute(data) - assert result.shape == (2, 3, 4, 5) - assert unbroadcast(result).shape == (2, 3, 1, 1) - - comp = ComponentLink(data.pixel_component_ids[:2], ComponentID('c'), using=add) - result = comp.compute(data) - assert result.shape == (2, 3, 4, 5) - assert unbroadcast(result).shape == (2, 3, 1, 1) + for i, from_ids in enumerate(([data.id['x'], data.id['y']], + data.world_component_ids[:2], + data.pixel_component_ids[:2])): + + if i == 0: + expected_shape = (2, 3, 4, 5) + else: + expected_shape = (2, 3, 1, 1) + + for cls in [ComponentLink, BinaryComponentLink]: + + if cls is ComponentLink: + link = ComponentLink(from_ids, ComponentID('test'), using=add) + else: + link = BinaryComponentLink(from_ids[0], from_ids[1], add) + + result = link.compute(data) + assert result.shape == (2, 3, 4, 5) + assert unbroadcast(result).shape == expected_shape From 298c4476e4de1914a3a1902322ad412ba7abff90 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 20 Mar 2018 12:43:58 +0000 Subject: [PATCH 5/6] Fix BinaryComponentLink when used with scalars --- glue/core/component_link.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/glue/core/component_link.py b/glue/core/component_link.py index 1ea78385b..9f2dfe553 100644 --- a/glue/core/component_link.py +++ b/glue/core/component_link.py @@ -437,16 +437,27 @@ def compute(self, data, view=None): right = data[self._right, view] # As described in more detail in ComponentLink.compute, we can - # 'unbroadcast' the arrays to ensure a minimal operation, since _op - # is not guaranteed to be a function that recognizes broadcasted arrays - original_shape = left.shape - left = unbroadcast(left) - right = unbroadcast(right) - left, right = np.broadcast_arrays(left, right) + # 'unbroadcast' the arrays to ensure a minimal operation + + original_shape = None + + if isinstance(left, np.ndarray): + original_shape = left.shape + left = unbroadcast(left) + + if isinstance(right, np.ndarray): + original_shape = right.shape + right = unbroadcast(right) + + if original_shape is not None: + left, right = np.broadcast_arrays(left, right) result = self._op(left, right) - return broadcast_to(result, original_shape) + if original_shape is None: + return result + else: + return broadcast_to(result, original_shape) def __gluestate__(self, context): left = context.id(self._left) From 69d04cda9fe966685ff6ade68deb88854cdcc654 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 20 Mar 2018 17:45:03 +0000 Subject: [PATCH 6/6] Fix documentation build --- doc/developer_guide/api.rst | 3 +++ glue/core/component_link.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/developer_guide/api.rst b/doc/developer_guide/api.rst index e18b503b8..336bdada0 100644 --- a/doc/developer_guide/api.rst +++ b/doc/developer_guide/api.rst @@ -47,6 +47,9 @@ Core Data .. automodapi:: glue.core.state_objects :no-inheritance-diagram: +.. automodapi:: glue.core.exceptions + :no-inheritance-diagram: + User Interface ============== diff --git a/glue/core/component_link.py b/glue/core/component_link.py index 9f2dfe553..8d8ed6838 100644 --- a/glue/core/component_link.py +++ b/glue/core/component_link.py @@ -135,7 +135,7 @@ def compute(self, data, view=None): For a given data set, compute the component comp_to given the data associated with each comp_from and the ``using`` function - This raises an :class:`glue.core.exceptions.InvalidAttribute` if the + This raises an :class:`glue.core.exceptions.IncompatibleAttribute` if the data set doesn't have all the ComponentIDs needed for the transformation Parameters