diff --git a/CHANGES.md b/CHANGES.md index 67993635f..d8a061e06 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -81,6 +81,9 @@ v0.13.0 (unreleased) fixed performance issues when adding/removing links or loading data collections with many links. [#1531, #1533] +* Significantly improve performance of link computations when the + links depend only on pixel or world coordinate components. [#1585] + * Added the ability to customize the appearance of tick and axis labels in Matplotlib plots. [#1511] diff --git a/doc/developer_guide/api.rst b/doc/developer_guide/api.rst index e18b503b8..336bdada0 100644 --- a/doc/developer_guide/api.rst +++ b/doc/developer_guide/api.rst @@ -47,6 +47,9 @@ Core Data .. automodapi:: glue.core.state_objects :no-inheritance-diagram: +.. automodapi:: glue.core.exceptions + :no-inheritance-diagram: + User Interface ============== diff --git a/glue/core/component_link.py b/glue/core/component_link.py index 430b26a25..8d8ed6838 100644 --- a/glue/core/component_link.py +++ b/glue/core/component_link.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function import numbers -import logging import operator import numpy as np @@ -10,7 +9,8 @@ from glue.core.contracts import contract, ContractsMeta from glue.core.subset import InequalitySubsetState from glue.core.util import join_component_view - +from glue.utils import unbroadcast, broadcast_to +from glue.logger import logger __all__ = ['ComponentLink', 'BinaryComponentLink', 'CoordinateComponentLink'] @@ -131,34 +131,64 @@ def __init__(self, comp_from, comp_to, using=None, inverse=None, inverse_compone @contract(data='isinstance(Data)', view='array_view') def compute(self, data, view=None): - """For a given data set, compute the component comp_to given - the data associated with each comp_from and the ``using`` - function - - :param data: The data set to use - :param view: Optional view (e.g. slice) through the data to use - - - *Returns*: - + """ + For a given data set, compute the component comp_to given the data + associated with each comp_from and the ``using`` function + + This raises an :class:`glue.core.exceptions.IncompatibleAttribute` if the + data set doesn't have all the ComponentIDs needed for the transformation + + Parameters + ---------- + data : `~glue.core.data.Data` + The data set to use + view : `None` or `slice` or `tuple` + Optional view (e.g. slice) through the data to use + + Returns + ------- + result The data associated with comp_to component - - *Raises*: - - InvalidAttribute, if the data set doesn't have all the - ComponentIDs needed for the transformation """ - logger = logging.getLogger(__name__) + + # First we get the values of all the 'from' components. args = [data[join_component_view(f, view)] for f in self._from] - logger.debug("shape of first argument: %s", args[0].shape) + + # We keep track of the original shape of the arguments + original_shape = args[0].shape + logger.debug("shape of first argument: %s", original_shape) + + # We now unbroadcast the arrays to only compute the link with the + # smallest number of values we can. This can help for cases where + # the link depends only on e.g. pixel components or world coordinates + # that themselves only depend on a subset of pixel components. + # Unbroadcasting is the act of returning the smallest array that + # contains all the information needed to be broadcasted back to its + # full value + args = [unbroadcast(arg) for arg in args] + + # We now broadcast these to the smallest common shape in case the + # linking functions don't know how to broadcast arrays with different + # shapes. + args = np.broadcast_arrays(*args) + + # We call the actual linking function result = self._using(*args) + # We call asarray since link functions may return Python scalars in some cases result = np.asarray(result) + + # In some cases, linking functions return ravelled arrays, so we + # fix this here. logger.debug("shape of result: %s", result.shape) if result.shape != args[0].shape: logger.debug("ComponentLink function %s changed shape. Fixing", self._using.__name__) result.shape = args[0].shape + + # Finally we broadcast the final result to desired shape + result = broadcast_to(result, original_shape) + return result def get_from_ids(self): @@ -297,8 +327,6 @@ def __ge__(self, other): return InequalitySubsetState(self, other, operator.ge) - - class CoordinateComponentLink(ComponentLink): @contract(comp_from='list(isinstance(ComponentID))', @@ -399,13 +427,37 @@ def replace_ids(self, old, new): self._right.replace_ids(old, new) def compute(self, data, view=None): + left = self._left right = self._right + if not isinstance(self._left, numbers.Number): left = data[self._left, view] if not isinstance(self._right, numbers.Number): right = data[self._right, view] - return self._op(left, right) + + # As described in more detail in ComponentLink.compute, we can + # 'unbroadcast' the arrays to ensure a minimal operation + + original_shape = None + + if isinstance(left, np.ndarray): + original_shape = left.shape + left = unbroadcast(left) + + if isinstance(right, np.ndarray): + original_shape = right.shape + right = unbroadcast(right) + + if original_shape is not None: + left, right = np.broadcast_arrays(left, right) + + result = self._op(left, right) + + if original_shape is None: + return result + else: + return broadcast_to(result, original_shape) def __gluestate__(self, context): left = context.id(self._left) diff --git a/glue/core/subset.py b/glue/core/subset.py index a29030d1d..4887676b7 100644 --- a/glue/core/subset.py +++ b/glue/core/subset.py @@ -357,7 +357,10 @@ def read_mask(self, file_name): self.subset_state = state def __del__(self): - self.delete() + try: + self.delete() + except Exception: + pass def __setattr__(self, attribute, value): object.__setattr__(self, attribute, value) diff --git a/glue/core/tests/test_component_link.py b/glue/core/tests/test_component_link.py index 6e0eca7e6..e61f6ae44 100644 --- a/glue/core/tests/test_component_link.py +++ b/glue/core/tests/test_component_link.py @@ -6,11 +6,11 @@ import numpy as np from numpy.testing import assert_array_equal +from glue.utils import unbroadcast + from ..component import DerivedComponent from ..component_link import ComponentLink, BinaryComponentLink from ..data import ComponentID, Data, Component -from ..data_collection import DataCollection -from ..link_helpers import LinkSame from ..subset import InequalitySubsetState @@ -293,3 +293,32 @@ def test_link_str(): assert str(x + x + y) == ('((x + x) + y)') assert repr(x + y) == '' + + +def test_efficiency(): + + def add(x, y): + # Make sure we don't benefit from broadcasting here + return x.copy() + y.copy() + + data = Data(x=np.ones((2, 3, 4, 5)), y=np.ones((2, 3, 4, 5))) + + for i, from_ids in enumerate(([data.id['x'], data.id['y']], + data.world_component_ids[:2], + data.pixel_component_ids[:2])): + + if i == 0: + expected_shape = (2, 3, 4, 5) + else: + expected_shape = (2, 3, 1, 1) + + for cls in [ComponentLink, BinaryComponentLink]: + + if cls is ComponentLink: + link = ComponentLink(from_ids, ComponentID('test'), using=add) + else: + link = BinaryComponentLink(from_ids[0], from_ids[1], add) + + result = link.compute(data) + assert result.shape == (2, 3, 4, 5) + assert unbroadcast(result).shape == expected_shape