Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve selection in cube slices with functional links #1585

Merged
merged 6 commits into from
Mar 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ v0.13.0 (unreleased)
fixed performance issues when adding/removing links or loading
data collections with many links. [#1531, #1533]

* Significantly improve performance of link computations when the
links depend only on pixel or world coordinate components. [#1585]

* Added the ability to customize the appearance of tick and axis
labels in Matplotlib plots. [#1511]

Expand Down
3 changes: 3 additions & 0 deletions doc/developer_guide/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ Core Data
.. automodapi:: glue.core.state_objects
:no-inheritance-diagram:

.. automodapi:: glue.core.exceptions
:no-inheritance-diagram:

User Interface
==============

Expand Down
96 changes: 74 additions & 22 deletions glue/core/component_link.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import absolute_import, division, print_function

import numbers
import logging
import operator

import numpy as np
Expand All @@ -10,7 +9,8 @@
from glue.core.contracts import contract, ContractsMeta
from glue.core.subset import InequalitySubsetState
from glue.core.util import join_component_view

from glue.utils import unbroadcast, broadcast_to
from glue.logger import logger

__all__ = ['ComponentLink', 'BinaryComponentLink', 'CoordinateComponentLink']

Expand Down Expand Up @@ -131,34 +131,64 @@ def __init__(self, comp_from, comp_to, using=None, inverse=None, inverse_compone

@contract(data='isinstance(Data)', view='array_view')
def compute(self, data, view=None):
"""For a given data set, compute the component comp_to given
the data associated with each comp_from and the ``using``
function

:param data: The data set to use
:param view: Optional view (e.g. slice) through the data to use


*Returns*:

"""
For a given data set, compute the component comp_to given the data
associated with each comp_from and the ``using`` function

This raises an :class:`glue.core.exceptions.IncompatibleAttribute` if the
data set doesn't have all the ComponentIDs needed for the transformation

Parameters
----------
data : `~glue.core.data.Data`
The data set to use
view : `None` or `slice` or `tuple`
Optional view (e.g. slice) through the data to use

Returns
-------
result
The data associated with comp_to component

*Raises*:

InvalidAttribute, if the data set doesn't have all the
ComponentIDs needed for the transformation
"""
logger = logging.getLogger(__name__)

# First we get the values of all the 'from' components.
args = [data[join_component_view(f, view)] for f in self._from]
logger.debug("shape of first argument: %s", args[0].shape)

# We keep track of the original shape of the arguments
original_shape = args[0].shape
logger.debug("shape of first argument: %s", original_shape)

# We now unbroadcast the arrays to only compute the link with the
# smallest number of values we can. This can help for cases where
# the link depends only on e.g. pixel components or world coordinates
# that themselves only depend on a subset of pixel components.
# Unbroadcasting is the act of returning the smallest array that
# contains all the information needed to be broadcasted back to its
# full value
args = [unbroadcast(arg) for arg in args]

# We now broadcast these to the smallest common shape in case the
# linking functions don't know how to broadcast arrays with different
# shapes.
args = np.broadcast_arrays(*args)

# We call the actual linking function
result = self._using(*args)

# We call asarray since link functions may return Python scalars in some cases
result = np.asarray(result)

# In some cases, linking functions return ravelled arrays, so we
# fix this here.
logger.debug("shape of result: %s", result.shape)
if result.shape != args[0].shape:
logger.debug("ComponentLink function %s changed shape. Fixing",
self._using.__name__)
result.shape = args[0].shape

# Finally we broadcast the final result to desired shape
result = broadcast_to(result, original_shape)

return result

def get_from_ids(self):
Expand Down Expand Up @@ -297,8 +327,6 @@ def __ge__(self, other):
return InequalitySubsetState(self, other, operator.ge)




class CoordinateComponentLink(ComponentLink):

@contract(comp_from='list(isinstance(ComponentID))',
Expand Down Expand Up @@ -399,13 +427,37 @@ def replace_ids(self, old, new):
self._right.replace_ids(old, new)

def compute(self, data, view=None):

left = self._left
right = self._right

if not isinstance(self._left, numbers.Number):
left = data[self._left, view]
if not isinstance(self._right, numbers.Number):
right = data[self._right, view]
return self._op(left, right)

# As described in more detail in ComponentLink.compute, we can
# 'unbroadcast' the arrays to ensure a minimal operation

original_shape = None

if isinstance(left, np.ndarray):
original_shape = left.shape
left = unbroadcast(left)

if isinstance(right, np.ndarray):
original_shape = right.shape
right = unbroadcast(right)

if original_shape is not None:
left, right = np.broadcast_arrays(left, right)

result = self._op(left, right)

if original_shape is None:
return result
else:
return broadcast_to(result, original_shape)

def __gluestate__(self, context):
left = context.id(self._left)
Expand Down
5 changes: 4 additions & 1 deletion glue/core/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,10 @@ def read_mask(self, file_name):
self.subset_state = state

def __del__(self):
self.delete()
try:
self.delete()
except Exception:
pass

def __setattr__(self, attribute, value):
object.__setattr__(self, attribute, value)
Expand Down
33 changes: 31 additions & 2 deletions glue/core/tests/test_component_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import numpy as np
from numpy.testing import assert_array_equal

from glue.utils import unbroadcast

from ..component import DerivedComponent
from ..component_link import ComponentLink, BinaryComponentLink
from ..data import ComponentID, Data, Component
from ..data_collection import DataCollection
from ..link_helpers import LinkSame
from ..subset import InequalitySubsetState


Expand Down Expand Up @@ -293,3 +293,32 @@ def test_link_str():
assert str(x + x + y) == ('((x + x) + y)')

assert repr(x + y) == '<BinaryComponentLink: (x + y)>'


def test_efficiency():

def add(x, y):
# Make sure we don't benefit from broadcasting here
return x.copy() + y.copy()

data = Data(x=np.ones((2, 3, 4, 5)), y=np.ones((2, 3, 4, 5)))

for i, from_ids in enumerate(([data.id['x'], data.id['y']],
data.world_component_ids[:2],
data.pixel_component_ids[:2])):

if i == 0:
expected_shape = (2, 3, 4, 5)
else:
expected_shape = (2, 3, 1, 1)

for cls in [ComponentLink, BinaryComponentLink]:

if cls is ComponentLink:
link = ComponentLink(from_ids, ComponentID('test'), using=add)
else:
link = BinaryComponentLink(from_ids[0], from_ids[1], add)

result = link.compute(data)
assert result.shape == (2, 3, 4, 5)
assert unbroadcast(result).shape == expected_shape