Skip to content

Commit

Permalink
Merge pull request #1029 from astrofrog/performance-selection
Browse files Browse the repository at this point in the history
Performance improvements for selection
  • Loading branch information
astrofrog committed Jul 6, 2016
1 parent 8dea207 commit d820bff
Show file tree
Hide file tree
Showing 12 changed files with 292 additions and 66 deletions.
15 changes: 15 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ Full changelog
v0.8.2 (unreleased)
-------------------

* Ensure that failing data factory identifier functions are skipped. [#1029]

* The naming of pixel axes is now more consistent between data with 3 or
fewer dimensions, and data with more than 3 dimensions. The naming is now
always ``Pixel Axis ?`` where ``?`` is the index of the array, and for
datasets with 1 to 3 dimensions, we add a suffix e.g. ``[x]`` to indicate
the traditional axes. [#1029]

* Implemented a number of performance improvements, including for: the check
of whether points are in polygon (``points_inside_poly``), the selection of
polygonal regions in multi-dimentional cubes when the selections are along
pixel axes, the selection of points in scatter plots with one or two
categorical components for rectangular, circular, and polygonal regions.
[#1029]

* Fix a bug that caused multiple custom viewer classes to not work properly
if the user did not override ``_custom_functions`` (which was private).
[#810]
Expand Down
78 changes: 49 additions & 29 deletions glue/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@

from glue.core.subset import (RoiSubsetState, RangeSubsetState,
CategoricalROISubsetState, AndState,
MultiRangeSubsetState, OrState)
from glue.core.roi import PolygonalROI, CategoricalROI, RangeROI
MultiRangeSubsetState,
CategoricalMultiRangeSubsetState,
CategoricalROISubsetState2D)
from glue.core.roi import (PolygonalROI, CategoricalROI, RangeROI, XRangeROI,
YRangeROI, RectangularROI)
from glue.core.util import row_lookup
from glue.utils import (unique, shape_to_string, coerce_numeric, check_sorted,
polygon_line_intersections)
Expand Down Expand Up @@ -127,6 +130,8 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None, coord='x'):
if coord not in ('x', 'y'):
raise ValueError('coord should be one of x/y')

other_coord = 'y' if coord == 'x' else 'x'

if isinstance(roi, RangeROI):

# The selection is either an x range or a y range
Expand All @@ -140,7 +145,6 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None, coord='x'):
else:

# The selection applies to the other component, so we delegate
other_coord = 'y' if coord == 'x' else 'x'
return other_comp.subset_from_roi(other_att, roi,
other_comp=self,
other_att=att,
Expand All @@ -154,11 +158,11 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None, coord='x'):

if isinstance(other_comp, CategoricalComponent):

# Categorical components
return other_comp.subset_from_roi(other_att, roi,
other_comp=self,
other_att=att,
is_nested=True)
is_nested=True,
coord=other_coord)
else:

subset_state = RoiSubsetState()
Expand Down Expand Up @@ -471,6 +475,35 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None,
other_att=att,
coord=other_coord)

elif isinstance(roi, RectangularROI):

# In this specific case, we can decompose the rectangular
# ROI into two RangeROIs that are combined with an 'and'
# logical operation.

other_coord = 'y' if coord == 'x' else 'x'

if coord == 'x':
range1 = XRangeROI(roi.xmin, roi.xmax)
range2 = YRangeROI(roi.ymin, roi.ymax)
else:
range2 = XRangeROI(roi.xmin, roi.xmax)
range1 = YRangeROI(roi.ymin, roi.ymax)

# We get the subset state for the current component
subset1 = self.subset_from_roi(att, range1,
other_comp=other_comp,
other_att=other_att,
coord=coord)

# We now get the subset state for the other component
subset2 = other_comp.subset_from_roi(other_att, range2,
other_comp=self,
other_att=att,
coord=other_coord)

return AndState(subset1, subset2)

elif isinstance(roi, CategoricalROI):

# The selection is categorical itself
Expand All @@ -481,16 +514,13 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None,

# The selection is polygon-like, which requires special care.

# TODO: need to make this a public function
from glue.core.subset import combine_multiple

selection = []

if isinstance(other_comp, CategoricalComponent):

# For each category, we check which categories along the other
# axis fall inside the polygon:

selection = {}

for code, label in enumerate(self.categories):

# Determine the coordinates of the points to check
Expand All @@ -506,18 +536,10 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None,
in_poly = roi.contains(x, y)
categories = other_comp.categories[in_poly]

# If any categories are in the polygon, we set up an
# AndState subset that includes only points for the current
# label and for all the categories that do fall inside the
# polygon.
if len(categories) > 0:
selection[label] = set(categories)

cat_roi_1 = CategoricalROI([label])
cat_subset_1 = CategoricalROISubsetState(att=att, roi=cat_roi_1)
cat_roi_2 = CategoricalROI(categories)
cat_subset_2 = CategoricalROISubsetState(att=other_att, roi=cat_roi_2)

selection.append(AndState(cat_subset_1, cat_subset_2))
return CategoricalROISubsetState2D(selection, att, other_att)

else:

Expand All @@ -538,22 +560,20 @@ def subset_from_roi(self, att, roi, other_comp=None, other_att=None,
# We loop over each category and for each one we find the
# numerical ranges

for code, label in zip(self.codes, self.labels):
selection = {}

for code, label in enumerate(self.categories):

# We determine all the numerical segments that represent the
# ensemble of points in y that fall in the polygon
# TODO: profile the following function
segments = polygon_line_intersections(x, y, xval=code)

# We make use of MultiRangeSubsetState to represent a
# discontinuous range, and then combine with the categorical
# component to create the selection.
cont_subset = MultiRangeSubsetState(segments, att=other_att)
cat_roi = CategoricalROI([label])
cat_subset = CategoricalROISubsetState(att=att, roi=cat_roi)
if len(segments) > 0:
selection[label] = segments

selection.append(AndState(cat_subset, cont_subset))
return CategoricalMultiRangeSubsetState(selection, att, other_att)

return combine_multiple(selection, operator.or_)

def to_series(self, **kwargs):
""" Convert into a pandas.Series object.
Expand Down
20 changes: 16 additions & 4 deletions glue/core/component_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from glue.core.subset import InequalitySubsetState


__all__ = ['ComponentID', 'ComponentIDDict']
__all__ = ['PixelComponentID', 'ComponentID', 'ComponentIDDict']

# access to ComponentIDs via .item[name]

Expand All @@ -28,14 +28,13 @@ def __getitem__(self, key):


class ComponentID(object):

""" References a :class:`glue.core.component.Component` object within a :class:`~glue.core.data.Data` object.
"""
References a :class:`glue.core.component.Component` object within a :class:`~glue.core.data.Data` object.
ComponentIDs behave as keys::
component_id = data.id[name]
data[component_id] -> numpy array
"""

def __init__(self, label, hidden=False):
Expand Down Expand Up @@ -131,3 +130,16 @@ def __pow__(self, other):

def __rpow__(self, other):
return BinaryComponentLink(other, self, operator.pow)


class PixelComponentID(ComponentID):
"""
The ID of a component which is a pixel position in the data - this allows
us to make assumptions in certain places. For example when a polygon
selection is done in pixel space, it can easily be broadcast along
dimensions.
"""

def __init__(self, axis, label, hidden=False):
self.axis = axis
super(PixelComponentID, self).__init__(label, hidden=hidden)
14 changes: 7 additions & 7 deletions glue/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# they are here for backward-compatibility (the code used to live in this
# file)
from glue.core.component import Component, CoordinateComponent, DerivedComponent
from glue.core.component_id import ComponentID, ComponentIDDict
from glue.core.component_id import ComponentID, ComponentIDDict, PixelComponentID

__all__ = ['Data']

Expand Down Expand Up @@ -312,7 +312,8 @@ def _create_pixel_and_world_components(self):
for i in range(self.ndim):
comp = CoordinateComponent(self, i)
label = pixel_label(i, self.ndim)
cid = self.add_component(comp, "Pixel %s" % label, hidden=True)
cid = PixelComponentID(i, "Pixel Axis %s" % label, hidden=True)
self.add_component(comp, cid)
self._pixel_component_ids.append(cid)
if self.coords:
for i in range(self.ndim):
Expand Down Expand Up @@ -702,8 +703,7 @@ def update_components(self, mapping):

@contract(i=int, ndim=int)
def pixel_label(i, ndim):
if ndim == 2:
return ['y', 'x'][i]
if ndim == 3:
return ['z', 'y', 'x'][i]
return "Axis %s" % i
label = "{0}".format(i)
if 1 <= ndim <= 3:
label += " [{0}]".format('xyz'[ndim - 1 - i])
return label
2 changes: 2 additions & 0 deletions glue/core/data_factories/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ def find_factory(filename, **kwargs):
is_format = df.identifier(filename, **kwargs)
except ImportError: # dependencies missing
continue
except Exception: # any other issue
continue

if is_format:
valid_formats.append(df)
Expand Down
2 changes: 1 addition & 1 deletion glue/core/qt/tests/test_data_combo_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_component_id_combo_helper():
helper.numeric = True

helper.visible = False
assert _items_as_string(combo) == "data1:Pixel Axis 0:World 0:x:y:data2:Pixel Axis 0:World 0:a:b"
assert _items_as_string(combo) == "data1:Pixel Axis 0 [x]:World 0:x:y:data2:Pixel Axis 0 [x]:World 0:a:b"
helper.visible = True

dc.remove(data2)
Expand Down
Loading

0 comments on commit d820bff

Please sign in to comment.