Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dim-expression support in Dataset.select #3920

Merged
merged 6 commits into from
Sep 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
return self.clone(data, **dimensions)


def select(self, selection_specs=None, **selection):
def select(self, selection_expr=None, selection_specs=None, **selection):
"""Applies selection by dimension name

Applies a selection along the dimensions of the object using
Expand All @@ -360,7 +360,14 @@ def select(self, selection_specs=None, **selection):

ds.select(x=[0, 1, 2])

* predicate expression: A holoviews.dim expression, e.g.:

from holoviews import dim
ds.select(selection_expr=dim('x') % 2 == 0)

Args:
selection_expr: holoviews.dim predicate expression
specifying selection.
selection_specs: List of specs to match on
A list of types, functions, or type[.group][.label]
strings specifying which objects to apply the
Expand All @@ -373,15 +380,33 @@ def select(self, selection_specs=None, **selection):
Returns an Dimensioned object containing the selected data
or a scalar if a single value was selected
"""
from ...util.transform import dim
if selection_expr is not None and not isinstance(selection_expr, dim):
raise ValueError("""\
The first positional argument to the Dataset.select method is expected to be a
holoviews.util.transform.dim expression. Use the selection_specs keyword
argument to specify a selection specification""")

if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]
selection = {dim: sel for dim, sel in selection.items()
if dim in self.dimensions()+['selection_mask']}
selection = {dim_name: sel for dim_name, sel in selection.items()
if dim_name in self.dimensions()+['selection_mask']}
if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
or not selection):
or (not selection and not selection_expr)):
return self

data = self.interface.select(self, **selection)
# Handle selection dim expression
if selection_expr is not None:
mask = selection_expr.apply(self, compute=False, keep_index=True)
dataset = self[mask]
else:
dataset = self

# Handle selection kwargs
if selection:
data = dataset.interface.select(dataset, **selection)
else:
data = dataset.data

if np.isscalar(data):
return data
Expand Down Expand Up @@ -453,7 +478,7 @@ def __getitem__(self, slices):
object.
"""
slices = util.process_ellipses(self, slices, vdim_selection=True)
if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b':
if getattr(getattr(slices, 'dtype', None), 'kind', None) == 'b':
if not len(slices) == len(self):
raise IndexError("Boolean index must match length of sliced object")
return self.clone(self.select(selection_mask=slices))
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ def sort(cls, dataset, by=[], reverse=False):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
data = dataset.data
dim_idx = dataset.get_dimension_index(dim)
if data.ndim == 1:
Expand Down
15 changes: 13 additions & 2 deletions holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,23 @@ def sort(cls, dataset, by=[], reverse=False):
return dataset.data

@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dim,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
dim = dataset.get_dimension(dim)
data = dataset.data[dim.name]
if not expanded:
data = data.unique()
return data.compute().values if compute else data.values
if keep_index:
return data.compute() if compute else data
else:
return data.compute().values if compute else data.values

@classmethod
def select_mask(cls, dataset, selection):
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ def range(cls, dataset, dimension):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
dim = dataset.get_dimension(dim).name
values = dataset.data.get(dim)
if isscalar(values):
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,9 @@ def ndloc(cls, dataset, indices):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
data = dataset.data[dim.name]
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ def range(cls, obj, dim):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
"""
The set of samples available along a particular dimension.
"""
Expand Down
14 changes: 12 additions & 2 deletions holoviews/core/data/multipath.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,15 @@ def redim(cls, dataset, dimensions):
return new_data

@classmethod
def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dimension,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
"""
Returns a single concatenated array of all subpaths separated
by NaN values. If expanded keyword is False an array of arrays
Expand All @@ -293,7 +301,9 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
dvals = ds.interface.values(ds, dimension, expanded, flat, compute)
dvals = ds.interface.values(
ds, dimension, expanded, flat, compute, keep_index
)
if not len(dvals):
continue
elif expanded:
Expand Down
19 changes: 16 additions & 3 deletions holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,20 +266,33 @@ def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
if selection_mask is None:
selection_mask = cls.select_mask(dataset, selection)

indexed = cls.indexed(dataset, selection)
df = df.iloc[selection_mask]
if isinstance(selection_mask, pd.Series):
df = df[selection_mask]
else:
df = df.iloc[selection_mask]
if indexed and len(df) == 1 and len(dataset.vdims) == 1:
return df[dataset.vdims[0].name].iloc[0]
return df


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dim,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name]
if not expanded:
return data.unique()
return data.values

return data if keep_index else data.values


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name].data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
Expand Down
4 changes: 2 additions & 2 deletions holoviews/core/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,14 +1109,14 @@ def select(self, selection_specs=None, **kwargs):
# Apply the selection on the selected object of a different type
dimensions = selection.dimensions() + ['value']
if any(kw in dimensions for kw in kwargs):
selection = selection.select(selection_specs, **kwargs)
selection = selection.select(selection_specs=selection_specs, **kwargs)
elif isinstance(selection, Dimensioned) and selection._deep_indexable:
# Apply the deep selection on each item in local selection
items = []
for k, v in selection.items():
dimensions = v.dimensions() + ['value']
if any(kw in dimensions for kw in kwargs):
items.append((k, v.select(selection_specs, **kwargs)))
items.append((k, v.select(selection_specs=selection_specs, **kwargs)))
else:
items.append((k, v))
selection = selection.clone(items)
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ def select(self, selection_specs=None, **kwargs):
"""
if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]
selection = super(DynamicMap, self).select(selection_specs, **kwargs)
selection = super(DynamicMap, self).select(selection_specs=selection_specs, **kwargs)
def dynamic_select(obj, **dynkwargs):
if selection_specs is not None:
matches = any(obj.matches(spec) for spec in selection_specs)
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def process_ellipses(obj, key, vdim_selection=False):
will be exactly one longer than the number of kdims). Note: this
flag should not be used for composite types.
"""
if isinstance(key, np.ndarray) and key.dtype.kind == 'b':
if getattr(getattr(key, 'dtype', None), 'kind', None) == 'b':
return key
wrapped_key = wrap_tuple(key)
if wrapped_key.count(Ellipsis)== 0:
Expand Down
4 changes: 2 additions & 2 deletions holoviews/element/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ def __getitem__(self, key):
return self.clone(extents=(xstart, ystart, xstop, ystop))


def select(self, selection_specs=None, **kwargs):
def select(self, *args, **kwargs):
"""
Bypasses selection on data and sets extents based on selection.
"""
return super(Element2D, self).select(selection_specs, **kwargs)
return super(Element2D, self).select(*args, **kwargs)


def split(self, start=None, end=None, datatype=None, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion holoviews/plotting/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def initialize_unbounded(obj, dimensions, key):
"""
select = dict(zip([d.name for d in dimensions], key))
try:
obj.select([DynamicMap], **select)
obj.select(selection_specs=[DynamicMap], **select)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I count only four times where selection_specs had to be specified as a keyword instead of by position! If that is how often it was used that positional argument in our own codebase, I am pretty certain users barely used it (if at all).

except KeyError:
pass

Expand Down
24 changes: 23 additions & 1 deletion holoviews/tests/core/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from holoviews.core.data import concat
from holoviews.core.data.interface import DataError
from holoviews.element import Scatter, Curve
from holoviews.element.comparison import ComparisonTestCase
from holoviews.element.comparison import ComparisonTestCase
from holoviews.util.transform import dim

from collections import OrderedDict

Expand Down Expand Up @@ -697,6 +698,13 @@ def test_dataset_select_rows_gender_male(self):
kdims=self.kdims, vdims=self.vdims)
self.assertEquals(row, indexed)

def test_dataset_select_rows_gender_male_expr(self):
row = self.table.select(selection_expr=dim('Gender') == 'M')
indexed = Dataset({'Gender': ['M', 'M'], 'Age': [10, 16],
'Weight': [15, 18], 'Height': [0.8,0.6]},
kdims=self.kdims, vdims=self.vdims)
self.assertEquals(row, indexed)

def test_dataset_select_rows_gender_male_alias(self):
row = self.alias_table.select(Gender='M')
alias_row = self.alias_table.select(gender='M')
Expand Down Expand Up @@ -859,10 +867,24 @@ def test_dataset_scalar_select(self):
ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
self.assertEqual(ds.select(A=1).dimension_values('B'), np.arange(10))

def test_dataset_scalar_select_expr(self):
ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
self.assertEqual(
ds.select(selection_expr=dim('A') == 1).dimension_values('B'),
np.arange(10)
)

def test_dataset_scalar_empty_select(self):
ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
self.assertEqual(ds.select(A=0).dimension_values('B'), np.array([]))

def test_dataset_scalar_empty_select_expr(self):
ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
self.assertEqual(
ds.select(selection_expr=dim('A') == 0).dimension_values('B'),
np.array([])
)

def test_dataset_scalar_sample(self):
ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
self.assertEqual(ds.sample([(1,)]).dimension_values('B'), np.arange(10))
Expand Down
14 changes: 14 additions & 0 deletions holoviews/tests/core/data/testdaskinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
raise SkipTest("Could not import dask, skipping DaskInterface tests.")

from holoviews.core.data import Dataset
from holoviews.util.transform import dim

from .testpandasinterface import PandasInterfaceTests

Expand Down Expand Up @@ -92,3 +93,16 @@ def test_dataset_range_categorical_dimension_empty(self):
ds_range = ds.range(0)
self.assertTrue(np.isnan(ds_range[0]))
self.assertTrue(np.isnan(ds_range[1]))

def test_select_expression_lazy(self):
df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [10, 10, 11, 11, 10],
})
ddf = dd.from_pandas(df, npartitions=2)
ds = Dataset(ddf)
new_ds = ds.select(selection_expr=dim('b') == 10)

# Make sure that selecting by expression didn't cause evaluation
self.assertIsInstance(new_ds.data, dd.DataFrame)
self.assertEqual(new_ds.data.compute(), df[df.b == 10])
15 changes: 13 additions & 2 deletions holoviews/tests/element/testelementselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def test_deep_layout_nesting_slice(self):
self.assertEqual(selection, hmap1 + hmap2)

def test_spec_duplicate_dim_select(self):
selection = self.duplicate_map.select((HoloMap,), x=(0, 1), y=(1, 3))
selection = self.duplicate_map.select(
selection_specs=(HoloMap,), x=(0, 1), y=(1, 3)
)
self.assertEqual(selection, self.duplicate_map[0:1, 1:3])

def test_duplicate_dim_select(self):
Expand All @@ -102,11 +104,20 @@ def test_datetime_select(self):
curve = self.datetime_fn()
overlay = curve * self.datetime_fn()
for el in [curve, overlay]:
self.assertEqual(el.select(time=(s, e)), el[s:e])
v = el.select(time=(s, e))
self.assertEqual(v, el[s:e])
self.assertEqual(el.select(time=
(dt.datetime(1999, 12, 31), dt.datetime(2000, 1, 2))), el[s:e]
)
if pd:
self.assertEqual(el.select(
time=(pd.Timestamp(s), pd.Timestamp(e))
), el[pd.Timestamp(s):pd.Timestamp(e)])

def test_selection_spec_positional_error_message(self):
s, e = '1999-12-31', '2000-1-2'
curve = self.datetime_fn()
with self.assertRaisesRegexp(
ValueError, "Use the selection_specs keyword"
):
curve.select((Curve,), time=(s, e))
Loading