diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 879a3a17b2..ab3df168ff 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -335,7 +335,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs): return self.clone(data, **dimensions) - def select(self, selection_specs=None, **selection): + def select(self, selection_expr=None, selection_specs=None, **selection): """Applies selection by dimension name Applies a selection along the dimensions of the object using @@ -360,7 +360,14 @@ def select(self, selection_specs=None, **selection): ds.select(x=[0, 1, 2]) + * predicate expression: A holoviews.dim expression, e.g.: + + from holoviews import dim + ds.select(selection_expr=dim('x') % 2 == 0) + Args: + selection_expr: holoviews.dim predicate expression + specifying selection. selection_specs: List of specs to match on A list of types, functions, or type[.group][.label] strings specifying which objects to apply the @@ -373,15 +380,33 @@ def select(self, selection_specs=None, **selection): Returns an Dimensioned object containing the selected data or a scalar if a single value was selected """ + from ...util.transform import dim + if selection_expr is not None and not isinstance(selection_expr, dim): + raise ValueError("""\ +The first positional argument to the Dataset.select method is expected to be a +holoviews.util.transform.dim expression. Use the selection_specs keyword +argument to specify a selection specification""") + if selection_specs is not None and not isinstance(selection_specs, (list, tuple)): selection_specs = [selection_specs] - selection = {dim: sel for dim, sel in selection.items() - if dim in self.dimensions()+['selection_mask']} + selection = {dim_name: sel for dim_name, sel in selection.items() + if dim_name in self.dimensions()+['selection_mask']} if (selection_specs and not any(self.matches(sp) for sp in selection_specs) - or not selection): + or (not selection and not selection_expr)): return self - data = self.interface.select(self, **selection) + # Handle selection dim expression + if selection_expr is not None: + mask = selection_expr.apply(self, compute=False, keep_index=True) + dataset = self[mask] + else: + dataset = self + + # Handle selection kwargs + if selection: + data = dataset.interface.select(dataset, **selection) + else: + data = dataset.data if np.isscalar(data): return data @@ -453,7 +478,7 @@ def __getitem__(self, slices): object. """ slices = util.process_ellipses(self, slices, vdim_selection=True) - if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b': + if getattr(getattr(slices, 'dtype', None), 'kind', None) == 'b': if not len(slices) == len(self): raise IndexError("Boolean index must match length of sliced object") return self.clone(self.select(selection_mask=slices)) diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 997c4632fb..df9290e641 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -123,7 +123,9 @@ def sort(cls, dataset, by=[], reverse=False): @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False + ): data = dataset.data dim_idx = dataset.get_dimension_index(dim) if data.ndim == 1: diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index a47e68a176..8229d66fa9 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -90,12 +90,23 @@ def sort(cls, dataset, by=[], reverse=False): return dataset.data @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, + dataset, + dim, + expanded=True, + flat=True, + compute=True, + keep_index=False, + ): dim = dataset.get_dimension(dim) data = dataset.data[dim.name] if not expanded: data = data.unique() - return data.compute().values if compute else data.values + if keep_index: + return data.compute() if compute else data + else: + return data.compute().values if compute else data.values @classmethod def select_mask(cls, dataset, selection): diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 0a8ae1e648..b82e523a52 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -246,7 +246,9 @@ def range(cls, dataset, dimension): @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False + ): dim = dataset.get_dimension(dim).name values = dataset.data.get(dim) if isscalar(values): diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 983433dbb3..7b4a748bf1 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -338,7 +338,9 @@ def ndloc(cls, dataset, indices): @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False + ): dim = dataset.get_dimension(dim, strict=True) if dim in dataset.vdims or dataset.data[dim.name].ndim > 1: data = dataset.data[dim.name] diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py index 445c0a8804..f82230f141 100644 --- a/holoviews/core/data/image.py +++ b/holoviews/core/data/image.py @@ -156,7 +156,9 @@ def range(cls, obj, dim): @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False + ): """ The set of samples available along a particular dimension. """ diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py index 04e84a18d2..b0ff823a29 100644 --- a/holoviews/core/data/multipath.py +++ b/holoviews/core/data/multipath.py @@ -281,7 +281,15 @@ def redim(cls, dataset, dimensions): return new_data @classmethod - def values(cls, dataset, dimension, expanded=True, flat=True, compute=True): + def values( + cls, + dataset, + dimension, + expanded=True, + flat=True, + compute=True, + keep_index=False, + ): """ Returns a single concatenated array of all subpaths separated by NaN values. If expanded keyword is False an array of arrays @@ -293,7 +301,9 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True): ds = cls._inner_dataset_template(dataset) for d in dataset.data: ds.data = d - dvals = ds.interface.values(ds, dimension, expanded, flat, compute) + dvals = ds.interface.values( + ds, dimension, expanded, flat, compute, keep_index + ) if not len(dvals): continue elif expanded: diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index ed25cf143d..b90af8e573 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -266,20 +266,33 @@ def select(cls, dataset, selection_mask=None, **selection): df = dataset.data if selection_mask is None: selection_mask = cls.select_mask(dataset, selection) + indexed = cls.indexed(dataset, selection) - df = df.iloc[selection_mask] + if isinstance(selection_mask, pd.Series): + df = df[selection_mask] + else: + df = df.iloc[selection_mask] if indexed and len(df) == 1 and len(dataset.vdims) == 1: return df[dataset.vdims[0].name].iloc[0] return df @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values( + cls, + dataset, + dim, + expanded=True, + flat=True, + compute=True, + keep_index=False, + ): dim = dataset.get_dimension(dim, strict=True) data = dataset.data[dim.name] if not expanded: return data.unique() - return data.values + + return data if keep_index else data.values @classmethod diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py index 71ac1b4ae6..d6393f564a 100644 --- a/holoviews/core/data/xarray.py +++ b/holoviews/core/data/xarray.py @@ -303,7 +303,7 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False): @classmethod - def values(cls, dataset, dim, expanded=True, flat=True, compute=True): + def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False): dim = dataset.get_dimension(dim, strict=True) data = dataset.data[dim.name].data irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index e02aa49fd8..f001fedfbd 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -1109,14 +1109,14 @@ def select(self, selection_specs=None, **kwargs): # Apply the selection on the selected object of a different type dimensions = selection.dimensions() + ['value'] if any(kw in dimensions for kw in kwargs): - selection = selection.select(selection_specs, **kwargs) + selection = selection.select(selection_specs=selection_specs, **kwargs) elif isinstance(selection, Dimensioned) and selection._deep_indexable: # Apply the deep selection on each item in local selection items = [] for k, v in selection.items(): dimensions = v.dimensions() + ['value'] if any(kw in dimensions for kw in kwargs): - items.append((k, v.select(selection_specs, **kwargs))) + items.append((k, v.select(selection_specs=selection_specs, **kwargs))) else: items.append((k, v)) selection = selection.clone(items) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index cb780aa945..c584381556 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -1371,7 +1371,7 @@ def select(self, selection_specs=None, **kwargs): """ if selection_specs is not None and not isinstance(selection_specs, (list, tuple)): selection_specs = [selection_specs] - selection = super(DynamicMap, self).select(selection_specs, **kwargs) + selection = super(DynamicMap, self).select(selection_specs=selection_specs, **kwargs) def dynamic_select(obj, **dynkwargs): if selection_specs is not None: matches = any(obj.matches(spec) for spec in selection_specs) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 69b5976bba..27a04b6c90 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -505,7 +505,7 @@ def process_ellipses(obj, key, vdim_selection=False): will be exactly one longer than the number of kdims). Note: this flag should not be used for composite types. """ - if isinstance(key, np.ndarray) and key.dtype.kind == 'b': + if getattr(getattr(key, 'dtype', None), 'kind', None) == 'b': return key wrapped_key = wrap_tuple(key) if wrapped_key.count(Ellipsis)== 0: diff --git a/holoviews/element/path.py b/holoviews/element/path.py index 484474fcba..00905048f4 100644 --- a/holoviews/element/path.py +++ b/holoviews/element/path.py @@ -110,11 +110,11 @@ def __getitem__(self, key): return self.clone(extents=(xstart, ystart, xstop, ystop)) - def select(self, selection_specs=None, **kwargs): + def select(self, *args, **kwargs): """ Bypasses selection on data and sets extents based on selection. """ - return super(Element2D, self).select(selection_specs, **kwargs) + return super(Element2D, self).select(*args, **kwargs) def split(self, start=None, end=None, datatype=None, **kwargs): diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index 8a5f6c1764..f72ded0ac7 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -477,7 +477,7 @@ def initialize_unbounded(obj, dimensions, key): """ select = dict(zip([d.name for d in dimensions], key)) try: - obj.select([DynamicMap], **select) + obj.select(selection_specs=[DynamicMap], **select) except KeyError: pass diff --git a/holoviews/tests/core/data/base.py b/holoviews/tests/core/data/base.py index abf03edac9..b39b8ea1b0 100644 --- a/holoviews/tests/core/data/base.py +++ b/holoviews/tests/core/data/base.py @@ -11,7 +11,8 @@ from holoviews.core.data import concat from holoviews.core.data.interface import DataError from holoviews.element import Scatter, Curve -from holoviews.element.comparison import ComparisonTestCase +from holoviews.element.comparison import ComparisonTestCase +from holoviews.util.transform import dim from collections import OrderedDict @@ -697,6 +698,13 @@ def test_dataset_select_rows_gender_male(self): kdims=self.kdims, vdims=self.vdims) self.assertEquals(row, indexed) + def test_dataset_select_rows_gender_male_expr(self): + row = self.table.select(selection_expr=dim('Gender') == 'M') + indexed = Dataset({'Gender': ['M', 'M'], 'Age': [10, 16], + 'Weight': [15, 18], 'Height': [0.8,0.6]}, + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(row, indexed) + def test_dataset_select_rows_gender_male_alias(self): row = self.alias_table.select(Gender='M') alias_row = self.alias_table.select(gender='M') @@ -859,10 +867,24 @@ def test_dataset_scalar_select(self): ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B']) self.assertEqual(ds.select(A=1).dimension_values('B'), np.arange(10)) + def test_dataset_scalar_select_expr(self): + ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B']) + self.assertEqual( + ds.select(selection_expr=dim('A') == 1).dimension_values('B'), + np.arange(10) + ) + def test_dataset_scalar_empty_select(self): ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B']) self.assertEqual(ds.select(A=0).dimension_values('B'), np.array([])) + def test_dataset_scalar_empty_select_expr(self): + ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B']) + self.assertEqual( + ds.select(selection_expr=dim('A') == 0).dimension_values('B'), + np.array([]) + ) + def test_dataset_scalar_sample(self): ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B']) self.assertEqual(ds.sample([(1,)]).dimension_values('B'), np.arange(10)) diff --git a/holoviews/tests/core/data/testdaskinterface.py b/holoviews/tests/core/data/testdaskinterface.py index 1645473189..c4a46ffb02 100644 --- a/holoviews/tests/core/data/testdaskinterface.py +++ b/holoviews/tests/core/data/testdaskinterface.py @@ -9,6 +9,7 @@ raise SkipTest("Could not import dask, skipping DaskInterface tests.") from holoviews.core.data import Dataset +from holoviews.util.transform import dim from .testpandasinterface import PandasInterfaceTests @@ -92,3 +93,16 @@ def test_dataset_range_categorical_dimension_empty(self): ds_range = ds.range(0) self.assertTrue(np.isnan(ds_range[0])) self.assertTrue(np.isnan(ds_range[1])) + + def test_select_expression_lazy(self): + df = pd.DataFrame({ + 'a': [1, 2, 3, 4, 5], + 'b': [10, 10, 11, 11, 10], + }) + ddf = dd.from_pandas(df, npartitions=2) + ds = Dataset(ddf) + new_ds = ds.select(selection_expr=dim('b') == 10) + + # Make sure that selecting by expression didn't cause evaluation + self.assertIsInstance(new_ds.data, dd.DataFrame) + self.assertEqual(new_ds.data.compute(), df[df.b == 10]) diff --git a/holoviews/tests/element/testelementselect.py b/holoviews/tests/element/testelementselect.py index 5331b393e1..1530d746b2 100644 --- a/holoviews/tests/element/testelementselect.py +++ b/holoviews/tests/element/testelementselect.py @@ -86,7 +86,9 @@ def test_deep_layout_nesting_slice(self): self.assertEqual(selection, hmap1 + hmap2) def test_spec_duplicate_dim_select(self): - selection = self.duplicate_map.select((HoloMap,), x=(0, 1), y=(1, 3)) + selection = self.duplicate_map.select( + selection_specs=(HoloMap,), x=(0, 1), y=(1, 3) + ) self.assertEqual(selection, self.duplicate_map[0:1, 1:3]) def test_duplicate_dim_select(self): @@ -102,7 +104,8 @@ def test_datetime_select(self): curve = self.datetime_fn() overlay = curve * self.datetime_fn() for el in [curve, overlay]: - self.assertEqual(el.select(time=(s, e)), el[s:e]) + v = el.select(time=(s, e)) + self.assertEqual(v, el[s:e]) self.assertEqual(el.select(time= (dt.datetime(1999, 12, 31), dt.datetime(2000, 1, 2))), el[s:e] ) @@ -110,3 +113,11 @@ def test_datetime_select(self): self.assertEqual(el.select( time=(pd.Timestamp(s), pd.Timestamp(e)) ), el[pd.Timestamp(s):pd.Timestamp(e)]) + + def test_selection_spec_positional_error_message(self): + s, e = '1999-12-31', '2000-1-2' + curve = self.datetime_fn() + with self.assertRaisesRegexp( + ValueError, "Use the selection_specs keyword" + ): + curve.select((Curve,), time=(s, e)) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index 2beeee4133..c25e390527 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -322,7 +322,16 @@ def applies(self, dataset): applies &= arg.applies(dataset) return applies - def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False): + def apply( + self, + dataset, + flat=False, + expanded=None, + ranges={}, + all_values=False, + keep_index=False, + compute=True, + ): """Evaluates the transform on the supplied dataset. Args: @@ -334,6 +343,10 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False) Whether to evaluate on all available values, for some element types, such as Graphs, this may include values not included in the referenced column + keep_index: For data types that support indexes, whether the index + should be preserved in the result. + compute: For data types that support lazy evaluation, whether + the result should be computed before it is returned. Returns: values: NumPy array computed by evaluating the expression @@ -346,7 +359,15 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False) if dimension in dataset.kdims and all_values: dimension = dataset.nodes.kdims[2] dataset = dataset if dimension in dataset else dataset.nodes - data = dataset.dimension_values(dimension, expanded=expanded, flat=flat) + + data = dataset.interface.values( + dataset, + dimension, + expanded=expanded, + flat=flat, + compute=compute, + keep_index=keep_index + ) for o in self.ops: args = o['args'] fn_args = [data]