diff --git a/examples/user_guide/Geometry_Data.ipynb b/examples/user_guide/Geometry_Data.ipynb index 64a3015d66..97ae8391ec 100644 --- a/examples/user_guide/Geometry_Data.ipynb +++ b/examples/user_guide/Geometry_Data.ipynb @@ -6,7 +6,7 @@ "source": [ "In addition to the two main types of data, namely tabular/columnar and gridded data HoloViews also provide extensible interfaces to represent path geometry data. Specifically it has three main element types used to representing different types of geometries. In this section we will cover the HoloViews data model for representing different kinds of geometries.\n", "\n", - "There are many different ways of representing path geometries but HoloViews' data model is oriented on GEOS geometry definitions and allows faithfully round-tripping data between its element types and GEOS geometry definitions such as ``LinearString``, ``Polygon``, ``MultiLineString`` and ``MultiPolygon`` geometries (even if this is not implemented in HoloViews itself). Since HoloViews interfaces are extensible many different formats for representing geometries could be supported (see [GeoViews](http://geoviews.org/user_guide/Geometries.html) for other representations) but here we will cover the native formats used by HoloViews to represent this data." + "There are many different ways of representing path geometries but HoloViews' data model is oriented on GEOS geometry definitions and allows faithfully round-tripping data between its element types and GEOS geometry definitions such as ``LinearString``, ``Polygon``, ``MultiLineString`` and ``MultiPolygon`` geometries (even if this is not implemented in HoloViews itself). HoloViews defines a dictionary based format for the geometries but also supports [spatialpandas](https://github.com/holoviz/spatialpandas), which is a highly optimized implementation similar to [geopandas](https://github.com/geopandas/geopandas/) but without the heavy geo-dependencies such as shapely and fiona. [GeoViews](https://geoviews.org/user_guide/Geometries.html) supports both geopandas and raw shapely geometries directly." ] }, { @@ -41,14 +41,16 @@ "metadata": {}, "outputs": [], "source": [ - "hv.Path({'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2]})" + "hv.Path({'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2]}, ['x', 'y'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here the dictionary of x- and y-coordinates could also be an NumPy array with two columns or a dataframe with 'x' and 'y' columns. To draw multiple paths the data-structures can be wrapped in a list. Additionally, it is also possible to associate a value with each path by declaring it as a value dimension:" + "Here the dictionary of x- and y-coordinates could also be an NumPy array with two columns or a dataframe with 'x' and 'y' columns.\n", + "\n", + "To draw multiple paths the data-structures can be wrapped in a list. Additionally, it is also possible to associate a value with each path by declaring it as a value dimension:" ] }, { @@ -57,8 +59,9 @@ "metadata": {}, "outputs": [], "source": [ - "hv.Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0},\n", - " {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], vdims='value').opts(color='value')" + "p = hv.Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0},\n", + " {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], vdims='value').opts(color='value')\n", + "p" ] }, { @@ -193,7 +196,8 @@ "outputs": [], "source": [ "hv.Polygons([{'x': xs, 'y': ys, 'holes': holes, 'value': 0},\n", - " {'x': [4, 6, 6], 'y': [0, 2, 1], 'value': 1}, {'x': [-3, -1, -6], 'y': [3, 2, 1], 'value': 3}], vdims='value')" + " {'x': [4, 6, 6], 'y': [0, 2, 1], 'value': 1},\n", + " {'x': [-3, -1, -6], 'y': [3, 2, 1], 'value': 3}], vdims='value')" ] }, { @@ -228,7 +232,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using the ``datatype`` argument the data may instead be returned in the desired format, e.g. a list of arrays:" + "Using the ``datatype`` argument the data may instead be returned in the desired format, e.g. 'dictionary', 'array' or 'dataframe'. Here we return the 'dictionary' format:" ] }, { @@ -237,7 +241,7 @@ "metadata": {}, "outputs": [], "source": [ - "poly.split(datatype='array')" + "poly.split(datatype='dictionary')" ] }, { @@ -255,5 +259,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/holoviews/annotators.py b/holoviews/annotators.py index 8dc49f7d9a..f56ceaf63a 100644 --- a/holoviews/annotators.py +++ b/holoviews/annotators.py @@ -383,7 +383,7 @@ def _update_table(self): table = self.object for transform in self.table_transforms: table = transform(table) - table_data = {a: [d.dimension_values(a, expanded=False)[0] for d in table.split()] + table_data = {a: list(table.dimension_values(a, expanded=False)) for a in annotations} self._table = Table(table_data, annotations, [], label=name).opts( show_title=False, **self.table_opts) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 01d6d4468f..974d48e387 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -25,15 +25,16 @@ from .grid import GridInterface from .multipath import MultiInterface # noqa (API import) from .image import ImageInterface # noqa (API import) +from .spatialpandas import SpatialPandasInterface # noqa (API import) default_datatype = 'dictionary' -datatypes = ['dictionary', 'grid'] +datatypes = ['dictionary', 'grid', 'spatialpandas'] try: import pandas as pd # noqa (Availability import) from .pandas import PandasInterface default_datatype = 'dataframe' - datatypes = ['dataframe', 'dictionary', 'grid'] + datatypes = ['dataframe', 'dictionary', 'spatialpandas', 'grid'] DFColumns = PandasInterface except ImportError: pd = None @@ -331,7 +332,8 @@ def dataset(self): """ from . import Dataset if self._dataset is None: - dataset = Dataset(self, _validate_vdims=False) + datatype = list(util.unique_iterator(self.datatype+Dataset.datatype)) + dataset = Dataset(self, _validate_vdims=False, datatype=datatype) if hasattr(self, '_binned'): dataset._binned = self._binned return dataset diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index b82e523a52..4cdbd8acd0 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -310,9 +310,18 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): def select(cls, dataset, selection_mask=None, **selection): if selection_mask is None: selection_mask = cls.select_mask(dataset, selection) + empty = not selection_mask.sum() + dimensions = dataset.dimensions() + if empty: + return {d.name: np.array([], dtype=cls.dtype(dataset, d)) + for d in dimensions} indexed = cls.indexed(dataset, selection) - data = OrderedDict((k, v if isscalar(v) else v[selection_mask]) - for k, v in dataset.data.items()) + data = OrderedDict() + for k, v in dataset.data.items(): + if k not in dimensions or isscalar(v): + data[k] = v + else: + data[k] = v[selection_mask] if indexed and len(list(data.values())[0]) == 1 and len(dataset.vdims) == 1: value = data[dataset.vdims[0].name] return value if isscalar(value) else value[0] @@ -389,6 +398,11 @@ def iloc(cls, dataset, index): return arr if isscalar(arr) else arr[0] return new_data + + @classmethod + def geom_type(cls, dataset): + return dataset.data.get('geom_type') + @classmethod def has_holes(cls, dataset): from holoviews.element import Polygons @@ -400,7 +414,16 @@ def holes(cls, dataset): from holoviews.element import Polygons key = Polygons._hole_key if key in dataset.data: - return [[[np.asarray(h) for h in hs] for hs in dataset.data[key]]] + holes = [] + for hs in dataset.data[key]: + subholes = [] + for h in hs: + hole = np.asarray(h) + if (hole[0, :] != hole[-1, :]).all(): + hole = np.concatenate([hole, hole[:1]]) + subholes.append(hole) + holes.append(subholes) + return [holes] else: return super(DictInterface, cls).holes(dataset) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index d92e2b2771..7d0e6a135f 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -1,7 +1,9 @@ from __future__ import absolute_import +import sys import warnings +import six import param import numpy as np @@ -229,6 +231,8 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): interface = data.interface if interface.datatype in datatype and interface.datatype in eltype.datatype: data = data.data + elif interface.multi and any(cls.interfaces[dt].multi for dt in datatype if dt in cls.interfaces): + data = [d for d in data.interface.split(data, None, None, 'columns')] elif interface.gridded and any(cls.interfaces[dt].gridded for dt in datatype): new_data = [] for kd in data.kdims: @@ -239,6 +243,8 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): for vd in data.vdims: new_data.append(interface.values(data, vd, flat=False, compute=False)) data = tuple(new_data) + elif 'dataframe' in datatype and util.pd: + data = data.dframe() else: data = tuple(data.columns().values()) elif isinstance(data, Element): @@ -269,16 +275,17 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): except DataError: raise except Exception as e: - if interface in head: - priority_errors.append((interface, e)) + if interface in head or len(prioritized) == 1: + priority_errors.append((interface, e, True)) else: error = ("None of the available storage backends were able " "to support the supplied data format.") if priority_errors: - intfc, e = priority_errors[0] + intfc, e, _ = priority_errors[0] priority_error = ("%s raised following error:\n\n %s" % (intfc.__name__, e)) error = ' '.join([error, priority_error]) + raise six.reraise(DataError, DataError(error, intfc), sys.exc_info()[2]) raise DataError(error) return data, interface, dims, extra_kws @@ -304,6 +311,16 @@ def expanded(cls, arrays): def isscalar(cls, dataset, dim): return len(cls.values(dataset, dim, expanded=False)) == 1 + @classmethod + def isunique(cls, dataset, dim, per_geom=False): + """ + Compatibility method introduced for v1.13.0 to smooth + over addition of per_geom kwarg for isscalar method. + """ + try: + return cls.isscalar(dataset, dim, per_geom) + except TypeError: + return cls.isscalar(dataset, dim) @classmethod def dtype(cls, dataset, dimension): diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py index fa4053683c..e3c499c6b8 100644 --- a/holoviews/core/data/multipath.py +++ b/holoviews/core/data/multipath.py @@ -15,9 +15,8 @@ class MultiInterface(Interface): be split into individual datasets. The interface makes the data appear a list of tabular datasets as - a single dataset. The length, shape and values methods therefore - make the data appear like a single array of concatenated subpaths, - separated by NaN values. + a single dataset. The interface may be used to represent geometries + so the behavior depends on the type of geometry being represented. """ types = () @@ -26,22 +25,47 @@ class MultiInterface(Interface): subtypes = ['dictionary', 'dataframe', 'array', 'dask'] + geom_types = ['Polygon', 'Ring', 'Line', 'Point'] + multi = True @classmethod def init(cls, eltype, data, kdims, vdims): + from ...element import Polygons, Path + new_data = [] dims = {'kdims': eltype.kdims, 'vdims': eltype.vdims} if kdims is not None: dims['kdims'] = kdims if vdims is not None: dims['vdims'] = vdims - if not isinstance(data, list): - raise ValueError('MultiInterface data must be a list tabular data types.') + + if (isinstance(data, list) and len(data) and + all(isinstance(d, tuple) and all(util.isscalar(v) for v in d) for d in data)): + data = [data] + elif not isinstance(data, list): + interface = [Interface.interfaces.get(st).applies(data) + for st in cls.subtypes if st in Interface.interfaces] + if (interface or isinstance(data, tuple)) and issubclass(eltype, Path): + data = [data] + else: + raise ValueError('MultiInterface data must be a list of tabular data types.') prev_interface, prev_dims = None, None for d in data: + datatype = cls.subtypes + if isinstance(d, dict): + if Polygons._hole_key in d: + datatype = [dt for dt in datatype + if hasattr(Interface.interfaces.get(dt), 'has_holes')] + geom_type = d.get('geom_type') + if geom_type is not None and geom_type not in cls.geom_types: + raise DataError("Geometry type '%s' not recognized, " + "must be one of %s." % (geom_type, cls.geom_types)) + else: + datatype = [dt for dt in datatype + if hasattr(Interface.interfaces.get(dt), 'geom_type')] d, interface, dims, _ = Interface.initialize(eltype, d, kdims, vdims, - datatype=cls.subtypes) + datatype=datatype) if prev_interface: if prev_interface != interface: raise DataError('MultiInterface subpaths must all have matching datatype.', cls) @@ -75,6 +99,26 @@ def validate(cls, dataset, vdims=True): 'must declare a list of holes for each geometry.', cls) + @classmethod + def geom_type(cls, dataset): + from holoviews.element import Polygons, Path, Points + if isinstance(dataset, type): + eltype = dataset + else: + eltype = type(dataset) + if isinstance(dataset.data, list): + ds = cls._inner_dataset_template(dataset) + if hasattr(ds.interface, 'geom_type'): + geom_type = ds.interface.geom_type(ds) + if geom_type is not None: + return geom_type + if issubclass(eltype, Polygons): + return 'Polygon' + elif issubclass(eltype, Path): + return 'Line' + elif issubclass(eltype, Points): + return 'Point' + @classmethod def _inner_dataset_template(cls, dataset, validate_vdims=True): """ @@ -115,7 +159,6 @@ def range(cls, dataset, dim): ranges.append(ds.interface.range(ds, dim)) return util.max_range(ranges) - @classmethod def has_holes(cls, dataset): if not dataset.data: @@ -138,43 +181,65 @@ def holes(cls, dataset): holes += ds.interface.holes(ds) return holes - @classmethod - def isscalar(cls, dataset, dim): + def isscalar(cls, dataset, dim, per_geom=False): """ Tests if dimension is scalar in each subpath. """ if not dataset.data: return True + geom_type = cls.geom_type(dataset) ds = cls._inner_dataset_template(dataset) - isscalar = [] + combined = [] for d in dataset.data: ds.data = d - isscalar.append(ds.interface.isscalar(ds, dim)) - return all(isscalar) - + values = ds.interface.values(ds, dim) + unique = list(util.unique_iterator(values)) + if len(unique) > 1: + return False + elif per_geom and geom_type != 'Point': + continue + unique = unique[0] + if unique not in combined: + if combined: + return False + combined.append(unique) + return True @classmethod def select(cls, dataset, selection_mask=None, **selection): """ Applies selectiong on all the subpaths. """ + from ...element import Polygons if not dataset.data: - return [] + return dataset.data ds = cls._inner_dataset_template(dataset) + skipped = (Polygons._hole_key,) + if hasattr(ds.interface, 'geo_column'): + skipped += (ds.interface.geo_column(ds),) data = [] for d in dataset.data: ds.data = d - sel = ds.interface.select(ds, **selection) + selection_mask = ds.interface.select_mask(ds, selection) + sel = ds.interface.select(ds, selection_mask) + is_dict = isinstance(sel, dict) + if ((not len(sel) and not is_dict) or + (is_dict and any(False if util.isscalar(v) else len(v) == 0 + for k, v in sel.items() if k not in skipped))): + continue data.append(sel) return data @classmethod - def select_paths(cls, dataset, selection): + def select_paths(cls, dataset, index): """ Allows selecting paths with usual NumPy slicing index. """ - return [s[0] for s in np.array([{0: p} for p in dataset.data])[selection]] + selection = np.array([{0: p} for p in dataset.data])[index] + if isinstance(selection, dict): + return [selection[0]] + return [s[0] for s in selection] @classmethod def aggregate(cls, dataset, dimensions, function, **kwargs): @@ -197,9 +262,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): # Find all the keys along supplied dimensions values = [] for d in dimensions: - if not cls.isscalar(dataset, d): + if not cls.isscalar(dataset, d, True): raise ValueError('MultiInterface can only apply groupby ' - 'on scalar dimensions, %s dimension' + 'on scalar dimensions, %s dimension ' 'is not scalar' % d) vals = cls.values(dataset, d, False, True) values.append(vals) @@ -234,6 +299,8 @@ def shape(cls, dataset): """ if not dataset.data: return (0, len(dataset.dimensions())) + elif cls.geom_type(dataset) != 'Point': + return (len(dataset.data), len(dataset.dimensions())) rows, cols = 0, 0 ds = cls._inner_dataset_template(dataset) @@ -241,7 +308,7 @@ def shape(cls, dataset): ds.data = d r, cols = ds.interface.shape(ds) rows += r - return rows+len(dataset.data)-1, cols + return rows, cols @classmethod def length(cls, dataset): @@ -252,12 +319,14 @@ def length(cls, dataset): """ if not dataset.data: return 0 + elif cls.geom_type(dataset) != 'Point': + return len(dataset.data) length = 0 ds = cls._inner_dataset_template(dataset) for d in dataset.data: ds.data = d length += ds.interface.length(ds) - return length+len(dataset.data)-1 + return length @classmethod def dtype(cls, dataset, dimension): @@ -266,10 +335,29 @@ def dtype(cls, dataset, dimension): ds = cls._inner_dataset_template(dataset) return ds.interface.dtype(ds, dimension) + @classmethod + def sort(cls, dataset, by=[], reverse=False): + by = [dataset.get_dimension(d).name for d in by] + if len(by) == 1: + sorting = cls.values(dataset, by[0], False).argsort() + else: + arrays = [dataset.dimension_values(d, False) for d in by] + sorting = util.arglexsort(arrays) + return [dataset.data[s] for s in sorting] + @classmethod def nonzero(cls, dataset): return bool(dataset.data) + @classmethod + def reindex(cls, dataset, kdims=None, vdims=None): + new_data = [] + ds = cls._inner_dataset_template(dataset) + for d in dataset.data: + ds.data = d + new_data.append(ds.reindex(kdims, vdims)) + return new_data + @classmethod def redim(cls, dataset, dimensions): if not dataset.data: @@ -282,15 +370,8 @@ def redim(cls, dataset, dimensions): return new_data @classmethod - def values( - cls, - dataset, - dimension, - expanded=True, - flat=True, - compute=True, - keep_index=False, - ): + def values(cls, dataset, dimension, expanded=True, flat=True, + compute=True, keep_index=False): """ Returns a single concatenated array of all subpaths separated by NaN values. If expanded keyword is False an array of arrays @@ -298,26 +379,48 @@ def values( """ if not dataset.data: return np.array([]) - values = [] + values, scalars = [], [] + all_scalar = True ds = cls._inner_dataset_template(dataset) + geom_type = cls.geom_type(dataset) + is_points = geom_type == 'Point' + is_geom = dimension in dataset.kdims[:2] for d in dataset.data: ds.data = d dvals = ds.interface.values( - ds, dimension, expanded, flat, compute, keep_index + ds, dimension, True, flat, compute, keep_index ) + scalar = len(util.unique_array(dvals)) == 1 and not is_geom + gt = ds.interface.geom_type(ds) if hasattr(ds.interface, 'geom_type') else None + + if gt is None: + gt = geom_type + + if (gt in ('Polygon', 'Ring') and (not scalar or expanded) and + not geom_type == 'Points'): + gvals = ds.array([0, 1]) + dvals = ensure_ring(gvals, dvals) + if scalar and not expanded: + dvals = dvals[:1] + all_scalar &= scalar + + scalars.append(scalar) if not len(dvals): continue - elif expanded: - values.append(dvals) + values.append(dvals) + if not is_points and expanded: values.append([np.NaN]) - else: - values.append(dvals) + if not values: return np.array([]) - elif expanded: - return np.concatenate(values[:-1]) + elif expanded or (all_scalar and not is_geom): + if not is_points and expanded: + values = values[:-1] + return np.concatenate(values) if values else np.array([]) else: - return np.concatenate(values) + array = np.empty(len(values), dtype=object) + array[:] = [a[0] if s else a for s, a in zip(scalars, values)] + return array @classmethod def split(cls, dataset, start, end, datatype, **kwargs): @@ -328,22 +431,35 @@ def split(cls, dataset, start, end, datatype, **kwargs): objs = [] if datatype is None: for d in dataset.data[start: end]: - objs.append(dataset.clone(d, datatype=cls.subtypes)) + objs.append(dataset.clone([d])) return objs elif not dataset.data: return objs - ds = cls._inner_dataset_template(dataset) - for d in dataset.data: - ds.data = d + + geom_type = cls.geom_type(dataset) + ds = dataset.clone([]) + for d in dataset.data[start:end]: + ds.data = [d] if datatype == 'array': obj = ds.array(**kwargs) elif datatype == 'dataframe': obj = ds.dframe(**kwargs) - elif datatype == 'columns': - if ds.interface.datatype == 'dictionary': - obj = dict(ds.data) + elif datatype in ('columns', 'dictionary'): + if hasattr(ds.interface, 'geom_type'): + gt = ds.interface.geom_type(ds) + if gt is None: + gt = geom_type + if isinstance(ds.data[0], dict): + obj = dict(ds.data[0]) + xd, yd = ds.kdims + if (geom_type in ('Polygon', 'Ring') or + xd not in obj or yd not in obj): + obj[xd.name] = ds.interface.values(ds, xd) + obj[yd.name] = ds.interface.values(ds, yd) else: - obj = ds.columns(**kwargs) + obj = ds.columns() + if gt is not None: + obj['geom_type'] = gt else: raise ValueError("%s datatype not support" % datatype) objs.append(obj) @@ -371,6 +487,78 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): new_data.append(ds.interface.add_dimension(ds, dimension, dim_pos, v, vdim)) return new_data + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + scalar = np.isscalar(cols) and np.isscalar(rows) + + template = cls._inner_dataset_template(dataset) + if cls.geom_type(dataset) != 'Point': + geoms = cls.select_paths(dataset, rows) + new_data = [] + for d in geoms: + template.data = d + new_data.append(template.iloc[:, cols]) + return new_data + + count = 0 + new_data = [] + for d in dataset.data: + template.data = d + length = len(template) + if np.isscalar(rows): + if (count+length) > rows >= count: + data = template.iloc[rows-count, cols] + return data if scalar else [data.data] + elif isinstance(rows, slice): + if rows.start is not None and rows.start > (count+length): + continue + elif rows.stop is not None and rows.stop < count: + break + start = None if rows.start is None else max(rows.start - count, 0) + stop = None if rows.stop is None else min(rows.stop - count, length) + if rows.step is not None: + dataset.param.warning(".iloc step slicing currently not supported for" + "the multi-tabular data format.") + slc = slice(start, stop) + new_data.append(template.iloc[slc, cols].data) + else: + sub_rows = [r-count for r in rows if 0 <= (r-count) < (count+length)] + new = template.iloc[sub_rows, cols] + if len(new): + new_data.append(new.data) + count += length + return new_data + + +def ensure_ring(geom, values=None): + """Ensure the (multi-)geometry forms a ring. + + Checks the start- and end-point of each geometry to ensure they + form a ring, if not the start point is inserted at the end point. + If a values array is provided (which must match the geometry in + length) then the insertion will occur on the values instead, + ensuring that they will match the ring geometry. + + Args: + geom: 2-D array of geometry coordinates + values: Optional array of values + + Returns: + Array where values have been inserted and ring closing indexes + """ + if values is None: + values = geom + breaks = np.where(np.isnan(geom).sum(axis=1))[0] + starts = [0] + list(breaks+1) + ends = list(breaks-1) + [len(geom)-1] + zipped = zip(geom[starts], geom[ends], ends, values[starts]) + unpacked = tuple(zip(*[(v, i+1) for s, e, i, v in zipped + if (s!=e).any()])) + if not unpacked: + return values + inserts, inds = unpacked + return np.insert(values, list(inds), list(inserts), axis=0) Interface.register(MultiInterface) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py new file mode 100644 index 0000000000..1865f8e612 --- /dev/null +++ b/holoviews/core/data/spatialpandas.py @@ -0,0 +1,885 @@ +from __future__ import absolute_import, division + +import sys +import warnings + +from collections import defaultdict + +import numpy as np + +from ..dimension import dimension_name +from ..util import isscalar, unique_iterator, pd, unique_array +from .interface import DataError, Interface +from .multipath import MultiInterface, ensure_ring +from .pandas import PandasInterface + + +class SpatialPandasInterface(MultiInterface): + + types = () + + datatype = 'spatialpandas' + + multi = True + + @classmethod + def loaded(cls): + return 'spatialpandas' in sys.modules + + @classmethod + def applies(cls, obj): + if not cls.loaded(): + return False + from spatialpandas import GeoDataFrame, GeoSeries + return isinstance(obj, (GeoDataFrame, GeoSeries)) + + @classmethod + def geo_column(cls, data): + from spatialpandas import GeoSeries + col = 'geometry' + if col in data and isinstance(data[col], GeoSeries): + return col + cols = [c for c in data.columns if isinstance(data[c], GeoSeries)] + if not cols: + raise ValueError('No geometry column found in spatialpandas.GeoDataFrame, ' + 'use the PandasInterface instead.') + return cols[0] + + @classmethod + def init(cls, eltype, data, kdims, vdims): + import pandas as pd + from spatialpandas import GeoDataFrame, GeoSeries + + if kdims is None: + kdims = eltype.kdims + + if vdims is None: + vdims = eltype.vdims + + if isinstance(data, GeoSeries): + data = data.to_frame() + + if 'geopandas' in sys.modules: + import geopandas as gpd + if isinstance(data, gpd.GeoSeries): + data = data.to_frame() + if isinstance(data, gpd.GeoDataFrame): + data = GeoDataFrame(data) + if isinstance(data, list): + if 'shapely' in sys.modules: + data = from_shapely(data) + if isinstance(data, list): + data = from_multi(eltype, data, kdims, vdims) + elif not isinstance(data, GeoDataFrame): + raise ValueError("SpatialPandasInterface only support spatialpandas DataFrames.") + elif 'geometry' not in data: + cls.geo_column(data) + + index_names = data.index.names if isinstance(data, pd.DataFrame) else [data.index.name] + if index_names == [None]: + index_names = ['index'] + + for kd in kdims+vdims: + kd = dimension_name(kd) + if kd in data.columns: + continue + if any(kd == ('index' if name is None else name) + for name in index_names): + data = data.reset_index() + break + + return data, {'kdims': kdims, 'vdims': vdims}, {} + + @classmethod + def validate(cls, dataset, vdims=True): + dim_types = 'key' if vdims else 'all' + geom_dims = cls.geom_dims(dataset) + if len(geom_dims) != 2: + raise DataError('Expected %s instance to declare two key ' + 'dimensions corresponding to the geometry ' + 'coordinates but %d dimensions were found ' + 'which did not refer to any columns.' + % (type(dataset).__name__, len(geom_dims)), cls) + not_found = [d.name for d in dataset.dimensions(dim_types) + if d not in geom_dims and d.name not in dataset.data] + if not_found: + raise DataError("Supplied data does not contain specified " + "dimensions, the following dimensions were " + "not found: %s" % repr(not_found), cls) + + @classmethod + def dtype(cls, dataset, dimension): + dim = dataset.get_dimension(dimension, strict=True) + if dim in cls.geom_dims(dataset): + col = cls.geo_column(dataset.data) + return dataset.data[col].values.numpy_dtype + return dataset.data[dim.name].dtype + + @classmethod + def has_holes(cls, dataset): + from spatialpandas.geometry import ( + MultiPolygonDtype, PolygonDtype, Polygon, MultiPolygon + ) + col = cls.geo_column(dataset.data) + series = dataset.data[col] + if not isinstance(series.dtype, (MultiPolygonDtype, PolygonDtype)): + return False + for geom in series: + if isinstance(geom, Polygon) and len(geom.data) > 1: + return True + elif isinstance(geom, MultiPolygon): + for p in geom.data: + if len(p) > 1: + return True + return False + + @classmethod + def holes(cls, dataset): + holes = [] + if not len(dataset.data): + return holes + col = cls.geo_column(dataset.data) + series = dataset.data[col] + return [geom_to_holes(geom) for geom in series] + + @classmethod + def select(cls, dataset, selection_mask=None, **selection): + xdim, ydim = cls.geom_dims(dataset) + selection.pop(xdim.name, None) + selection.pop(ydim.name, None) + df = dataset.data + if not selection: + return df + elif selection_mask is None: + selection_mask = cls.select_mask(dataset, selection) + indexed = cls.indexed(dataset, selection) + df = df.iloc[selection_mask] + if indexed and len(df) == 1 and len(dataset.vdims) == 1: + return df[dataset.vdims[0].name].iloc[0] + return df + + @classmethod + def select_mask(cls, dataset, selection): + mask = np.ones(len(dataset.data), dtype=np.bool) + for dim, k in selection.items(): + if isinstance(k, tuple): + k = slice(*k) + arr = dataset.data[dim].values + if isinstance(k, slice): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'invalid value encountered') + if k.start is not None: + mask &= k.start <= arr + if k.stop is not None: + mask &= arr < k.stop + elif isinstance(k, (set, list)): + iter_slcs = [] + for ik in k: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'invalid value encountered') + iter_slcs.append(arr == ik) + mask &= np.logical_or.reduce(iter_slcs) + elif callable(k): + mask &= k(arr) + else: + index_mask = arr == k + if dataset.ndims == 1 and np.sum(index_mask) == 0: + data_index = np.argmin(np.abs(arr - k)) + mask = np.zeros(len(dataset), dtype=np.bool) + mask[data_index] = True + else: + mask &= index_mask + return mask + + @classmethod + def geom_dims(cls, dataset): + return [d for d in dataset.kdims + dataset.vdims + if d.name not in dataset.data] + + @classmethod + def dimension_type(cls, dataset, dim): + dim = dataset.get_dimension(dim) + col = cls.geo_column(dataset.data) + if dim in cls.geom_dims(dataset) and len(dataset.data): + arr = geom_to_array(dataset.data[col].iloc[0]) + ds = dataset.clone(arr, datatype=cls.subtypes, vdims=[]) + return ds.interface.dimension_type(ds, dim) + else: + return cls.dtype(dataset, dim).type + + @classmethod + def isscalar(cls, dataset, dim, per_geom=False): + """ + Tests if dimension is scalar in each subpath. + """ + dim = dataset.get_dimension(dim) + if (dim in cls.geom_dims(dataset)): + return False + elif per_geom: + return all(isscalar(v) or len(list(unique_array(v))) == 1 + for v in dataset.data[dim.name]) + dim = dataset.get_dimension(dim) + return len(dataset.data[dim.name].unique()) == 1 + + @classmethod + def range(cls, dataset, dim): + dim = dataset.get_dimension(dim) + geom_dims = cls.geom_dims(dataset) + if dim in geom_dims: + col = cls.geo_column(dataset.data) + idx = geom_dims.index(dim) + bounds = dataset.data[col].total_bounds + if idx == 0: + return (bounds[0], bounds[2]) + else: + return (bounds[1], bounds[3]) + else: + return Interface.range(dataset, dim) + + @classmethod + def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): + geo_dims = cls.geom_dims(dataset) + if any(d in geo_dims for d in dimensions): + raise DataError("SpatialPandasInterface does not allow grouping " + "by geometry dimension.", cls) + return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) + + @classmethod + def aggregate(cls, columns, dimensions, function, **kwargs): + raise NotImplementedError + + @classmethod + def sample(cls, columns, samples=[]): + raise NotImplementedError + + @classmethod + def reindex(cls, dataset, kdims=None, vdims=None): + return dataset.data + + @classmethod + def shape(cls, dataset): + return (cls.length(dataset), len(dataset.dimensions())) + + @classmethod + def sort(cls, dataset, by=[], reverse=False): + geo_dims = cls.geom_dims(dataset) + if any(d in geo_dims for d in by): + raise DataError("SpatialPandasInterface does not allow sorting " + "by geometry dimension.", cls) + return PandasInterface.sort(dataset, by, reverse) + + @classmethod + def length(cls, dataset): + from spatialpandas.geometry import MultiPointDtype, Point + col_name = cls.geo_column(dataset.data) + column = dataset.data[col_name] + geom_type = cls.geom_type(dataset) + if not isinstance(column.dtype, MultiPointDtype) and geom_type != 'Point': + return PandasInterface.length(dataset) + length = 0 + for i, geom in enumerate(column): + if isinstance(geom, Point): + length += 1 + else: + length += (len(geom.buffer_values)//2) + return length + + @classmethod + def nonzero(cls, dataset): + return bool(cls.length(dataset)) + + @classmethod + def redim(cls, dataset, dimensions): + return PandasInterface.redim(dataset, dimensions) + + @classmethod + def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): + data = dataset.data.copy() + geom_col = cls.geo_column(dataset.data) + if dim_pos >= list(data.columns).index(geom_col): + dim_pos -= 1 + if dimension.name not in data: + data.insert(dim_pos, dimension.name, values) + return data + + @classmethod + def iloc(cls, dataset, index): + from spatialpandas import GeoSeries + from spatialpandas.geometry import MultiPointDtype + rows, cols = index + geom_dims = cls.geom_dims(dataset) + geom_col = cls.geo_column(dataset.data) + scalar = False + columns = list(dataset.data.columns) + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + if not all(d in cols for d in geom_dims): + raise DataError("Cannot index a dimension which is part of the " + "geometry column of a spatialpandas DataFrame.", cls) + cols = list(unique_iterator([ + columns.index(geom_col) if c in geom_dims else columns.index(c) for c in cols + ])) + + if not isinstance(dataset.data[geom_col].dtype, MultiPointDtype): + if scalar: + return dataset.data.iloc[rows[0], cols[0]] + elif isscalar(rows): + rows = [rows] + return dataset.data.iloc[rows, cols] + + geoms = dataset.data[geom_col] + count = 0 + new_geoms, indexes = [], [] + for i, geom in enumerate(geoms): + length = int(len(geom.buffer_values)/2) + if np.isscalar(rows): + if count <= rows < (count+length): + idx = (rows-count)*2 + data = geom.buffer_values[idx:idx+2] + new_geoms.append(type(geom)(data)) + indexes.append(i) + break + elif isinstance(rows, slice): + if rows.start is not None and rows.start > (count+length): + continue + elif rows.stop is not None and rows.stop < count: + break + start = None if rows.start is None else max(rows.start - count, 0)*2 + stop = None if rows.stop is None else min(rows.stop - count, length)*2 + if rows.step is not None: + dataset.param.warning(".iloc step slicing currently not supported for" + "the multi-tabular data format.") + sliced = geom.buffer_values[start:stop] + if len(sliced): + indexes.append(i) + new_geoms.append(type(geom)(sliced)) + else: + sub_rows = [v for r in rows for v in ((r-count)*2, (r-count)*2+1) + if count <= r < (count+length)] + if sub_rows: + indexes.append(i) + idxs = np.array(sub_rows, dtype=int) + new_geoms.append(type(geom)(geom.buffer_values[idxs])) + count += length + + new = dataset.data.iloc[indexes].copy() + new[geom_col] = GeoSeries(new_geoms) + return new + + @classmethod + def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep_index=False): + dimension = dataset.get_dimension(dimension) + geom_dims = dataset.interface.geom_dims(dataset) + data = dataset.data + isgeom = (dimension in geom_dims) + geom_col = cls.geo_column(dataset.data) + is_points = cls.geom_type(dataset) == 'Point' + if isgeom and keep_index: + return data[geom_col] + elif not isgeom: + return get_value_array(data, dimension, expanded, keep_index, geom_col, is_points) + elif not len(data): + return np.array([]) + + geom_type = cls.geom_type(dataset) + index = geom_dims.index(dimension) + return geom_array_to_array(data[geom_col].values, index, expanded, geom_type) + + @classmethod + def split(cls, dataset, start, end, datatype, **kwargs): + from spatialpandas import GeoDataFrame, GeoSeries + from ...element import Polygons + + objs = [] + if not len(dataset.data): + return [] + xdim, ydim = cls.geom_dims(dataset) + value_dims = [dim for dim in dataset.kdims+dataset.vdims + if dim not in (xdim, ydim)] + row = dataset.data.iloc[0] + col = cls.geo_column(dataset.data) + geom_type = cls.geom_type(dataset) + if datatype is not None: + arr = geom_to_array(row[col], geom_type=geom_type) + d = {(xdim.name, ydim.name): arr} + d.update({dim.name: row[dim.name] for dim in value_dims}) + ds = dataset.clone(d, datatype=['dictionary']) + + holes = cls.holes(dataset) if cls.has_holes(dataset) else None + for i, row in dataset.data.iterrows(): + if datatype is None: + gdf = GeoDataFrame({c: GeoSeries([row[c]]) if c == 'geometry' else [row[c]] + for c in dataset.data.columns}) + objs.append(dataset.clone(gdf)) + continue + + geom = row[col] + gt = geom_type or get_geom_type(dataset.data, col) + arr = geom_to_array(geom, geom_type=gt) + d = {xdim.name: arr[:, 0], ydim.name: arr[:, 1]} + d.update({dim.name: row[dim.name] for dim in value_dims}) + if datatype in ('dictionary', 'columns'): + if holes is not None: + d[Polygons._hole_key] = holes[i] + d['geom_type'] = gt + objs.append(d) + continue + + ds.data = d + if datatype == 'array': + obj = ds.array(**kwargs) + elif datatype == 'dataframe': + obj = ds.dframe(**kwargs) + else: + raise ValueError("%s datatype not support" % datatype) + objs.append(obj) + return objs + + + +def get_geom_type(gdf, col): + """Return the HoloViews geometry type string for the geometry column. + + Args: + gdf: The GeoDataFrame to get the geometry from + col: The geometry column + + Returns: + A string representing the type of geometry + """ + from spatialpandas.geometry import ( + PointDtype, MultiPointDtype, LineDtype, MultiLineDtype, + PolygonDtype, MultiPolygonDtype, RingDtype + ) + + column = gdf[col] + if isinstance(column.dtype, (PointDtype, MultiPointDtype)): + return 'Point' + elif isinstance(column.dtype, (LineDtype, MultiLineDtype)): + return 'Line' + elif isinstance(column.dtype, (PolygonDtype, MultiPolygonDtype)): + return 'Polygon' + elif isinstance(column.dtype, RingDtype): + return 'Ring' + + +def geom_to_array(geom, index=None, multi=False, geom_type=None): + """Converts spatialpandas geometry to an array. + + Args: + geom: spatialpandas geometry + index: The column index to return + multi: Whether to concatenate multiple arrays or not + + Returns: + Array or list of arrays. + """ + from spatialpandas.geometry import ( + Point, Polygon, Line, Ring, MultiPolygon, MultiPoint + ) + if isinstance(geom, Point): + if index is None: + return np.array([[geom.x, geom.y]]) + arrays = [np.array([geom.y if index else geom.x])] + elif isinstance(geom, (Polygon, Line, Ring)): + exterior = geom.data[0] if isinstance(geom, Polygon) else geom.data + arr = np.array(exterior.as_py()).reshape(-1, 2) + if isinstance(geom, (Polygon, Ring)): + arr = ensure_ring(arr) + arrays = [arr if index is None else arr[:, index]] + elif isinstance(geom, MultiPoint): + if index is None: + arrays = [np.array(geom.buffer_values).reshape(-1, 2)] + else: + arrays = [np.array(geom.buffer_values[index::2])] + else: + arrays = [] + for g in geom.data: + exterior = g[0] if isinstance(geom, MultiPolygon) else g + arr = np.array(exterior.as_py()).reshape(-1, 2) + if isinstance(geom, MultiPolygon): + arr = ensure_ring(arr) + arrays.append(arr if index is None else arr[:, index]) + if geom_type != 'Point': + arrays.append([[np.nan, np.nan]] if index is None else [np.nan]) + if geom_type != 'Point': + arrays = arrays[:-1] + if multi: + return arrays + elif len(arrays) == 1: + return arrays[0] + else: + return np.concatenate(arrays) + + +def geom_array_to_array(geom_array, index, expand=False, geom_type=None): + """Converts spatialpandas extension arrays to a flattened array. + + Args: + geom: spatialpandas geometry + index: The column index to return + + Returns: + Flattened array + """ + from spatialpandas.geometry import PointArray, MultiPointArray + if isinstance(geom_array, PointArray): + return geom_array.y if index else geom_array.x + arrays = [] + multi_point = isinstance(geom_array, MultiPointArray) or geom_type == 'Point' + for geom in geom_array: + array = geom_to_array(geom, index, multi=expand, geom_type=geom_type) + if expand: + arrays.extend(array) + if not multi_point: + arrays.append([np.nan]) + else: + arrays.append(array) + if expand: + if not multi_point: + arrays = arrays[:-1] + return np.concatenate(arrays) if arrays else np.array([]) + else: + array = np.empty(len(arrays), dtype=object) + array[:] = arrays + return array + + +def geom_length(geom): + from spatialpandas.geometry import Polygon, Ring, MultiPolygon, MultiLine + if isinstance(geom, Polygon): + offset = 0 + exterior = geom.data[0] + if exterior[0] != exterior[-2] or exterior[1] != exterior[-1]: + offset = 1 + return len(exterior)//2 + offset + elif isinstance(geom, (MultiPolygon, MultiLine)): + length = 0 + for g in geom.data: + offset = 0 + if isinstance(geom, MultiLine): + exterior = g + else: + exterior = g[0] + if exterior[0] != exterior[-2] or exterior[1] != exterior[-1]: + offset = 1 + length += (len(exterior)//2 + 1) + offset + return length-1 if length else 0 + else: + offset = 0 + exterior = geom.buffer_values + if isinstance(geom, Ring) and (exterior[0] != exterior[-2] or exterior[1] != exterior[-1]): + offset = 1 + return len(exterior)//2 + + +def get_value_array(data, dimension, expanded, keep_index, geom_col, + is_points, geom_length=geom_length): + """Returns an array of values from a GeoDataFrame. + + Args: + data: GeoDataFrame + dimension: The dimension to get the values from + expanded: Whether to expand the value array + keep_index: Whether to return a Series + geom_col: The column in the data that contains the geometries + is_points: Whether the geometries are points + geom_length: The function used to compute the length of each geometry + + Returns: + An array containing the values along a dimension + """ + column = data[dimension.name] + if keep_index: + return column + all_scalar = True + arrays, scalars = [], [] + for i, geom in enumerate(data[geom_col]): + length = geom_length(geom) + val = column.iloc[i] + scalar = isscalar(val) + if scalar: + val = np.array([val]) + if not scalar and len(unique_array(val)) == 1: + val = val[:1] + scalar = True + all_scalar &= scalar + scalars.append(scalar) + if not expanded or not scalar: + arrays.append(val) + elif scalar: + arrays.append(np.full(length, val)) + if expanded and not is_points and not i == (len(data[geom_col])-1): + arrays.append(np.array([np.NaN])) + + if not len(data): + return np.array([]) + if expanded: + return np.concatenate(arrays) if len(arrays) > 1 else arrays[0] + elif (all_scalar and arrays): + return np.array([a[0] for a in arrays]) + else: + array = np.empty(len(arrays), dtype=object) + array[:] = [a[0] if s else a for s, a in zip(scalars, arrays)] + return array + + +def geom_to_holes(geom): + """Extracts holes from spatialpandas Polygon geometries. + + Args: + geom: spatialpandas geometry + + Returns: + List of arrays representing holes + """ + from spatialpandas.geometry import Polygon, MultiPolygon + if isinstance(geom, Polygon): + holes = [] + for i, hole in enumerate(geom.data): + if i == 0: + continue + hole = ensure_ring(np.array(hole.as_py()).reshape(-1, 2)) + holes.append(hole) + return [holes] + elif isinstance(geom, MultiPolygon): + holes = [] + for poly in geom.data: + poly_holes = [] + for i, hole in enumerate(poly): + if i == 0: + continue + arr = ensure_ring(np.array(hole.as_py()).reshape(-1, 2)) + poly_holes.append(arr) + holes.append(poly_holes) + return holes + elif 'Multi' in type(geom).__name__: + return [[]]*len(geom) + else: + return [[]] + + +def to_spatialpandas(data, xdim, ydim, columns=[], geom='point'): + """Converts list of dictionary format geometries to spatialpandas line geometries. + + Args: + data: List of dictionaries representing individual geometries + xdim: Name of x-coordinates column + ydim: Name of y-coordinates column + columns: List of columns to add + geom: The type of geometry + + Returns: + A spatialpandas.GeoDataFrame version of the data + """ + from spatialpandas import GeoSeries, GeoDataFrame + from spatialpandas.geometry import ( + Point, Line, Polygon, Ring, LineArray, PolygonArray, PointArray, + MultiLineArray, MultiPolygonArray, MultiPointArray, RingArray + ) + from ...element import Polygons + poly = any(Polygons._hole_key in d for d in data) or geom == 'Polygon' + if poly: + geom_type = Polygon + single_array, multi_array = PolygonArray, MultiPolygonArray + elif geom == 'Line': + geom_type = Line + single_array, multi_array = LineArray, MultiLineArray + elif geom == 'Ring': + geom_type = Ring + single_array, multi_array = RingArray, MultiLineArray + else: + geom_type = Point + single_array, multi_array = PointArray, MultiPointArray + + array_type = None + hole_arrays, geom_arrays = [], [] + for geom in data: + geom = dict(geom) + if xdim not in geom or ydim not in geom: + raise ValueError('Could not find geometry dimensions') + xs, ys = geom.pop(xdim), geom.pop(ydim) + xscalar, yscalar = isscalar(xs), isscalar(ys) + if xscalar and yscalar: + xs, ys = np.array([xs]), np.array([ys]) + elif xscalar: + xs = np.full_like(ys, xs) + elif yscalar: + ys = np.full_like(xs, ys) + geom_array = np.column_stack([xs, ys]) + + if geom_type in (Polygon, Ring): + geom_array = ensure_ring(geom_array) + + splits = np.where(np.isnan(geom_array[:, :2].astype('float')).sum(axis=1))[0] + split_geoms = np.split(geom_array, splits+1) if len(splits) else [geom_array] + split_holes = geom.pop(Polygons._hole_key, None) + if split_holes is not None: + if len(split_holes) != len(split_geoms): + raise DataError('Polygons with holes containing multi-geometries ' + 'must declare a list of holes for each geometry.', + SpatialPandasInterface) + else: + split_holes = [[ensure_ring(np.asarray(h)) for h in hs] for hs in split_holes] + + geom_arrays.append(split_geoms) + hole_arrays.append(split_holes) + if geom_type is Point: + if len(splits) > 1 or any(len(g) > 1 for g in split_geoms): + array_type = multi_array + elif array_type is None: + array_type = single_array + elif len(splits): + array_type = multi_array + elif array_type is None: + array_type = single_array + + converted = defaultdict(list) + for geom, arrays, holes in zip(data, geom_arrays, hole_arrays): + parts = [] + for i, g in enumerate(arrays): + if i != (len(arrays)-1): + g = g[:-1] + if len(g) < (3 if poly else 2) and geom_type is not Point: + continue + if poly: + parts.append([]) + subparts = parts[-1] + else: + subparts = parts + subparts.append(g[:, :2]) + if poly and holes is not None: + subparts += [np.array(h) for h in holes[i]] + + for c, v in geom.items(): + converted[c].append(v) + + if array_type is PointArray: + parts = parts[0].flatten() + elif array_type is MultiPointArray: + parts = np.concatenate([sp.flatten() for sp in parts]) + elif array_type is multi_array: + parts = [[ssp.flatten() for ssp in sp] if poly else sp.flatten() for sp in parts] + else: + parts = [np.asarray(sp).flatten() for sp in parts[0]] if poly else parts[0].flatten() + converted['geometry'].append(parts) + + if converted: + geometries = converted['geometry'] + if array_type is PointArray: + geometries = np.concatenate(geometries) + geom_array = array_type(geometries) + if poly: + geom_array = geom_array.oriented() + converted['geometry'] = GeoSeries(geom_array) + else: + converted['geometry'] = GeoSeries(single_array([])) + return GeoDataFrame(converted, columns=['geometry']+columns) + + +def to_geom_dict(eltype, data, kdims, vdims, interface=None): + """Converts data from any list format to a dictionary based format. + + Args: + eltype: Element type to convert + data: The original data + kdims: The declared key dimensions + vdims: The declared value dimensions + + Returns: + A list of dictionaries containing geometry coordinates and values. + """ + from . import Dataset + + xname, yname = (kd.name for kd in kdims[:2]) + if isinstance(data, dict): + data = {k: v if isscalar(v) else np.asarray(v) for k, v in data.items()} + return data + new_el = Dataset(data, kdims, vdims) + if new_el.interface is interface: + return new_el.data + new_dict = {} + for d in new_el.dimensions(): + if d in (xname, yname): + scalar = False + else: + scalar = new_el.interface.isscalar(new_el, d) + vals = new_el.dimension_values(d, not scalar) + new_dict[d.name] = vals[0] if scalar else vals + return new_dict + + +def from_multi(eltype, data, kdims, vdims): + """Converts list formats into spatialpandas.GeoDataFrame. + + Args: + eltype: Element type to convert + data: The original data + kdims: The declared key dimensions + vdims: The declared value dimensions + + Returns: + A GeoDataFrame containing in the list based format. + """ + from spatialpandas import GeoDataFrame + + xname, yname = (kd.name for kd in kdims[:2]) + + new_data, types, geom_types = [], [], [] + for d in data: + types.append(type(d)) + new_dict = to_geom_dict(eltype, d, kdims, vdims, SpatialPandasInterface) + if 'geom_type' in new_dict and new_dict['geom_type'] not in geom_types: + geom_types.append(new_dict['geom_type']) + new_data.append(new_dict) + if not isinstance(new_data[-1], dict): + types[-1] = type(new_data[-1]) + if len(set(types)) > 1: + raise DataError('Mixed types not supported') + if new_data and types[0] is GeoDataFrame: + data = pd.concat(new_data) + else: + columns = [d.name for d in kdims+vdims if d not in (xname, yname)] + if len(geom_types) == 1: + geom = geom_types[0] + else: + geom = SpatialPandasInterface.geom_type(eltype) + data = to_spatialpandas(new_data, xname, yname, columns, geom) + return data + + +def from_shapely(data): + """Converts shapely based data formats to spatialpandas.GeoDataFrame. + + Args: + data: A list of shapely objects or dictionaries containing + shapely objects + + Returns: + A GeoDataFrame containing the shapely geometry data. + """ + + from spatialpandas import GeoDataFrame, GeoSeries + from shapely.geometry.base import BaseGeometry + + if not data: + pass + elif all(isinstance(d, BaseGeometry) for d in data): + data = GeoSeries(data).to_frame() + elif all(isinstance(d, dict) and 'geometry' in d and isinstance(d['geometry'], BaseGeometry) + for d in data): + new_data = {col: [] for col in data[0]} + for d in data: + for col, val in d.items(): + new_data[col].append(val if isscalar(val) or isinstance(val, BaseGeometry) else np.asarray(val)) + new_data['geometry'] = GeoSeries(new_data['geometry']) + data = GeoDataFrame(new_data) + return data + + +Interface.register(SpatialPandasInterface) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index 07dd26ff10..24ac78c9e6 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -518,9 +518,16 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): @classmethod def compare_dataset(cls, el1, el2, msg='Dataset'): cls.compare_dimensioned(el1, el2) + tabular = not (el1.interface.gridded and el2.interface.gridded) + dimension_data = [(d, el1.dimension_values(d, expanded=tabular), + el2.dimension_values(d, expanded=tabular)) + for d in el1.kdims] + dimension_data += [(d, el1.dimension_values(d, flat=tabular), + el2.dimension_values(d, flat=tabular)) + for d in el1.vdims] if el1.shape[0] != el2.shape[0]: - raise AssertionError("%s not of matching length." % msg) - dimension_data = [(d, el1[d], el2[d]) for d in el1.dimensions()] + raise AssertionError("%s not of matching length, %d vs. %d." + % (msg, el1.shape[0], el2.shape[0])) for dim, d1, d2 in dimension_data: if d1.dtype != d2.dtype: cls.failureException("%s %s columns have different type." % (msg, dim.pprint_label) diff --git a/holoviews/element/graphs.py b/holoviews/element/graphs.py index 0424a63031..20e60d7345 100644 --- a/holoviews/element/graphs.py +++ b/holoviews/element/graphs.py @@ -577,8 +577,8 @@ def edgepaths(self): pad_width=((0, 0), (0, 1), (0, 0)), mode='constant', constant_values=np.nan).reshape(-1, 2)[:-1] - edgepaths = self.edge_type([paths], - kdims=self.nodes.kdims[:2]) + edgepaths = self.edge_type([paths], kdims=self.nodes.kdims[:2], + datatype=['multitabular']) self._edgepaths = edgepaths return edgepaths diff --git a/holoviews/element/path.py b/holoviews/element/path.py index e8f6b7bbfe..bbf260d82c 100644 --- a/holoviews/element/path.py +++ b/holoviews/element/path.py @@ -8,10 +8,10 @@ import numpy as np import param -from ..core import Element2D, Dataset +from ..core import Dataset from ..core.data import MultiInterface from ..core.dimension import Dimension, asdim -from ..core.util import OrderedDict, disable_constant, isscalar +from ..core.util import OrderedDict, disable_constant from .geom import Geometry @@ -55,8 +55,7 @@ class Path(Geometry): group = param.String(default="Path", constant=True) - datatype = param.ObjectSelector(default=[ - 'multitabular', 'dataframe', 'dictionary', 'dask', 'array']) + datatype = param.ObjectSelector(default=['multitabular', 'spatialpandas']) def __init__(self, data, kdims=None, vdims=None, **params): if isinstance(data, tuple) and len(data) == 2: @@ -80,20 +79,7 @@ def __init__(self, data, kdims=None, vdims=None, **params): paths.append(path.data) data = paths - datatype = params.pop('datatype', self.datatype) - - # Ensure that a list of tuples of scalars and any other non-list - # type is interpreted as a single path - if (not isinstance(data, (list, Dataset)) or - (isinstance(data, list) and not len(data) == 0 and all( - isinstance(d, tuple) and all(isscalar(v) for v in d) - for d in data))): - datatype = [dt for dt in datatype if dt != 'multitabular'] - elif isinstance(data, list) and 'multitabular' not in datatype: - datatype = datatype + ['multitabular'] - - super(Path, self).__init__(data, kdims=kdims, vdims=vdims, - datatype=datatype, **params) + super(Path, self).__init__(data, kdims=kdims, vdims=vdims, **params) def __getitem__(self, key): @@ -110,12 +96,61 @@ def __getitem__(self, key): return self.clone(extents=(xstart, ystart, xstop, ystop)) - def select(self, *args, **kwargs): - """ - Bypasses selection on data and sets extents based on selection. - """ - return super(Element2D, self).select(*args, **kwargs) + def select(self, selection_expr=None, selection_specs=None, **selection): + """Applies selection by dimension name + + Applies a selection along the dimensions of the object using + keyword arguments. The selection may be narrowed to certain + objects using selection_specs. For container objects the + selection will be applied to all children as well. + + Selections may select a specific value, slice or set of values: + + * value: Scalar values will select rows along with an exact + match, e.g.: + + ds.select(x=3) + + * slice: Slices may be declared as tuples of the upper and + lower bound, e.g.: + ds.select(x=(0, 3)) + + * values: A list of values may be selected using a list or + set, e.g.: + + ds.select(x=[0, 1, 2]) + + * predicate expression: A holoviews.dim expression, e.g.: + + from holoviews import dim + ds.select(selection_expr=dim('x') % 2 == 0) + + Args: + selection_expr: holoviews.dim predicate expression + specifying selection. + selection_specs: List of specs to match on + A list of types, functions, or type[.group][.label] + strings specifying which objects to apply the + selection on. + **selection: Dictionary declaring selections by dimension + Selections can be scalar values, tuple ranges, lists + of discrete values and boolean arrays + + Returns: + Returns an Dimensioned object containing the selected data + or a scalar if a single value was selected + """ + xdim, ydim = self.kdims[:2] + x_range = selection.pop(xdim.name, None) + y_range = selection.pop(ydim.name, None) + sel = super(Path, self).select(selection_expr, selection_specs, + **selection) + if x_range is None and y_range is None: + return sel + x_range = x_range if isinstance(x_range, slice) else slice(None) + y_range = y_range if isinstance(y_range, slice) else slice(None) + return sel[x_range, y_range] def split(self, start=None, end=None, datatype=None, **kwargs): """ @@ -130,10 +165,10 @@ def split(self, start=None, end=None, datatype=None, **kwargs): obj = self.array(**kwargs) elif datatype == 'dataframe': obj = self.dframe(**kwargs) - elif datatype == 'columns': + elif datatype in ('columns', 'dictionary'): obj = self.columns(**kwargs) elif datatype is None: - obj = self + obj = self.clone([self.data]) else: raise ValueError("%s datatype not support" % datatype) return [obj] @@ -225,10 +260,6 @@ def __init__(self, data, kdims=None, vdims=None, **params): if params.get('level') is not None: with disable_constant(self): self.vdims = [asdim(d) for d in vdims] - else: - all_scalar = all(self.interface.isscalar(self, vdim) for vdim in self.vdims) - if not all_scalar and not (not self.interface.multi and len(self) == 0): - raise ValueError("All value dimensions on a Contours element must be scalar") def dimension_values(self, dim, expanded=True, flat=True): dimension = self.get_dimension(dim, strict=True) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index aaf1883a3f..724cc171cb 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -28,7 +28,7 @@ datetime_types, dt_to_int, get_param_values, max_range) from ..element import (Image, Path, Curve, RGB, Graph, TriMesh, QuadMesh, Contours, Spikes, Area, Spread, - Segments, Scatter, Points) + Segments, Scatter, Points, Polygons) from ..streams import RangeXY, PlotSize ds_version = LooseVersion(ds.__version__) @@ -1255,6 +1255,61 @@ def _process(self, element, key=None): +class geometry_rasterize(AggregationOperation): + """ + Rasterizes geometries by converting them to spatialpandas. + """ + + aggregator = param.ClassSelector(default=ds.mean(), + class_=(ds.reductions.Reduction, basestring)) + + def _get_aggregator(self, element, add_field=True): + agg = self.p.aggregator + if not element.vdims and agg.column is None and not isinstance(agg, (rd.count, rd.any)): + return ds.count() + return super(geometry_rasterize, self)._get_aggregator(element, add_field) + + def _process(self, element, key=None): + agg_fn = self._get_aggregator(element) + xdim, ydim = element.kdims + info = self._get_sampling(element, xdim, ydim) + (x_range, y_range), (xs, ys), (width, height), (xtype, ytype) = info + x0, x1 = x_range + y0, y1 = y_range + + params = self._get_agg_params(element, xdim, ydim, agg_fn, (x0, y0, x1, y1)) + + if width == 0 or height == 0: + return self._empty_agg(element, xdim, ydim, width, height, xs, ys, agg_fn, **params) + + cvs = ds.Canvas(plot_width=width, plot_height=height, + x_range=x_range, y_range=y_range) + + if element.interface.datatype != 'spatialpandas': + element = element.clone(datatype=['spatialpandas']) + data = element.data + if isinstance(agg_fn, ds.count_cat): + data[agg_fn.column] = data[agg_fn.column].astype('category') + col = element.interface.geo_column(element.data) + + if isinstance(element, Polygons): + agg = cvs.polygons(data, geometry=col, agg=agg_fn) + elif isinstance(element, Path): + agg = cvs.line(data, geometry=col, agg=agg_fn) + elif isinstance(element, Points): + agg = cvs.points(data, geometry=col, agg=agg_fn) + + if agg.ndim == 2: + return self.p.element_type(agg, **params) + else: + layers = {} + for c in agg.coords[agg_fn.column].data: + cagg = agg.sel(**{agg_fn.column: c}) + layers[c] = self.p.element_type(cagg, **params) + return NdOverlay(layers, kdims=[element.get_dimension(agg_fn.column)]) + + + class rasterize(AggregationOperation): """ Rasterize is a high-level operation that will rasterize any @@ -1288,6 +1343,10 @@ class rasterize(AggregationOperation): of each other.""") _transforms = [(Image, regrid), + (Polygons, geometry_rasterize), + (lambda x: (isinstance(x, Path) and + x.interface.datatype == 'spatialpandas'), + geometry_rasterize), (TriMesh, trimesh_rasterize), (QuadMesh, quadmesh_rasterize), (lambda x: (isinstance(x, NdOverlay) and diff --git a/holoviews/plotting/bokeh/callbacks.py b/holoviews/plotting/bokeh/callbacks.py index 1f7580c970..477b1f8ec3 100644 --- a/holoviews/plotting/bokeh/callbacks.py +++ b/holoviews/plotting/bokeh/callbacks.py @@ -1061,8 +1061,10 @@ def _update_cds_vdims(self): # ensuring the element can be reconstituted in entirety element = self.plot.current_frame cds = self.plot.handles['cds'] + interface = element.interface + scalar_kwargs = {'per_geom': True} if interface.multi else {} for d in element.vdims: - scalar = element.interface.isscalar(element, d) + scalar = element.interface.isunique(element, d, **scalar_kwargs) dim = dimension_sanitizer(d.name) if dim not in cds.data: if scalar: diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index 3a77e4ce7d..630e083193 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -983,7 +983,7 @@ def _apply_transforms(self, element, data, ranges, style, group=None): list(self.overlay_dims)) val = v.apply(ds, ranges=ranges, flat=True)[0] elif isinstance(element, Path) and not isinstance(element, Contours): - val = np.concatenate([v.apply(el, ranges=ranges, flat=True)[:-1] + val = np.concatenate([v.apply(el, ranges=ranges, flat=True) for el in element.split()]) else: val = v.apply(element, ranges=ranges, flat=True) @@ -1006,6 +1006,8 @@ def _apply_transforms(self, element, data, ranges, style, group=None): elif data and len(val) != len(list(data.values())[0]): if isinstance(element, VectorField): val = np.tile(val, 3) + elif isinstance(element, Path) and not isinstance(element, Contours): + val = val[:-1] else: continue @@ -1089,6 +1091,7 @@ def _apply_transforms(self, element, data, ranges, style, group=None): if ((line_style is not None and (validate(s, line_style) and not hover)) or (line_style is None and not supports_fill)): new_style[line_key] = val + return new_style diff --git a/holoviews/plotting/bokeh/path.py b/holoviews/plotting/bokeh/path.py index c7b4e4188f..72c1f66688 100644 --- a/holoviews/plotting/bokeh/path.py +++ b/holoviews/plotting/bokeh/path.py @@ -58,15 +58,19 @@ def _get_hover_data(self, data, element): def get_data(self, element, ranges, style): color = style.get('color', None) cdim = None - if isinstance(color, util.basestring) and validate('color', color) == False: + if isinstance(color, util.basestring) and not validate('color', color): cdim = element.get_dimension(color) elif self.color_index is not None: cdim = element.get_dimension(self.color_index) - style_mapping = any( - s for s, v in style.items() if (s not in self._nonvectorized_styles) and - (isinstance(v, util.basestring) and v in element) or isinstance(v, dim)) + + scalar = element.interface.isunique(element, cdim, per_geom=True) if cdim else False + style_mapping = { + (s, v) for s, v in style.items() if (s not in self._nonvectorized_styles) and + ((isinstance(v, util.basestring) and v in element) or isinstance(v, dim)) and + not (v == color and s == 'color')} mapping = dict(self._mapping) - if not cdim and not style_mapping and 'hover' not in self.handles: + + if (not cdim or scalar) and not style_mapping and 'hover' not in self.handles: if self.static_source: data = {} else: @@ -81,7 +85,7 @@ def get_data(self, element, ranges, style): vals = defaultdict(list) if hover: vals.update({util.dimension_sanitizer(vd.name): [] for vd in element.vdims}) - if cdim: + if cdim and self.color_index is not None: dim_name = util.dimension_sanitizer(cdim.name) cmapper = self._get_colormapper(cdim, element, ranges, style) mapping['line_color'] = {'field': dim_name, 'transform': cmapper} @@ -89,8 +93,9 @@ def get_data(self, element, ranges, style): xpaths, ypaths = [], [] for path in element.split(): - if cdim: - cvals = path.dimension_values(cdim) + if cdim and self.color_index is not None: + scalar = path.interface.isunique(path, cdim, per_geom=True) + cvals = path.dimension_values(cdim, not scalar) vals[dim_name].append(cvals[:-1]) cols = path.columns(path.kdims) xs, ys = (cols[kd.name] for kd in element.kdims) @@ -105,7 +110,7 @@ def get_data(self, element, ranges, style): values = path.dimension_values(vd)[:-1] vd_name = util.dimension_sanitizer(vd.name) vals[vd_name].append(values) - if values.dtype.kind == 'M': + if values.dtype.kind == 'M' or (len(values) and isinstance(values[0], util.datetime_types)): vals[vd_name+'_dt_strings'].append([vd.pprint_value(v) for v in values]) values = {d: np.concatenate(vs) if len(vs) else [] for d, vs in vals.items()} if self.invert_axes: @@ -175,18 +180,22 @@ def _get_hover_data(self, data, element): if 'hover' not in self.handles or self.static_source: return + interface = element.interface + scalar_kwargs = {'per_geom': True} if interface.multi else {} npath = len([vs for vs in data.values()][0]) for d in element.vdims: dim = util.dimension_sanitizer(d.name) if dim not in data: if element.level is not None: data[dim] = np.full(npath, element.level) - elif element.interface.isscalar(element, d): + elif interface.isunique(element, d, **scalar_kwargs): data[dim] = element.dimension_values(d, expanded=False) else: data[dim] = element.split(datatype='array', dimensions=[d]) - elif isinstance(data[dim], np.ndarray) and data[dim].dtype.kind == 'M': - data[dim+'_dt_strings'] = [d.pprint_value(v) for v in data[dim]] + values = data[dim] + if ((isinstance(values, np.ndarray) and values.dtype.kind == 'M') or + (len(values) and isinstance(values[0], util.datetime_types))): + data[dim+'_dt_strings'] = [d.pprint_value(v) for v in values] for k, v in self.overlay_dims.items(): dim = util.dimension_sanitizer(k.name) @@ -202,15 +211,17 @@ def get_data(self, element, ranges, style): else: has_holes = self._has_holes + if not element.interface.multi: + element = element.clone([element.data], datatype=type(element).datatype) + if self.static_source: data = dict() xs = self.handles['cds'].data['xs'] else: - if has_holes and bokeh_version >= '1.0': + if has_holes: xs, ys = multi_polygons_data(element) else: - paths = element.split(datatype='array', dimensions=element.kdims) - xs, ys = ([path[:, idx] for path in paths] for idx in (0, 1)) + xs, ys = (element.dimension_values(kd, expanded=False) for kd in element.kdims) if self.invert_axes: xs, ys = ys, xs data = dict(xs=xs, ys=ys) @@ -240,7 +251,7 @@ def get_data(self, element, ranges, style): if cdim.name in ranges and 'factors' in ranges[cdim.name]: factors = ranges[cdim.name]['factors'] else: - factors = util.unique_array(values) if values.dtype.kind in 'SUO' else None + factors = util.unique_array(np.concatenate(values)) if values.dtype.kind in 'SUO' else None cmapper = self._get_colormapper(cdim, element, ranges, style, factors) mapping[self._color_style] = {'field': dim_name, 'transform': cmapper} if self.show_legend: diff --git a/holoviews/plotting/bokeh/util.py b/holoviews/plotting/bokeh/util.py index cf98ad911d..d40e895112 100644 --- a/holoviews/plotting/bokeh/util.py +++ b/holoviews/plotting/bokeh/util.py @@ -863,8 +863,7 @@ def multi_polygons_data(element): representation. Multi-polygons split by nans are expanded and the correct list of holes is assigned to each sub-polygon. """ - paths = element.split(datatype='array', dimensions=element.kdims) - xs, ys = ([path[:, idx] for path in paths] for idx in (0, 1)) + xs, ys = (element.dimension_values(kd, expanded=False) for kd in element.kdims) holes = element.holes() xsh, ysh = [], [] for x, y, multi_hole in zip(xs, ys, holes): diff --git a/holoviews/plotting/mpl/element.py b/holoviews/plotting/mpl/element.py index c7c1754d5e..bfa6f48656 100644 --- a/holoviews/plotting/mpl/element.py +++ b/holoviews/plotting/mpl/element.py @@ -572,7 +572,7 @@ def _apply_transforms(self, element, ranges, style): list(self.overlay_dims)) val = v.apply(ds, ranges=ranges, flat=True)[0] elif type(element) is Path: - val = np.concatenate([v.apply(el, ranges=ranges, flat=True)[:-1] + val = np.concatenate([v.apply(el, ranges=ranges, flat=True) for el in element.split()]) else: val = v.apply(element, ranges) @@ -830,7 +830,7 @@ def _norm_kwargs(self, element, ranges, opts, vdim, values=None, prefix=''): isinstance(element, Dataset) and element.interface.multi and (getattr(element, 'level', None) is not None or - element.interface.isscalar(element, vdim.name)) + element.interface.isunique(element, vdim.name, True)) ) values = np.asarray(element.dimension_values(vdim, expanded=expanded)) diff --git a/holoviews/plotting/mpl/path.py b/holoviews/plotting/mpl/path.py index 88184d4a01..ba75335dcd 100644 --- a/holoviews/plotting/mpl/path.py +++ b/holoviews/plotting/mpl/path.py @@ -30,11 +30,14 @@ class PathPlot(ColorbarPlot): style_opts = ['alpha', 'color', 'linestyle', 'linewidth', 'visible', 'cmap'] def get_data(self, element, ranges, style): + cdim = element.get_dimension(self.color_index or style.get('color')) + with abbreviated_exception(): style = self._apply_transforms(element, ranges, style) - cdim = element.get_dimension(self.color_index) - style_mapping = any(True for v in style.values() if isinstance(v, util.arraylike_types)) + scalar = element.interface.isunique(element, cdim, per_geom=True) if cdim else False + style_mapping = any(isinstance(v, util.arraylike_types) and not (k == 'c' and scalar) + for k, v in style.items()) dims = element.kdims xdim, ydim = dims generic_dt_format = Dimension.type_formatters[np.datetime64] @@ -50,7 +53,7 @@ def get_data(self, element, ranges, style): yarr = date2num(yarr) dims[1] = ydim(value_format=DateFormatter(dt_format)) arr = np.column_stack([xarr, yarr]) - if not (cdim or style_mapping): + if not (self.color_index is not None or style_mapping): paths.append(arr) continue length = len(xarr) @@ -60,7 +63,10 @@ def get_data(self, element, ranges, style): paths.append(arr[s1:s2+1]) if self.invert_axes: paths = [p[::-1] for p in paths] - if not (cdim or style_mapping): + if not (self.color_index or style_mapping): + if cdim: + style['array'] = style.pop('c') + style['clim'] = style.pop('vmin', None), style.pop('vmax', None) return (paths,), style, {'dimensions': dims} if cdim: self._norm_kwargs(element, ranges, style, cdim) diff --git a/holoviews/plotting/plot.py b/holoviews/plotting/plot.py index e08fefba5a..6c0975afe6 100644 --- a/holoviews/plotting/plot.py +++ b/holoviews/plotting/plot.py @@ -704,6 +704,9 @@ def _compute_group_range(cls, group, elements, ranges): values = el.dimension_values(el_dim, expanded=False) elif isinstance(el, Graph) and el_dim in el.nodes: values = el.nodes.dimension_values(el_dim, expanded=False) + if (isinstance(values, np.ndarray) and values.dtype.kind == 'O' and + all(isinstance(v, (np.ndarray)) for v in values)): + values = np.concatenate(values) factors = util.unique_array(values) group_ranges[el_dim.name]['factors'].append(factors) diff --git a/holoviews/tests/core/data/testgridinterface.py b/holoviews/tests/core/data/testgridinterface.py index 2ed298ebda..7b5758e092 100644 --- a/holoviews/tests/core/data/testgridinterface.py +++ b/holoviews/tests/core/data/testgridinterface.py @@ -33,19 +33,13 @@ class GridInterfaceTests(GriddedInterfaceTests, HomogeneousColumnTests, Interfac @pd_skip def test_dataset_dataframe_init_hm(self): - "Tests support for homogeneous DataFrames" - exception = "None of the available storage backends "\ - "were able to support the supplied data format." - with self.assertRaisesRegexp(Exception, exception): + with self.assertRaises(Exception): Dataset(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}), kdims=['x'], vdims=['x2']) @pd_skip def test_dataset_dataframe_init_hm_alias(self): - "Tests support for homogeneous DataFrames" - exception = "None of the available storage backends "\ - "were able to support the supplied data format." - with self.assertRaisesRegexp(Exception, exception): + with self.assertRaises(Exception): Dataset(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}), kdims=['x'], vdims=['x2']) diff --git a/holoviews/tests/core/data/testmultiinterface.py b/holoviews/tests/core/data/testmultiinterface.py index 7516b545c3..d7f231b162 100644 --- a/holoviews/tests/core/data/testmultiinterface.py +++ b/holoviews/tests/core/data/testmultiinterface.py @@ -4,11 +4,14 @@ from unittest import SkipTest +import logging + import numpy as np -from holoviews.core.data import Dataset -from holoviews.core.data.interface import DataError -from holoviews.element import Path + +from holoviews.core.data import Dataset, MultiInterface +from holoviews.element import Path, Points, Polygons from holoviews.element.comparison import ComparisonTestCase +from param import get_logger try: import pandas as pd @@ -20,166 +23,504 @@ except: dd = None -class MultiInterfaceTest(ComparisonTestCase): + +class GeomTests(ComparisonTestCase): """ Test of the MultiInterface. """ - def test_multi_array_dataset(self): - arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(arrays[i], kdims=['x', 'y'], datatype=['array'])) + datatype = None - def test_multi_dict_dataset(self): - arrays = [{'x': np.arange(i, i+2), 'y': np.arange(i, i+2)} for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(arrays[i], kdims=['x', 'y'], datatype=['dictionary'])) + interface = None - def test_multi_df_dataset(self): + __test__ = False + + def test_array_dataset(self): + arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, array in enumerate(mds.split(datatype='array')): + self.assertEqual(array, arrays[i]) + + def test_dict_dataset(self): + dicts = [{'x': np.arange(i, i+2), 'y': np.arange(i, i+2)} for i in range(2)] + mds = Path(dicts, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, cols in enumerate(mds.split(datatype='columns')): + self.assertEqual(dict(cols), dict(dicts[i], geom_type='Line')) + + def test_df_dataset(self): if not pd: raise SkipTest('Pandas not available') - arrays = [pd.DataFrame(np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]), columns=['x', 'y']) + dfs = [pd.DataFrame(np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]), columns=['x', 'y']) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(arrays[i], kdims=['x', 'y'], datatype=['dataframe'])) - - def test_multi_dask_df_dataset(self): - if not dd: - raise SkipTest('Dask not available') - arrays = [dd.from_pandas(pd.DataFrame(np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]), - columns=['x', 'y']), npartitions=2) - for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(arrays[i], kdims=['x', 'y'], datatype=['dask'])) + mds = Path(dfs, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, ds in enumerate(mds.split(datatype='dataframe')): + self.assertEqual(ds, dfs[i]) - def test_multi_array_dataset_add_dimension_scalar(self): + def test_array_dataset_add_dimension_scalar(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']).add_dimension('A', 0, 'Scalar', True) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path({('x', 'y'): arrays[i], 'A': 'Scalar'}, ['x', 'y'], - 'A', datatype=['dictionary'])) - - def test_multi_dict_dataset_add_dimension_scalar(self): + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]).add_dimension('A', 0, 'Scalar', True) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds, Path([{('x', 'y'): arrays[i], 'A': 'Scalar'} for i in range(2)], + ['x', 'y'], 'A')) + + def test_dict_dataset_add_dimension_scalar(self): arrays = [{'x': np.arange(i, i+2), 'y': np.arange(i, i+2)} for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']).add_dimension('A', 0, 'Scalar', True) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(dict(arrays[i], A='Scalar'), ['x', 'y'], - 'A', datatype=['dictionary'])) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]).add_dimension('A', 0, 'Scalar', True) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds, Path([dict(arrays[i], A='Scalar') for i in range(2)], ['x', 'y'], + 'A', datatype=['multitabular'])) - def test_multi_dict_dataset_add_dimension_values(self): + def test_dict_dataset_add_dimension_values(self): arrays = [{'x': np.arange(i, i+2), 'y': np.arange(i, i+2)} for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']).add_dimension('A', 0, [0,1], True) - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(dict(arrays[i], A=i), ['x', 'y'], - 'A', datatype=['dictionary'])) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]).add_dimension('A', 0, [0,1], True) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds, Path([dict(arrays[i], A=i) for i in range(2)], ['x', 'y'], + 'A', datatype=['multitabular'])) - def test_multi_array_length(self): + def test_array_length(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - self.assertEqual(len(mds), 5) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(len(mds), 2) - def test_multi_empty_length(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) + def test_array_length_points(self): + arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(len(mds), 4) + + def test_empty_length(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(len(mds), 0) - def test_multi_array_range(self): + def test_empty_range(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + x0, x1 = mds.range(0) + self.assertFalse(np.isfinite(x0)) + self.assertFalse(np.isfinite(x0)) + y0, y1 = mds.range(1) + self.assertFalse(np.isfinite(y0)) + self.assertFalse(np.isfinite(y1)) + + def test_array_range(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.range(0), (0, 2)) - def test_multi_empty_range(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) - low, high = mds.range(0) - self.assertFalse(np.isfinite(np.NaN)) - self.assertFalse(np.isfinite(np.NaN)) + def test_array_shape(self): + arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.shape, (2, 2)) - def test_multi_array_shape(self): + def test_array_shape_points(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - self.assertEqual(mds.shape, (5, 2)) + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.shape, (4, 2)) - def test_multi_empty_shape(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) + def test_empty_shape(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.shape, (0, 2)) - def test_multi_array_values(self): + def test_array_values(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.dimension_values(0), np.array([0., 1, np.NaN, 1, 2])) - def test_multi_empty_array_values(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) + def test_empty_array_values(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.dimension_values(0), np.array([])) - def test_multi_array_values_coordinates_nonexpanded(self): + def test_array_values_coordinates_nonexpanded(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) - self.assertEqual(mds.dimension_values(0, expanded=False), np.array([0., 1, 1, 2])) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + values = mds.dimension_values(0, expanded=False) + self.assertEqual(values[0], np.array([0., 1])) + self.assertEqual(values[1], np.array([1, 2])) - def test_multi_array_values_coordinates_nonexpanded_constant_kdim(self): + def test_array_values_coordinates_nonexpanded_constant_kdim(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2), np.ones(2)*i]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], vdims=['z'], datatype=['multitabular']) + mds = Path(arrays, kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.dimension_values(2, expanded=False), np.array([0, 1])) - def test_multi_array_redim(self): + def test_scalar_value_isscalar_per_geom(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertTrue(path.interface.isscalar(path, 'value', per_geom=True)) + + def test_unique_values_isscalar_per_geom(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.full(5, 0)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertTrue(path.interface.isscalar(path, 'value', per_geom=True)) + + def test_scalar_and_unique_values_isscalar_per_geom(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertTrue(path.interface.isscalar(path, 'value', per_geom=True)) + + def test_varying_values_not_isscalar_per_geom(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.arange(5)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertFalse(path.interface.isscalar(path, 'value', per_geom=True)) + + def test_varying_values_and_scalar_not_isscalar_per_geom(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.arange(5)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertFalse(path.interface.isscalar(path, 'value', per_geom=True)) + + def test_scalar_value_dimension_values_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertEqual(path.dimension_values('value'), np.array([0, 0, 0, 0, 0, np.nan, 1, 1, 1, 1, 1])) + + def test_scalar_value_dimension_values_not_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 0}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 1}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertEqual(path.dimension_values('value', expanded=False), + np.array([0, 1])) + + def test_unique_value_dimension_values_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.full(5, 0)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertEqual(path.dimension_values('value'), np.array([0, 0, 0, 0, 0, np.nan, 1, 1, 1, 1, 1])) + + def test_unique_value_dimension_values_not_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.full(5, 0)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertEqual(path.dimension_values('value', expanded=False), + np.array([0, 1])) + + def test_varying_value_dimension_values_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.arange(5)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + self.assertEqual(path.dimension_values('value'), np.array([0, 1, 2, 3, 4, np.nan, 1, 1, 1, 1, 1])) + + def test_varying_value_dimension_values_not_expanded(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': np.arange(5)}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': np.full(5, 1)}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + values = path.dimension_values('value', expanded=False) + self.assertEqual(values[0], np.array([0, 1, 2, 3, 4])) + self.assertEqual(values[1], 1) + self.assertIsInstance(values[1], np.int_) + + def test_array_redim(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']).redim(x='x2') - for i, ds in enumerate(mds.split()): - self.assertEqual(ds, Path(arrays[i], kdims=['x2', 'y'])) - - def test_multi_mixed_interface_raises(self): - arrays = [np.random.rand(10, 2) if j else {'x': range(10), 'y': range(10)} - for i in range(2) for j in range(2)] - with self.assertRaises(DataError): - Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]).redim(x='x2') + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds, Path([arrays[i] for i in range(2)], ['x2', 'y'])) - def test_multi_mixed_dims_raises(self): + def test_mixed_dims_raises(self): arrays = [{'x': range(10), 'y' if j else 'z': range(10)} for i in range(2) for j in range(2)] - with self.assertRaises(DataError): - Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) + with self.assertRaises(ValueError): + Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) - def test_multi_split(self): + def test_split_into_arrays(self): arrays = [np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]) for i in range(2)] - mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular']) + mds = Path(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) for arr1, arr2 in zip(mds.split(datatype='array'), arrays): self.assertEqual(arr1, arr2) - def test_multi_split_empty(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) + def test_split_empty(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(len(mds.split()), 0) - def test_multi_values_empty(self): - mds = Path([], kdims=['x', 'y'], datatype=['multitabular']) + def test_values_empty(self): + mds = Path([], kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) self.assertEqual(mds.dimension_values(0), np.array([])) - def test_multi_dict_groupby(self): + def test_dict_groupby_non_scalar(self): arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)] - mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, (k, ds) in enumerate(mds.groupby('y').items()): - self.assertEqual(k, arrays[i]['y']) - self.assertEqual(ds, Dataset([arrays[i]], kdims=['x'])) + mds = Dataset(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + with self.assertRaises(ValueError): + mds.groupby('x') - def test_multi_dict_groupby_non_scalar(self): - arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)] - mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) + def test_array_groupby_non_scalar(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Dataset(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) with self.assertRaises(ValueError): mds.groupby('x') - def test_multi_array_groupby(self): + def test_array_points_iloc_index_row(self): arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] - mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) - for i, (k, ds) in enumerate(mds.groupby('y').items()): - self.assertEqual(k, arrays[i][0, 1]) - self.assertEqual(ds, Dataset([arrays[i]], kdims=['x'])) + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[1], Points([(2, 0)], ['x', 'y'])) - def test_multi_array_groupby_non_scalar(self): + def test_array_points_iloc_slice_rows(self): arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] - mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) - with self.assertRaises(ValueError): - mds.groupby('x') + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[2:4], Points([(3, 0), (2, 1)], ['x', 'y'])) + + def test_array_points_iloc_slice_rows_no_start(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[:4], Points([(1, 0), (2, 0), (3, 0), (2, 1)], ['x', 'y'])) + + def test_array_points_iloc_slice_rows_no_stop(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[2:], Points([(3, 0), (2, 1), (3, 1), (4, 1)], ['x', 'y'])) + + def test_array_points_iloc_index_rows(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[[1, 3, 4]], Points([(2, 0), (2, 1), (3, 1)], ['x', 'y'])) + + def test_array_points_iloc_index_rows_index_cols(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Points(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + self.assertEqual(mds.iloc[3, 0], 2) + self.assertEqual(mds.iloc[3, 1], 1) + + def test_multi_polygon_iloc_index_row(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.iloc[0], expected) + + def test_multi_polygon_iloc_index_rows(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs, 'y': ys, 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs, 'y': ys, 'holes': holes, 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.iloc[[0, 2]], expected) + + def test_multi_polygon_iloc_slice_rows(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs, 'y': ys, 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs, 'y': ys, 'holes': holes, 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.iloc[1:3], expected) + + def test_polygon_expanded_values(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + poly = Polygons([{'x': xs, 'y': ys, 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertEqual(poly.dimension_values(0), np.array([1, 2, 3, 1])) + self.assertEqual(poly.dimension_values(1), np.array([2, 0, 7, 2])) + self.assertEqual(poly.dimension_values(2), np.array([1, 1, 1, 1])) + + def test_polygons_expanded_values(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + poly = Polygons([{'x': xs, 'y': ys, 'z': 1}, + {'x': xs, 'y': ys, 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertEqual(poly.dimension_values(0), np.array([1, 2, 3, 1, np.nan, 1, 2, 3, 1])) + self.assertEqual(poly.dimension_values(1), np.array([2, 0, 7, 2, np.nan, 2, 0, 7, 2])) + self.assertEqual(poly.dimension_values(2), np.array([1, 1, 1, 1, np.nan, 2, 2, 2, 2])) + + def test_multi_polygon_expanded_values(self): + xs = [1, 2, 3, np.nan, 1, 2, 3] + ys = [2, 0, 7, np.nan, 2, 0, 7] + poly = Polygons([{'x': xs, 'y': ys, 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertEqual(poly.dimension_values(0), np.array([1, 2, 3, 1, np.nan, 1, 2, 3, 1])) + self.assertEqual(poly.dimension_values(1), np.array([2, 0, 7, 2, np.nan, 2, 0, 7, 2])) + self.assertEqual(poly.dimension_values(2), np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])) + + def test_polygon_get_holes(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5), (2.1, 4.5)]] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + holes = [ + [[np.array([(1.5, 2), (2, 3), (1.6, 1.6), (1.5, 2)]), np.array(holes[0][1])]], + [[]] + ] + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.holes(), holes) + + def test_multi_polygon_get_holes(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6), (1.5, 2)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + holes = [ + [[np.array(holes[0][0]), np.array([(2.1, 4.5), (2.5, 5), (2.3, 3.5), (2.1, 4.5)])], []], + [[], []] + ] + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.holes(), holes) + + def test_polygon_dtype(self): + poly = Polygons([{'x': [1, 2, 3], 'y': [2, 0, 7]}], datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.interface.dtype(poly, 'x'), + 'int64') + + def test_select_from_multi_polygons_with_scalar(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.select(z=2), expected) + + def test_select_from_multi_polygons_with_slice(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs[:3], 'y': ys[:3], 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs[:3], 'y': ys[:3], 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.select(z=(2, 4)), expected) + + def test_select_from_multi_polygons_with_list(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}, + {'x': xs[:3], 'y': ys[:3], 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + expected = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[:3], 'y': ys[:3], 'z': 3}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.select(z=[1, 3]), expected) + + def test_sort_by_value(self): + path = Path([{'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 1}, + {'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 0}], + vdims='value', datatype=[self.datatype]) + self.assertIs(path.interface, self.interface) + sorted = Path([{'x': [5, 4, 3, 2, 1], 'y': [2, 2, 1, 1, 0], 'value': 0}, + {'x': [1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2], 'value': 1}], vdims='value') + self.assertEqual(path.sort('value'), sorted) + + +class MultiBaseInterfaceTest(GeomTests): + + datatype = 'multitabular' + interface = MultiInterface + subtype = None + + __test__ = False + + def setUp(self): + logger = get_logger() + self._log_level = logger.level + get_logger().setLevel(logging.ERROR) + self._subtypes = MultiInterface.subtypes + MultiInterface.subtypes = [self.subtype] + super(MultiBaseInterfaceTest, self).setUp() + + def tearDown(self): + MultiInterface.subtypes = self._subtypes + get_logger().setLevel(self._log_level) + super(MultiBaseInterfaceTest, self).tearDown() + + +class MultiDictInterfaceTest(MultiBaseInterfaceTest): + """ + Test of the MultiInterface. + """ + + datatype = 'multitabular' + interface = MultiInterface + subtype = 'dictionary' + __test__ = True diff --git a/holoviews/tests/core/data/testspatialpandas.py b/holoviews/tests/core/data/testspatialpandas.py new file mode 100644 index 0000000000..f251b63226 --- /dev/null +++ b/holoviews/tests/core/data/testspatialpandas.py @@ -0,0 +1,219 @@ +""" +Tests for the spatialpandas interface. +""" +from __future__ import absolute_import + +from unittest import SkipTest + +import numpy as np + +try: + import spatialpandas + from spatialpandas.geometry import ( + LineDtype, PointDtype, PolygonDtype, + MultiLineDtype, MultiPointDtype, MultiPolygonDtype + ) +except: + spatialpandas = None + +from holoviews.core.data import Dataset, SpatialPandasInterface +from holoviews.core.data.interface import DataError +from holoviews.element import Path, Points, Polygons +from holoviews.element.comparison import ComparisonTestCase + +from .testmultiinterface import GeomTests + + +class RoundTripTests(ComparisonTestCase): + + datatype = None + + interface = None + + __test__ = False + + def test_point_roundtrip(self): + points = Points([{'x': 0, 'y': 1, 'z': 0}, + {'x': 1, 'y': 0, 'z': 1}], ['x', 'y'], + 'z', datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, PointDtype) + roundtrip = points.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Points([{'x': 0, 'y': 1, 'z': 0}, + {'x': 1, 'y': 0, 'z': 1}], ['x', 'y'], + 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_point_roundtrip(self): + xs = [1, 2, 3, 2] + ys = [2, 0, 7, 4] + points = Points([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, MultiPointDtype) + roundtrip = points.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Points([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_line_roundtrip(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + path = Path([{'x': xs, 'y': ys, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, LineDtype) + roundtrip = path.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Path([{'x': xs, 'y': ys, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_line_roundtrip(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + path = Path([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, MultiLineDtype) + roundtrip = path.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Path([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_polygon_roundtrip(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + poly = Polygons([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(poly.data.geometry.dtype, PolygonDtype) + roundtrip = poly.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Polygons([{'x': xs+[1], 'y': ys+[2], 'z': 0}, + {'x': [3]+xs, 'y': [7]+ys, 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_polygon_roundtrip(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(poly.data.geometry.dtype, MultiPolygonDtype) + roundtrip = poly.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Polygons([{'x': [1, 2, 3, 1, np.nan, 6, 3, 7, 6], + 'y': [2, 0, 7, 2, np.nan, 7, 2, 5, 7], 'holes': holes, 'z': 1}, + {'x': [3, 7, 6, 3, np.nan, 3, 1, 2, 3], + 'y': [2, 5, 7, 2, np.nan, 7, 2, 0, 7], 'z': 2}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + + +class SpatialPandasTest(GeomTests, RoundTripTests): + """ + Test of the SpatialPandasInterface. + """ + + datatype = 'spatialpandas' + + interface = SpatialPandasInterface + + __test__ = True + + def setUp(self): + if spatialpandas is None: + raise SkipTest('SpatialPandasInterface requires spatialpandas, skipping tests') + super(GeomTests, self).setUp() + + def test_array_points_iloc_index_rows_index_cols(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Dataset(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + with self.assertRaises(DataError): + mds.iloc[3, 0] + + def test_point_constructor(self): + points = Points([{'x': 0, 'y': 1}, {'x': 1, 'y': 0}], ['x', 'y'], + datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, PointDtype) + self.assertEqual(points.data.iloc[0, 0].flat_values, np.array([0, 1])) + self.assertEqual(points.data.iloc[1, 0].flat_values, np.array([1, 0])) + + def test_multi_point_constructor(self): + xs = [1, 2, 3, 2] + ys = [2, 0, 7, 4] + points = Points([{'x': xs, 'y': ys}, {'x': xs[::-1], 'y': ys[::-1]}], ['x', 'y'], + datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, MultiPointDtype) + self.assertEqual(points.data.iloc[0, 0].buffer_values, + np.array([1, 2, 2, 0, 3, 7, 2, 4])) + self.assertEqual(points.data.iloc[1, 0].buffer_values, + np.array([2, 4, 3, 7, 2, 0, 1, 2])) + + def test_line_constructor(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + path = Path([{'x': xs, 'y': ys}, {'x': xs[::-1], 'y': ys[::-1]}], + ['x', 'y'], datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, LineDtype) + self.assertEqual(path.data.iloc[0, 0].buffer_values, + np.array([1, 2, 2, 0, 3, 7])) + self.assertEqual(path.data.iloc[1, 0].buffer_values, + np.array([3, 7, 2, 0, 1, 2])) + + def test_multi_line_constructor(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + path = Path([{'x': xs, 'y': ys}, {'x': xs[::-1], 'y': ys[::-1]}], + ['x', 'y'], datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, MultiLineDtype) + self.assertEqual(path.data.iloc[0, 0].buffer_values, + np.array([1, 2, 2, 0, 3, 7, 6, 7, 7, 5, 3, 2])) + self.assertEqual(path.data.iloc[1, 0].buffer_values, + np.array([3, 2, 7, 5, 6, 7, 3, 7, 2, 0, 1, 2])) + + def test_polygon_constructor(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]] + ] + path = Polygons([{'x': xs, 'y': ys, 'holes': holes}, {'x': xs[::-1], 'y': ys[::-1]}], + ['x', 'y'], datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, PolygonDtype) + self.assertEqual(path.data.iloc[0, 0].buffer_values, + np.array([1., 2., 2., 0., 3., 7., 1., 2., 1.5, 2., 2., 3., + 1.6, 1.6, 1.5, 2., 2.1, 4.5, 2.5, 5., 2.3, 3.5, 2.1, 4.5])) + self.assertEqual(path.data.iloc[1, 0].buffer_values, + np.array([3, 7, 1, 2, 2, 0, 3, 7])) + + def test_multi_polygon_constructor(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + path = Polygons([{'x': xs, 'y': ys, 'holes': holes}, + {'x': xs[::-1], 'y': ys[::-1]}], + ['x', 'y'], datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, MultiPolygonDtype) + self.assertEqual(path.data.iloc[0, 0].buffer_values, + np.array([1., 2., 2., 0., 3., 7., 1., 2., 1.5, 2., 2., 3., 1.6, 1.6, + 1.5, 2., 2.1, 4.5, 2.5, 5., 2.3, 3.5, 2.1, 4.5, 6., 7., 3., + 2., 7., 5., 6., 7. ])) + self.assertEqual(path.data.iloc[1, 0].buffer_values, + np.array([3, 2, 7, 5, 6, 7, 3, 2, 3, 7, 1, 2, 2, 0, 3, 7])) diff --git a/holoviews/tests/element/testcomparisonchart.py b/holoviews/tests/element/testcomparisonchart.py index 2513f91519..822ea065e2 100644 --- a/holoviews/tests/element/testcomparisonchart.py +++ b/holoviews/tests/element/testcomparisonchart.py @@ -22,7 +22,7 @@ def test_curves_unequal(self): try: self.assertEqual(self.curve1, self.curve2) except AssertionError as e: - if not str(e).startswith("Curve not of matching length."): + if not str(e).startswith("Curve not of matching length, 100 vs. 101"): raise self.failureException("Curve mismatch error not raised.") @@ -124,7 +124,7 @@ def test_scatter_unequal_data_shape(self): try: self.assertEqual(self.scatter1, self.scatter2) except AssertionError as e: - if not str(e).startswith("Scatter not of matching length."): + if not str(e).startswith("Scatter not of matching length, 20 vs. 21."): raise self.failureException("Scatter data mismatch error not raised.") def test_scatter_unequal_data_values(self): @@ -159,7 +159,7 @@ def test_points_unequal_data_shape(self): try: self.assertEqual(self.points1, self.points2) except AssertionError as e: - if not str(e).startswith("Points not of matching length."): + if not str(e).startswith("Points not of matching length, 20 vs. 21."): raise self.failureException("Points count mismatch error not raised.") def test_points_unequal_data_values(self): diff --git a/holoviews/tests/element/testpaths.py b/holoviews/tests/element/testpaths.py index 0c01108ae7..48340ccfd3 100644 --- a/holoviews/tests/element/testpaths.py +++ b/holoviews/tests/element/testpaths.py @@ -53,7 +53,6 @@ def test_multi_path_unpack_multi_paths(self): def test_single_path_list_constructor(self): path = Path([(0, 1), (1, 2), (2, 3), (3, 4)]) - self.assertFalse(path.interface.multi) self.assertEqual(path.dimension_values(0), np.array([ 0, 1, 2, 3])) self.assertEqual(path.dimension_values(1), np.array([ @@ -61,7 +60,6 @@ def test_single_path_list_constructor(self): def test_single_path_tuple_constructor(self): path = Path(([0, 1, 2, 3], [1, 2, 3, 4])) - self.assertFalse(path.interface.multi) self.assertEqual(path.dimension_values(0), np.array([ 0, 1, 2, 3])) self.assertEqual(path.dimension_values(1), np.array([ diff --git a/holoviews/tests/operation/testdatashader.py b/holoviews/tests/operation/testdatashader.py index 84fc5a8e24..de3f9946f4 100644 --- a/holoviews/tests/operation/testdatashader.py +++ b/holoviews/tests/operation/testdatashader.py @@ -1,11 +1,11 @@ import datetime as dt -from unittest import SkipTest +from unittest import SkipTest, skipIf import numpy as np from holoviews import (Dimension, Curve, Points, Image, Dataset, RGB, Path, Graph, TriMesh, QuadMesh, NdOverlay, Contours, Spikes, - Spread, Area, Segments) + Spread, Area, Segments, Polygons) from holoviews.element.comparison import ComparisonTestCase try: @@ -19,6 +19,14 @@ except: raise SkipTest('Datashader not available') +try: + import spatialpandas +except: + spatialpandas = None + +spatialpandas_skip = skipIf(spatialpandas is None, "SpatialPandas not available") + + class DatashaderAggregateTests(ComparisonTestCase): """ @@ -35,8 +43,10 @@ def test_aggregate_points(self): def test_aggregate_zero_range_points(self): p = Points([(0, 0), (1, 1)]) - agg = rasterize(p, x_range=(0, 0), y_range=(0, 1), expand=False, dynamic=False) - img = Image(([], [0.25, 0.75], np.zeros((2, 0))), bounds=(0, 0, 0, 1), xdensity=1, vdims=['Count']) + agg = rasterize(p, x_range=(0, 0), y_range=(0, 1), expand=False, dynamic=False, + width=2, height=2) + img = Image(([], [0.25, 0.75], np.zeros((2, 0))), bounds=(0, 0, 0, 1), + xdensity=1, vdims=['Count']) self.assertEqual(agg, img) def test_aggregate_points_target(self): @@ -68,7 +78,7 @@ def test_aggregate_points_categorical(self): def test_aggregate_points_categorical_zero_range(self): points = Points([(0.2, 0.3, 'A'), (0.4, 0.7, 'B'), (0, 0.99, 'C')], vdims='z') img = aggregate(points, dynamic=False, x_range=(0, 0), y_range=(0, 1), - aggregator=ds.count_cat('z')) + aggregator=ds.count_cat('z'), height=2) xs, ys = [], [0.25, 0.75] params = dict(bounds=(0, 0, 0, 1), xdensity=1) expected = NdOverlay({'A': Image((xs, ys, np.zeros((2, 0))), vdims='z Count', **params), @@ -149,7 +159,7 @@ def test_aggregate_ndoverlay_count_cat_datetimes_microsecond_timebase(self): def test_aggregate_dt_xaxis_constant_yaxis(self): df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1T')) - img = rasterize(Curve(df), dynamic=False) + img = rasterize(Curve(df), dynamic=False, width=3) xs = np.array(['1980-01-01T00:16:30.000000', '1980-01-01T00:49:30.000000', '1980-01-01T01:22:30.000000'], dtype='datetime64[us]') ys = np.array([]) @@ -452,6 +462,107 @@ def test_spread_aggregate_assymmetric_count(self): expected = Image((xs, ys, arr), vdims='count') self.assertEqual(agg, expected) + @spatialpandas_skip + def test_line_rasterize(self): + path = Path([[(0, 0), (1, 1), (2, 0)], [(0, 0), (0, 1)]], datatype=['spatialpandas']) + agg = rasterize(path, width=4, height=4, dynamic=False) + xs = [0.25, 0.75, 1.25, 1.75] + ys = [0.125, 0.375, 0.625, 0.875] + arr = np.array([ + [2, 0, 0, 1], + [1, 1, 0, 1], + [1, 1, 1, 0], + [1, 0, 1, 0] + ]) + expected = Image((xs, ys, arr), vdims='Count') + self.assertEqual(agg, expected) + + @spatialpandas_skip + def test_multi_line_rasterize(self): + path = Path([{'x': [0, 1, 2, np.nan, 0, 0], 'y': [0, 1, 0, np.nan, 0, 1]}], + datatype=['spatialpandas']) + agg = rasterize(path, width=4, height=4, dynamic=False) + xs = [0.25, 0.75, 1.25, 1.75] + ys = [0.125, 0.375, 0.625, 0.875] + arr = np.array([ + [2, 0, 0, 1], + [1, 1, 0, 1], + [1, 1, 1, 0], + [1, 0, 1, 0] + ]) + expected = Image((xs, ys, arr), vdims='Count') + self.assertEqual(agg, expected) + + @spatialpandas_skip + def test_ring_rasterize(self): + path = Path([{'x': [0, 1, 2], 'y': [0, 1, 0], 'geom_type': 'Ring'}], datatype=['spatialpandas']) + agg = rasterize(path, width=4, height=4, dynamic=False) + xs = [0.25, 0.75, 1.25, 1.75] + ys = [0.125, 0.375, 0.625, 0.875] + arr = np.array([ + [2, 1, 1, 1], + [0, 1, 0, 1], + [0, 1, 1, 0], + [0, 0, 1, 0] + ]) + expected = Image((xs, ys, arr), vdims='Count') + self.assertEqual(agg, expected) + + @spatialpandas_skip + def test_polygon_rasterize(self): + poly = Polygons([ + {'x': [0, 1, 2], 'y': [0, 1, 0], + 'holes': [[[(1.6, 0.2), (1, 0.8), (0.4, 0.2)]]]} + ]) + agg = rasterize(poly, width=6, height=6, dynamic=False) + xs = [0.166667, 0.5, 0.833333, 1.166667, 1.5, 1.833333] + ys = [0.083333, 0.25, 0.416667, 0.583333, 0.75, 0.916667] + arr = np.array([ + [1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0] + ]) + expected = Image((xs, ys, arr), vdims='Count') + self.assertEqual(agg, expected) + + @spatialpandas_skip + def test_polygon_rasterize_mean_agg(self): + poly = Polygons([ + {'x': [0, 1, 2], 'y': [0, 1, 0], 'z': 2.4}, + {'x': [0, 0, 1], 'y': [0, 1, 1], 'z': 3.6} + ], vdims='z') + agg = rasterize(poly, width=4, height=4, dynamic=False, aggregator='mean') + xs = [0.25, 0.75, 1.25, 1.75] + ys = [0.125, 0.375, 0.625, 0.875] + arr = np.array([ + [ 2.4, 2.4, 2.4, 2.4], + [ 3.6, 2.4, 2.4, np.nan], + [ 3.6, 2.4, 2.4, np.nan], + [ 3.6, 3.6, np.nan, np.nan]]) + expected = Image((xs, ys, arr), vdims='z') + self.assertEqual(agg, expected) + + @spatialpandas_skip + def test_multi_poly_rasterize(self): + poly = Polygons([{'x': [0, 1, 2, np.nan, 0, 0, 1], + 'y': [0, 1, 0, np.nan, 0, 1, 1]}], + datatype=['spatialpandas']) + agg = rasterize(poly, width=4, height=4, dynamic=False) + xs = [0.25, 0.75, 1.25, 1.75] + ys = [0.125, 0.375, 0.625, 0.875] + arr = np.array([ + [1, 1, 1, 1], + [1, 1, 1, 0], + [1, 1, 1, 0], + [1, 1, 0, 0] + ]) + expected = Image((xs, ys, arr), vdims='Count') + self.assertEqual(agg, expected) + + class DatashaderShadeTests(ComparisonTestCase): @@ -488,7 +599,7 @@ def test_shade_categorical_images_grid(self): def test_shade_dt_xaxis_constant_yaxis(self): df = pd.DataFrame({'y': np.ones(100)}, index=pd.date_range('1980-01-01', periods=100, freq='1T')) - rgb = shade(rasterize(Curve(df), dynamic=False)) + rgb = shade(rasterize(Curve(df), dynamic=False, width=3)) xs = np.array(['1980-01-01T00:16:30.000000', '1980-01-01T00:49:30.000000', '1980-01-01T01:22:30.000000'], dtype='datetime64[us]') ys = np.array([]) diff --git a/holoviews/tests/plotting/bokeh/testlinks.py b/holoviews/tests/plotting/bokeh/testlinks.py index 13a6929009..52645814a8 100644 --- a/holoviews/tests/plotting/bokeh/testlinks.py +++ b/holoviews/tests/plotting/bokeh/testlinks.py @@ -72,8 +72,10 @@ def test_data_link_poly_table(self): plot = bokeh_renderer.get_plot(layout) cds = list(plot.state.select({'type': ColumnDataSource})) self.assertEqual(len(cds), 1) - merged_data = {'xs': [[[arr1[:, 0]]], [[arr2[:, 0]]]], - 'ys': [[[arr1[:, 1]]], [[arr2[:, 1]]]], + merged_data = {'xs': [[[np.concatenate([arr1[:, 0], arr1[:1, 0]])]], + [[np.concatenate([arr2[:, 0], arr2[:1, 0]])]]], + 'ys': [[[np.concatenate([arr1[:, 1], arr1[:1, 1]])]], + [[np.concatenate([arr2[:, 1], arr2[:1, 1]])]]], 'A': np.array(['A', 'B']), 'B': np.array([1, 2])} for k, v in cds[0].data.items(): self.assertEqual(v, merged_data[k]) diff --git a/holoviews/tests/plotting/bokeh/testpathplot.py b/holoviews/tests/plotting/bokeh/testpathplot.py index 761691d637..6aa78cd8fa 100644 --- a/holoviews/tests/plotting/bokeh/testpathplot.py +++ b/holoviews/tests/plotting/bokeh/testpathplot.py @@ -1,11 +1,9 @@ import datetime as dt -from unittest import SkipTest import numpy as np from holoviews.core import NdOverlay, HoloMap from holoviews.core.options import Cycle from holoviews.element import Path, Polygons, Contours -from holoviews.plotting.bokeh.util import bokeh_version from holoviews.streams import PolyDraw from .testplot import TestBokehPlot, bokeh_renderer @@ -58,14 +56,6 @@ def test_path_overlay_hover(self): obj = obj.opts(plot=opts) self._test_hover_info(obj, [('Test', '@{Test}')]) - def test_empty_path_plot(self): - path = Path([], vdims=['Intensity']).opts(plot=dict(color_index=2)) - plot = bokeh_renderer.get_plot(path) - source = plot.handles['source'] - self.assertEqual(len(source.data['xs']), 0) - self.assertEqual(len(source.data['ys']), 0) - self.assertEqual(len(source.data['Intensity']), 0) - def test_path_colored_and_split_with_extra_vdims(self): xs = [1, 2, 3, 4] ys = xs[::-1] @@ -81,23 +71,10 @@ def test_path_colored_and_split_with_extra_vdims(self): self.assertEqual(source.data['other'], np.array(['A', 'B', 'C'])) self.assertEqual(source.data['color'], np.array([0, 0.25, 0.5])) - def test_path_colored_and_split_on_single_value(self): - xs = [1, 2, 3, 4] - ys = xs[::-1] - color = [1, 1, 1, 1] - data = {'x': xs, 'y': ys, 'color': color} - path = Path([data], vdims=['color']).options(color_index='color') - plot = bokeh_renderer.get_plot(path) - source = plot.handles['source'] - - self.assertEqual(source.data['xs'], [np.array([1, 2]), np.array([2, 3]), np.array([3, 4])]) - self.assertEqual(source.data['ys'], [np.array([4, 3]), np.array([3, 2]), np.array([2, 1])]) - self.assertEqual(source.data['color'], np.array([1, 1, 1])) - def test_path_colored_by_levels_single_value(self): xs = [1, 2, 3, 4] ys = xs[::-1] - color = [998, 998, 998, 998] + color = [998, 999, 998, 998] date = np.datetime64(dt.datetime(2018, 8, 1)) data = {'x': xs, 'y': ys, 'color': color, 'date': date} levels = [0, 38, 73, 95, 110, 130, 156, 999] @@ -110,12 +87,10 @@ def test_path_colored_by_levels_single_value(self): self.assertEqual(source.data['xs'], [np.array([1, 2]), np.array([2, 3]), np.array([3, 4])]) self.assertEqual(source.data['ys'], [np.array([4, 3]), np.array([3, 2]), np.array([2, 1])]) - self.assertEqual(source.data['color'], np.array([998, 998, 998])) - self.assertEqual(source.data['date'], - np.array([1533081600000000000, 1533081600000000000, 1533081600000000000])) + self.assertEqual(source.data['color'], np.array([998, 999, 998])) self.assertEqual(source.data['date_dt_strings'], np.array(['2018-08-01 00:00:00', '2018-08-01 00:00:00', '2018-08-01 00:00:00'])) - self.assertEqual(cmapper.low, 156) + self.assertEqual(cmapper.low, 998) self.assertEqual(cmapper.high, 999) self.assertEqual(cmapper.palette, colors[-1:]) @@ -136,8 +111,6 @@ def test_path_continuously_varying_color_op(self): self.assertEqual(source.data['xs'], [np.array([1, 2]), np.array([2, 3]), np.array([3, 4])]) self.assertEqual(source.data['ys'], [np.array([4, 3]), np.array([3, 2]), np.array([2, 1])]) self.assertEqual(source.data['color'], np.array([998, 999, 998])) - self.assertEqual(source.data['date'], - np.array([1533081600000000000, 1533081600000000000, 1533081600000000000])) self.assertEqual(source.data['date_dt_strings'], np.array(['2018-08-01 00:00:00', '2018-08-01 00:00:00', '2018-08-01 00:00:00'])) self.assertEqual(cmapper.low, 994) @@ -240,24 +213,20 @@ def test_empty_polygons_plot(self): self.assertEqual(len(source.data['Intensity']), 0) def test_polygon_with_hole_plot(self): - if bokeh_version < '1.0': - raise SkipTest('Plotting Polygons with holes requires bokeh >= 1.0') xs = [1, 2, 3] ys = [2, 0, 7] holes = [[[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]]] poly = Polygons([{'x': xs, 'y': ys, 'holes': holes}]) plot = bokeh_renderer.get_plot(poly) source = plot.handles['source'] - self.assertEqual(source.data['xs'], [[[np.array([1, 2, 3]), np.array([1.5, 2, 1.6]), - np.array([2.1, 2.5, 2.3])]]]) - self.assertEqual(source.data['ys'], [[[np.array([2, 0, 7]), np.array([2, 3, 1.6]), - np.array([4.5, 5, 3.5])]]]) + self.assertEqual(source.data['xs'], [[[np.array([1, 2, 3, 1]), np.array([1.5, 2, 1.6, 1.5]), + np.array([2.1, 2.5, 2.3, 2.1])]]]) + self.assertEqual(source.data['ys'], [[[np.array([2, 0, 7, 2]), np.array([2, 3, 1.6, 2]), + np.array([4.5, 5, 3.5, 4.5])]]]) def test_multi_polygon_hole_plot(self): - if bokeh_version < '1.0': - raise SkipTest('Plotting Polygons with holes requires bokeh >= 1.0') - xs = [1, 2, 3, np.nan, 6, 7, 3] - ys = [2, 0, 7, np.nan, 7, 5, 2] + xs = [1, 2, 3, np.nan, 3, 7, 6] + ys = [2, 0, 7, np.nan, 2, 5, 7] holes = [ [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], [] @@ -265,10 +234,10 @@ def test_multi_polygon_hole_plot(self): poly = Polygons([{'x': xs, 'y': ys, 'holes': holes}]) plot = bokeh_renderer.get_plot(poly) source = plot.handles['source'] - self.assertEqual(source.data['xs'], [[[np.array([1, 2, 3]), np.array([1.5, 2, 1.6]), - np.array([2.1, 2.5, 2.3])], [np.array([6, 7, 3])]]]) - self.assertEqual(source.data['ys'], [[[np.array([2, 0, 7]), np.array([2, 3, 1.6]), - np.array([4.5, 5, 3.5])], [np.array([7, 5, 2])]]]) + self.assertEqual(source.data['xs'], [[[np.array([1, 2, 3, 1]), np.array([1.5, 2, 1.6, 1.5]), + np.array([2.1, 2.5, 2.3, 2.1])], [np.array([3, 7, 6, 3])]]]) + self.assertEqual(source.data['ys'], [[[np.array([2, 0, 7, 2]), np.array([2, 3, 1.6, 2]), + np.array([4.5, 5, 3.5, 4.5])], [np.array([2, 5, 7, 2])]]]) def test_polygons_hover_color_op(self): polygons = Polygons([ @@ -350,8 +319,6 @@ def test_polygons_line_width_op(self): self.assertEqual(cds.data['line_width'], np.array([7, 3])) def test_polygons_holes_initialize(self): - if bokeh_version < '1.0': - raise SkipTest('Plotting Polygons with holes requires bokeh >= 1.0') from bokeh.models import MultiPolygons xs = [1, 2, 3, np.nan, 6, 7, 3] ys = [2, 0, 7, np.nan, 7, 5, 2] @@ -367,8 +334,6 @@ def test_polygons_holes_initialize(self): self.assertIsInstance(glyph, MultiPolygons) def test_polygons_no_holes_with_draw_tool(self): - if bokeh_version < '1.0': - raise SkipTest('Plotting Polygons with holes requires bokeh >= 1.0') from bokeh.models import Patches xs = [1, 2, 3, np.nan, 6, 7, 3] ys = [2, 0, 7, np.nan, 7, 5, 2] diff --git a/holoviews/tests/plotting/matplotlib/testpathplot.py b/holoviews/tests/plotting/matplotlib/testpathplot.py index 50b85e2365..825ca3871a 100644 --- a/holoviews/tests/plotting/matplotlib/testpathplot.py +++ b/holoviews/tests/plotting/matplotlib/testpathplot.py @@ -20,7 +20,7 @@ def test_path_continuously_varying_color_op(self): color='color', color_levels=levels, cmap=colors) plot = mpl_renderer.get_plot(path) artist = plot.handles['artist'] - self.assertEqual(artist.get_array(), np.array([998, 999, 998])) + self.assertEqual(artist.get_array(), np.array(color)) self.assertEqual(artist.get_clim(), (994, 999)) def test_path_continuously_varying_alpha_op(self): @@ -40,7 +40,7 @@ def test_path_continuously_varying_line_width_op(self): path = Path([data], vdims='line_width').options(linewidth='line_width') plot = mpl_renderer.get_plot(path) artist = plot.handles['artist'] - self.assertEqual(artist.get_linewidths(), [1, 7, 3]) + self.assertEqual(artist.get_linewidths(), line_width) def test_path_continuously_varying_line_width_op_update(self): xs = [1, 2, 3, 4] @@ -51,9 +51,9 @@ def test_path_continuously_varying_line_width_op_update(self): }).options(linewidth='line_width') plot = mpl_renderer.get_plot(path) artist = plot.handles['artist'] - self.assertEqual(artist.get_linewidths(), [1, 7, 3]) + self.assertEqual(artist.get_linewidths(), [1, 7, 3, 2]) plot.update((1,)) - self.assertEqual(artist.get_linewidths(), [3, 8, 2]) + self.assertEqual(artist.get_linewidths(), [3, 8, 2, 3]) class TestPolygonPlot(TestMPLPlot): @@ -84,8 +84,8 @@ def test_polygon_with_hole_plot(self): self.assertEqual(path.codes, np.array([1, 2, 2, 79, 1, 2, 2, 79, 1, 2, 2, 79])) def test_multi_polygon_hole_plot(self): - xs = [1, 2, 3, np.nan, 6, 7, 3] - ys = [2, 0, 7, np.nan, 7, 5, 2] + xs = [1, 2, 3, np.nan, 3, 7, 6] + ys = [2, 0, 7, np.nan, 2, 5, 7] holes = [ [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], [] @@ -103,7 +103,7 @@ def test_multi_polygon_hole_plot(self): ) self.assertEqual(path.codes, np.array([1, 2, 2, 79, 1, 2, 2, 79, 1, 2, 2, 79])) path2 = paths[1] - self.assertEqual(path2.vertices, np.array([(6, 7), (7, 5), (3, 2), (6, 7)])) + self.assertEqual(path2.vertices, np.array([(3, 2), (7, 5), (6, 7), (3, 2)])) self.assertEqual(path2.codes, np.array([1, 2, 2, 79])) def test_polygons_color_op(self): diff --git a/holoviews/tests/plotting/matplotlib/testutils.py b/holoviews/tests/plotting/matplotlib/testutils.py index 0b08334df5..312d87d1c7 100644 --- a/holoviews/tests/plotting/matplotlib/testutils.py +++ b/holoviews/tests/plotting/matplotlib/testutils.py @@ -11,8 +11,8 @@ class TestUtils(TestMPLPlot): def test_polygon_to_path_patches(self): - xs = [1, 2, 3, np.nan, 6, 7, 3, np.nan, 0, 0, 0] - ys = [2, 0, 7, np.nan, 7, 5, 2, np.nan, 0, 1, 0] + xs = [1, 2, 3, np.nan, 3, 7, 6, np.nan, 0, 0, 0] + ys = [2, 0, 7, np.nan, 2, 5, 7, np.nan, 0, 1, 0] holes = [ [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], @@ -30,7 +30,7 @@ def test_polygon_to_path_patches(self): (1.5, 2), (2, 3), (1.6, 1.6), (1.5, 2), (2.1, 4.5), (2.5, 5), (2.3, 3.5), (2.1, 4.5)])) self.assertEqual(paths[0][0].get_path().codes, np.array([1, 2, 2, 79, 1, 2, 2, 79, 1, 2, 2, 79], dtype='uint8')) - self.assertEqual(paths[0][1].get_path().vertices, np.array([(6, 7), (7, 5), (3, 2), (6, 7)])) + self.assertEqual(paths[0][1].get_path().vertices, np.array([(3, 2), (7, 5), (6, 7), (3, 2),])) self.assertEqual(paths[0][1].get_path().codes, np.array([1, 2, 2, 79], dtype='uint8')) self.assertEqual(paths[0][2].get_path().vertices, np.array([(0, 0), (0, 1), (0, 0)])) self.assertEqual(paths[0][1].get_path().codes, np.array([1, 2, 2, 79], dtype='uint8')) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index a72115a76d..6ae2140f24 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -399,7 +399,8 @@ def apply( dimension = self.dimension if expanded is None: expanded = not ((dataset.interface.gridded and dimension in dataset.kdims) or - (dataset.interface.multi and dataset.interface.isscalar(dataset, dimension))) + (dataset.interface.multi and dataset.interface.isunique(dataset, dimension, True))) + if isinstance(dataset, Graph): if dimension in dataset.kdims and all_values: dimension = dataset.nodes.kdims[2] diff --git a/setup.py b/setup.py index 44e1443542..3bb53cac31 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,9 @@ 'datashader', 'selenium', 'phantomjs', 'ffmpeg', 'streamz>=0.5.0', 'cftime', 'netcdf4', 'bzip2', 'dask', 'scipy'] +if sys.version_info.major > 2: + extras_require['examples'].append('spatialpandas') + # Extra third-party libraries extras_require['extras'] = extras_require['examples']+[ 'cyordereddict', 'pscript==0.7.1']