From 4d3857342139dde2ec274706a46460b87989be9d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 21 Sep 2019 12:41:24 +0200 Subject: [PATCH 01/23] Add cuDFInterface to work with cuDF GPU dataframes --- holoviews/core/data/__init__.py | 6 + holoviews/core/data/cudf.py | 283 ++++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 holoviews/core/data/cudf.py diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 75c23bb98f..949b9b4369 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -55,6 +55,12 @@ except ImportError: pass +try: + from .cudf import cuDFInterface # noqa (Conditional API import) + datatypes.append('cuDF') +except ImportError: + pass + if 'array' not in datatypes: datatypes.append('array') if 'multitabular' not in datatypes: diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py new file mode 100644 index 0000000000..42e20a0db8 --- /dev/null +++ b/holoviews/core/data/cudf.py @@ -0,0 +1,283 @@ +from __future__ import absolute_import + +import sys +import warnings + +try: + import itertools.izip as zip +except ImportError: + pass + +import numpy as np + +from .. import util +from ...dimension import dimension_name +from ..element import Element +from ...ndmapping import NdMapping, item_check, sorted_context +from .interface import DataError, Interface +from .pandas import PandasInterface + + +class cuDFInterface(PandasInterface): + """ + The cuDFInterface allows a Dataset objects to wrap a cuDF + DataFrame object. Using cuDF allows working with columnar + data on a GPU. Most operation leave the data in GPU memory, + however to plot the data it has to be loaded into memory. + + The cuDFInterface covers almost the complete API exposed + by the PandasInterface with two notable exceptions: + + 1) Sorting is not supported and any attempt at sorting will + be ignored with an warning. + 2) cuDF does not easily support adding a new column to an existing + dataframe unless it is a scalar, add_dimension will therefore + error when supplied a non-scalar value. + 3) Not all functions can be easily applied to a dask dataframe so + some functions applied with aggregate and reduce will not work. + """ + + datatype = 'cuDF' + + types = () + + @classmethod + def loaded(cls): + return 'cudf' in sys.modules + + @classmethod + def applies(cls, obj): + if not cls.loaded(): + return False + import cudf + return isinstance(obj, (cudf.DataFrame, cudf.Series)) + + @classmethod + def init(cls, eltype, data, kdims, vdims): + import cudf + + element_params = eltype.param.objects() + kdim_param = element_params['kdims'] + vdim_param = element_params['vdims'] + ncols = len(data.columns) + + if isinstance(data, cudf.Series): + data = data.to_frame() + + index_names = [data.index.name] + if index_names == [None]: + index_names = ['index'] + if eltype._auto_indexable_1d and ncols == 1 and kdims is None: + kdims = list(index_names) + + if isinstance(kdim_param.bounds[1], int): + ndim = min([kdim_param.bounds[1], len(kdim_param.default)]) + else: + ndim = None + nvdim = vdim_param.bounds[1] if isinstance(vdim_param.bounds[1], int) else None + if kdims and vdims is None: + vdims = [c for c in data.columns if c not in kdims] + elif vdims and kdims is None: + kdims = [c for c in data.columns if c not in vdims][:ndim] + elif kdims is None: + kdims = list(data.columns[:ndim]) + if vdims is None: + vdims = [d for d in data.columns[ndim:((ndim+nvdim) if nvdim else None)] + if d not in kdims] + elif kdims == [] and vdims is None: + vdims = list(data.columns[:nvdim if nvdim else None]) + + # Handle reset of index if kdims reference index by name + for kd in kdims: + kd = dimension_name(kd) + if kd in data.columns: + continue + if any(kd == ('index' if name is None else name) + for name in index_names): + data = data.reset_index() + break + if any(isinstance(d, (np.int64, int)) for d in kdims+vdims): + raise DataError("cudf DataFrame column names used as dimensions " + "must be strings not integers.", cls) + + if kdims: + kdim = dimension_name(kdims[0]) + if eltype._auto_indexable_1d and ncols == 1 and kdim not in data.columns: + data = data.copy() + data.insert(0, kdim, np.arange(len(data))) + + for d in kdims+vdims: + d = dimension_name(d) + if len([c for c in data.columns if c == d]) > 1: + raise DataError('Dimensions may not reference duplicated DataFrame ' + 'columns (found duplicate %r columns). If you want to plot ' + 'a column against itself simply declare two dimensions ' + 'with the same name. '% d, cls) + return data, {'kdims':kdims, 'vdims':vdims}, {} + + + @classmethod + def range(cls, dataset, dimension): + column = dataset.data[dataset.get_dimension(dimension, strict=True).name] + if column.dtype.kind == 'O': + return np.NaN, np.NaN + else: + return (column.min(), column.max()) + + + @classmethod + def values( + cls, + dataset, + dim, + expanded=True, + flat=True, + compute=True, + keep_index=False, + ): + dim = dataset.get_dimension(dim, strict=True) + data = dataset.data[dim.name] + if not expanded: + data = data.unique() + return data.to_array() if compute else data + elif keep_index: + return data + elif compute: + return data.to_array() + return data + + + @classmethod + def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): + # Get dimensions information + dimensions = [dataset.get_dimension(d).name for d in dimensions] + kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions] + + # Update the kwargs appropriately for Element group types + group_kwargs = {} + group_type = dict if group_type == 'raw' else group_type + if issubclass(group_type, Element): + group_kwargs.update(util.get_param_values(dataset)) + group_kwargs['kdims'] = kdims + group_kwargs.update(kwargs) + + # Find all the keys along supplied dimensions + indices = [dataset.get_dimension_index(d) for d in dimensions] + keys = (tuple(dataset.iloc[i, d] for d in indices) + for i in range(len(dataset))) + + # Iterate over the unique entries applying selection masks + grouped_data = [] + for unique_key in util.unique_iterator(keys): + group_data = dataset.select(**dict(zip(dimensions, unique_key))) + group_data = group_type(group_data, **group_kwargs) + grouped_data.append((unique_key, group_data)) + + if issubclass(container_type, NdMapping): + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) + else: + return container_type(grouped_data) + + + @classmethod + def select_mask(cls, dataset, selection): + """ + Given a Dataset object and a dictionary with dimension keys and + selection keys (i.e tuple ranges, slices, sets, lists or literals) + return a boolean mask over the rows in the Dataset object that + have been selected. + """ + mask = None + for dim, sel in selection.items(): + if isinstance(sel, tuple): + sel = slice(*sel) + arr = cls.values(dataset, dim, compute=False) + if util.isdatetime(arr) and util.pd: + try: + sel = util.parse_datetime_selection(sel) + except: + pass + + new_masks = [] + if isinstance(sel, slice): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'invalid value encountered') + if sel.start is not None: + new_masks.append(sel.start <= arr) + if sel.stop is not None: + new_masks.append(arr < sel.stop) + new_mask = new_masks[0] + for imask in new_masks[1:]: + new_mask &= imask + elif isinstance(sel, (set, list)): + for v in sel: + new_masks.append(arr==v) + new_mask = new_masks[0] + for imask in new_masks[1:]: + new_mask |= imask + elif callable(sel): + new_mask = sel(arr) + else: + new_mask = arr == sel + + if mask is None: + mask = new_mask + else: + mask &= new_mask + return mask + + + @classmethod + def select(cls, dataset, selection_mask=None, **selection): + df = dataset.data + if selection_mask is None: + selection_mask = cls.select_mask(dataset, selection) + + indexed = cls.indexed(dataset, selection) + df = df[selection_mask] + if indexed and len(df) == 1 and len(dataset.vdims) == 1: + return df[dataset.vdims[0].name].iloc[0] + return df + + + @classmethod + def aggregate(cls, dataset, dimensions, function, **kwargs): + data = dataset.data + cols = [d.name for d in dataset.kdims if d in dimensions] + vdims = dataset.dimensions('value', label='name') + reindexed = data[cols+vdims] + agg = function.__name__ + if agg in ('amin', 'amax'): + agg = agg[1:] + if not hasattr(data, agg): + raise ValueError('%s aggregation is not supported on cudf DataFrame.' % agg) + if len(dimensions): + grouped = reindexed.groupby(cols, sort=False) + df = getattr(grouped, agg)().reset_index() + else: + agg = getattr(reindexed, agg)() + data = dict(((col, [v]) for col, v in zip(agg.index, agg.to_array()))) + df = util.pd.DataFrame(data, columns=list(agg.index)) + + dropped = [] + for vd in vdims: + if vd not in df.columns: + dropped.append(vd) + return df, dropped + + + @classmethod + def sort(cls, dataset, by=[], reverse=False): + raise NotImplementedError('Sorting is not supported by cudf DataFrames.') + + + @classmethod + def dframe(cls, dataset, dimensions): + if dimensions: + return dataset.data[dimensions].to_pandas() + else: + return dataset.data.to_pandas() + + +Interface.register(cuDFInterface) From 8f8aa360ca88c7be2c019e9d175f547658d3a482 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 21 Sep 2019 14:02:13 +0200 Subject: [PATCH 02/23] Various fixes --- holoviews/core/data/cudf.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 42e20a0db8..d0269a5e57 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -11,9 +11,9 @@ import numpy as np from .. import util -from ...dimension import dimension_name +from ..dimension import dimension_name from ..element import Element -from ...ndmapping import NdMapping, item_check, sorted_context +from ..ndmapping import NdMapping, item_check, sorted_context from .interface import DataError, Interface from .pandas import PandasInterface @@ -126,15 +126,8 @@ def range(cls, dataset, dimension): @classmethod - def values( - cls, - dataset, - dim, - expanded=True, - flat=True, - compute=True, - keep_index=False, - ): + def values(cls, dataset, dim, expanded=True, flat=True, compute=True, + keep_index=False): dim = dataset.get_dimension(dim, strict=True) data = dataset.data[dim.name] if not expanded: From 1335e553defc3dd06ec8a74eae9e4017ee6a9b31 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 17:29:27 +0100 Subject: [PATCH 03/23] Enabled datashader support --- holoviews/core/data/xarray.py | 10 ++++++++++ holoviews/operation/datashader.py | 8 ++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py index c7b77f823f..51b125a006 100644 --- a/holoviews/core/data/xarray.py +++ b/holoviews/core/data/xarray.py @@ -13,6 +13,13 @@ from .interface import Interface, DataError, dask_array_module +def is_cupy(array): + if 'cupy' not in sys.modules: + return False + from cupy import ndarray + return isinstance(array, ndarray) + + class XArrayInterface(GridInterface): types = () @@ -359,6 +366,9 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index da = dask_array_module() if compute and da and isinstance(data, da.Array): data = data.compute() + if is_cupy(data): + import cupy + data = cupy.asnumpy(data) data = cls.canonicalize(dataset, data, data_coords=data_coords, virtual_coords=virtual_coords) return data.T.flatten() if flat else data diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index 1fee7a0dc5..2adcb00879 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -22,7 +22,7 @@ from ..core import (Operation, Element, Dimension, NdOverlay, CompositeOverlay, Dataset, Overlay, OrderedDict) -from ..core.data import PandasInterface, XArrayInterface, DaskInterface +from ..core.data import PandasInterface, XArrayInterface, DaskInterface, cuDFInterface from ..core.util import ( Iterable, LooseVersion, basestring, cftime_types, cftime_to_timestamp, datetime_types, dt_to_int, isfinite, get_param_values, max_range) @@ -387,14 +387,14 @@ def get_agg_data(cls, obj, category=None): if category and df[category].dtype.name != 'category': df[category] = df[category].astype('category') - is_dask = isinstance(df, dd.DataFrame) - if any((not is_dask and len(df[d.name]) and isinstance(df[d.name].values[0], cftime_types)) or + is_custom = isinstance(df, dd.DataFrame) or cuDFInterface.applies(df) + if any((not is_custom and len(df[d.name]) and isinstance(df[d.name].values[0], cftime_types)) or df[d.name].dtype.kind == 'M' for d in (x, y)): df = df.copy() for d in (x, y): vals = df[d.name] - if not is_dask and len(vals) and isinstance(vals.values[0], cftime_types): + if not is_custom and len(vals) and isinstance(vals.values[0], cftime_types): vals = cftime_to_timestamp(vals, 'ns') elif df[d.name].dtype.kind == 'M': vals = vals.astype('datetime64[ns]') From 3a842110a50bde77f826cd533bb9d368474c5465 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 18:25:10 +0100 Subject: [PATCH 04/23] Improved groupby --- holoviews/core/data/cudf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index d0269a5e57..c5eafde16b 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -8,6 +8,8 @@ except ImportError: pass +from itertools import product + import numpy as np from .. import util @@ -155,14 +157,14 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): group_kwargs.update(kwargs) # Find all the keys along supplied dimensions - indices = [dataset.get_dimension_index(d) for d in dimensions] - keys = (tuple(dataset.iloc[i, d] for d in indices) - for i in range(len(dataset))) + keys = product(*(dataset.data[dimensions[0]].unique() for d in dimensions)) # Iterate over the unique entries applying selection masks grouped_data = [] for unique_key in util.unique_iterator(keys): group_data = dataset.select(**dict(zip(dimensions, unique_key))) + if not len(group_data): + continue group_data = group_type(group_data, **group_kwargs) grouped_data.append((unique_key, group_data)) From 51da1435badfdefb384de6c91f1510b0d2951170 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 18:33:38 +0100 Subject: [PATCH 05/23] Made cuDF sort warn --- holoviews/core/data/cudf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index c5eafde16b..3527147930 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -264,8 +264,8 @@ def aggregate(cls, dataset, dimensions, function, **kwargs): @classmethod def sort(cls, dataset, by=[], reverse=False): - raise NotImplementedError('Sorting is not supported by cudf DataFrames.') - + dataset.param.warning("cuDF DataFrames do not yet support sorting.") + return dataset.data @classmethod def dframe(cls, dataset, dimensions): From e818413a087d062df73ca7e7d32e28fc887d2653 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 18:40:36 +0100 Subject: [PATCH 06/23] Implement cuDF concat --- holoviews/core/data/cudf.py | 7 +++++++ holoviews/core/data/dask.py | 11 ++--------- holoviews/core/data/pandas.py | 10 ++++++++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 3527147930..94477cd4cf 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -117,6 +117,7 @@ def init(cls, eltype, data, kdims, vdims): 'with the same name. '% d, cls) return data, {'kdims':kdims, 'vdims':vdims}, {} + @classmethod def range(cls, dataset, dimension): @@ -236,6 +237,12 @@ def select(cls, dataset, selection_mask=None, **selection): return df + @classmethod + def concat_fn(cls, dataframes, **kwargs): + import cudf + return cudf.concat(dataframes, **kwargs) + + @classmethod def aggregate(cls, dataset, dimensions, function, **kwargs): data = dataset.data diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 8dca6c321e..3dcb1d6e33 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -270,15 +270,8 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): return data @classmethod - def concat(cls, datasets, dimensions, vdims): - import dask.dataframe as dd - dataframes = [] - for key, ds in datasets: - data = ds.data.copy() - for d, k in zip(dimensions, key): - data[d.name] = k - dataframes.append(data) - return dd.concat(dataframes) + def concat_fn(cls, dataframe, **kwargs): + return dd.concat(dataframes, **kwargs) @classmethod def dframe(cls, dataset, dimensions): diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index 692452d820..2c7dd33532 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -173,6 +173,13 @@ def range(cls, dataset, dimension): return (column.min(), column.max()) + @classmethod + def concat_fn(cls, dataframes, **kwargs): + if util.pandas_version >= '0.23.0': + kwargs['sort'] = False + return pd.concat(dataframes, **kwargs) + + @classmethod def concat(cls, datasets, dimensions, vdims): dataframes = [] @@ -181,8 +188,7 @@ def concat(cls, datasets, dimensions, vdims): for d, k in zip(dimensions, key): data[d.name] = k dataframes.append(data) - kwargs = dict(sort=False) if util.pandas_version >= '0.23.0' else {} - return pd.concat(dataframes, **kwargs) + return cls.concat_fn(dataframes) @classmethod From 3b9c5f7ea532c471d5d2748911460212dadce4fa Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 18:57:36 +0100 Subject: [PATCH 07/23] Fixed flakes --- holoviews/core/data/dask.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 3dcb1d6e33..9b7755093d 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -270,7 +270,8 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): return data @classmethod - def concat_fn(cls, dataframe, **kwargs): + def concat_fn(cls, dataframes, **kwargs): + import dask.dataframe as dd return dd.concat(dataframes, **kwargs) @classmethod From a27feeb4ee6a8d4eea811e96d8463842cca6d70d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 19:41:00 +0100 Subject: [PATCH 08/23] Improved PandasInterface.sample to work with cuDF --- holoviews/core/data/pandas.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index 2c7dd33532..b8934d7de2 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -321,13 +321,20 @@ def values( @classmethod def sample(cls, dataset, samples=[]): data = dataset.data - mask = False + mask = None for sample in samples: - sample_mask = True + sample_mask = None if np.isscalar(sample): sample = [sample] for i, v in enumerate(sample): - sample_mask = np.logical_and(sample_mask, data.iloc[:, i]==v) - mask |= sample_mask + submask = data.iloc[:, i]==v + if sample_mask is None: + sample_mask = submask + else: + sample_mask &= submask + if mask is None: + mask = sample_mask + else: + mask |= sample_mask return data[mask] From 53c902ebe8a17cc96d7e6b7f5e399fd4d0d1f00e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 19:41:43 +0100 Subject: [PATCH 09/23] Fix add_dimension, iloc, select and init --- holoviews/core/data/cudf.py | 58 ++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 94477cd4cf..797dea4fd1 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -57,15 +57,20 @@ def applies(cls, obj): @classmethod def init(cls, eltype, data, kdims, vdims): import cudf + import pandas as pd element_params = eltype.param.objects() kdim_param = element_params['kdims'] vdim_param = element_params['vdims'] - ncols = len(data.columns) - if isinstance(data, cudf.Series): + if isinstance(data, (cudf.Series, pd.Series)): data = data.to_frame() + if not isinstance(data, cudf.DataFrame): + data, _, _ = PandasInterface.init(eltype, data, kdims, vdims) + data = cudf.from_pandas(data) + + ncols = len(data.columns) index_names = [data.index.name] if index_names == [None]: index_names = ['index'] @@ -117,7 +122,7 @@ def init(cls, eltype, data, kdims, vdims): 'with the same name. '% d, cls) return data, {'kdims':kdims, 'vdims':vdims}, {} - + @classmethod def range(cls, dataset, dimension): @@ -171,7 +176,8 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): if issubclass(container_type, NdMapping): with item_check(False), sorted_context(False): - return container_type(grouped_data, kdims=dimensions) + kdims = [dataset.get_dimension(d) for d in dimensions] + return container_type(grouped_data, kdims=kdims) else: return container_type(grouped_data) @@ -203,12 +209,16 @@ def select_mask(cls, dataset, selection): new_masks.append(sel.start <= arr) if sel.stop is not None: new_masks.append(arr < sel.stop) + if not new_masks: + continue new_mask = new_masks[0] for imask in new_masks[1:]: new_mask &= imask elif isinstance(sel, (set, list)): for v in sel: new_masks.append(arr==v) + if not new_masks: + continue new_mask = new_masks[0] for imask in new_masks[1:]: new_mask |= imask @@ -243,6 +253,14 @@ def concat_fn(cls, dataframes, **kwargs): return cudf.concat(dataframes, **kwargs) + @classmethod + def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): + data = dataset.data.copy() + if dimension.name not in data: + data[dimension.name] = values + return data + + @classmethod def aggregate(cls, dataset, dimensions, function, **kwargs): data = dataset.data @@ -269,6 +287,38 @@ def aggregate(cls, dataset, dimensions, function, **kwargs): return df, dropped + @classmethod + def iloc(cls, dataset, index): + import cudf + + rows, cols = index + scalar = False + columns = list(dataset.data.columns) + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + col_index = [columns.index(c) for c in cols] + if np.isscalar(rows): + rows = [rows] + + if scalar: + return dataset.data[cols[0]].iloc[rows[0]] + result = dataset.data.iloc[rows, col_index] + + # cuDF does not handle single rows and cols indexing correctly + # as of cudf=0.10.0 so we have to convert Series back to DataFrame + if isinstance(result, cudf.Series): + if len(cols) == 1: + result = result.to_frame(cols[0]) + else: + result = result.to_frame().T + return result + + @classmethod def sort(cls, dataset, by=[], reverse=False): dataset.param.warning("cuDF DataFrames do not yet support sorting.") From e168726282470f36d19bd50c47790c88bdc9dacf Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 19:43:14 +0100 Subject: [PATCH 10/23] Update cuDF issue description --- holoviews/core/data/cudf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 797dea4fd1..068ab20194 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -32,9 +32,7 @@ class cuDFInterface(PandasInterface): 1) Sorting is not supported and any attempt at sorting will be ignored with an warning. - 2) cuDF does not easily support adding a new column to an existing - dataframe unless it is a scalar, add_dimension will therefore - error when supplied a non-scalar value. + 2) Aggregation and groupby do not have a consistent sort order. 3) Not all functions can be easily applied to a dask dataframe so some functions applied with aggregate and reduce will not work. """ From 1057edb9c9523fb54ab177efa49ebf7a01770fba Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 19:43:45 +0100 Subject: [PATCH 11/23] Add cuDF tests --- .../tests/core/data/testcudfinterface.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 holoviews/tests/core/data/testcudfinterface.py diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py new file mode 100644 index 0000000000..6e6ff4008f --- /dev/null +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -0,0 +1,44 @@ +from unittest import SkipTest + +try: + import cudf +except: + raise SkipTest("Could not import cuDF, skipping cuDFInterface tests.") + +from .base import HeterogeneousColumnTests, InterfaceTests + +import logging + + + +class cuDFInterfaceTests(HeterogeneousColumnTests, InterfaceTests): + """ + Tests for the cuDFInterface. + """ + + datatype = 'cuDF' + data_type = cudf.DataFrame + + def setUp(self): + super(cuDFInterfaceTests, self).setUp() + logging.getLogger('numba.cuda.cudadrv.driver').setLevel(30) + + def test_dataset_sort_hm(self): + raise SkipTest("Not supported") + + def test_dataset_sort_reverse_hm(self): + raise SkipTest("Not supported") + + def test_dataset_sort_vdim_ht(self): + raise SkipTest("Not supported") + + def test_dataset_sort_vdim_hm(self): + raise SkipTest("Not supported") + + def test_dataset_sort_vdim_hm_alias(self): + raise SkipTest("Not supported") + + def test_dataset_sort_string_ht(self): + raise SkipTest("Not supported") + + From e73cf8139cebc777394e5f487a9fb15794354884 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Dec 2019 20:00:45 +0100 Subject: [PATCH 12/23] Further test fixes --- holoviews/core/data/cudf.py | 16 ++++++++++------ holoviews/tests/core/data/testcudfinterface.py | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 068ab20194..409201a449 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -121,7 +121,6 @@ def init(cls, eltype, data, kdims, vdims): return data, {'kdims':kdims, 'vdims':vdims}, {} - @classmethod def range(cls, dataset, dimension): column = dataset.data[dataset.get_dimension(dimension, strict=True).name] @@ -239,7 +238,8 @@ def select(cls, dataset, selection_mask=None, **selection): selection_mask = cls.select_mask(dataset, selection) indexed = cls.indexed(dataset, selection) - df = df[selection_mask] + if selection_mask is not None: + df = df[selection_mask] if indexed and len(df) == 1 and len(dataset.vdims) == 1: return df[dataset.vdims[0].name].iloc[0] return df @@ -266,14 +266,18 @@ def aggregate(cls, dataset, dimensions, function, **kwargs): vdims = dataset.dimensions('value', label='name') reindexed = data[cols+vdims] agg = function.__name__ - if agg in ('amin', 'amax'): - agg = agg[1:] - if not hasattr(data, agg): - raise ValueError('%s aggregation is not supported on cudf DataFrame.' % agg) if len(dimensions): + agg_map = {'amin': 'min', 'amax': 'max'} + agg = agg_map.get(agg, agg) grouped = reindexed.groupby(cols, sort=False) + if not hasattr(grouped, agg): + raise ValueError('%s aggregation is not supported on cudf DataFrame.' % agg) df = getattr(grouped, agg)().reset_index() else: + agg_map = {'amin': 'min', 'amax': 'max', 'size': 'count'} + agg = agg_map.get(agg, agg) + if not hasattr(reindexed, agg): + raise ValueError('%s aggregation is not supported on cudf DataFrame.' % agg) agg = getattr(reindexed, agg)() data = dict(((col, [v]) for col, v in zip(agg.index, agg.to_array()))) df = util.pd.DataFrame(data, columns=list(agg.index)) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index 6e6ff4008f..72a0d6590a 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -41,4 +41,5 @@ def test_dataset_sort_vdim_hm_alias(self): def test_dataset_sort_string_ht(self): raise SkipTest("Not supported") - + def test_dataset_2D_aggregate_spread_fn_with_duplicates(self): + raise SkipTest("cuDF does not support variance aggregation") From a9ef585509581a1eb7b14f8cccebf04c04e3825e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Dec 2019 00:03:32 +0100 Subject: [PATCH 13/23] Add dataset.dataset to cuDF groupby --- holoviews/core/data/cudf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 409201a449..ffca018708 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -159,6 +159,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): group_kwargs['kdims'] = kdims group_kwargs.update(kwargs) + # Propagate dataset + group_kwargs['dataset'] = dataset.dataset + # Find all the keys along supplied dimensions keys = product(*(dataset.data[dimensions[0]].unique() for d in dimensions)) From dd32306f00ec24fe29ca9d3fc3c551da3fe2bb07 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 15:40:55 +0100 Subject: [PATCH 14/23] Implement cuDF sort --- holoviews/core/data/cudf.py | 5 ++-- .../tests/core/data/testcudfinterface.py | 23 ++++--------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index ffca018708..9758ff9c90 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -326,8 +326,9 @@ def iloc(cls, dataset, index): @classmethod def sort(cls, dataset, by=[], reverse=False): - dataset.param.warning("cuDF DataFrames do not yet support sorting.") - return dataset.data + cols = [dataset.get_dimension(d, strict=True).name for d in by] + return dataset.data.sort_values(by=cols, ascending=not reverse) + @classmethod def dframe(cls, dataset, dimensions): diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index 72a0d6590a..3eda53a532 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -23,23 +23,10 @@ def setUp(self): super(cuDFInterfaceTests, self).setUp() logging.getLogger('numba.cuda.cudadrv.driver').setLevel(30) - def test_dataset_sort_hm(self): - raise SkipTest("Not supported") - - def test_dataset_sort_reverse_hm(self): - raise SkipTest("Not supported") - - def test_dataset_sort_vdim_ht(self): - raise SkipTest("Not supported") - - def test_dataset_sort_vdim_hm(self): - raise SkipTest("Not supported") - - def test_dataset_sort_vdim_hm_alias(self): - raise SkipTest("Not supported") - - def test_dataset_sort_string_ht(self): - raise SkipTest("Not supported") - def test_dataset_2D_aggregate_spread_fn_with_duplicates(self): raise SkipTest("cuDF does not support variance aggregation") + + def test_dataset_reduce_ht(self): + reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height}, + kdims=self.kdims[1:], vdims=self.vdims) + self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced) From ef8170d2f7d82eba851ab54fc64a0ab8890e1f27 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 15:44:21 +0100 Subject: [PATCH 15/23] Enabled cudf tests --- holoviews/tests/core/data/testcudfinterface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index 3eda53a532..07f55252f1 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -19,6 +19,8 @@ class cuDFInterfaceTests(HeterogeneousColumnTests, InterfaceTests): datatype = 'cuDF' data_type = cudf.DataFrame + __test__ = True + def setUp(self): super(cuDFInterfaceTests, self).setUp() logging.getLogger('numba.cuda.cudadrv.driver').setLevel(30) From e4868fb7c5edcb32f0f273f5b5580c1d35feec74 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 15:56:55 +0100 Subject: [PATCH 16/23] Updated various tests --- .../tests/core/data/testcudfinterface.py | 54 +++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index 07f55252f1..55017c9320 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -1,3 +1,5 @@ +import logging + from unittest import SkipTest try: @@ -5,10 +7,9 @@ except: raise SkipTest("Could not import cuDF, skipping cuDFInterface tests.") -from .base import HeterogeneousColumnTests, InterfaceTests - -import logging +from holoviews.core.data import Dataset +from .base import HeterogeneousColumnTests, InterfaceTests class cuDFInterfaceTests(HeterogeneousColumnTests, InterfaceTests): @@ -32,3 +33,50 @@ def test_dataset_reduce_ht(self): reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height}, kdims=self.kdims[1:], vdims=self.vdims) self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced) + + def test_dataset_mixed_type_range(self): + ds = Dataset((['A', 'B', 'C', None],), 'A') + self.assertEqual(ds.range(0), (np.nan, np.nan)) + + def test_dataset_groupby(self): + group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]} + group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]} + grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)), + ('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))], + kdims=['Gender'], sort=False) + self.assertEqual(self.table.groupby(['Gender']).apply('sort'), grouped.apply('sort')) + + def test_dataset_groupby_alias(self): + group1 = {'age':[10,16], 'weight':[15,18], 'height':[0.8,0.6]} + group2 = {'age':[12], 'weight':[10], 'height':[0.8]} + grouped = HoloMap([('M', Dataset(group1, kdims=[('age', 'Age')], + vdims=self.alias_vdims)), + ('F', Dataset(group2, kdims=[('age', 'Age')], + vdims=self.alias_vdims))], + kdims=[('gender', 'Gender')], sort=False) + self.assertEqual(self.alias_table.groupby('Gender').apply('sort'), + grouped.apply('sort')) + + def test_dataset_aggregate_ht(self): + aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]}, + kdims=self.kdims[:1], vdims=self.vdims) + self.compare_dataset(self.table.aggregate(['Gender'], np.mean).sort(), aggregated.sort()) + + def test_dataset_aggregate_ht_alias(self): + aggregated = Dataset({'gender':['M', 'F'], 'weight':[16.5, 10], 'height':[0.7, 0.8]}, + kdims=self.alias_kdims[:1], vdims=self.alias_vdims) + self.compare_dataset(self.alias_table.aggregate('Gender', np.mean).sort(), aggregated.sort()) + + def test_dataset_2D_partial_reduce_ht(self): + dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs}, + kdims=['x', 'y'], vdims=['z']) + reduced = Dataset({'x':self.xs, 'z':self.zs}, + kdims=['x'], vdims=['z']) + self.assertEqual(dataset.reduce(['y'], np.mean).sort(), reduced.sort()) + + def test_dataset_2D_aggregate_partial_ht(self): + dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs}, + kdims=['x', 'y'], vdims=['z']) + reduced = Dataset({'x':self.xs, 'z':self.zs}, + kdims=['x'], vdims=['z']) + self.assertEqual(dataset.aggregate(['x'], np.mean).sort(), reduced.sort()) From 359ce09e43b45ada6522e3680750f8d2b8599a5f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 15:58:31 +0100 Subject: [PATCH 17/23] Fixed flakes --- holoviews/tests/core/data/testcudfinterface.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index 55017c9320..dfc1d4640b 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -2,12 +2,15 @@ from unittest import SkipTest +import numpy as np + try: import cudf except: raise SkipTest("Could not import cuDF, skipping cuDFInterface tests.") from holoviews.core.data import Dataset +from holoviews.core.spaces import HoloMap from .base import HeterogeneousColumnTests, InterfaceTests From 304b712e234c682c5bb21e40b945d5b46734e354 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 17:45:19 +0100 Subject: [PATCH 18/23] Updated tests --- holoviews/tests/core/data/testcudfinterface.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index dfc1d4640b..c52a8b178a 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -46,7 +46,7 @@ def test_dataset_groupby(self): group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]} grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)), ('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))], - kdims=['Gender'], sort=False) + kdims=['Gender']) self.assertEqual(self.table.groupby(['Gender']).apply('sort'), grouped.apply('sort')) def test_dataset_groupby_alias(self): @@ -56,9 +56,9 @@ def test_dataset_groupby_alias(self): vdims=self.alias_vdims)), ('F', Dataset(group2, kdims=[('age', 'Age')], vdims=self.alias_vdims))], - kdims=[('gender', 'Gender')], sort=False) + kdims=[('gender', 'Gender')]) self.assertEqual(self.alias_table.groupby('Gender').apply('sort'), - grouped.apply('sort')) + grouped) def test_dataset_aggregate_ht(self): aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]}, @@ -83,3 +83,15 @@ def test_dataset_2D_aggregate_partial_ht(self): reduced = Dataset({'x':self.xs, 'z':self.zs}, kdims=['x'], vdims=['z']) self.assertEqual(dataset.aggregate(['x'], np.mean).sort(), reduced.sort()) + + def test_dataset_2D_aggregate_partial_hm(self): + z_ints = [el**2 for el in self.y_ints] + dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z':z_ints}, + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(dataset.aggregate(['x'], np.mean).sort(), + Dataset({'x':self.xs, 'z':z_ints}, kdims=['x'], vdims=['z']).sort()) + + def test_dataset_reduce_ht(self): + reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height}, + kdims=self.kdims[1:], vdims=self.vdims) + self.assertEqual(self.table.reduce(['Gender'], np.mean).sort(), reduced.sort()) From 909c9b6c27094f4fe2cf7c1552bc309afde1e621 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 18:07:33 +0100 Subject: [PATCH 19/23] Further test fixes --- holoviews/tests/core/data/testcudfinterface.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index c52a8b178a..cd3bca8f7f 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -39,7 +39,9 @@ def test_dataset_reduce_ht(self): def test_dataset_mixed_type_range(self): ds = Dataset((['A', 'B', 'C', None],), 'A') - self.assertEqual(ds.range(0), (np.nan, np.nan)) + vmin, vmax = ds.range(0) + self.assertTrue(np.isnan(vmin)) + self.assertTrue(np.isnan(vmax)) def test_dataset_groupby(self): group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]} @@ -95,3 +97,16 @@ def test_dataset_reduce_ht(self): reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height}, kdims=self.kdims[1:], vdims=self.vdims) self.assertEqual(self.table.reduce(['Gender'], np.mean).sort(), reduced.sort()) + + def test_dataset_groupby_second_dim(self): + group1 = {'Gender':['M'], 'Weight':[15], 'Height':[0.8]} + group2 = {'Gender':['M'], 'Weight':[18], 'Height':[0.6]} + group3 = {'Gender':['F'], 'Weight':[10], 'Height':[0.8]} + grouped = HoloMap([(10, Dataset(group1, kdims=['Gender'], vdims=self.vdims)), + (16, Dataset(group2, kdims=['Gender'], vdims=self.vdims)), + (12, Dataset(group3, kdims=['Gender'], vdims=self.vdims))], + kdims=['Age']) + self.assertEqual(self.table.groupby(['Age']).apply('sort'), grouped) + + def test_dataset_aggregate_string_types_size(self): + raise SkipTest("cuDF does not support variance aggregation") From 75997d214b8aab1f23a558931cf0da98e2e5a4de Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 18:17:13 +0100 Subject: [PATCH 20/23] Update docstring --- holoviews/core/data/cudf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 9758ff9c90..0b31801476 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -30,10 +30,9 @@ class cuDFInterface(PandasInterface): The cuDFInterface covers almost the complete API exposed by the PandasInterface with two notable exceptions: - 1) Sorting is not supported and any attempt at sorting will - be ignored with an warning. - 2) Aggregation and groupby do not have a consistent sort order. - 3) Not all functions can be easily applied to a dask dataframe so + 1) Aggregation and groupby do not have a consistent sort order + (see https://github.com/rapidsai/cudf/issues/4237) + 3) Not all functions can be easily applied to a cuDF so some functions applied with aggregate and reduce will not work. """ From 3fd757c0b4d598a37a89959cc1925a6712ba94c9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 18:29:44 +0100 Subject: [PATCH 21/23] Add datashader cuDF test --- holoviews/tests/operation/testdatashader.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/holoviews/tests/operation/testdatashader.py b/holoviews/tests/operation/testdatashader.py index 2f5d4d3b4a..47d7779c89 100644 --- a/holoviews/tests/operation/testdatashader.py +++ b/holoviews/tests/operation/testdatashader.py @@ -20,13 +20,19 @@ except: raise SkipTest('Datashader not available') +try: + import cudf + import cupy +except: + cudf = None + try: import spatialpandas except: spatialpandas = None spatialpandas_skip = skipIf(spatialpandas is None, "SpatialPandas not available") - +cudf_skip = skipIf(cudf is None, "cuDF not available") class DatashaderAggregateTests(ComparisonTestCase): @@ -42,6 +48,17 @@ def test_aggregate_points(self): vdims=['Count']) self.assertEqual(img, expected) + @cudf_skip + def test_aggregate_points_cudf(self): + points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)], datatype=['cuDF']) + self.assertIsInstance(points.data, cudf.DataFrame) + img = aggregate(points, dynamic=False, x_range=(0, 1), y_range=(0, 1), + width=2, height=2) + expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]), + vdims=['Count']) + self.assertIsInstance(img.data.Count.data, cupy.ndarray) + self.assertEqual(img, expected) + def test_aggregate_zero_range_points(self): p = Points([(0, 0), (1, 1)]) agg = rasterize(p, x_range=(0, 0), y_range=(0, 1), expand=False, dynamic=False, From 8b6e5d6448652070a7dfe4bdb61edef03e438210 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 18:50:42 +0100 Subject: [PATCH 22/23] Fixed flake --- holoviews/tests/core/data/testcudfinterface.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/holoviews/tests/core/data/testcudfinterface.py b/holoviews/tests/core/data/testcudfinterface.py index cd3bca8f7f..89c0b4c3c4 100644 --- a/holoviews/tests/core/data/testcudfinterface.py +++ b/holoviews/tests/core/data/testcudfinterface.py @@ -32,11 +32,6 @@ def setUp(self): def test_dataset_2D_aggregate_spread_fn_with_duplicates(self): raise SkipTest("cuDF does not support variance aggregation") - def test_dataset_reduce_ht(self): - reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height}, - kdims=self.kdims[1:], vdims=self.vdims) - self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced) - def test_dataset_mixed_type_range(self): ds = Dataset((['A', 'B', 'C', None],), 'A') vmin, vmax = ds.range(0) From e1e9bd0378bb067927085df43f793bc606e85b36 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Mar 2020 19:31:48 +0100 Subject: [PATCH 23/23] Update holoviews/core/data/cudf.py Co-Authored-By: James A. Bednar --- holoviews/core/data/cudf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/cudf.py b/holoviews/core/data/cudf.py index 0b31801476..ea0f62b5a3 100644 --- a/holoviews/core/data/cudf.py +++ b/holoviews/core/data/cudf.py @@ -24,7 +24,7 @@ class cuDFInterface(PandasInterface): """ The cuDFInterface allows a Dataset objects to wrap a cuDF DataFrame object. Using cuDF allows working with columnar - data on a GPU. Most operation leave the data in GPU memory, + data on a GPU. Most operations leave the data in GPU memory, however to plot the data it has to be loaded into memory. The cuDFInterface covers almost the complete API exposed