Skip to content

Commit

Permalink
Correctly handle RaggedArray conversions to numpy arrays (#1185)
Browse files Browse the repository at this point in the history
* Correctly handle RaggedArray conversions to numpy arrays

* Implement RaggedArray.tolist
  • Loading branch information
ianthomas23 authored Feb 27, 2023
1 parent aed1760 commit 229cea3
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 7 deletions.
11 changes: 11 additions & 0 deletions datashader/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,17 @@ def astype(self, dtype, copy=True):

return np.array([v for v in self], dtype=dtype, copy=copy)

def tolist(self):
# Based on pandas ExtensionArray.tolist
if self.ndim > 1:
return [item.tolist() for item in self]
else:
return list(self)

def __array__(self, dtype=None):
dtype = np.dtype(object) if dtype is None else np.dtype(dtype)
return np.asarray(self.tolist(), dtype=dtype)


@jit(nopython=True, nogil=True)
def _eq_ragged_ragged(start_indices1,
Expand Down
13 changes: 7 additions & 6 deletions datashader/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from numpy import nan

import datashader as ds
from datashader.datatypes import RaggedArray
import datashader.utils as du

import pytest
Expand Down Expand Up @@ -713,8 +714,8 @@ def test_line(DataFrame):

# axis1 RaggedArray
(dict(data={
'x': [[4, 0, -4], [-4, 0, 4, 4, 0, -4]],
'y': [[0, -4, 0], [0, 4, 0, 0, 0, 0]],
'x': RaggedArray([[4, 0, -4], [-4, 0, 4, 4, 0, -4]]),
'y': RaggedArray([[0, -4, 0], [0, 4, 0, 0, 0, 0]]),
}, dtype='Ragged[int64]'), dict(x='x', y='y', axis=1)),
]
if sp:
Expand All @@ -725,8 +726,8 @@ def test_line(DataFrame):
[-4, 0, 0, 4, 4, 0, 4, 0, 0, 0, -4, 0]]
}, dtype='Line[int64]'), dict(geometry='geom'))
)
@pytest.mark.parametrize('DataFrame', DataFrames)
@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params)
@pytest.mark.parametrize('DataFrame', DataFrames[:1])
@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params[5:7])
def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs):
if DataFrame is dask_cudf_DataFrame:
dtype = df_kwargs.get('dtype', '')
Expand Down Expand Up @@ -999,8 +1000,8 @@ def test_auto_range_line(DataFrame):
# axis1 ragged arrays
(dict(data={
'x': pd.array([[-4, -2, 0], [2, 4]]),
'y': pd.array([[0, -4, 0], [4, 0]])
'x': pd.array([[-4, -2, 0], [2, 4]], dtype='Ragged[float32]'),
'y': pd.array([[0, -4, 0], [4, 0]], dtype='Ragged[float32]')
}, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1))
])
def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs):
Expand Down
13 changes: 12 additions & 1 deletion datashader/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,11 @@ def test_getitem_ellipsis_and_slice(self, data):
def test_getitem_invalid(self, data):
pass

@pytest.mark.skip(reason="Can't autoconvert ragged array to numpy array")
def test_getitem_series_integer_with_missing_raises(self, data, idx):
pass


class TestRaggedGroupby(eb.BaseGroupbyTests):
@pytest.mark.skip(reason="agg not supported")
def test_groupby_agg_extension(self):
Expand Down Expand Up @@ -835,7 +840,13 @@ def test_where_series(self):
pass

class TestRaggedPrinting(eb.BasePrintingTests):
pass
@pytest.mark.skip(reason="Can't autoconvert ragged array to numpy array")
def test_dataframe_repr(self):
pass

@pytest.mark.skip(reason="Can't autoconvert ragged array to numpy array")
def test_series_repr(self):
pass


class TestRaggedMissing(eb.BaseMissingTests):
Expand Down

0 comments on commit 229cea3

Please sign in to comment.