Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/POC: Allow (N, 1) and (1,N) EAs #26914

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
bb1ea4a
Implement ReshapeMixin to allow datetimelike arrays to be 2D
jbrockmendel Jun 14, 2019
65dc544
Make DatetimeTZBlock.values have same ndim as the block
jbrockmendel Jun 14, 2019
ccc49b4
checkpoint with only 18 failing tests
jbrockmendel Jun 15, 2019
92a6ec7
Checkpoint with 13 failing
jbrockmendel Jun 16, 2019
923cd7d
passing
jbrockmendel Jun 16, 2019
ab05bcd
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 16, 2019
7fc8021
passing
jbrockmendel Jun 16, 2019
03817c4
cleanup
jbrockmendel Jun 16, 2019
929d87b
nothing broken
jbrockmendel Jun 16, 2019
6f8eae3
cleanup
jbrockmendel Jun 16, 2019
729abbb
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 16, 2019
401a35e
remove assertion
jbrockmendel Jun 16, 2019
1d6b812
remove unreachable
jbrockmendel Jun 16, 2019
2fe0c48
docstrings, remove need for swapaxes
jbrockmendel Jun 16, 2019
9f525fc
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 16, 2019
dd42b06
separate tests, use Block.interpolate
jbrockmendel Jun 17, 2019
88fbd6a
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 17, 2019
089af0a
repr tests, _unstack
jbrockmendel Jun 17, 2019
84ebc36
cleanup
jbrockmendel Jun 17, 2019
f789d02
cleanup
jbrockmendel Jun 17, 2019
b37e347
cleanup
jbrockmendel Jun 18, 2019
a4b8505
Cleanup
jbrockmendel Jun 18, 2019
99931d4
fix sql tests
jbrockmendel Jun 18, 2019
3ff3380
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 18, 2019
3331260
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 18, 2019
849ee62
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 19, 2019
bbd098a
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 20, 2019
ce9b464
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 20, 2019
9979c60
simplifications
jbrockmendel Jun 21, 2019
06c4544
Merge branch 'master' of https://github.com/pandas-dev/pandas into d2arr
jbrockmendel Jun 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,3 +1119,70 @@ def _create_arithmetic_method(cls, op):
@classmethod
def _create_comparison_method(cls, op):
return cls._create_method(op, coerce_to_dtype=False)


class ReshapeMixin:
"""
Mixin for ExtensionArray subclasses that secretly define `reshape`
and related methods.

Subclass must implement _wrap_data property.

Notes
-----
- We assume that the constructor will accept:
type(self)(self._wrap_data.reshape(shape), dtype=self.dtype)
If not, then the methods below will need to be overriden.
- We assume that the only 2D shapes taken will be (N, 1) and (1, N).
This ensures that we can reshape, transpose, and ravel without worrying
about column-order/row-order.
"""

@property
def _wrap_data(self):
"""
The underlying reshape-able array that we are wrapping.
"""
raise AbstractMethodError(self)

# --------------------------------------------------
# Shape Attributes

@property
def shape(self) -> Tuple[int, ...]:
"""
Return a tuple of the array dimensions.
"""
return self._wrap_data.shape

def __len__(self) -> int:
return self.shape[0]

@property
def ndim(self) -> int:
return len(self.shape)

# --------------------------------------------------
# Reshape Methods

def reshape(self, *shape):
# numpy accepts either a single tuple or an expanded tuple
data = self._wrap_data.reshape(*shape)
return type(self)(data, dtype=self.dtype)

def transpose(self, axes):
data = self._wrap_data.transpose(axes)
return type(self)(data, dtype=self.dtype)

@property
def T(self):
data = self._wrap_data.T
return type(self)(data, dtype=self.dtype)

def ravel(self, order=None):
data = self._wrap_data.ravel(order=order)
return type(self)(data, dtype=self.dtype)

def swapaxes(self, *axes):
data = self._wrap_data.swapaxes(*axes)
return type(self)(data, dtype=self.dtype)
53 changes: 45 additions & 8 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick

from .base import ExtensionArray, ExtensionOpsMixin
from .base import ExtensionArray, ExtensionOpsMixin, ReshapeMixin


class AttributesMixin:
Expand Down Expand Up @@ -324,7 +324,7 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)


class DatetimeLikeArrayMixin(ExtensionOpsMixin,
class DatetimeLikeArrayMixin(ReshapeMixin, ExtensionOpsMixin,
AttributesMixin,
ExtensionArray):
"""
Expand All @@ -338,6 +338,10 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin,
_generate_range
"""

@property
def _wrap_data(self) -> np.ndarray:
return self._data

@property
def _box_func(self):
"""
Expand All @@ -349,7 +353,8 @@ def _box_values(self, values):
"""
apply box func to passed values
"""
return lib.map_infer(values, self._box_func)
vals1d = values.ravel()
return lib.map_infer(vals1d, self._box_func).reshape(values.shape)

def __iter__(self):
return (self._box_func(v) for v in self.asi8)
Expand Down Expand Up @@ -388,6 +393,21 @@ def _formatter(self, boxed=False):
# TODO: Remove Datetime & DatetimeTZ formatters.
return "'{}'".format

def __repr__(self):
# 2D compat
if self.ndim == 1:
return super().__repr__()
elif self.ndim == 2:
out = repr(self.ravel())
head, tail = out.split(', dtype: ')
head = head.replace('[', '[[').replace(']', ']]')
if self.shape[0] != 1:
head = head.replace(', ', '], [')
head = head.replace(',\n ', '],\n [')
return head + ', dtype: ' + tail

raise NotImplementedError

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

Expand All @@ -406,9 +426,6 @@ def size(self) -> int:
"""The number of elements in this array."""
return np.prod(self.shape)

def __len__(self):
return len(self._data)

def __getitem__(self, key):
"""
This getitem defers to the underlying array, which by-definition can
Expand All @@ -422,6 +439,17 @@ def __getitem__(self, key):
"arrays are valid indices")

getitem = self._data.__getitem__

if self.ndim == 2:
# Because we are only "faking" allowing 2D DatetimeArray,
# we only support a limited selection of indexers for 2D case
res = getitem(key)
if lib.is_scalar(res):
return self._box_func(res)

# Note: we drop `freq` attributes for all 2D cases
return type(self)(res, dtype=self.dtype)

if is_int:
val = getitem(key)
return self._box_func(val)
Expand Down Expand Up @@ -597,12 +625,21 @@ def take(self, indices, allow_fill=False, fill_value=None):
return type(self)(new_values, dtype=self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(cls, to_concat, axis=0):
if axis != 0:
# ravel() below assumes we are always either 1-D or column-like
raise NotImplementedError

# FIXME: Fails on pandas/tests/frame/test_combine_concat.py
# test_concat_tz_NaT, test_concat_tz_not_aligned
# assert all(x.ndim == to_concat[0].ndim for x in to_concat)

dtypes = {x.dtype for x in to_concat}
assert len(dtypes) == 1
dtype = list(dtypes)[0]

values = np.concatenate([x.asi8 for x in to_concat])
# FIXME: I don't like the ravel here
values = np.concatenate([x.asi8.ravel() for x in to_concat])
return cls(values, dtype=dtype)

def copy(self, deep=False):
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,10 @@ def __iter__(self):
------
tstamp : Timestamp
"""
if self.ndim > 1:
for i in range(len(self)):
yield self[i]
return

# convert in chunks of 10k for efficiency
data = self.asi8
Expand Down Expand Up @@ -663,7 +667,7 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.io.formats.format import _get_format_datetime64_from_values
fmt = _get_format_datetime64_from_values(self, date_format)

return tslib.format_array_from_datetime(self.asi8,
return tslib.format_array_from_datetime(self.asi8.ravel(),
tz=self.tz,
format=fmt,
na_rep=na_rep)
Expand Down Expand Up @@ -1066,7 +1070,8 @@ def to_pydatetime(self):
-------
datetimes : ndarray
"""
return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
i8vals = self.asi8.ravel()
return tslib.ints_to_pydatetime(i8vals, tz=self.tz).reshape(self.shape)

def normalize(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
"ndarray, or Series or Index containing one of those."
)
raise ValueError(msg.format(type(values).__name__))
if values.ndim != 1:
raise ValueError("Only 1-dimensional input arrays are supported.")
if values.ndim == 0:
raise ValueError("zero-dimensional arrays are not supported.")

if values.dtype == 'i8':
# for compat with datetime/timedelta/period shared methods,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,10 @@ def _concat_categorical(to_concat, axis=0):
return union_categoricals(categoricals)

# extract the categoricals & coerce to object if needed
# NB: ravel() assumes we will never have consolidated datetimetz
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel() if not is_datetime64tz_dtype(x)
else np.asarray(x.astype(object)) for x in to_concat]
else np.asarray(x.astype(object)).ravel() for x in to_concat]
result = _concat_compat(to_concat)
if axis == 1:
result = result.reshape(1, len(result))
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
else:
if axis > 0:
swapped = True
values = values.swapaxes(0, axis)
assert axis == 1, axis
values = values.T
if arity > 1:
raise NotImplementedError("arity of more than 1 is not "
"supported for the 'how' argument")
Expand Down
Loading