Skip to content

Commit f452c40

Browse files
jbrockmendelJustinZhengBC
authored andcommitted
Implement _most_ of the EA interface for DTA/TDA (#23643)
1 parent d0adfb0 commit f452c40

File tree

9 files changed

+272
-77
lines changed

9 files changed

+272
-77
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
13131313
- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
13141314
- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
13151315
- Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
1316+
- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
1317+
- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
13161318
- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
13171319
- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
13181320
- Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)

pandas/core/arrays/datetimelike.py

+62-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from pandas.core.dtypes.missing import isna
4040

4141
import pandas.core.common as com
42-
from pandas.core.algorithms import checked_add_with_arr
42+
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
4343

4444
from .base import ExtensionOpsMixin
4545
from pandas.util._decorators import deprecate_kwarg
@@ -196,6 +196,67 @@ def astype(self, dtype, copy=True):
196196
return self._box_values(self.asi8)
197197
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
198198

199+
# ------------------------------------------------------------------
200+
# ExtensionArray Interface
201+
# TODO:
202+
# * _from_sequence
203+
# * argsort / _values_for_argsort
204+
# * _reduce
205+
206+
def unique(self):
207+
result = unique1d(self.asi8)
208+
return type(self)(result, dtype=self.dtype)
209+
210+
def _validate_fill_value(self, fill_value):
211+
"""
212+
If a fill_value is passed to `take` convert it to an i8 representation,
213+
raising ValueError if this is not possible.
214+
215+
Parameters
216+
----------
217+
fill_value : object
218+
219+
Returns
220+
-------
221+
fill_value : np.int64
222+
223+
Raises
224+
------
225+
ValueError
226+
"""
227+
raise AbstractMethodError(self)
228+
229+
def take(self, indices, allow_fill=False, fill_value=None):
230+
if allow_fill:
231+
fill_value = self._validate_fill_value(fill_value)
232+
233+
new_values = take(self.asi8,
234+
indices,
235+
allow_fill=allow_fill,
236+
fill_value=fill_value)
237+
238+
return type(self)(new_values, dtype=self.dtype)
239+
240+
@classmethod
241+
def _concat_same_type(cls, to_concat):
242+
dtypes = {x.dtype for x in to_concat}
243+
assert len(dtypes) == 1
244+
dtype = list(dtypes)[0]
245+
246+
values = np.concatenate([x.asi8 for x in to_concat])
247+
return cls(values, dtype=dtype)
248+
249+
def copy(self, deep=False):
250+
values = self.asi8.copy()
251+
return type(self)(values, dtype=self.dtype, freq=self.freq)
252+
253+
def _values_for_factorize(self):
254+
return self.asi8, iNaT
255+
256+
@classmethod
257+
def _from_factorized(cls, values, original):
258+
return cls(values, dtype=original.dtype)
259+
199260
# ------------------------------------------------------------------
200261
# Null Handling
201262

pandas/core/arrays/datetimes.py

+23-5
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
conversion, fields, timezones,
1313
resolution as libresolution)
1414

15-
from pandas.util._decorators import cache_readonly
15+
from pandas.util._decorators import cache_readonly, Appender
1616
from pandas.errors import PerformanceWarning
1717
from pandas import compat
1818

@@ -21,8 +21,7 @@
2121
is_object_dtype,
2222
is_int64_dtype,
2323
is_datetime64tz_dtype,
24-
is_datetime64_dtype,
25-
ensure_int64)
24+
is_datetime64_dtype)
2625
from pandas.core.dtypes.dtypes import DatetimeTZDtype
2726
from pandas.core.dtypes.missing import isna
2827
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
@@ -294,7 +293,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
294293

295294
if tz is not None and index.tz is None:
296295
arr = conversion.tz_localize_to_utc(
297-
ensure_int64(index.values),
296+
index.asi8,
298297
tz, ambiguous=ambiguous)
299298

300299
index = cls(arr)
@@ -317,7 +316,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
317316
if not right_closed and len(index) and index[-1] == end:
318317
index = index[:-1]
319318

320-
return cls._simple_new(index.values, freq=freq, tz=tz)
319+
return cls._simple_new(index.asi8, freq=freq, tz=tz)
321320

322321
# -----------------------------------------------------------------
323322
# Descriptive Properties
@@ -419,6 +418,25 @@ def __iter__(self):
419418
for v in converted:
420419
yield v
421420

421+
# ----------------------------------------------------------------
422+
# ExtensionArray Interface
423+
424+
@property
425+
def _ndarray_values(self):
426+
return self._data
427+
428+
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
429+
def _validate_fill_value(self, fill_value):
430+
if isna(fill_value):
431+
fill_value = iNaT
432+
elif isinstance(fill_value, (datetime, np.datetime64)):
433+
self._assert_tzawareness_compat(fill_value)
434+
fill_value = Timestamp(fill_value).value
435+
else:
436+
raise ValueError("'fill_value' should be a Timestamp. "
437+
"Got '{got}'.".format(got=fill_value))
438+
return fill_value
439+
422440
# -----------------------------------------------------------------
423441
# Comparison Methods
424442

pandas/core/arrays/period.py

+14-42
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
216216
ordinals = libperiod.extract_ordinals(periods, freq)
217217
return cls(ordinals, freq=freq)
218218

219-
def _values_for_factorize(self):
220-
return self.asi8, iNaT
221-
222-
@classmethod
223-
def _from_factorized(cls, values, original):
224-
# type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray
225-
return cls(values, freq=original.freq)
226-
227219
@classmethod
228220
def _from_datetime64(cls, data, freq, tz=None):
229221
"""Construct a PeriodArray from a datetime64 array
@@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields):
262254

263255
return subarr, freq
264256

265-
@classmethod
266-
def _concat_same_type(cls, to_concat):
267-
freq = {x.freq for x in to_concat}
268-
assert len(freq) == 1
269-
freq = list(freq)[0]
270-
values = np.concatenate([x._data for x in to_concat])
271-
return cls(values, freq=freq)
272-
273257
# --------------------------------------------------------------------
274258
# Data / Attributes
275259

@@ -415,29 +399,20 @@ def __setitem__(
415399
raise TypeError(msg)
416400
self._data[key] = value
417401

418-
def take(self, indices, allow_fill=False, fill_value=None):
419-
if allow_fill:
420-
if isna(fill_value):
421-
fill_value = iNaT
422-
elif isinstance(fill_value, Period):
423-
if self.freq != fill_value.freq:
424-
msg = DIFFERENT_FREQ_INDEX.format(
425-
self.freq.freqstr,
426-
fill_value.freqstr
427-
)
428-
raise IncompatibleFrequency(msg)
429-
430-
fill_value = fill_value.ordinal
431-
else:
432-
msg = "'fill_value' should be a Period. Got '{}'."
433-
raise ValueError(msg.format(fill_value))
434-
435-
new_values = algos.take(self._data,
436-
indices,
437-
allow_fill=allow_fill,
438-
fill_value=fill_value)
439-
440-
return type(self)(new_values, self.freq)
402+
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
403+
def _validate_fill_value(self, fill_value):
404+
if isna(fill_value):
405+
fill_value = iNaT
406+
elif isinstance(fill_value, Period):
407+
if fill_value.freq != self.freq:
408+
msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr,
409+
fill_value.freqstr)
410+
raise IncompatibleFrequency(msg)
411+
fill_value = fill_value.ordinal
412+
else:
413+
raise ValueError("'fill_value' should be a Period. "
414+
"Got '{got}'.".format(got=fill_value))
415+
return fill_value
441416

442417
def fillna(self, value=None, method=None, limit=None):
443418
# TODO(#20300)
@@ -474,9 +449,6 @@ def fillna(self, value=None, method=None, limit=None):
474449
new_values = self.copy()
475450
return new_values
476451

477-
def copy(self, deep=False):
478-
return type(self)(self._data.copy(), freq=self.freq)
479-
480452
def value_counts(self, dropna=False):
481453
from pandas import Series, PeriodIndex
482454

pandas/core/arrays/timedeltas.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas._libs.tslibs.fields import get_timedelta_field
1010
from pandas._libs.tslibs.timedeltas import (
1111
array_to_timedelta64, parse_timedelta_unit)
12+
from pandas.util._decorators import Appender
1213

1314
from pandas import compat
1415

@@ -139,7 +140,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
139140
result._freq = freq
140141
return result
141142

142-
def __new__(cls, values, freq=None):
143+
def __new__(cls, values, freq=None, dtype=_TD_DTYPE):
143144

144145
freq, freq_infer = dtl.maybe_infer_freq(freq)
145146

@@ -193,6 +194,17 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
193194
# ----------------------------------------------------------------
194195
# Array-Like / EA-Interface Methods
195196

197+
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
198+
def _validate_fill_value(self, fill_value):
199+
if isna(fill_value):
200+
fill_value = iNaT
201+
elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)):
202+
fill_value = Timedelta(fill_value).value
203+
else:
204+
raise ValueError("'fill_value' should be a Timedelta. "
205+
"Got '{got}'.".format(got=fill_value))
206+
return fill_value
207+
196208
# ----------------------------------------------------------------
197209
# Arithmetic Methods
198210

pandas/core/dtypes/concat.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None):
476476
all inputs must be DatetimeIndex
477477
it is used in DatetimeIndex.append also
478478
"""
479-
# do not pass tz to set because tzlocal cannot be hashed
480-
if len({str(x.dtype) for x in to_concat}) != 1:
481-
raise ValueError('to_concat must have the same tz')
482-
tz = to_concat[0].tz
483-
# no need to localize because internal repr will not be changed
484-
new_values = np.concatenate([x.asi8 for x in to_concat])
485-
return to_concat[0]._simple_new(new_values, tz=tz, name=name)
479+
return to_concat[0]._concat_same_dtype(to_concat, name=name)
486480

487481

488482
def _concat_index_same_dtype(indexes, klass=None):

pandas/core/indexes/datetimelike.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
1919
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
2020
is_period_dtype, is_scalar, is_string_dtype)
21-
import pandas.core.dtypes.concat as _concat
2221
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
2322
from pandas.core.dtypes.missing import isna
2423

@@ -215,6 +214,11 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
215214
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
216215
""" common ops mixin to support a unified interface datetimelike Index """
217216

217+
# override DatetimeLikeArrayMixin method
218+
copy = Index.copy
219+
unique = Index.unique
220+
take = Index.take
221+
218222
# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
219223
# properties there. They can be made into cache_readonly for Index
220224
# subclasses bc they are immutable
@@ -685,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name):
685689
"""
686690
attribs = self._get_attributes_dict()
687691
attribs['name'] = name
692+
# do not pass tz to set because tzlocal cannot be hashed
693+
if len({str(x.dtype) for x in to_concat}) != 1:
694+
raise ValueError('to_concat must have the same tz')
688695

689696
if not is_period_dtype(self):
690697
# reset freq
691698
attribs['freq'] = None
692-
693-
if getattr(self, 'tz', None) is not None:
694-
return _concat._concat_datetimetz(to_concat, name)
699+
# TODO(DatetimeArray)
700+
# - remove the .asi8 here
701+
# - remove the _maybe_box_as_values
702+
# - combine with the `else` block
703+
new_data = self._concat_same_type(to_concat).asi8
695704
else:
696-
new_data = np.concatenate([c.asi8 for c in to_concat])
705+
new_data = type(self._values)._concat_same_type(to_concat)
697706

698-
new_data = self._maybe_box_as_values(new_data, **attribs)
699707
return self._simple_new(new_data, **attribs)
700708

701709
def _maybe_box_as_values(self, values, **attribs):
@@ -704,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs):
704712
# but others are not. When everyone is an ExtensionArray, this can
705713
# be removed. Currently used in
706714
# - sort_values
707-
# - _concat_same_dtype
708715
return values
709716

710717
def astype(self, dtype, copy=True):
@@ -761,7 +768,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False):
761768
try:
762769
return np.array(other, copy=False).view('i8')
763770
except TypeError:
764-
# period array cannot be coerces to int
771+
# period array cannot be coerced to int
765772
other = Index(other)
766773
return other.asi8
767774

pandas/core/indexes/datetimes.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -551,16 +551,13 @@ def snap(self, freq='S'):
551551
# TODO: what about self.name? if so, use shallow_copy?
552552

553553
def unique(self, level=None):
554-
# Override here since IndexOpsMixin.unique uses self._values.unique
555-
# For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error
556-
# So we extract the tz-naive DatetimeIndex, unique that, and wrap the
557-
# result with out TZ.
558-
if self.tz is not None:
559-
naive = type(self)(self._ndarray_values, copy=False)
560-
else:
561-
naive = self
562-
result = super(DatetimeIndex, naive).unique(level=level)
563-
return self._shallow_copy(result.values)
554+
if level is not None:
555+
self._validate_index_level(level)
556+
557+
# TODO(DatetimeArray): change dispatch once inheritance is removed
558+
# call DatetimeArray method
559+
result = DatetimeArray.unique(self)
560+
return self._shallow_copy(result._data)
564561

565562
def union(self, other):
566563
"""

0 commit comments

Comments
 (0)