From 6389872bd849efa0b6510e34da9537a05c522dec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 16 Jan 2020 18:44:56 -0800 Subject: [PATCH 1/8] REF: require DTA in DTI._simple_new --- pandas/core/indexes/datetimelike.py | 39 ++++++++++++---------- pandas/core/indexes/datetimes.py | 17 ++++++---- pandas/core/tools/datetimes.py | 9 +++-- pandas/io/pytables.py | 8 +++-- pandas/tests/arrays/test_datetimelike.py | 16 +++++---- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tseries/offsets.py | 14 ++++++++ 7 files changed, 67 insertions(+), 38 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bf1272b223f70..5209453d5aaa5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -193,20 +193,19 @@ def sort_values(self, return_indexer=False, ascending=True): # because the treatment of NaT has been changed to put NaT last # instead of first. sorted_values = np.sort(self.asi8) - attribs = self._get_attributes_dict() - freq = attribs["freq"] + freq = self.freq if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 - attribs["freq"] = freq if not ascending: sorted_values = sorted_values[::-1] - return self._simple_new(sorted_values, **attribs) + arr = type(self._data)._simple_new(sorted_values, dtype=self.dtype, freq=freq) + return self._simple_new(arr, name=self.name) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): @@ -503,22 +502,21 @@ def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ - attribs = self._get_attributes_dict() - attribs["name"] = name # do not pass tz to set because tzlocal cannot be hashed if len({str(x.dtype) for x in to_concat}) != 1: raise ValueError("to_concat must have the same tz") - new_data = type(self._values)._concat_same_type(to_concat).asi8 + new_data = type(self._values)._concat_same_type(to_concat) - # GH 3232: If the concat result is evenly spaced, we can retain the - # original frequency - is_diff_evenly_spaced = len(unique_deltas(new_data)) == 1 - if not is_period_dtype(self) and not is_diff_evenly_spaced: - # reset freq - attribs["freq"] = None + if not is_period_dtype(self.dtype): + # GH 3232: If the concat result is evenly spaced, we can retain the + # original frequency + is_diff_evenly_spaced = len(unique_deltas(new_data.asi8)) == 1 + if is_diff_evenly_spaced: + # FIXME: do this isnide _concat_same_type + new_data._freq = self.freq - return self._simple_new(new_data, **attribs) + return self._simple_new(new_data, name=name) def shift(self, periods=1, freq=None): """ @@ -614,6 +612,8 @@ def _shallow_copy(self, values=None, **kwargs): values = self._data if isinstance(values, type(self)): values = values._data + if isinstance(values, np.ndarray): + values = type(self._data)._simple_new(values, dtype=self.dtype, freq=kwargs.get("freq")) attributes = self._get_attributes_dict() @@ -767,7 +767,7 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + return self._shallow_copy(dates, freq=self.freq) # TODO: is self.freq right here? else: left, right = other, self @@ -779,7 +779,7 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + return self._shallow_copy(dates, freq=self.freq) # TODO: is self.freq right here? else: return left @@ -880,8 +880,11 @@ def _wrap_joined_index(self, joined, other): else: kwargs = {} if hasattr(self, "tz"): - kwargs["tz"] = getattr(other, "tz", None) - return self._simple_new(joined, name, **kwargs) + #kwargs["tz"] = getattr(other, "tz", None) + kwargs["dtype"] = other.dtype + + arr = type(self._data)._simple_new(joined, **kwargs) + return type(self)._simple_new(arr, name) class DatetimelikeDelegateMixin(PandasDelegate): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 942b51eda7d0b..ce8181a225f4f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -42,7 +42,13 @@ def _new_DatetimeIndex(cls, d): if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") - result = cls._simple_new(data, **d) + if not isinstance(data, DatetimeArray): + tz = d.pop("tz") + freq = d.pop("freq") + dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) + else: + dta = data + result = cls._simple_new(dta, **d) else: with warnings.catch_warnings(): # TODO: If we knew what was going in to **d, we might be able to @@ -262,6 +268,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): We require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ + assert isinstance(values, DatetimeArray), type(values) if isinstance(values, DatetimeArray): if tz: tz = validate_tz_from_dtype(dtype, tz) @@ -274,9 +281,6 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): freq = values.freq values = values._data - # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes - if isinstance(values, DatetimeIndex): - values = values._data dtype = tz_to_dtype(tz) dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) @@ -485,7 +489,8 @@ def snap(self, freq="S"): snapped[i] = s # we know it conforms; skip check - return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq) + dtarr = DatetimeArray._simple_new(snapped, dtype=self.dtype, freq=freq) + return DatetimeIndex._simple_new(dtarr, name=self.name) def _parsed_string_to_bounds(self, reso, parsed): """ @@ -1163,7 +1168,7 @@ def date_range( closed=closed, **kwargs, ) - return DatetimeIndex._simple_new(dtarr, tz=dtarr.tz, freq=dtarr.freq, name=name) + return DatetimeIndex._simple_new(dtarr, name=name) def bdate_range( diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 84c17748c503c..26eb48ff35431 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -37,7 +37,8 @@ ) from pandas.core.dtypes.missing import notna -from pandas.arrays import IntegerArray +from pandas.core.arrays.datetimes import tz_to_dtype +from pandas.arrays import IntegerArray, DatetimeArray from pandas.core import algorithms from pandas.core.algorithms import unique @@ -427,7 +428,8 @@ def _convert_listlike_datetimes( # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) - return DatetimeIndex._simple_new(values, name=name, tz=tz) + dta = DatetimeArray._simple_new(values, dtype=tz_to_dtype(tz)) + return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise e @@ -447,7 +449,8 @@ def _convert_listlike_datetimes( if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC - return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) + dta = DatetimeArray._simple_new(result, dtype=tz_to_dtype(tz_parsed)) + return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9e8d8a2e89f20..1fc95e8447c50 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -58,7 +58,7 @@ concat, isna, ) -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import DatetimeArray, PeriodArray, Categorical import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression from pandas.core.indexes.api import ensure_index @@ -2656,7 +2656,8 @@ def _get_index_factory(self, klass): def f(values, freq=None, tz=None): # data are already in UTC, localize and convert if tz present - result = DatetimeIndex._simple_new(values.values, name=None, freq=freq) + arr = DatetimeArray._simple_new(values.values, freq=freq) + result = DatetimeIndex._simple_new(arr, name=None) if tz is not None: result = result.tz_localize("UTC").tz_convert(tz) return result @@ -2665,7 +2666,8 @@ def f(values, freq=None, tz=None): elif klass == PeriodIndex: def f(values, freq=None, tz=None): - return PeriodIndex._simple_new(values, name=None, freq=freq) + arr = PeriodArray._simple_new(values, freq=freq) + return PeriodIndex._simple_new(arr, name=None) return f diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index fa45db93c6102..87ac36d47e968 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -65,8 +65,8 @@ def test_compare_len1_raises(self): # to the case where one has length-1, which numpy would broadcast data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 - idx = self.index_cls._simple_new(data, freq="D") - arr = self.array_cls(idx) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) with pytest.raises(ValueError, match="Lengths must match"): arr == arr[:1] @@ -79,8 +79,8 @@ def test_take(self): data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9 np.random.shuffle(data) - idx = self.index_cls._simple_new(data, freq="D") - arr = self.array_cls(idx) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) takers = [1, 4, 94] result = arr.take(takers) @@ -97,8 +97,8 @@ def test_take(self): def test_take_fill(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 - idx = self.index_cls._simple_new(data, freq="D") - arr = self.array_cls(idx) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is pd.NaT @@ -121,7 +121,9 @@ def test_take_fill(self): def test_concat_same_type(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 - idx = self.index_cls._simple_new(data, freq="D").insert(0, pd.NaT) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) + idx = idx.insert(0, pd.NaT) arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index ecd4ace705e9e..1d0995291eae8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -384,7 +384,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx2)) # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") + idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d31c23c7ccf1d..de95298949399 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1157,6 +1157,11 @@ def apply_index(self, i): shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? + if hasattr(i._data, "_data"): + # DTA + arr = type(i._data)._simple_new(shifted, dtype=i.dtype, freq=i.freq) + return type(i)._simple_new(arr) # TODO: retain name? + return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) @@ -1887,6 +1892,11 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? + if hasattr(dtindex._data, "_data"): + # DTA + arr = type(dtindex._data)._simple_new(shifted, dtype=dtindex.dtype, freq=dtindex.freq) + return type(dtindex)._simple_new(arr) # TODO: retain name? + return type(dtindex)._simple_new( shifted, freq=dtindex.freq, dtype=dtindex.dtype ) @@ -1973,6 +1983,10 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? + if hasattr(dtindex._data, "_data"): + # DTA + arr = type(dtindex._data)._simple_new(shifted, dtype=dtindex.dtype, freq=dtindex.freq) + return type(dtindex)._simple_new(arr) # TODO: retain name? return type(dtindex)._simple_new( shifted, freq=dtindex.freq, dtype=dtindex.dtype ) From 9f220f543b7cbf4357f267f2a7f3802e15644454 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Jan 2020 11:38:13 -0800 Subject: [PATCH 2/8] stricter --- pandas/core/indexes/datetimelike.py | 27 ++++++++++++++++++++++----- pandas/core/indexes/datetimes.py | 20 +++++--------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58e28b52b8254..d3ab181502cfd 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -571,7 +571,8 @@ def delete(self, loc): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq - return self._shallow_copy(new_i8s, freq=freq) + arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) + return self._shallow_copy(arr, name=self.name) class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): @@ -620,12 +621,21 @@ def _shallow_copy(self, values=None, **kwargs): raise ValueError(kwargs) values = type(self._data)._simple_new(values, dtype=self.dtype) + if kwargs.get("freq") is not None and values.freq != kwargs["freq"]: + # Try setting the new freq; the setter does validation + values = type(values)._simple_new(values._data) + values.freq = kwargs["freq"] + attributes = self._get_attributes_dict() if "freq" not in kwargs and self.freq is not None: if isinstance(values, (DatetimeArray, TimedeltaArray)): if values.freq is None: del attributes["freq"] + if "tz" in attributes: + # FIXME: kludge + tz = attributes.pop("tz") + assert tz == values.tz, (tz, values.tz) attributes.update(kwargs) return self._simple_new(values, **attributes) @@ -772,7 +782,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates, freq=self.freq) # TODO: is self.freq right here? + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: left, right = other, self @@ -784,7 +797,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates, freq=self.freq) # TODO: is self.freq right here? + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: return left @@ -885,7 +901,7 @@ def _wrap_joined_index(self, joined, other): else: kwargs = {} if hasattr(self, "tz"): - #kwargs["tz"] = getattr(other, "tz", None) + # TODO: Do we need to do this? kwargs["dtype"] = other.dtype arr = type(self._data)._simple_new(joined, **kwargs) @@ -935,7 +951,8 @@ def insert(self, loc, item): new_i8s = np.concatenate( (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) ) - return self._shallow_copy(new_i8s, freq=freq) + arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) + return self._shallow_copy(arr, name=self.name) except (AttributeError, TypeError): # fall back to object index diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d94a2702e0313..69274389bfaa7 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -275,21 +275,11 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): if we are passed a non-dtype compat, then coerce using the constructor """ assert isinstance(values, DatetimeArray), type(values) - if isinstance(values, DatetimeArray): - if tz: - tz = validate_tz_from_dtype(dtype, tz) - dtype = DatetimeTZDtype(tz=tz) - elif dtype is None: - dtype = _NS_DTYPE - - values = DatetimeArray(values, freq=freq, dtype=dtype) - tz = values.tz - freq = values.freq - values = values._data - - dtype = tz_to_dtype(tz) - dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) - assert isinstance(dtarr, DatetimeArray) + assert tz is None or tz == values.tz, (tz, values.tz) + assert dtype is None or dtype == values.dtype, (dtype, values.dtype) + assert freq is None or freq == values.freq, (freq, values.freq) + + dtarr = values result = object.__new__(cls) result._data = dtarr From 0e7ffe1e850285ac1b3720e0941f2b925d46928e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 10:54:16 -0800 Subject: [PATCH 3/8] blackify --- pandas/tseries/offsets.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 7ebac30f4d922..6dbfa8333e058 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1892,7 +1892,9 @@ def apply_index(self, dtindex): # are we passing incorrect freq? if hasattr(dtindex._data, "_data"): # DTA - arr = type(dtindex._data)._simple_new(shifted, dtype=dtindex.dtype, freq=dtindex.freq) + arr = type(dtindex._data)._simple_new( + shifted, dtype=dtindex.dtype, freq=dtindex.freq + ) return type(dtindex)._simple_new(arr) # TODO: retain name? return type(dtindex)._simple_new( @@ -1983,7 +1985,9 @@ def apply_index(self, dtindex): # are we passing incorrect freq? if hasattr(dtindex._data, "_data"): # DTA - arr = type(dtindex._data)._simple_new(shifted, dtype=dtindex.dtype, freq=dtindex.freq) + arr = type(dtindex._data)._simple_new( + shifted, dtype=dtindex.dtype, freq=dtindex.freq + ) return type(dtindex)._simple_new(arr) # TODO: retain name? return type(dtindex)._simple_new( shifted, freq=dtindex.freq, dtype=dtindex.dtype From 5cba722eee78675bbd3b63159a818b8f1237714b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 11:42:47 -0800 Subject: [PATCH 4/8] remove attrs --- pandas/core/indexes/datetimelike.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 50fe7b33fd324..0d6fe447facd1 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -648,25 +648,11 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): if values is None: values = self._data - if isinstance(values, type(self)): - values = values._data if isinstance(values, np.ndarray): # TODO: We would rather not get here values = type(self._data)(values, dtype=self.dtype) - attributes = self._get_attributes_dict() - - if self.freq is not None: - if isinstance(values, (DatetimeArray, TimedeltaArray)): - if values.freq is None: - del attributes["freq"] - if "tz" in attributes: - # FIXME: kludge - tz = attributes.pop("tz") - assert tz == values.tz, (tz, values.tz) - - attributes["name"] = name - return type(self)._simple_new(values, **attributes) + return type(self)._simple_new(values, name=name) # -------------------------------------------------------------------- # Set Operation Methods From 6efd06531be016366b0b6a86a6b783799ffb9c3d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 13:06:45 -0800 Subject: [PATCH 5/8] CLN: DTI/TDI _simple_new --- pandas/core/indexes/base.py | 4 +--- pandas/core/indexes/datetimes.py | 22 ++++++++-------------- pandas/core/indexes/timedeltas.py | 9 +++------ 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c215fdb475ed8..e1638b80913ec 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3304,13 +3304,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): target = _ensure_has_len(target) # target may be an iterator if not isinstance(target, Index) and len(target) == 0: - attrs = self._get_attributes_dict() - attrs.pop("freq", None) # don't preserve freq if isinstance(self, ABCRangeIndex): values = range(0) else: values = self._data[:0] # appropriately-dtyped empty array - target = self._simple_new(values, **attrs) + target = self._simple_new(values, name=self.name) else: target = ensure_index(target) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 61235b074c9e5..b958665003f8d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -7,17 +7,13 @@ from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts from pandas._libs.tslibs import fields, parsing, timezones +from pandas._typing import Label from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import is_valid_nat_for_dtype -from pandas.core.arrays.datetimes import ( - DatetimeArray, - tz_to_dtype, - validate_tz_from_dtype, -) +from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype import pandas.core.common as com from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin @@ -42,6 +38,9 @@ def _new_DatetimeIndex(cls, d): dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) else: dta = data + for key in ["tz", "freq"]: + if key in d: + assert d.pop(key) == getattr(dta, key) result = cls._simple_new(dta, **d) else: with warnings.catch_warnings(): @@ -241,24 +240,19 @@ def __new__( return subarr @classmethod - def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): + def _simple_new(cls, values: DatetimeArray, name: Label = None): """ We require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ assert isinstance(values, DatetimeArray), type(values) - assert tz is None or tz == values.tz, (tz, values.tz) - assert dtype is None or dtype == values.dtype, (dtype, values.dtype) - assert freq is None or freq == values.freq, (freq, values.freq) - - dtarr = values result = object.__new__(cls) - result._data = dtarr + result._data = values result.name = name result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data - result._index_data = dtarr._data + result._index_data = values._data result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b3b2bc46f6659..86e84e57a122f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,6 +1,7 @@ """ implement the TimedeltaIndex """ from pandas._libs import NaT, Timedelta, index as libindex +from pandas._typing import Label from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( @@ -154,7 +155,7 @@ def __new__( if isinstance(data, TimedeltaArray) and freq is None: if copy: data = data.copy() - return cls._simple_new(data, name=name, freq=freq) + return cls._simple_new(data, name=name) if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: @@ -170,12 +171,8 @@ def __new__( return cls._simple_new(tdarr, name=name) @classmethod - def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): - # `dtype` is passed by _shallow_copy in corner cases, should always - # be timedelta64[ns] if present - assert dtype == _TD_DTYPE, dtype + def _simple_new(cls, values: TimedeltaArray, name: Label = None): assert isinstance(values, TimedeltaArray) - assert freq is None or values.freq == freq result = object.__new__(cls) result._data = values From d659a2361c17eb13e139abec841465b86b78e4d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 16:33:31 -0800 Subject: [PATCH 6/8] simplify apply_index treatment --- pandas/tseries/offsets.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6dbfa8333e058..b6969d2c755da 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1155,12 +1155,12 @@ def apply_index(self, i): shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - if hasattr(i._data, "_data"): + dta = i + if not isinstance(i._data, np.ndarray): # DTA - arr = type(i._data)._simple_new(shifted, dtype=i.dtype, freq=i.freq) - return type(i)._simple_new(arr) # TODO: retain name? + dta = i._data - return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) + return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) class MonthEnd(MonthOffset): @@ -1890,16 +1890,12 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - if hasattr(dtindex._data, "_data"): + dta = dtindex + if not isinstance(dtindex._data, np.ndarray): # DTA - arr = type(dtindex._data)._simple_new( - shifted, dtype=dtindex.dtype, freq=dtindex.freq - ) - return type(dtindex)._simple_new(arr) # TODO: retain name? + dta = dtindex._data - return type(dtindex)._simple_new( - shifted, freq=dtindex.freq, dtype=dtindex.dtype - ) + return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) class BQuarterEnd(QuarterOffset): @@ -1983,15 +1979,11 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - if hasattr(dtindex._data, "_data"): + dta = dtindex + if not isinstance(dtindex._data, np.ndarray): # DTA - arr = type(dtindex._data)._simple_new( - shifted, dtype=dtindex.dtype, freq=dtindex.freq - ) - return type(dtindex)._simple_new(arr) # TODO: retain name? - return type(dtindex)._simple_new( - shifted, freq=dtindex.freq, dtype=dtindex.dtype - ) + dta = dtindex._data + return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): From 82c56457db11f352584db9eb4d468b380f9a90fe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 12:09:58 -0800 Subject: [PATCH 7/8] remove outdated docstring --- pandas/core/indexes/datetimes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b958665003f8d..ea441e3461ba4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -241,10 +241,6 @@ def __new__( @classmethod def _simple_new(cls, values: DatetimeArray, name: Label = None): - """ - We require the we have a dtype compat for the values - if we are passed a non-dtype compat, then coerce using the constructor - """ assert isinstance(values, DatetimeArray), type(values) result = object.__new__(cls) From e74ebc02ff822fceb9ff44b2405795e5da483789 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 14:08:29 -0700 Subject: [PATCH 8/8] Comments, collect offsets wrapping in apply_index_wrap --- pandas/_libs/tslibs/offsets.pyx | 13 ++++++++++- pandas/core/indexes/datetimes.py | 4 ++++ pandas/tseries/offsets.py | 39 ++++++-------------------------- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 48a3886c20a3a..da59c635b5a18 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -114,7 +114,18 @@ def apply_index_wraps(func): # Note: normally we would use `@functools.wraps(func)`, but this does # not play nicely with cython class methods def wrapper(self, other): - result = func(self, other) + + is_index = getattr(other, "_typ", "") == "datetimeindex" + + # operate on DatetimeArray + arr = other._data if is_index else other + + result = func(self, arr) + + if is_index: + # Wrap DatetimeArray result back to DatetimeIndex + result = type(other)._simple_new(result, name=other.name) + if self.normalize: result = result.to_period('D').to_timestamp() return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 41c97abf306aa..e791133220dbf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,12 +33,16 @@ def _new_DatetimeIndex(cls, d): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") if not isinstance(data, DatetimeArray): + # For backward compat with older pickles, we may need to construct + # a DatetimeArray to adapt to the newer _simple_new signature tz = d.pop("tz") freq = d.pop("freq") dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) else: dta = data for key in ["tz", "freq"]: + # These are already stored in our DatetimeArray; if they are + # also in the pickle and don't match, we have a problem. if key in d: assert d.pop(key) == getattr(dta, key) result = cls._simple_new(dta, **d) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index b6969d2c755da..bc20d784c8dee 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -337,9 +337,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we directly use _time_shift instead asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(weeks) i = shifted.to_timestamp() + i.to_perioddelta("W") @@ -629,9 +626,6 @@ def apply_index(self, i): # to_period rolls forward to next BDay; track and # reduce n where it does when rolling forward asper = i.to_period("B") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data if self.n > 0: shifted = (i.to_perioddelta("B") - time).asi8 != 0 @@ -1155,12 +1149,7 @@ def apply_index(self, i): shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - dta = i - if not isinstance(i._data, np.ndarray): - # DTA - dta = i._data - - return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) + return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) class MonthEnd(MonthOffset): @@ -1389,9 +1378,6 @@ def apply_index(self, i): # integer-array addition on PeriodIndex is deprecated, # so we use _addsub_int_array directly asper = i.to_period("M") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._addsub_int_array(roll // 2, operator.add) i = type(dti)(shifted.to_timestamp()) @@ -1587,9 +1573,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we use _time_shift directly asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(self.n) return shifted.to_timestamp() + i.to_perioddelta("W") @@ -1613,9 +1596,6 @@ def _end_apply_index(self, dtindex): base, mult = libfrequencies.get_freq_code(self.freqstr) base_period = dtindex.to_period(base) - if not isinstance(base_period._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - base_period = base_period._data if self.n > 0: # when adding, dates on end roll to next @@ -1890,12 +1870,9 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - dta = dtindex - if not isinstance(dtindex._data, np.ndarray): - # DTA - dta = dtindex._data - - return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) + return type(dtindex)._simple_new( + shifted, freq=dtindex.freq, dtype=dtindex.dtype + ) class BQuarterEnd(QuarterOffset): @@ -1979,11 +1956,9 @@ def apply_index(self, dtindex): ) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - dta = dtindex - if not isinstance(dtindex._data, np.ndarray): - # DTA - dta = dtindex._data - return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) + return type(dtindex)._simple_new( + shifted, freq=dtindex.freq, dtype=dtindex.dtype + ) def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt):