From f13cc5855828dc13a7f06768818e0a27c9975ef3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 13:16:59 -0700 Subject: [PATCH 01/17] Avoid non-public constructors --- pandas/io/pytables.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index de193db846c50..cb33ee0a38fe9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2476,15 +2476,14 @@ def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): # data are already in UTC, localize and convert if tz present - result = DatetimeIndex._simple_new(values.values, name=None, - freq=freq) + result = DatetimeIndex(values.values, name=None, freq=freq) if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): - return PeriodIndex._simple_new(values, name=None, freq=freq) + return PeriodIndex(values, name=None, freq=freq) return f return klass From 4188ec725aebc0140f6b8be34b66e5ab8e1a94f9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 13:17:40 -0700 Subject: [PATCH 02/17] simplify and de-duplicate _generate_range --- pandas/core/arrays/datetimelike.py | 10 ++++++++-- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 6 ++---- pandas/core/indexes/datetimelike.py | 6 ++++++ pandas/core/indexes/datetimes.py | 19 +++++-------------- pandas/core/indexes/timedeltas.py | 20 ++++---------------- 6 files changed, 26 insertions(+), 37 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 37fc451ba2a2b..4353c13889f37 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -332,6 +332,9 @@ def _validate_frequency(cls, index, freq, **kwargs): # Frequency validation is not meaningful for Period Array/Index return None + # DatetimeArray may pass `ambiguous`, nothing else allowed + assert all(key == 'ambiguous' for key in kwargs) + inferred = index.inferred_freq if index.size == 0 or inferred == freq.freqstr: return None @@ -590,9 +593,12 @@ def _time_shift(self, periods, freq=None): start = self[0] + periods * self.freq end = self[-1] + periods * self.freq - attribs = self._get_attributes_dict() + + # Note: in the DatetimeTZ case, _generate_range will infer the + # appropriate timezone from `start` and `end`, so tz does not need + # to be passed explicitly. return self._generate_range(start=start, end=end, periods=None, - **attribs) + freq=self.freq) @classmethod def _add_datetimelike_methods(cls): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d32ff76c0819b..4f7bab55dc824 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -179,7 +179,7 @@ def _generate_range(cls, start, end, periods, freq, fields): freq = Period._maybe_convert_freq(freq) field_count = len(fields) - if com.count_not_none(start, end) > 0: + if start is not None or end is not None: if field_count > 0: raise ValueError('Can either instantiate from fields ' 'or endpoints, but not both') diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4904a90ab7b2b..fe2d219f376f1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -140,8 +140,7 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None, return result @classmethod - def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): - # **kwargs are for compat with TimedeltaIndex, which includes `name` + def _generate_range(cls, start, end, periods, freq, closed=None): periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): @@ -167,10 +166,9 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): if freq is not None: index = _generate_regular_range(start, end, periods, freq) - index = cls._simple_new(index, freq=freq, **kwargs) + index = cls._simple_new(index, freq=freq) else: index = np.linspace(start.value, end.value, periods).astype('i8') - # TODO: shouldn't we pass `name` here? (via **kwargs) index = cls._simple_new(index, freq=freq) if not left_closed: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8e919ba3599fc..eb8341f379f56 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -699,6 +699,12 @@ def astype(self, dtype, copy=True): raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) + def _time_shift(self, periods, freq=None): + result = DatetimeLikeArrayMixin._time_shift(self, periods, freq=freq) + result.name = self.name + return result + def _ensure_datetimelike_to_i8(other, to_utc=False): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 70140d2d9a432..e0c1ef0489e8f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -241,9 +241,11 @@ def __new__(cls, data=None, if data is None: # TODO: Remove this block and associated kwargs; GH#20535 - return cls._generate_range(start, end, periods, name, freq, - tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) + out = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + out.name = name + return out if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): @@ -315,17 +317,6 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) - @classmethod - @Appender(DatetimeArrayMixin._generate_range.__doc__) - def _generate_range(cls, start, end, periods, name=None, freq=None, - tz=None, normalize=False, ambiguous='raise', - closed=None): - out = super(DatetimeIndex, cls)._generate_range( - start, end, periods, freq, - tz=tz, normalize=normalize, ambiguous=ambiguous, closed=closed) - out.name = name - return out - @classmethod def _use_cached_range(cls, freq, _normalized, start, end): # Note: This always returns False diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 56b6dc7051d9f..78fffb0f92958 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -147,12 +147,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if data is None: # TODO: Remove this block and associated kwargs; GH#20535 - if freq is None and com._any_none(periods, start, end): - raise ValueError('Must provide freq argument if no data is ' - 'supplied') - periods = dtl.validate_periods(periods) - return cls._generate_range(start, end, periods, name, freq, - closed=closed) + out = cls._generate_range(start, end, periods, + freq=freq, closed=closed) + out.name = name + return out if unit is not None: data = to_timedelta(data, unit=unit, box=False) @@ -181,16 +179,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return subarr - @classmethod - def _generate_range(cls, start, end, periods, - name=None, freq=None, closed=None): - # TimedeltaArray gets `name` via **kwargs, so we need to explicitly - # override it if name is passed as a positional argument - return super(TimedeltaIndex, cls)._generate_range(start, end, - periods, freq, - name=name, - closed=closed) - @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): result = super(TimedeltaIndex, cls)._simple_new(values, freq, **kwargs) From 7804f1beb1e030e2f90bd9073995fe01f5b1cdd1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 13:34:23 -0700 Subject: [PATCH 03/17] Check for invalid axis kwarg --- pandas/core/indexes/datetimelike.py | 16 ++++++++++++++++ pandas/tests/indexes/datetimelike.py | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index eb8341f379f56..f746cfa9b9410 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -430,6 +430,10 @@ def min(self, axis=None, *args, **kwargs): -------- numpy.ndarray.min """ + if axis is not None and axis >= self.ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=self.ndim)) + nv.validate_min(args, kwargs) try: @@ -458,6 +462,10 @@ def argmin(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmin """ + if axis is not None and axis >= self.ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=self.ndim)) + nv.validate_argmin(args, kwargs) i8 = self.asi8 @@ -478,6 +486,10 @@ def max(self, axis=None, *args, **kwargs): -------- numpy.ndarray.max """ + if axis is not None and axis >= self.ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=self.ndim)) + nv.validate_max(args, kwargs) try: @@ -506,6 +518,10 @@ def argmax(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmax """ + if axis is not None and axis >= self.ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=self.ndim)) + nv.validate_argmax(args, kwargs) i8 = self.asi8 diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index e32e18ea0ec4a..f71f5258bb477 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,6 +8,14 @@ class DatetimeLike(Base): + def test_argmax_axis_invalid(self): + # GH#23081 + rng = self.create_index() + with pytest.raises(ValueError): + rng.argmax(axis=1) + with pytest.raises(ValueError): + rng.argmin(axis=2) + def test_can_hold_identifiers(self): idx = self.create_index() key = idx[0] From a4775f484249eab4945f64c6be5b89a2425300cb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 13:35:26 -0700 Subject: [PATCH 04/17] Move some EA properties up to mixins --- pandas/core/arrays/datetimelike.py | 14 +++++++++++++- pandas/core/indexes/datetimes.py | 21 --------------------- pandas/core/indexes/period.py | 10 ---------- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4353c13889f37..4b0e9c665543f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -11,6 +11,7 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) +from pandas.util._decorators import deprecate_kwarg from pandas.errors import NullFrequencyError, PerformanceWarning from pandas import compat @@ -39,7 +40,6 @@ from pandas.core.algorithms import checked_add_with_arr from .base import ExtensionOpsMixin -from pandas.util._decorators import deprecate_kwarg def _make_comparison_op(cls, op): @@ -143,6 +143,10 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + @property + def ndim(self): + return len(self.shape) + @property def shape(self): return (len(self),) @@ -151,6 +155,10 @@ def shape(self): def size(self): return np.prod(self.shape) + @property + def nbytes(self): + return self._ndarray_values.nbytes + def __len__(self): return len(self._data) @@ -211,6 +219,10 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # Null Handling + def isna(self): + # EA Interface + return self._isnan + @property # NB: override with cache_readonly in immutable subclasses def _isnan(self): """ return if each value is nan""" diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e0c1ef0489e8f..ba7e19b289e0d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -380,27 +380,6 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") - @property - def size(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.size - - @property - def shape(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.shape - - @property - def nbytes(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.nbytes - @classmethod def _cached_range(cls, start=None, end=None, periods=None, freq=None, name=None): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f452a57e82725..05f784c87dc64 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -314,16 +314,6 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return self._shallow_copy(result, freq=self.freq, name=self.name) - @property - def size(self): - # Avoid materializing self._values - return self._ndarray_values.size - - @property - def shape(self): - # Avoid materializing self._values - return self._ndarray_values.shape - @property def _formatter_func(self): return lambda x: "'%s'" % x From 8ee34fa54439f56a438c32e465033ce13520e394 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 13:35:41 -0700 Subject: [PATCH 05/17] implement basic TimedeltaArray tests --- pandas/tests/arrays/test_datetimelike.py | 44 +++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6bb4241451b3f..4aef1d5dceb8a 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -34,7 +34,7 @@ def datetime_index(request): A fixture to provide DatetimeIndex objects with different frequencies. Most DatetimeArray behavior is already tested in DatetimeIndex tests, - so here we just test that the DatetimeIndex behavior matches + so here we just test that the DatetimeArray behavior matches the DatetimeIndex behavior. """ freqstr = request.param @@ -45,6 +45,20 @@ def datetime_index(request): return pi +@pytest.fixture +def timedelta_index(request): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) + + + class TestDatetimeArray(object): def test_from_dti(self, tz_naive_fixture): @@ -122,6 +136,34 @@ def test_astype_object(self): assert asobj.dtype == 'O' assert list(asobj) == list(tdi) + def test_to_pytimedelta(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + expected = tdi.to_pytimedelta() + result = arr.to_pytimedelta() + + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + expected = tdi.total_seconds() + result = arr.total_seconds() + + tm.assert_numpy_array_equal(result, expected.values) + + @pytest.mark.parametrize('propname', pd.TimedeltaIndex._field_ops) + def test_int_properties(self, timedelta_index, propname): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + result = getattr(arr, propname) + expected = np.array(getattr(tdi, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + class TestPeriodArray(object): From 78943c17e2dcab4f9cf1098c18980d06c9dbfe9e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 14:55:46 -0700 Subject: [PATCH 06/17] clean up PeriodArray constructor, with tests --- pandas/core/arrays/period.py | 15 +++++++++++---- pandas/core/indexes/period.py | 3 +-- pandas/tests/arrays/test_datetimelike.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 4f7bab55dc824..50769aaa68a96 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -18,6 +18,7 @@ from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, + is_object_dtype, is_datetime64_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries @@ -124,15 +125,19 @@ def freq(self, value): def __new__(cls, values, freq=None, **kwargs): if is_period_dtype(values): # PeriodArray, PeriodIndex - if freq is not None and values.freq != freq: - raise IncompatibleFrequency(freq, values.freq) - freq = values.freq + freq = dtl.validate_dtype_freq(values.dtype, freq) values = values.asi8 elif is_datetime64_dtype(values): - # TODO: what if it has tz? values = dt64arr_to_periodarr(values, freq) + elif is_object_dtype(values) or isinstance(values, (list, tuple)): + # e.g. array([Period(...), Period(...), NaT]) + values = np.array(values) + if freq is None: + freq = libperiod.extract_freq(values) + values = libperiod.extract_ordinals(values, freq) + return cls._simple_new(values, freq=freq, **kwargs) @classmethod @@ -175,6 +180,8 @@ def _from_ordinals(cls, values, freq=None, **kwargs): @classmethod def _generate_range(cls, start, end, periods, freq, fields): + periods = dtl.validate_periods(periods) + if freq is not None: freq = Period._maybe_convert_freq(freq) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 05f784c87dc64..d6ad04d916e3a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -165,8 +165,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, raise TypeError('__new__() got an unexpected keyword argument {}'. format(list(set(fields) - valid_field_set)[0])) - periods = dtl.validate_periods(periods) - if name is None and hasattr(data, 'name'): name = data.name @@ -178,6 +176,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, freq = Period._maybe_convert_freq(freq) if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 4aef1d5dceb8a..f3a44be467410 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -167,6 +167,20 @@ def test_int_properties(self, timedelta_index, propname): class TestPeriodArray(object): + def test_from_object_dtype(self, period_index): + pi = period_index + arr = PeriodArrayMixin(pd.Index(pi, dtype=object)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(np.array(pi, dtype=object)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(list(pi)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(tuple(pi)) + assert list(arr) == list(pi) + def test_from_pi(self, period_index): pi = period_index arr = PeriodArrayMixin(pi) From aa7138342b0bdf69e017ab397df34c38c7ab3fe8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 15:03:01 -0700 Subject: [PATCH 07/17] make PeriodArray.__new__ more grown-up --- pandas/core/arrays/period.py | 10 +++++++++- pandas/core/indexes/period.py | 1 - 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 50769aaa68a96..794ec39c1fabb 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -122,7 +122,15 @@ def freq(self, value): _attributes = ["freq"] - def __new__(cls, values, freq=None, **kwargs): + def __new__(cls, values, freq=None, dtype=None, **kwargs): + + if freq is not None: + # coerce freq to freq object, otherwise it can be coerced + # elementwise, which is slow + freq = Period._maybe_convert_freq(freq) + + freq = dtl.validate_dtype_freq(dtype, freq) + if is_period_dtype(values): # PeriodArray, PeriodIndex freq = dtl.validate_dtype_freq(values.dtype, freq) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index d6ad04d916e3a..3bac746c332a3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -176,7 +176,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, freq = Period._maybe_convert_freq(freq) if data is None: - # TODO: Remove this block and associated kwargs; GH#20535 if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: From eae838968abaadbade999d437f07f4876d356468 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 15:09:11 -0700 Subject: [PATCH 08/17] Remove unused kwargs from TimedeltaArray.__new__ --- pandas/core/arrays/timedeltas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index fe2d219f376f1..eb7dabdc03b0b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -126,8 +126,7 @@ def _simple_new(cls, values, freq=None, **kwargs): result._freq = freq return result - def __new__(cls, values, freq=None, start=None, end=None, periods=None, - closed=None): + def __new__(cls, values, freq=None): freq, freq_infer = dtl.maybe_infer_freq(freq) From e8717337eb326e55ff403c532e1ba897c4a63d09 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 13 Oct 2018 16:35:37 -0700 Subject: [PATCH 09/17] revert change that broke tests --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cb33ee0a38fe9..0a2d34fec9496 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2483,7 +2483,7 @@ def f(values, freq=None, tz=None): return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): - return PeriodIndex(values, name=None, freq=freq) + return PeriodIndex._simple_new(values, name=None, freq=freq) return f return klass From 7840f91346598e0cf24b018169a96afeefe38d1d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 13 Oct 2018 17:51:37 -0700 Subject: [PATCH 10/17] Fixup whitespace --- pandas/tests/arrays/test_datetimelike.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index f3a44be467410..93cab19297fc2 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -58,7 +58,6 @@ def timedelta_index(request): return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) - class TestDatetimeArray(object): def test_from_dti(self, tz_naive_fixture): From ec50b0bd7b98ab9ddf40a8ed121f4629be3d033f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 08:16:39 -0700 Subject: [PATCH 11/17] helper function for axis validation --- pandas/core/indexes/datetimelike.py | 39 +++++++++++++++++------------ pandas/core/indexes/datetimes.py | 10 ++++---- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f746cfa9b9410..a2c2f7ef41ec0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -430,10 +430,7 @@ def min(self, axis=None, *args, **kwargs): -------- numpy.ndarray.min """ - if axis is not None and axis >= self.ndim: - raise ValueError("`axis` must be fewer than the number of " - "dimensions ({ndim})".format(ndim=self.ndim)) - + _validate_minmax_axis(axis) nv.validate_min(args, kwargs) try: @@ -462,10 +459,7 @@ def argmin(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmin """ - if axis is not None and axis >= self.ndim: - raise ValueError("`axis` must be fewer than the number of " - "dimensions ({ndim})".format(ndim=self.ndim)) - + _validate_minmax_axis(axis) nv.validate_argmin(args, kwargs) i8 = self.asi8 @@ -486,10 +480,7 @@ def max(self, axis=None, *args, **kwargs): -------- numpy.ndarray.max """ - if axis is not None and axis >= self.ndim: - raise ValueError("`axis` must be fewer than the number of " - "dimensions ({ndim})".format(ndim=self.ndim)) - + _validate_minmax_axis(axis) nv.validate_max(args, kwargs) try: @@ -518,10 +509,7 @@ def argmax(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmax """ - if axis is not None and axis >= self.ndim: - raise ValueError("`axis` must be fewer than the number of " - "dimensions ({ndim})".format(ndim=self.ndim)) - + _validate_minmax_axis(axis) nv.validate_argmax(args, kwargs) i8 = self.asi8 @@ -722,6 +710,25 @@ def _time_shift(self, periods, freq=None): return result +def _validate_minmax_axis(axis): + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is + zero or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + + Raises + ------ + ValueError + """ + ndim = 1 # hard-coded for Index + if axis is not None and axis >= ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=ndim)) + + def _ensure_datetimelike_to_i8(other, to_utc=False): """ helper for coercing an input scalar or array to i8 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ba7e19b289e0d..b45016e39dff6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -241,11 +241,11 @@ def __new__(cls, data=None, if data is None: # TODO: Remove this block and associated kwargs; GH#20535 - out = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - out.name = name - return out + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + result.name = name + return result if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): From eb7a6b633e25dec4f4de7de43892f5a3d6211b12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 08:22:16 -0700 Subject: [PATCH 12/17] suggested clarifications --- pandas/core/arrays/datetimelike.py | 3 ++- pandas/core/arrays/period.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4b0e9c665543f..4355da3e5fdad 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -344,7 +344,8 @@ def _validate_frequency(cls, index, freq, **kwargs): # Frequency validation is not meaningful for Period Array/Index return None - # DatetimeArray may pass `ambiguous`, nothing else allowed + # DatetimeArray may pass `ambiguous`, nothing else will be accepted + # by cls._generate_range below assert all(key == 'ambiguous' for key in kwargs) inferred = index.inferred_freq diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 794ec39c1fabb..503e296fb2547 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -141,7 +141,7 @@ def __new__(cls, values, freq=None, dtype=None, **kwargs): elif is_object_dtype(values) or isinstance(values, (list, tuple)): # e.g. array([Period(...), Period(...), NaT]) - values = np.array(values) + values = np.array(values, dtype=object) if freq is None: freq = libperiod.extract_freq(values) values = libperiod.extract_ordinals(values, freq) From b97ec964d97c9681b3a66116a0d70e0221443d79 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 11:23:28 -0700 Subject: [PATCH 13/17] move axis validation to nv --- pandas/compat/numpy/function.py | 19 +++++++++++++++++++ pandas/core/indexes/datetimelike.py | 27 ++++----------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index d42be56963569..b8276493643f2 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -360,3 +360,22 @@ def validate_resampler_func(method, args, kwargs): "{func}() instead".format(func=method))) else: raise TypeError("too many arguments passed in") + + +def validate_minmax_axis(axis): + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is + zero or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + + Raises + ------ + ValueError + """ + ndim = 1 # hard-coded for Index + if axis is not None and axis >= ndim: + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=ndim)) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a2c2f7ef41ec0..a73b1941e353a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -430,7 +430,7 @@ def min(self, axis=None, *args, **kwargs): -------- numpy.ndarray.min """ - _validate_minmax_axis(axis) + nv.validate_minmax_axis(axis) nv.validate_min(args, kwargs) try: @@ -459,7 +459,7 @@ def argmin(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmin """ - _validate_minmax_axis(axis) + nv.validate_minmax_axis(axis) nv.validate_argmin(args, kwargs) i8 = self.asi8 @@ -480,7 +480,7 @@ def max(self, axis=None, *args, **kwargs): -------- numpy.ndarray.max """ - _validate_minmax_axis(axis) + nv.validate_minmax_axis(axis) nv.validate_max(args, kwargs) try: @@ -509,7 +509,7 @@ def argmax(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmax """ - _validate_minmax_axis(axis) + nv.validate_minmax_axis(axis) nv.validate_argmax(args, kwargs) i8 = self.asi8 @@ -710,25 +710,6 @@ def _time_shift(self, periods, freq=None): return result -def _validate_minmax_axis(axis): - """ - Ensure that the axis argument passed to min, max, argmin, or argmax is - zero or None, as otherwise it will be incorrectly ignored. - - Parameters - ---------- - axis : int or None - - Raises - ------ - ValueError - """ - ndim = 1 # hard-coded for Index - if axis is not None and axis >= ndim: - raise ValueError("`axis` must be fewer than the number of " - "dimensions ({ndim})".format(ndim=ndim)) - - def _ensure_datetimelike_to_i8(other, to_utc=False): """ helper for coercing an input scalar or array to i8 From 147de57a4fa16ecb5bdd0ce7a6c25dc4280017c9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 15:16:01 -0700 Subject: [PATCH 14/17] revert some removals --- pandas/core/indexes/datetimes.py | 21 +++++++++++++++++++++ pandas/core/indexes/period.py | 10 ++++++++++ 2 files changed, 31 insertions(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2835238b57275..c9107d6509848 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -373,6 +373,27 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + @property + def size(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.size + + @property + def shape(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.shape + + @property + def nbytes(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.nbytes + def _mpl_repr(self): # how to represent ourselves to matplotlib return libts.ints_to_pydatetime(self.asi8, self.tz) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3bac746c332a3..ef88ef9ccb624 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -312,6 +312,16 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return self._shallow_copy(result, freq=self.freq, name=self.name) + @property + def size(self): + # Avoid materializing self._values + return self._ndarray_values.size + + @property + def shape(self): + # Avoid materializing self._values + return self._ndarray_values.shape + @property def _formatter_func(self): return lambda x: "'%s'" % x From b90f421df97ff6637ef48f646964c00a7d572121 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Oct 2018 08:23:31 -0700 Subject: [PATCH 15/17] catch too-negative values --- pandas/compat/numpy/function.py | 4 +++- pandas/tests/indexes/datetimelike.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index b8276493643f2..ff7c6c0886ed3 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -376,6 +376,8 @@ def validate_minmax_axis(axis): ValueError """ ndim = 1 # hard-coded for Index - if axis is not None and axis >= ndim: + if axis is None: + return + if axis >= ndim or (axis < 0 and ndim - axis < 0): raise ValueError("`axis` must be fewer than the number of " "dimensions ({ndim})".format(ndim=ndim)) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index f71f5258bb477..7af8b259fa137 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -15,6 +15,10 @@ def test_argmax_axis_invalid(self): rng.argmax(axis=1) with pytest.raises(ValueError): rng.argmin(axis=2) + with pytest.raises(ValueError): + rng.min(axis=-2) + with pytest.raises(ValueError): + rng.max(axis=-3) def test_can_hold_identifiers(self): idx = self.create_index() From dc4f474e92fc1ca25419ccf2c98dc48ada5c5e41 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Oct 2018 08:27:51 -0700 Subject: [PATCH 16/17] Roll validate_minmax_axis into existing validate functions --- pandas/compat/numpy/function.py | 36 ++++++++++++++++++++++------- pandas/core/indexes/datetimelike.py | 12 ++++------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index ff7c6c0886ed3..becb1b7231203 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -58,10 +58,30 @@ def __call__(self, args, kwargs, fname=None, ARGMINMAX_DEFAULTS = dict(out=None) -validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', - method='both', max_fname_arg_count=1) -validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax', - method='both', max_fname_arg_count=1) +_validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', + method='both', max_fname_arg_count=1) +_validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax', + method='both', max_fname_arg_count=1) + + +def validate_argmin(args, kwargs, axis=None): + _validate_argmin(args, kwargs) + validate_minmax_axis(axis) + + +def validate_argmax(args, kwargs, axis=None): + _validate_argmax(args, kwargs) + validate_minmax_axis(axis) + + +def validate_min(args, kwargs, axis=None): + _validate_min(args, kwargs) + validate_minmax_axis(axis) + + +def validate_max(args, kwargs, axis=None): + _validate_max(args, kwargs) + validate_minmax_axis(axis) def process_skipna(skipna, args): @@ -196,10 +216,10 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') MINMAX_DEFAULTS = dict(out=None) -validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', - method='both', max_fname_arg_count=1) -validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', - method='both', max_fname_arg_count=1) +_validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', + method='both', max_fname_arg_count=1) +_validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', + method='both', max_fname_arg_count=1) RESHAPE_DEFAULTS = dict(order='C') validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape', diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a73b1941e353a..b6cd3067f226b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -430,8 +430,7 @@ def min(self, axis=None, *args, **kwargs): -------- numpy.ndarray.min """ - nv.validate_minmax_axis(axis) - nv.validate_min(args, kwargs) + nv.validate_min(args, kwargs, axis=axis) try: i8 = self.asi8 @@ -459,8 +458,7 @@ def argmin(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmin """ - nv.validate_minmax_axis(axis) - nv.validate_argmin(args, kwargs) + nv.validate_argmin(args, kwargs, axis=axis) i8 = self.asi8 if self.hasnans: @@ -480,8 +478,7 @@ def max(self, axis=None, *args, **kwargs): -------- numpy.ndarray.max """ - nv.validate_minmax_axis(axis) - nv.validate_max(args, kwargs) + nv.validate_max(args, kwargs, axis=axis) try: i8 = self.asi8 @@ -509,8 +506,7 @@ def argmax(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmax """ - nv.validate_minmax_axis(axis) - nv.validate_argmax(args, kwargs) + nv.validate_argmax(args, kwargs, axis=axis) i8 = self.asi8 if self.hasnans: From 46d5e647bf4e0f351fc4601482834bd80662b44a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Oct 2018 09:52:16 -0700 Subject: [PATCH 17/17] fixup typo --- pandas/compat/numpy/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index becb1b7231203..e9499cd43b267 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -398,6 +398,6 @@ def validate_minmax_axis(axis): ndim = 1 # hard-coded for Index if axis is None: return - if axis >= ndim or (axis < 0 and ndim - axis < 0): + if axis >= ndim or (axis < 0 and ndim + axis < 0): raise ValueError("`axis` must be fewer than the number of " "dimensions ({ndim})".format(ndim=ndim))