diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 7e0931ca1b745..0b7287ed69c56 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -274,6 +274,8 @@ Bug Fixes - Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`) +- Bug in ``PeriodIndex.unique`` returns int64 ``np.ndarray`` (:issue:`7540`) + - Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`) diff --git a/pandas/core/base.py b/pandas/core/base.py index d55196b56c784..beffbfb2923db 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -552,3 +552,17 @@ def __sub__(self, other): def _add_delta(self, other): return NotImplemented + + def unique(self): + """ + Index.unique with handling for DatetimeIndex/PeriodIndex metadata + + Returns + ------- + result : DatetimeIndex or PeriodIndex + """ + from pandas.core.index import Int64Index + result = Int64Index.unique(self) + return self._simple_new(result, name=self.name, freq=self.freq, + tz=getattr(self, 'tz', None)) + diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 494c0ee6b2bec..9acb1804a7ef0 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -250,11 +250,13 @@ def test_value_counts_unique_nunique(self): expected_s = Series(range(10, 0, -1), index=values[::-1], dtype='int64') tm.assert_series_equal(o.value_counts(), expected_s) - if isinstance(o, DatetimeIndex): - # DatetimeIndex.unique returns DatetimeIndex - self.assertTrue(o.unique().equals(klass(values))) - else: - self.assert_numpy_array_equal(o.unique(), values) + result = o.unique() + if isinstance(o, (DatetimeIndex, PeriodIndex)): + self.assertTrue(isinstance(result, o.__class__)) + self.assertEqual(result.name, o.name) + self.assertEqual(result.freq, o.freq) + + self.assert_numpy_array_equal(result, values) self.assertEqual(o.nunique(), len(np.unique(o.values))) @@ -263,17 +265,13 @@ def test_value_counts_unique_nunique(self): klass = type(o) values = o.values - if o.values.dtype == 'int64': - # skips int64 because it doesn't allow to include nan or None - continue - if ((isinstance(o, Int64Index) and not isinstance(o, (DatetimeIndex, PeriodIndex)))): # skips int64 because it doesn't allow to include nan or None continue # special assign to the numpy array - if o.values.dtype == 'datetime64[ns]': + if o.values.dtype == 'datetime64[ns]' or isinstance(o, PeriodIndex): values[0:2] = pd.tslib.iNaT else: values[0:2] = null_obj @@ -294,8 +292,8 @@ def test_value_counts_unique_nunique(self): result = o.unique() self.assert_numpy_array_equal(result[1:], values[2:]) - if isinstance(o, DatetimeIndex): - self.assertTrue(result[0] is pd.NaT) + if isinstance(o, (DatetimeIndex, PeriodIndex)): + self.assertTrue(result.asi8[0] == pd.tslib.iNaT) else: self.assertTrue(pd.isnull(result[0])) @@ -706,7 +704,7 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) - def test_value_counts(self): + def test_value_counts_unique(self): # GH 7735 for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']: idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) @@ -717,6 +715,9 @@ def test_value_counts(self): expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') tm.assert_series_equal(idx.value_counts(), expected) + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(idx.unique(), expected) + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], tz=tz) @@ -728,6 +729,8 @@ def test_value_counts(self): expected = Series([3, 2, 1], index=exp_idx) tm.assert_series_equal(idx.value_counts(dropna=False), expected) + tm.assert_index_equal(idx.unique(), exp_idx) + class TestPeriodIndexOps(Ops): _allowed = '_allow_period_index_ops' @@ -987,7 +990,7 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) - def test_value_counts(self): + def test_value_counts_unique(self): # GH 7735 idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) # create repeated values, 'n'th element is repeated by n+1 times @@ -1000,6 +1003,9 @@ def test_value_counts(self): expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') tm.assert_series_equal(idx.value_counts(), expected) + expected = pd.period_range('2011-01-01 09:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], freq='H') @@ -1011,6 +1017,8 @@ def test_value_counts(self): expected = Series([3, 2, 1], index=exp_idx) tm.assert_series_equal(idx.value_counts(dropna=False), expected) + tm.assert_index_equal(idx.unique(), exp_idx) + if __name__ == '__main__': import nose diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2a3c53135a644..4aa424ea08031 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -848,18 +848,6 @@ def take(self, indices, axis=0): return self[maybe_slice] return super(DatetimeIndex, self).take(indices, axis) - def unique(self): - """ - Index.unique with handling for DatetimeIndex metadata - - Returns - ------- - result : DatetimeIndex - """ - result = Int64Index.unique(self) - return DatetimeIndex._simple_new(result, tz=self.tz, - name=self.name) - def union(self, other): """ Specialized union for DatetimeIndex objects. If combine