From 8980295a42ff8f1ae1889e36c69970ab25338f13 Mon Sep 17 00:00:00 2001 From: immerrr Date: Sat, 4 Oct 2014 19:44:28 +0400 Subject: [PATCH] BUG: fix Index.reindex to preserve type when target is empty list/ndarray TST: check index/columns types when doing empty loc/ix tests CLN: don't _ensure_index in NDFrame._reindex_axes, it is done in Index.reindex --- doc/source/v0.15.0.txt | 1 + pandas/core/generic.py | 6 +---- pandas/core/index.py | 31 ++++++++++++++++++++-- pandas/tests/test_index.py | 50 +++++++++++++++++++++++++++++++++++ pandas/tests/test_indexing.py | 36 ++++++++++++++++++------- 5 files changed, 108 insertions(+), 16 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 68f313f321fc8..f163efe45dd86 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -1025,3 +1025,4 @@ Bug Fixes - Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`) - Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`) - Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`) +- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f2ff44bb5214c..ffedeb9ade355 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1707,15 +1707,11 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy): if labels is None: continue - # convert to an index if we are not a multi-selection ax = self._get_axis(a) - if level is None: - labels = _ensure_index(labels) - - axis = self._get_axis_number(a) new_index, indexer = ax.reindex( labels, level=level, limit=limit, method=method) + axis = self._get_axis_number(a) obj = obj._reindex_with_indexers( {axis: [new_index, indexer]}, method=method, fill_value=fill_value, limit=limit, copy=copy, diff --git a/pandas/core/index.py b/pandas/core/index.py index 2048081573308..e10f4b2009817 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1594,7 +1594,14 @@ def reindex(self, target, method=None, level=None, limit=None): # (i.e. neither Index nor Series). preserve_names = not hasattr(target, 'name') - target = _ensure_index(target) + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = _ensure_has_len(target) # target may be an iterator + if not isinstance(target, Index) and len(target) == 0: + attrs = self._get_attributes_dict() + attrs.pop('freq', None) # don't preserve freq + target = self._simple_new(np.empty(0, dtype=self.dtype), **attrs) + else: + target = _ensure_index(target) if level is not None: if method is not None: raise TypeError('Fill method not supported if level passed') @@ -3706,7 +3713,17 @@ def reindex(self, target, method=None, level=None, limit=None): if level is not None: if method is not None: raise TypeError('Fill method not supported if level passed') - target = _ensure_index(target) + + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = _ensure_has_len(target) # target may be an iterator + if len(target) == 0 and not isinstance(target, Index): + idx = self.levels[level] + attrs = idx._get_attributes_dict() + attrs.pop('freq', None) # don't preserve freq + target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), + **attrs) + else: + target = _ensure_index(target) target, indexer, _ = self._join_level(target, level, how='right', return_indexers=True) else: @@ -4566,3 +4583,13 @@ def _get_na_rep(dtype): def _get_na_value(dtype): return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype, np.nan) + + +def _ensure_has_len(seq): + """If seq is an iterator, put its values into a list.""" + try: + len(seq) + except TypeError: + return list(seq) + else: + return seq diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index ec9193d67151b..53a5bd4ae5d49 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1049,6 +1049,34 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') + def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): + # GH7774 + idx = pd.Index(list('abc')) + def get_reindex_type(target): + return idx.reindex(target)[0].dtype.type + + self.assertEqual(get_reindex_type([]), np.object_) + self.assertEqual(get_reindex_type(np.array([])), np.object_) + self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)), + np.object_) + + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): + # GH7774 + idx = pd.Index(list('abc')) + def get_reindex_type(target): + return idx.reindex(target)[0].dtype.type + + self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int_) + self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float_) + self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) + + reindexed = idx.reindex(pd.MultiIndex([pd.Int64Index([]), + pd.Float64Index([])], + [[], []]))[0] + self.assertEqual(reindexed.levels[0].dtype.type, np.int64) + self.assertEqual(reindexed.levels[1].dtype.type, np.float64) + + class Numeric(Base): @@ -1699,6 +1727,13 @@ def test_roundtrip_pickle_with_tz(self): unpickled = self.round_trip_pickle(index) self.assertTrue(index.equals(unpickled)) + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH7774 + index = date_range('20130101', periods=3, tz='US/Eastern') + self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') + self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + + class TestPeriodIndex(Base, tm.TestCase): _holder = PeriodIndex _multiprocess_can_split_ = True @@ -3321,6 +3356,21 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['foo', 'bar']) + self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar']) + + def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type, + np.int_) + self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type, + np.object_) + def test_get_combined_index(): from pandas.core.index import _get_combined_index diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 97ebca39aae5a..79e4b89889916 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3832,23 +3832,41 @@ def test_set_ix_out_of_bounds_axis_1(self): def test_iloc_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) - assert_frame_equal(df.iloc[:,[]], df.iloc[:, :0]) # vertical empty - assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty - assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty + # vertical empty + assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.iloc[[]], df.iloc[:0, :], + check_index_type=True, check_column_type=True) def test_loc_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) - assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty - assert_frame_equal(df.loc[[],:], df.iloc[:0, :]) # horizontal empty - assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty + # vertical empty + assert_frame_equal(df.loc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.loc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.loc[[]], df.iloc[:0, :], + check_index_type=True, check_column_type=True) def test_ix_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) - assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty - assert_frame_equal(df.ix[[],:], df.iloc[:0, :]) # horizontal empty - assert_frame_equal(df.ix[[]], df.iloc[:0, :]) # horizontal empty + # vertical empty + assert_frame_equal(df.ix[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.ix[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + assert_frame_equal(df.ix[[]], df.iloc[:0, :], + check_index_type=True, check_column_type=True) def test_deprecate_float_indexers(self):