Skip to content

BUG: fix Index.reindex to preserve type when target is empty list/ndarray #8462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1025,3 +1025,4 @@ Bug Fixes
- Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`)
- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`)
- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`)
- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`)
6 changes: 1 addition & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,15 +1707,11 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
if labels is None:
continue

# convert to an index if we are not a multi-selection
ax = self._get_axis(a)
if level is None:
labels = _ensure_index(labels)

axis = self._get_axis_number(a)
new_index, indexer = ax.reindex(
labels, level=level, limit=limit, method=method)

axis = self._get_axis_number(a)
obj = obj._reindex_with_indexers(
{axis: [new_index, indexer]}, method=method,
fill_value=fill_value, limit=limit, copy=copy,
Expand Down
31 changes: 29 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,14 @@ def reindex(self, target, method=None, level=None, limit=None):
# (i.e. neither Index nor Series).
preserve_names = not hasattr(target, 'name')

target = _ensure_index(target)
# GH7774: preserve dtype/tz if target is empty and not an Index.
target = _ensure_has_len(target) # target may be an iterator
if not isinstance(target, Index) and len(target) == 0:
attrs = self._get_attributes_dict()
attrs.pop('freq', None) # don't preserve freq
target = self._simple_new(np.empty(0, dtype=self.dtype), **attrs)
else:
target = _ensure_index(target)
if level is not None:
if method is not None:
raise TypeError('Fill method not supported if level passed')
Expand Down Expand Up @@ -3706,7 +3713,17 @@ def reindex(self, target, method=None, level=None, limit=None):
if level is not None:
if method is not None:
raise TypeError('Fill method not supported if level passed')
target = _ensure_index(target)

# GH7774: preserve dtype/tz if target is empty and not an Index.
target = _ensure_has_len(target) # target may be an iterator
if len(target) == 0 and not isinstance(target, Index):
idx = self.levels[level]
attrs = idx._get_attributes_dict()
attrs.pop('freq', None) # don't preserve freq
target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype),
**attrs)
else:
target = _ensure_index(target)
target, indexer, _ = self._join_level(target, level, how='right',
return_indexers=True)
else:
Expand Down Expand Up @@ -4566,3 +4583,13 @@ def _get_na_rep(dtype):
def _get_na_value(dtype):
return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype,
np.nan)


def _ensure_has_len(seq):
"""If seq is an iterator, put its values into a list."""
try:
len(seq)
except TypeError:
return list(seq)
else:
return seq
50 changes: 50 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,34 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self):
self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar')
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar')

def test_reindex_preserves_type_if_target_is_empty_list_or_array(self):
# GH7774
idx = pd.Index(list('abc'))
def get_reindex_type(target):
return idx.reindex(target)[0].dtype.type

self.assertEqual(get_reindex_type([]), np.object_)
self.assertEqual(get_reindex_type(np.array([])), np.object_)
self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)),
np.object_)

def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
# GH7774
idx = pd.Index(list('abc'))
def get_reindex_type(target):
return idx.reindex(target)[0].dtype.type

self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int_)
self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float_)
self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)

reindexed = idx.reindex(pd.MultiIndex([pd.Int64Index([]),
pd.Float64Index([])],
[[], []]))[0]
self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
self.assertEqual(reindexed.levels[1].dtype.type, np.float64)



class Numeric(Base):

Expand Down Expand Up @@ -1699,6 +1727,13 @@ def test_roundtrip_pickle_with_tz(self):
unpickled = self.round_trip_pickle(index)
self.assertTrue(index.equals(unpickled))

def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
# GH7774
index = date_range('20130101', periods=3, tz='US/Eastern')
self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern')
self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern')


class TestPeriodIndex(Base, tm.TestCase):
_holder = PeriodIndex
_multiprocess_can_split_ = True
Expand Down Expand Up @@ -3321,6 +3356,21 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar'])

def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self):
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
names=['foo', 'bar'])
self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar'])

def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self):
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type,
np.int_)
self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type,
np.object_)


def test_get_combined_index():
from pandas.core.index import _get_combined_index
Expand Down
36 changes: 27 additions & 9 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3832,23 +3832,41 @@ def test_set_ix_out_of_bounds_axis_1(self):
def test_iloc_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
assert_frame_equal(df.iloc[:,[]], df.iloc[:, :0]) # vertical empty
assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty
assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty
# vertical empty
assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
check_index_type=True, check_column_type=True)

def test_loc_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty
assert_frame_equal(df.loc[[],:], df.iloc[:0, :]) # horizontal empty
assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty
# vertical empty
assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.loc[[]], df.iloc[:0, :],
check_index_type=True, check_column_type=True)

def test_ix_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty
assert_frame_equal(df.ix[[],:], df.iloc[:0, :]) # horizontal empty
assert_frame_equal(df.ix[[]], df.iloc[:0, :]) # horizontal empty
# vertical empty
assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
check_index_type=True, check_column_type=True)
# horizontal empty
assert_frame_equal(df.ix[[]], df.iloc[:0, :],
check_index_type=True, check_column_type=True)

def test_deprecate_float_indexers(self):

Expand Down