Skip to content

Commit 83798b4

Browse files
committed
BUG: Respect dups in reindexing CategoricalIndex
When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes gh-17323.
1 parent 473a7f3 commit 83798b4

File tree

3 files changed

+19
-6
lines changed

3 files changed

+19
-6
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ Indexing
358358
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
359359
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
360360
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
361+
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
361362

362363
I/O
363364
^^^

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
487487
method = missing.clean_reindex_fill_method(method)
488488
target = ibase._ensure_index(target)
489489

490-
if self.equals(target):
490+
if self.is_unique and self.equals(target):
491491
return np.arange(len(self), dtype='intp')
492492

493493
if method == 'pad' or method == 'backfill':

pandas/tests/indexes/test_category.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -365,18 +365,18 @@ def test_astype(self):
365365
tm.assert_index_equal(result, expected)
366366

367367
def test_reindex_base(self):
368-
369-
# determined by cat ordering
370-
idx = self.create_index()
368+
# Determined by cat ordering.
369+
idx = CategoricalIndex(list("cab"), categories=list("cab"))
371370
expected = np.arange(len(idx), dtype=np.intp)
372371

373372
actual = idx.get_indexer(idx)
374373
tm.assert_numpy_array_equal(expected, actual)
375374

376-
with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
377-
idx.get_indexer(idx, method='invalid')
375+
with tm.assert_raises_regex(ValueError, "Invalid fill method"):
376+
idx.get_indexer(idx, method="invalid")
378377

379378
def test_reindexing(self):
379+
np.random.seed(123456789)
380380

381381
ci = self.create_index()
382382
oidx = Index(np.array(ci))
@@ -388,6 +388,18 @@ def test_reindexing(self):
388388
actual = ci.get_indexer(finder)
389389
tm.assert_numpy_array_equal(expected, actual)
390390

391+
# see gh-17323
392+
#
393+
# Even when indexer is equal to the
394+
# members in the index, we should
395+
# respect duplicates instead of taking
396+
# the fast-track path.
397+
for finder in [list("aabbca"), list("aababca")]:
398+
expected = oidx.get_indexer_non_unique(finder)[0]
399+
400+
actual = ci.get_indexer(finder)
401+
tm.assert_numpy_array_equal(expected, actual)
402+
391403
def test_reindex_dtype(self):
392404
c = CategoricalIndex(['a', 'b', 'c', 'a'])
393405
res, indexer = c.reindex(['a', 'c'])

0 commit comments

Comments
 (0)