Skip to content

Commit 841decf

Browse files
thequackdaddyTomAugspurger
authored andcommitted
BUG: Load data from a CategoricalIndex for dtype comparison, closes #… (#16738)
* BUG: Load data from a CategoricalIndex for dtype comparison, closes #16627 * Enable is_dtype_equal on CategoricalIndex, fixed some doc typos, added ordered CategoricalIndex test * Flake8 windows suggestion * Fixed some documentation/formatting issues, clarified the purpose of the test case. (cherry picked from commit 5b88d2f)
1 parent c61978e commit 841decf

File tree

5 files changed

+53
-3
lines changed

5 files changed

+53
-3
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff``
3+
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.)
44
- [ ] whatsnew entry

doc/source/contributing.rst

+6
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,12 @@ run this slightly modified command::
525525

526526
git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8
527527

528+
Note that on Windows, ``grep``, ``xargs``, and other tools are likely
529+
unavailable. However, this has been shown to work on smaller commits in the
530+
standard Windows command line::
531+
532+
git diff master -u -- "*.py" | flake8 --diff
533+
528534
Backwards Compatibility
529535
~~~~~~~~~~~~~~~~~~~~~~~
530536

doc/source/whatsnew/v0.20.3.txt

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Sparse
7878
Reshaping
7979
^^^^^^^^^
8080

81+
- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
8182

8283

8384
Numeric

pandas/core/indexes/category.py

+3
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True,
560560
na_value=-1)
561561
return self._create_from_codes(taken)
562562

563+
def is_dtype_equal(self, other):
564+
return self._data.is_dtype_equal(other)
565+
563566
take_nd = take
564567

565568
def map(self, mapper):

pandas/tests/test_join.py

+42-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# -*- coding: utf-8 -*-
22

33
import numpy as np
4-
from pandas import Index
4+
from pandas import Index, DataFrame, Categorical, merge
55

66
from pandas._libs import join as _join
77
import pandas.util.testing as tm
8-
from pandas.util.testing import assert_almost_equal
8+
from pandas.util.testing import assert_almost_equal, assert_frame_equal
99

1010

1111
class TestIndexer(object):
@@ -192,3 +192,43 @@ def test_inner_join_indexer2():
192192

193193
exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64)
194194
assert_almost_equal(ridx, exp_ridx)
195+
196+
197+
def test_merge_join_categorical_multiindex():
198+
# From issue 16627
199+
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
200+
['a', 'b', 'c']),
201+
'Int1': [0, 1, 0, 1, 0, 0]}
202+
a = DataFrame(a)
203+
204+
b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
205+
['a', 'b', 'c']),
206+
'Int': [0, 0, 0, 1, 1, 1],
207+
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
208+
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']
209+
210+
expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
211+
right_on=['Cat', 'Int'], how='left')
212+
result = a.join(b, on=['Cat1', 'Int1'])
213+
expected = expected.drop(['Cat', 'Int'], axis=1)
214+
assert_frame_equal(expected, result)
215+
216+
# Same test, but with ordered categorical
217+
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
218+
['b', 'a', 'c'],
219+
ordered=True),
220+
'Int1': [0, 1, 0, 1, 0, 0]}
221+
a = DataFrame(a)
222+
223+
b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
224+
['b', 'a', 'c'],
225+
ordered=True),
226+
'Int': [0, 0, 0, 1, 1, 1],
227+
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
228+
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']
229+
230+
expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
231+
right_on=['Cat', 'Int'], how='left')
232+
result = a.join(b, on=['Cat1', 'Int1'])
233+
expected = expected.drop(['Cat', 'Int'], axis=1)
234+
assert_frame_equal(expected, result)

0 commit comments

Comments
 (0)