From 11cb1f8b81dc7d986831edd2bc151d57b2060c54 Mon Sep 17 00:00:00 2001 From: gabriellm1 Date: Thu, 29 Aug 2019 11:51:00 -0300 Subject: [PATCH 1/7] cython fix --- pandas/_libs/join.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 238bfd0be0aa7..397d9e19d30c5 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -242,6 +242,7 @@ ctypedef fused join_t: float64_t float32_t object + int8_t int32_t int64_t uint64_t @@ -410,6 +411,7 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right): left_join_indexer_float64 = left_join_indexer["float64_t"] left_join_indexer_float32 = left_join_indexer["float32_t"] left_join_indexer_object = left_join_indexer["object"] +left_join_indexer_int8 = left_join_indexer["int8_t"] left_join_indexer_int32 = left_join_indexer["int32_t"] left_join_indexer_int64 = left_join_indexer["int64_t"] left_join_indexer_uint64 = left_join_indexer["uint64_t"] From 95c5cf8b815b5a8ce2c5c76fa9dbee5aaba111e5 Mon Sep 17 00:00:00 2001 From: guipleite Date: Thu, 5 Sep 2019 10:05:46 -0300 Subject: [PATCH 2/7] added unit test --- pandas/tests/test_join.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index e750193abb71a..ce2cdfd5c86f7 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -348,3 +348,25 @@ def test_merge_join_categorical_multiindex(): result = a.join(b, on=["Cat1", "Int1"]) expected = expected.drop(["Cat", "Int"], axis=1) assert_frame_equal(expected, result) + + +def test_left_index_and_right_index_true(): + # From issue 28189 + + pdf = DataFrame({ + "idx": Categorical(["1"] * 4), + "value": [1, 2, 3, 4] + }) + pdf = pdf.set_index("idx") + agg = pdf.groupby("idx").agg(np.sum)["value"] + + result = merge(pdf, agg, how="left", left_index=True, right_index=True) + expected = merge(pdf, agg, how="left", on="idx") + + result = result.reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(expected, result) + + + From fd1d3f196a05ced6e21e639b648eb3dc61e9ebbd Mon Sep 17 00:00:00 2001 From: gabriellm1 Date: Thu, 29 Aug 2019 11:51:00 -0300 Subject: [PATCH 3/7] cython fix --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/_libs/join.pyx | 2 ++ pandas/tests/test_join.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 050a26cc86d42..3b7ebbf419375 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -89,7 +89,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) -- +- Bug in merge on CategoricalIndex fails if left_index=True & right_index=True, but not if on={index} (:issue:`28189`) - diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 238bfd0be0aa7..397d9e19d30c5 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -242,6 +242,7 @@ ctypedef fused join_t: float64_t float32_t object + int8_t int32_t int64_t uint64_t @@ -410,6 +411,7 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right): left_join_indexer_float64 = left_join_indexer["float64_t"] left_join_indexer_float32 = left_join_indexer["float32_t"] left_join_indexer_object = left_join_indexer["object"] +left_join_indexer_int8 = left_join_indexer["int8_t"] left_join_indexer_int32 = left_join_indexer["int32_t"] left_join_indexer_int64 = left_join_indexer["int64_t"] left_join_indexer_uint64 = left_join_indexer["uint64_t"] diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index e750193abb71a..5213b473eb0ae 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -348,3 +348,19 @@ def test_merge_join_categorical_multiindex(): result = a.join(b, on=["Cat1", "Int1"]) expected = expected.drop(["Cat", "Int"], axis=1) assert_frame_equal(expected, result) + + +def test_left_index_and_right_index_true(): + # From issue 28189 + + pdf = DataFrame({"idx": Categorical(["1"] * 4), "value": [1, 2, 3, 4]}) + pdf = pdf.set_index("idx") + agg = pdf.groupby("idx").agg(np.sum)["value"] + + result = merge(pdf, agg, how="left", left_index=True, right_index=True) + expected = merge(pdf, agg, how="left", on="idx") + + result = result.reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(expected, result) From 832107543fd4fb45dfb3a106f7ca66efd1b4161d Mon Sep 17 00:00:00 2001 From: guipleite Date: Thu, 5 Sep 2019 14:15:23 -0300 Subject: [PATCH 4/7] Fixing reviwed unit test --- pandas/tests/test_join.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 5213b473eb0ae..b9051c4887292 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -353,14 +353,15 @@ def test_merge_join_categorical_multiindex(): def test_left_index_and_right_index_true(): # From issue 28189 - pdf = DataFrame({"idx": Categorical(["1"] * 4), "value": [1, 2, 3, 4]}) - pdf = pdf.set_index("idx") + pdf = DataFrame( + range(4), columns=["value"], index=Index(Categorical(["1"] * 4), name="idx") + ) agg = pdf.groupby("idx").agg(np.sum)["value"] result = merge(pdf, agg, how="left", left_index=True, right_index=True) - expected = merge(pdf, agg, how="left", on="idx") - result = result.reset_index(drop=True) - expected = expected.reset_index(drop=True) + expected = DataFrame( + np.array([[0, 6], [1, 6], [2, 6], [3, 6]]), columns=["value_x", "value_y"] + ) assert_frame_equal(expected, result) From d938b1b1c6dc0e3838c62e95cdee17f85c830a8a Mon Sep 17 00:00:00 2001 From: guipleite Date: Thu, 5 Sep 2019 15:30:35 -0300 Subject: [PATCH 5/7] Futher fixing reviwed unit test --- pandas/tests/test_join.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index b9051c4887292..8f7bc02f468ef 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -353,12 +353,14 @@ def test_merge_join_categorical_multiindex(): def test_left_index_and_right_index_true(): # From issue 28189 - pdf = DataFrame( + df = DataFrame( range(4), columns=["value"], index=Index(Categorical(["1"] * 4), name="idx") ) - agg = pdf.groupby("idx").agg(np.sum)["value"] + df2 = DataFrame( + [[6]], columns=["value"], index=Index(Categorical(["1"]), name="idx") + ) - result = merge(pdf, agg, how="left", left_index=True, right_index=True) + result = merge(df, df2, how="left", left_index=True, right_index=True) result = result.reset_index(drop=True) expected = DataFrame( np.array([[0, 6], [1, 6], [2, 6], [3, 6]]), columns=["value_x", "value_y"] From 2bbc4911ff44b35d413794a7342076bddb4f09bb Mon Sep 17 00:00:00 2001 From: Hugo Carl <30904672+hugoecarl@users.noreply.github.com> Date: Thu, 5 Sep 2019 15:17:37 -0700 Subject: [PATCH 6/7] int16fix --- pandas/_libs/join.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 397d9e19d30c5..8f2af65f23429 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -243,6 +243,7 @@ ctypedef fused join_t: float32_t object int8_t + int16_t int32_t int64_t uint64_t @@ -412,6 +413,7 @@ left_join_indexer_float64 = left_join_indexer["float64_t"] left_join_indexer_float32 = left_join_indexer["float32_t"] left_join_indexer_object = left_join_indexer["object"] left_join_indexer_int8 = left_join_indexer["int8_t"] +left_join_indexer_int16 = left_join_indexer["int16_t"] left_join_indexer_int32 = left_join_indexer["int32_t"] left_join_indexer_int64 = left_join_indexer["int64_t"] left_join_indexer_uint64 = left_join_indexer["uint64_t"] From 4e8bf1f62a82e6fec532c91297b0b5a908351c3d Mon Sep 17 00:00:00 2001 From: Hugo Carl <30904672+hugoecarl@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:25:13 -0300 Subject: [PATCH 7/7] whatsnew format --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3b7ebbf419375..2bd7d1b223617 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -89,7 +89,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) -- Bug in merge on CategoricalIndex fails if left_index=True & right_index=True, but not if on={index} (:issue:`28189`) +- Bug in merge on CategoricalIndex fails if ``left_index=True`` & ``right_index=True``, but not if ``on={index}`` (:issue:`28189`) -