From 0dfd01e2c536fcfa713bf36b2952b50638f22a41 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Jun 2018 10:33:37 +0200 Subject: [PATCH 1/4] REGR: allow merging on object boolean columns --- pandas/core/reshape/merge.py | 7 +++++-- pandas/tests/reshape/merge/test_merge.py | 11 +++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d8897fb7c811..cf07a8645b3af 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -28,6 +28,7 @@ is_int_or_datetime_dtype, is_dtype_equal, is_bool, + is_bool_dtype, is_list_like, is_datetimelike, _ensure_int64, @@ -974,9 +975,11 @@ def _maybe_coerce_merge_keys(self): # Check if we are trying to merge on obviously # incompatible dtypes GH 9780, GH 15800 - elif is_numeric_dtype(lk) and not is_numeric_dtype(rk): + elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) + and not is_numeric_dtype(rk)): raise ValueError(msg) - elif not is_numeric_dtype(lk) and is_numeric_dtype(rk): + elif (not is_numeric_dtype(lk) + and (is_numeric_dtype(rk) and not is_bool_dtype(rk))): raise ValueError(msg) elif is_datetimelike(lk) and not is_datetimelike(rk): raise ValueError(msg) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8e639edd34b18..782d95b069b81 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1526,6 +1526,17 @@ def test_merge_on_ints_floats_warning(self): result = B.merge(A, left_on='Y', right_on='X') assert_frame_equal(result, expected[['Y', 'X']]) + def test_merge_incompat_infer_object(self): + # GH21119 + df1 = DataFrame({'key': Series([True, False], dtype=object)}) + df2 = DataFrame({'key': [True, False]}) + + expected = DataFrame({'key': [True, False]}, dtype=object) + result = pd.merge(df1, df2, on='key') + assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on='key') + assert_frame_equal(result, expected) + @pytest.mark.parametrize('df1_vals, df2_vals', [ ([0, 1, 2], ["0", "1", "2"]), ([0.0, 1.0, 2.0], ["0", "1", "2"]), From 163007249a801417343f57be600fe2aa954be1f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jun 2018 18:12:14 +0200 Subject: [PATCH 2/4] add more test cases --- pandas/tests/reshape/merge/test_merge.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 782d95b069b81..037bd9cc7cd18 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1526,8 +1526,8 @@ def test_merge_on_ints_floats_warning(self): result = B.merge(A, left_on='Y', right_on='X') assert_frame_equal(result, expected[['Y', 'X']]) - def test_merge_incompat_infer_object(self): - # GH21119 + def test_merge_incompat_infer_boolean_object(self): + # GH21119: bool + object bool merge OK df1 = DataFrame({'key': Series([True, False], dtype=object)}) df2 = DataFrame({'key': [True, False]}) @@ -1537,6 +1537,16 @@ def test_merge_incompat_infer_object(self): result = pd.merge(df2, df1, on='key') assert_frame_equal(result, expected) + # with missing value + df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)}) + df2 = DataFrame({'key': [True, False]}) + + expected = DataFrame({'key': [True, False]}, dtype=object) + result = pd.merge(df1, df2, on='key') + assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on='key') + assert_frame_equal(result, expected) + @pytest.mark.parametrize('df1_vals, df2_vals', [ ([0, 1, 2], ["0", "1", "2"]), ([0.0, 1.0, 2.0], ["0", "1", "2"]), @@ -1549,6 +1559,8 @@ def test_merge_incompat_infer_object(self): pd.date_range('20130101', periods=3, tz='US/Eastern')), ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), + # TODO ([0, 1], pd.Series([False, True], dtype=bool)), + ([0, 1], pd.Series([False, True], dtype=object)) ]) def test_merge_incompat_dtypes(self, df1_vals, df2_vals): # GH 9780, GH 15800 From fe45b89c740ffaec6993a524289d65929f9e49ee Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jun 2018 18:14:07 +0200 Subject: [PATCH 3/4] add comment --- pandas/core/reshape/merge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index cf07a8645b3af..d69d79ca9b098 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -975,6 +975,9 @@ def _maybe_coerce_merge_keys(self): # Check if we are trying to merge on obviously # incompatible dtypes GH 9780, GH 15800 + + # boolean values are considered as numeric, but are still allowed + # to be merged on object boolean values elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) and not is_numeric_dtype(rk)): raise ValueError(msg) From 58b72c55bfad933f9438b5a4c7615e5f6793bb9a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jun 2018 18:15:13 +0200 Subject: [PATCH 4/4] add whatsnew --- doc/source/whatsnew/v0.23.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5a1bcce9b5970..d4a332c65187c 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -31,7 +31,7 @@ Fixed Regressions - Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`) - Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) - +- Fixed regression in merging on boolean index/columns (:issue:`21119`). .. _whatsnew_0231.performance: