diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 1a8b1603daaaa..07120e26b4ecd 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -65,15 +65,14 @@ In addition, ordering comparisons will raise a ``TypeError`` in the future. a tz-aware time instead of tz-naive (:issue:`21267`) and :attr:`DatetimeIndex.date` returned incorrect date when the input date has a non-UTC timezone (:issue:`21230`). - Fixed regression in :meth:`pandas.io.json.json_normalize` when called with ``None`` values - in nested levels in JSON (:issue:`21158`). + in nested levels in JSON, and to not drop keys with value as `None` (:issue:`21158`, :issue:`21356`). - Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) - Bug preventing pandas from being importable with -OO optimization (:issue:`21071`) - Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`) - Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`) - Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) -- Fixed Regression in :func:`nested_to_record` which now flattens list of dictionaries and doesnot drop keys with value as `None` (:issue:`21356`) - +- Fixed regression in merging on boolean index/columns (:issue:`21119`). .. _whatsnew_0231.performance: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 73aba4d4e044b..e38c069b3c3fb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -28,6 +28,7 @@ is_int_or_datetime_dtype, is_dtype_equal, is_bool, + is_bool_dtype, is_list_like, is_datetimelike, _ensure_int64, @@ -974,9 +975,14 @@ def _maybe_coerce_merge_keys(self): # Check if we are trying to merge on obviously # incompatible dtypes GH 9780, GH 15800 - elif is_numeric_dtype(lk) and not is_numeric_dtype(rk): + + # boolean values are considered as numeric, but are still allowed + # to be merged on object boolean values + elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) + and not is_numeric_dtype(rk)): raise ValueError(msg) - elif not is_numeric_dtype(lk) and is_numeric_dtype(rk): + elif (not is_numeric_dtype(lk) + and (is_numeric_dtype(rk) and not is_bool_dtype(rk))): raise ValueError(msg) elif is_datetimelike(lk) and not is_datetimelike(rk): raise ValueError(msg) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8e639edd34b18..037bd9cc7cd18 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1526,6 +1526,27 @@ def test_merge_on_ints_floats_warning(self): result = B.merge(A, left_on='Y', right_on='X') assert_frame_equal(result, expected[['Y', 'X']]) + def test_merge_incompat_infer_boolean_object(self): + # GH21119: bool + object bool merge OK + df1 = DataFrame({'key': Series([True, False], dtype=object)}) + df2 = DataFrame({'key': [True, False]}) + + expected = DataFrame({'key': [True, False]}, dtype=object) + result = pd.merge(df1, df2, on='key') + assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on='key') + assert_frame_equal(result, expected) + + # with missing value + df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)}) + df2 = DataFrame({'key': [True, False]}) + + expected = DataFrame({'key': [True, False]}, dtype=object) + result = pd.merge(df1, df2, on='key') + assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on='key') + assert_frame_equal(result, expected) + @pytest.mark.parametrize('df1_vals, df2_vals', [ ([0, 1, 2], ["0", "1", "2"]), ([0.0, 1.0, 2.0], ["0", "1", "2"]), @@ -1538,6 +1559,8 @@ def test_merge_on_ints_floats_warning(self): pd.date_range('20130101', periods=3, tz='US/Eastern')), ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), + # TODO ([0, 1], pd.Series([False, True], dtype=bool)), + ([0, 1], pd.Series([False, True], dtype=object)) ]) def test_merge_incompat_dtypes(self, df1_vals, df2_vals): # GH 9780, GH 15800