From 0e7ce964f38c54fe7de169d5acbcfe7e8450d589 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 7 Mar 2024 08:36:56 +0000 Subject: [PATCH 1/2] BUG: DataFrame Interchange Protocol errors on Boolean columns [skip-ci] --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/core/interchange/utils.py | 3 +++ pandas/tests/interchange/test_impl.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 54084abab7817..ce15b5d742a9d 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -24,6 +24,7 @@ Bug fixes ~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) +- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py index 2a19dd5046aa3..fd1c7c9639242 100644 --- a/pandas/core/interchange/utils.py +++ b/pandas/core/interchange/utils.py @@ -144,6 +144,9 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: elif isinstance(dtype, DatetimeTZDtype): return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz) + elif isinstance(dtype, pd.BooleanDtype): + return ArrowCTypes.BOOL + raise NotImplementedError( f"Conversion of {dtype} to Arrow C format string is not implemented." ) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 83574e8630d6f..60e05c2c65124 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -459,6 +459,7 @@ def test_non_str_names_w_duplicates(): ), ([1.0, 2.25, None], "Float32", "float32"), ([1.0, 2.25, None], "Float32[pyarrow]", "float32"), + ([True, False, None], "boolean", "bool"), ([True, False, None], "boolean[pyarrow]", "bool"), (["much ado", "about", None], "string[pyarrow_numpy]", "large_string"), (["much ado", "about", None], "string[pyarrow]", "large_string"), @@ -521,6 +522,7 @@ def test_pandas_nullable_with_missing_values( ), ([1.0, 2.25, 5.0], "Float32", "float32"), ([1.0, 2.25, 5.0], "Float32[pyarrow]", "float32"), + ([True, False, False], "boolean", "bool"), ([True, False, False], "boolean[pyarrow]", "bool"), (["much ado", "about", "nothing"], "string[pyarrow_numpy]", "large_string"), (["much ado", "about", "nothing"], "string[pyarrow]", "large_string"), From 30c4fbafae19e32e13adc3c6c8dbe7c617ce7c49 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:54:17 +0000 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v2.2.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index ce15b5d742a9d..2a48403d9a318 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -22,9 +22,9 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) -- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: