Skip to content

Commit 6e7efb4

Browse files
Backport PR #52577 on branch 2.0.x (BUG: describe not respecting ArrowDtype in include/exclude) (#52879)
Backport PR #52577: BUG: describe not respecting ArrowDtype in include/exclude Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
1 parent 22f9e93 commit 6e7efb4

File tree

4 files changed

+27
-0
lines changed

4 files changed

+27
-0
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug fixes
3838
- Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`)
3939
- Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`)
4040
- Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`)
41+
- Bug in :meth:`DataFrame.describe` not respecting ``ArrowDtype`` in ``include`` and ``exclude`` (:issue:`52570`)
4142
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
4243
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
4344
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)

pandas/core/dtypes/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,10 @@ def infer_dtype_from_object(dtype) -> type:
15651565
except TypeError:
15661566
# Should still pass if we don't have a date-like
15671567
pass
1568+
if hasattr(dtype, "numpy_dtype"):
1569+
# TODO: Implement this properly
1570+
# https://github.com/pandas-dev/pandas/issues/52576
1571+
return dtype.numpy_dtype.type
15681572
return dtype.type
15691573

15701574
try:

pandas/core/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
PeriodArray,
172172
TimedeltaArray,
173173
)
174+
from pandas.core.arrays.arrow import ArrowDtype
174175
from pandas.core.arrays.sparse import SparseFrameAccessor
175176
from pandas.core.construction import (
176177
ensure_wrapped_if_datetimelike,
@@ -4695,6 +4696,7 @@ def check_int_infer_dtype(dtypes):
46954696

46964697
def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
46974698
# GH 46870: BooleanDtype._is_numeric == True but should be excluded
4699+
dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
46984700
return issubclass(dtype.type, tuple(dtypes_set)) or (
46994701
np.number in dtypes_set
47004702
and getattr(dtype, "_is_numeric", False)

pandas/tests/frame/methods/test_describe.py

+20
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,23 @@ def test_ea_with_na(self, any_numeric_ea_dtype):
395395
dtype="Float64",
396396
)
397397
tm.assert_frame_equal(result, expected)
398+
399+
def test_describe_exclude_pa_dtype(self):
400+
# GH#52570
401+
pa = pytest.importorskip("pyarrow")
402+
df = DataFrame(
403+
{
404+
"a": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int8())),
405+
"b": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int16())),
406+
"c": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int32())),
407+
}
408+
)
409+
result = df.describe(
410+
include=pd.ArrowDtype(pa.int8()), exclude=pd.ArrowDtype(pa.int32())
411+
)
412+
expected = DataFrame(
413+
{"a": [3, 2, 1, 1, 1.5, 2, 2.5, 3]},
414+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
415+
dtype=pd.ArrowDtype(pa.float64()),
416+
)
417+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)