diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6f2b9b4f946c7..3dd8fd4a38d7e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -445,6 +445,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) - Bug in :meth:`GroupBy.count` causes segmentation fault when grouped-by column contains NaNs (:issue:`32841`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean series (:issue:`32894`) +- Bug in :meth:`SeriesGroupBy.quantile` raising on nullable integers (:issue:`33136`) - Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` returning floats when applied to nullable Booleans (:issue:`33071`) - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1474e173b4f8c..7a7ac58b9d11b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -44,7 +44,9 @@ class providing the base-class of operations. from pandas.core.dtypes.cast import maybe_cast_result from pandas.core.dtypes.common import ( ensure_float, + is_bool_dtype, is_datetime64_dtype, + is_extension_array_dtype, is_integer_dtype, is_numeric_dtype, is_object_dtype, @@ -1867,9 +1869,13 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: ) inference = None - if is_integer_dtype(vals): + if is_integer_dtype(vals.dtype): + if is_extension_array_dtype(vals.dtype): + vals = vals.to_numpy(dtype=float, na_value=np.nan) inference = np.int64 - elif is_datetime64_dtype(vals): + elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype): + vals = vals.to_numpy(dtype=float, na_value=np.nan) + elif is_datetime64_dtype(vals.dtype): inference = "datetime64[ns]" vals = np.asarray(vals).astype(np.float) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 9c33843cdcecc..346de55f551df 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1519,6 +1519,30 @@ def test_quantile_missing_group_values_correct_results(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "values", + [ + pd.array([1, 0, None] * 2, dtype="Int64"), + pd.array([True, False, None] * 2, dtype="boolean"), + ], +) +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_nullable_array(values, q): + # https://github.com/pandas-dev/pandas/issues/33136 + df = pd.DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values}) + result = df.groupby("a")["b"].quantile(q) + + if isinstance(q, list): + idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None]) + true_quantiles = [0.0, 0.5, 1.0] + else: + idx = pd.Index(["x", "y"], name="a") + true_quantiles = [0.5] + + expected = pd.Series(true_quantiles * 2, index=idx, name="b") + tm.assert_series_equal(result, expected) + + # pipe # --------------------------------