Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo: pandas

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple -> list-like

- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
- Bug in windowing over read-only arrays (:issue:`27766`)
- Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`)
Expand Down
64 changes: 52 additions & 12 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,7 @@ def quantile(self, q=0.5, interpolation="linear"):
a 2.0
b 3.0
"""
from pandas import concat

def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
if is_object_dtype(vals):
Expand Down Expand Up @@ -1897,18 +1898,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:

return vals

return self._get_cythonized_result(
"group_quantile",
self.grouper,
aggregate=True,
needs_values=True,
needs_mask=True,
cython_dtype=np.float64,
pre_processing=pre_processor,
post_processing=post_processor,
q=q,
interpolation=interpolation,
)
if is_scalar(q):
return self._get_cythonized_result(
"group_quantile",
self.grouper,
aggregate=True,
needs_values=True,
needs_mask=True,
cython_dtype=np.float64,
pre_processing=pre_processor,
post_processing=post_processor,
q=q,
interpolation=interpolation,
)
else:
results = [
self._get_cythonized_result(
"group_quantile",
self.grouper,
aggregate=True,
needs_values=True,
needs_mask=True,
cython_dtype=np.float64,
pre_processing=pre_processor,
post_processing=post_processor,
q=qi,
interpolation=interpolation,
)
for qi in q
]
result = concat(results, axis=0, keys=q)
# fix levels to place quantiles on the inside
# TODO(GH-10710): Ideally, we could write this as
# >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
# but this hits https://github.com/pandas-dev/pandas/issues/10710
# which doesn't reorder the list-like `q` on the inner level.
order = np.roll(list(range(result.index.nlevels)), -1)
result = result.reorder_levels(order)
result = result.reindex(q, level=-1)

# fix order.
hi = len(q) * self.ngroups
arr = np.arange(0, hi, self.ngroups)
arrays = []

for i in range(self.ngroups):
arr = arr + i
arrays.append(arr)

indices = np.concatenate(arrays)
assert len(indices) == len(result)
return result.take(indices)

@Substitution(name="groupby")
def ngroup(self, ascending=True):
Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q):
tm.assert_frame_equal(result, expected)


def test_quantile_array():
# https://github.com/pandas-dev/pandas/issues/27526
df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])

index = pd.MultiIndex.from_product([[0, 1], [0.25]])
expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
tm.assert_frame_equal(result, expected)

df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])

result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
expected = pd.DataFrame(
{"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
)
tm.assert_frame_equal(result, expected)


def test_quantile_array_no_sort():
df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
expected = pd.DataFrame(
{"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
)
tm.assert_frame_equal(result, expected)

result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
expected = pd.DataFrame(
{"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
)
tm.assert_frame_equal(result, expected)


def test_quantile_array_multiple_levels():
df = pd.DataFrame(
{"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
)
result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
index = pd.MultiIndex.from_tuples(
[("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
names=["c", "d", None],
)
expected = pd.DataFrame(
{"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
)
tm.assert_frame_equal(result, expected)


def test_quantile_raises():
df = pd.DataFrame(
[["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]
Expand Down