Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,8 @@ Groupby/resample/rolling
- Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`)
- Bug in :meth:`DataFrameGroupBy.min` and :meth:`DataFrameGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising ``ValueError`` (:issue:41111`)
- Bug in :meth:`DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`)
- Bug in :meth:`DataFrameGroupBy.__getitem__` with non-unique columns incorrectly returning a malformed :class:`SeriesGroupBy` instead of :class:`DataFrameGroupBy` (:issue:`41427`)
- Bug in :meth:`DataFrameGroupBy.transform` with non-unique columns incorrectly raising ``AttributeError`` (:issue:`41427`)

Reshaping
^^^^^^^^^
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def ndim(self) -> int:
@cache_readonly
def _obj_with_exclusions(self):
if self._selection is not None and isinstance(self.obj, ABCDataFrame):
return self.obj.reindex(columns=self._selection_list)
return self.obj[self._selection_list]

if len(self.exclusions) > 0:
return self.obj.drop(self.exclusions, axis=1)
Expand All @@ -239,7 +239,9 @@ def __getitem__(self, key):
else:
if key not in self.obj:
raise KeyError(f"Column not found: {key}")
return self._gotitem(key, ndim=1)
subset = self.obj[key]
ndim = subset.ndim
return self._gotitem(key, ndim=ndim, subset=subset)

def _gotitem(self, key, ndim: int, subset=None):
"""
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1417,12 +1417,19 @@ def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFram
return path, res

def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
# iterate through columns
# iterate through columns, see test_transform_exclude_nuisance
output = {}
inds = []
for i, col in enumerate(obj):
subset = obj.iloc[:, i]
sgb = SeriesGroupBy(
subset,
selection=col,
grouper=self.grouper,
exclusions=self.exclusions,
)
try:
output[col] = self[col].transform(wrapper)
output[i] = sgb.transform(wrapper)
except TypeError:
# e.g. trying to call nanmean with string values
pass
Expand All @@ -1434,7 +1441,9 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:

columns = obj.columns.take(inds)

return self.obj._constructor(output, index=obj.index, columns=columns)
result = self.obj._constructor(output, index=obj.index)
result.columns = columns
return result

def filter(self, func, dropna=True, *args, **kwargs):
"""
Expand Down Expand Up @@ -1504,7 +1513,7 @@ def filter(self, func, dropna=True, *args, **kwargs):

return self._apply_filter(indices, dropna)

def __getitem__(self, key):
def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy:
if self.axis == 1:
# GH 37725
raise ValueError("Cannot subset columns when using axis=1")
Expand Down
26 changes: 24 additions & 2 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
date_range,
)
import pandas._testing as tm
from pandas.core.groupby.generic import (
DataFrameGroupBy,
SeriesGroupBy,
)
from pandas.core.groupby.groupby import DataError


Expand Down Expand Up @@ -391,13 +395,31 @@ def test_transform_select_columns(df):
tm.assert_frame_equal(result, expected)


def test_transform_exclude_nuisance(df):
@pytest.mark.parametrize("duplicates", [True, False])
def test_transform_exclude_nuisance(df, duplicates):
# case that goes through _transform_item_by_item

if duplicates:
# make sure we work with duplicate columns GH#41427
df.columns = ["A", "C", "C", "D"]

# this also tests orderings in transform between
# series/frame to make sure it's consistent
expected = {}
grouped = df.groupby("A")
expected["C"] = grouped["C"].transform(np.mean)

gbc = grouped["C"]
expected["C"] = gbc.transform(np.mean)
if duplicates:
# squeeze 1-column DataFrame down to Series
expected["C"] = expected["C"]["C"]

assert isinstance(gbc.obj, DataFrame)
assert isinstance(gbc, DataFrameGroupBy)
else:
assert isinstance(gbc, SeriesGroupBy)
assert isinstance(gbc.obj, Series)

expected["D"] = grouped["D"].transform(np.mean)
expected = DataFrame(expected)
result = df.groupby("A").transform(np.mean)
Expand Down