diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ea6a832d25058..4efcbb08580db 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -955,6 +955,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`) - Bug in :meth:`.DataFrameGrouBy.value_counts` would raise when used with a :class:`.TimeGrouper` (:issue:`50486`) - Bug in :meth:`Resampler.size` caused a wide :class:`DataFrame` to be returned instead of a :class:`Series` with :class:`MultiIndex` (:issue:`46826`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`) - Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 955f65585963d..8cd1a2543e23a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -95,7 +95,6 @@ ) from pandas.core.indexes.category import CategoricalIndex from pandas.core.series import Series -from pandas.core.shared_docs import _shared_docs from pandas.core.util.numba_ import maybe_use_numba from pandas.plotting import boxplot_frame_groupby @@ -1848,17 +1847,82 @@ def nunique(self, dropna: bool = True) -> DataFrame: return results - @doc( - _shared_docs["idxmax"], - numeric_only_default="False", - ) def idxmax( self, - axis: Axis = 0, + axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: - axis = DataFrame._get_axis_number(axis) + """ + Return index of first occurrence of maximum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default None + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + If axis is not provided, grouper's axis is used. + + .. versionchanged:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of maxima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmax : Return index of the maximum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the maximum value in each column. + + >>> df.idxmax() + consumption Wheat Products + co2_emissions Beef + dtype: object + + To return the index for the maximum value in each row, use ``axis="columns"``. + + >>> df.idxmax(axis="columns") + Pork co2_emissions + Wheat Products consumption + Beef co2_emissions + dtype: object + """ + if axis is None: + axis = self.axis def func(df): res = df._reduce( @@ -1879,17 +1943,82 @@ def func(df): ) return result - @doc( - _shared_docs["idxmin"], - numeric_only_default="False", - ) def idxmin( self, - axis: Axis = 0, + axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: - axis = DataFrame._get_axis_number(axis) + """ + Return index of first occurrence of minimum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default None + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + If axis is not provided, grouper's axis is used. + + .. versionchanged:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of minima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmin : Return index of the minimum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the minimum value in each column. + + >>> df.idxmin() + consumption Pork + co2_emissions Wheat Products + dtype: object + + To return the index for the minimum value in each row, use ``axis="columns"``. + + >>> df.idxmin(axis="columns") + Pork consumption + Wheat Products co2_emissions + Beef consumption + dtype: object + """ + if axis is None: + axis = self.axis def func(df): res = df._reduce( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 4c6f172b00a58..8f1d52c2ea03d 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -185,8 +185,6 @@ def test_transform_axis_1_reducer(request, reduction_func): # GH#45715 if reduction_func in ( "corrwith", - "idxmax", - "idxmin", "ngroup", "nth", ):