From 3799b995f127c926b114cf90e974173af5c9dbc8 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:11:03 -0600 Subject: [PATCH 1/5] DOC: fix PR02 errors in docstrings - pandas.core.groupby.SeriesGroupBy.idxmax, pandas.core.groupby.SeriesGroupBy.idxmin --- ci/code_checks.sh | 2 -- pandas/core/groupby/generic.py | 12 ++++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f3023bea3743e..dd9cd9e1cf174 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -175,8 +175,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.transform\ pandas.core.groupby.DataFrameGroupBy.nth\ pandas.core.groupby.DataFrameGroupBy.rolling\ - pandas.core.groupby.SeriesGroupBy.idxmax\ - pandas.core.groupby.SeriesGroupBy.idxmin\ pandas.core.groupby.SeriesGroupBy.nth\ pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.groupby.DataFrameGroupBy.hist\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9598bc0db02cc..c558179d001b2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1178,13 +1178,21 @@ def nsmallest( @doc(Series.idxmin.__doc__) def idxmin( - self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True + self, + axis: Axis | lib.NoDefault = lib.no_default, + skipna: bool = True, + *args, + **kwargs, ) -> Series: return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna) @doc(Series.idxmax.__doc__) def idxmax( - self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True + self, + axis: Axis | lib.NoDefault = lib.no_default, + skipna: bool = True, + *args, + **kwargs, ) -> Series: return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna) From 637f8ee3d73431bf9129bbf83b5f1c23d6d9b8d9 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Tue, 30 Jan 2024 01:57:52 -0600 Subject: [PATCH 2/5] Revert original change that caused some CI tests to fail and pushed a new fix --- pandas/core/groupby/generic.py | 133 ++++++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c558179d001b2..4d76deca5691c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1176,24 +1176,133 @@ def nsmallest( result = self._python_apply_general(f, data, not_indexed_same=True) return result - @doc(Series.idxmin.__doc__) def idxmin( - self, - axis: Axis | lib.NoDefault = lib.no_default, - skipna: bool = True, - *args, - **kwargs, + self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True ) -> Series: + """ + Return the row label of the minimum value. + + If multiple values equal the minimum, the first row label with that + value is returned. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Returns + ------- + Index + Label of the minimum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmin : Return indices of the minimum values + along the given axis. + DataFrame.idxmin : Return index of first occurrence of minimum + over requested axis. + Series.idxmax : Return index *label* of the first occurrence + of maximum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmin``. This method + returns the label of the minimum, while ``ndarray.argmin`` returns + the position. To get the position, use ``series.values.argmin()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B NaN + C 4.0 + D 1.0 + dtype: float64 + + >>> s.idxmin() + 'A' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmin(skipna=False) + nan + """ return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna) - @doc(Series.idxmax.__doc__) def idxmax( - self, - axis: Axis | lib.NoDefault = lib.no_default, - skipna: bool = True, - *args, - **kwargs, + self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True ) -> Series: + """ + Return the row label of the maximum value. + + If multiple values equal the maximum, the first row label with that + value is returned. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Returns + ------- + Index + Label of the maximum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmax : Return indices of the maximum values + along the given axis. + DataFrame.idxmax : Return index of first occurrence of maximum + over requested axis. + Series.idxmin : Return index *label* of the first occurrence + of minimum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmax``. This method + returns the label of the maximum, while ``ndarray.argmax`` returns + the position. To get the position, use ``series.values.argmax()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B NaN + C 4.0 + D 3.0 + E 4.0 + dtype: float64 + + >>> s.idxmax() + 'C' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmax(skipna=False) + nan + """ return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna) @doc(Series.corr.__doc__) From ff3a705cf94348e3928b51ee2ea8fa23550f4547 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Wed, 31 Jan 2024 04:15:09 -0600 Subject: [PATCH 3/5] Added examples for groupby.idxmin, groupby.idxmax --- pandas/core/groupby/generic.py | 57 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4d76deca5691c..494a4590f4e82 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1220,23 +1220,19 @@ def idxmin( Examples -------- - >>> s = pd.Series(data=[1, None, 4, 1], - ... index=['A', 'B', 'C', 'D']) - >>> s - A 1.0 - B NaN - C 4.0 - D 1.0 - dtype: float64 - - >>> s.idxmin() - 'A' - - If `skipna` is False and there is an NA value in the data, - the function returns ``nan``. + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 - >>> s.idxmin(skipna=False) - nan + >>> ser.groupby(['a', 'a', 'b', 'b']).idxmin() + a 2023-01-01 + b 2023-02-01 + dtype: datetime64[ns] """ return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna) @@ -1284,24 +1280,19 @@ def idxmax( Examples -------- - >>> s = pd.Series(data=[1, None, 4, 3, 4], - ... index=['A', 'B', 'C', 'D', 'E']) - >>> s - A 1.0 - B NaN - C 4.0 - D 3.0 - E 4.0 - dtype: float64 - - >>> s.idxmax() - 'C' - - If `skipna` is False and there is an NA value in the data, - the function returns ``nan``. + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 - >>> s.idxmax(skipna=False) - nan + >>> ser.groupby(['a', 'a', 'b', 'b']).idxmax() + a 2023-01-15 + b 2023-02-15 + dtype: datetime64[ns] """ return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna) From 514d8fc07e7b4a4cd138505823c7ff04cb9c1042 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:48:42 -0700 Subject: [PATCH 4/5] remove Axis parameters from docstring --- pandas/core/groupby/generic.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4d12d6a5829f3..034b9f1096f90 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1051,8 +1051,6 @@ def idxmin(self, skipna: bool = True) -> Series: Parameters ---------- - axis : {0 or 'index'} - Unused. Parameter needed for compatibility with DataFrame. skipna : bool, default True Exclude NA/null values. If the entire Series is NA, the result will be NA. @@ -1109,8 +1107,6 @@ def idxmax(self, skipna: bool = True) -> Series: Parameters ---------- - axis : {0 or 'index'} - Unused. Parameter needed for compatibility with DataFrame. skipna : bool, default True Exclude NA/null values. If the entire Series is NA, the result will be NA. From 420a33606799baf7e10eee0308dc87ea73062c6a Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:11:18 -0700 Subject: [PATCH 5/5] removed notes section that was not applicable --- pandas/core/groupby/generic.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 034b9f1096f90..d37acd8312483 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1074,12 +1074,6 @@ def idxmin(self, skipna: bool = True) -> Series: Series.idxmax : Return index *label* of the first occurrence of maximum of values. - Notes - ----- - This method is the Series version of ``ndarray.argmin``. This method - returns the label of the minimum, while ``ndarray.argmin`` returns - the position. To get the position, use ``series.values.argmin()``. - Examples -------- >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( @@ -1130,12 +1124,6 @@ def idxmax(self, skipna: bool = True) -> Series: Series.idxmin : Return index *label* of the first occurrence of minimum of values. - Notes - ----- - This method is the Series version of ``ndarray.argmax``. This method - returns the label of the maximum, while ``ndarray.argmax`` returns - the position. To get the position, use ``series.values.argmax()``. - Examples -------- >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(