From 31e6441d349cc4b365833dbbdb61e95683e2e683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 8 Jun 2023 16:16:03 +0200 Subject: [PATCH 1/4] Example for pct_change --- pandas/core/groupby/groupby.py | 42 ++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6ea5fc437f5a2..45cf0d86f2597 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3933,7 +3933,7 @@ def diff( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def pct_change( self, periods: int = 1, @@ -3949,7 +3949,45 @@ def pct_change( ------- Series or DataFrame Percentage changes within each group. - """ + %(see_also)s + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b', 'b'] + >>> ser = pd.Series([1, 2, 3, 4], index=lst) + >>> ser + a 1 + a 2 + b 3 + b 4 + dtype: int64 + >>> ser.groupby(level=0).pct_change() + a NaN + a 1.000000 + b NaN + b 0.333333 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df + a b c + tuna 1 2 3 + salmon 1 5 6 + catfish 2 5 8 + goldfish 2 6 9 + >>> df.groupby("a").pct_change() + b c + tuna NaN NaN + salmon 1.5 1.000 + catfish NaN NaN + goldfish 0.2 0.125""" + if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) self._deprecate_axis(axis, "pct_change") From acd7aad3151a4376d55b967b525bb7222b52a701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 8 Jun 2023 18:29:57 +0200 Subject: [PATCH 2/4] Added examples for groupby sem, shift, size --- pandas/core/groupby/groupby.py | 108 ++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 45cf0d86f2597..9d5c618bedb10 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2279,6 +2279,40 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): ------- Series or DataFrame Standard error of the mean of values within each group. + + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b', 'b'] + >>> ser = pd.Series([5, 10, 8, 14], index=lst) + >>> ser + a 5 + a 10 + b 8 + b 14 + dtype: int64 + >>> ser.groupby(level=0).sem() + a 2.5 + b 3.0 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = [[1, 12, 11], [1, 15, 2], [2, 5, 8], [2, 6, 12]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df + a b c + tuna 1 12 11 + salmon 1 15 2 + catfish 2 5 8 + goldfish 2 6 12 + >>> df.groupby("a").sem() + b c + a + 1 1.5 4.5 + 2 0.5 2.0 """ if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype): raise TypeError( @@ -2294,7 +2328,7 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def size(self) -> DataFrame | Series: """ Compute group sizes. @@ -2304,6 +2338,37 @@ def size(self) -> DataFrame | Series: DataFrame or Series Number of rows in each group as a Series if as_index is True or a DataFrame if as_index is False. + %(see_also)s + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'b', 'c'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + b 2 + c 3 + dtype: int64 + >>> ser.groupby(level=0).size() + a 2 + b 1 + dtype: float64 + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby("a").size() + a + 1 2 + 7 1 + dtype: int64 """ result = self.grouper.size() @@ -3853,6 +3918,44 @@ def shift( See Also -------- Index.shift : Shift values of Index. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b', 'b'] + >>> ser = pd.Series([1, 2, 3, 4], index=lst) + >>> ser + a 1 + a 2 + b 3 + b 4 + dtype: int64 + >>> ser.groupby(level=0).shift(1) + a NaN + a 1.0 + b NaN + b 3.0 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df + a b c + tuna 1 2 3 + salmon 1 5 6 + catfish 2 5 8 + goldfish 2 6 9 + >>> df.groupby("a").shift(1) + b c + tuna NaN NaN + salmon 2.0 3.0 + catfish NaN NaN + goldfish 5.0 8.0 """ if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) @@ -3986,7 +4089,8 @@ def pct_change( tuna NaN NaN salmon 1.5 1.000 catfish NaN NaN - goldfish 0.2 0.125""" + goldfish 0.2 0.125 + """ if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) From b8d239268cbc590758657c98402eaa5b6c9d107b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 8 Jun 2023 19:16:39 +0200 Subject: [PATCH 3/4] Updated code_checks --- ci/code_checks.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f2d6a2b222a3c..191e0d03b3a1a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -267,10 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.ffill \ pandas.core.groupby.DataFrameGroupBy.median \ pandas.core.groupby.DataFrameGroupBy.ohlc \ - pandas.core.groupby.DataFrameGroupBy.pct_change \ - pandas.core.groupby.DataFrameGroupBy.sem \ - pandas.core.groupby.DataFrameGroupBy.shift \ - pandas.core.groupby.DataFrameGroupBy.size \ pandas.core.groupby.DataFrameGroupBy.skew \ pandas.core.groupby.DataFrameGroupBy.std \ pandas.core.groupby.DataFrameGroupBy.var \ @@ -280,10 +276,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.SeriesGroupBy.median \ pandas.core.groupby.SeriesGroupBy.nunique \ pandas.core.groupby.SeriesGroupBy.ohlc \ - pandas.core.groupby.SeriesGroupBy.pct_change \ - pandas.core.groupby.SeriesGroupBy.sem \ - pandas.core.groupby.SeriesGroupBy.shift \ - pandas.core.groupby.SeriesGroupBy.size \ pandas.core.groupby.SeriesGroupBy.skew \ pandas.core.groupby.SeriesGroupBy.std \ pandas.core.groupby.SeriesGroupBy.var \ From 9ba068dc475beaaf1910e05c782ac01f4ac6cc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 8 Jun 2023 20:38:52 +0200 Subject: [PATCH 4/4] Corrected error on groupby size --- pandas/core/groupby/groupby.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e04c53be512f2..c372235481614 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2574,17 +2574,17 @@ def size(self) -> DataFrame | Series: For SeriesGroupBy: - >>> lst = ['a', 'b', 'c'] + >>> lst = ['a', 'a', 'b'] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 - b 2 - c 3 + a 2 + b 3 dtype: int64 >>> ser.groupby(level=0).size() a 2 b 1 - dtype: float64 + dtype: int64 >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] >>> df = pd.DataFrame(data, columns=["a", "b", "c"],