From 9fbe88433b4a70cf4232c433304309f8f9897934 Mon Sep 17 00:00:00 2001 From: Edoardo Abati Date: Fri, 15 Feb 2019 00:34:26 +0000 Subject: [PATCH 1/4] DOC: Improvement docstring of DataFrame.rank() --- pandas/core/generic.py | 87 +++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2308836d982a..06b5eaf65b049 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8286,34 +8286,85 @@ def last(self, offset): def rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False): """ - Compute numerical data ranks (1 through n) along axis. Equal values are - assigned a rank that is the average of the ranks of those values. + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - index to direct ranking - method : {'average', 'min', 'max', 'first', 'dense'} - * average: average rank of group - * min: lowest rank in group - * max: highest rank in group + Index to direct ranking. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value + (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups - numeric_only : boolean, default None - Include only float, int, boolean data. Valid only for DataFrame or - Panel objects - na_option : {'keep', 'top', 'bottom'} - * keep: leave NA values where they are - * top: smallest rank if ascending - * bottom: smallest rank if descending - ascending : boolean, default True - False for ranks by high (1) to low (N) - pct : boolean, default False - Computes percentage rank of data + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + + * keep: assign NaN rank to NaN values + * top: assign smallest rank to NaN values if ascending + * bottom: assign highest rank to NaN values if ascending + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. Returns ------- ranks : same type as caller + Return a Series or DataFrame with data ranks as values. + + See Also + -------- + core.groupby.GroupBy.rank : Rank of values within each group. + + Examples + -------- + + >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', + ... 'spider', 'snake'], + ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df + Animal Number_legs + 0 cat 4.0 + 1 penguin 2.0 + 2 dog 4.0 + 3 spider 8.0 + 4 snake NaN + + The following example shows how the method behaves with the above + parameters: + + * default_rank: this is the default behaviour obtained without using + any parameter. + * max_rank: setting ``method = 'max'`` the records that have the + same values are ranked using the highest rank (e.g.: since 'cat' + and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) + * NA_bottom: choosing ``na_option = 'bottom'``, if there are records + with NaN values they are placed at the bottom of the ranking. + * pct_rank: when setting ``pct = True``, the ranking is expressed as + percentile rank. + + >>> df['default_rank'] = df['Number_legs'].rank() + >>> df['max_rank'] = df['Number_legs'].rank(method='max') + >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') + >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df + Animal Number_legs default_rank max_rank NA_bottom pct_rank + 0 cat 4.0 2.5 3.0 2.5 0.625 + 1 penguin 2.0 1.0 1.0 1.0 0.250 + 2 dog 4.0 2.5 3.0 2.5 0.625 + 3 spider 8.0 4.0 4.0 4.0 1.000 + 4 snake NaN NaN NaN 5.0 NaN """ axis = self._get_axis_number(axis) From e85c65e485c0ea56fbd3fbcf0cec951f171b091a Mon Sep 17 00:00:00 2001 From: Edoardo Abati Date: Tue, 19 Feb 2019 20:02:46 +0000 Subject: [PATCH 2/4] Solved error in bulleted list --- pandas/core/generic.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 06b5eaf65b049..eaaec3c772b6a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8299,19 +8299,19 @@ def rank(self, axis=0, method='average', numeric_only=None, How to rank the group of records that have the same value (i.e. ties): - * average: average rank of the group - * min: lowest rank in the group - * max: highest rank in the group - * first: ranks assigned in order they appear in the array - * dense: like 'min', but rank always increases by 1 between groups + * average: average rank of the group. + * min: lowest rank in the group. + * max: highest rank in the group. + * first: ranks assigned in order they appear in the array. + * dense: like 'min', but rank always increases by 1 between groups. numeric_only : bool, optional For DataFrame objects, rank only numeric columns if set to True. na_option : {'keep', 'top', 'bottom'}, default 'keep' How to rank NaN values: - * keep: assign NaN rank to NaN values - * top: assign smallest rank to NaN values if ascending - * bottom: assign highest rank to NaN values if ascending + * keep: assign NaN rank to NaN values. + * top: assign smallest rank to NaN values if ascending. + * bottom: assign highest rank to NaN values if ascending. ascending : bool, default True Whether or not the elements should be ranked in ascending order. pct : bool, default False From 70f3381ad13f57612227914fe878ff670d1c6061 Mon Sep 17 00:00:00 2001 From: Edoardo Abati Date: Tue, 19 Feb 2019 20:49:40 +0000 Subject: [PATCH 3/4] Solved error in Returns section --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eaaec3c772b6a..20205d7891def 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8320,7 +8320,7 @@ def rank(self, axis=0, method='average', numeric_only=None, Returns ------- - ranks : same type as caller + same type as caller Return a Series or DataFrame with data ranks as values. See Also From 7de320e33fdaa5c8d9b51ef64a1e3b1617ddc04c Mon Sep 17 00:00:00 2001 From: Edoardo Abati Date: Mon, 6 May 2019 10:41:20 +0100 Subject: [PATCH 4/4] Removed bullet points in parameter lists --- pandas/core/generic.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 20205d7891def..2e534d25bcfca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8299,19 +8299,19 @@ def rank(self, axis=0, method='average', numeric_only=None, How to rank the group of records that have the same value (i.e. ties): - * average: average rank of the group. - * min: lowest rank in the group. - * max: highest rank in the group. - * first: ranks assigned in order they appear in the array. - * dense: like 'min', but rank always increases by 1 between groups. + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups numeric_only : bool, optional For DataFrame objects, rank only numeric columns if set to True. na_option : {'keep', 'top', 'bottom'}, default 'keep' How to rank NaN values: - * keep: assign NaN rank to NaN values. - * top: assign smallest rank to NaN values if ascending. - * bottom: assign highest rank to NaN values if ascending. + * keep: assign NaN rank to NaN values + * top: assign smallest rank to NaN values if ascending + * bottom: assign highest rank to NaN values if ascending ascending : bool, default True Whether or not the elements should be ranked in ascending order. pct : bool, default False