diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2308836d982a..2e534d25bcfca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8286,34 +8286,85 @@ def last(self, offset): def rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False): """ - Compute numerical data ranks (1 through n) along axis. Equal values are - assigned a rank that is the average of the ranks of those values. + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - index to direct ranking - method : {'average', 'min', 'max', 'first', 'dense'} - * average: average rank of group - * min: lowest rank in group - * max: highest rank in group + Index to direct ranking. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value + (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups - numeric_only : boolean, default None - Include only float, int, boolean data. Valid only for DataFrame or - Panel objects - na_option : {'keep', 'top', 'bottom'} - * keep: leave NA values where they are - * top: smallest rank if ascending - * bottom: smallest rank if descending - ascending : boolean, default True - False for ranks by high (1) to low (N) - pct : boolean, default False - Computes percentage rank of data + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + + * keep: assign NaN rank to NaN values + * top: assign smallest rank to NaN values if ascending + * bottom: assign highest rank to NaN values if ascending + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. Returns ------- - ranks : same type as caller + same type as caller + Return a Series or DataFrame with data ranks as values. + + See Also + -------- + core.groupby.GroupBy.rank : Rank of values within each group. + + Examples + -------- + + >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', + ... 'spider', 'snake'], + ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df + Animal Number_legs + 0 cat 4.0 + 1 penguin 2.0 + 2 dog 4.0 + 3 spider 8.0 + 4 snake NaN + + The following example shows how the method behaves with the above + parameters: + + * default_rank: this is the default behaviour obtained without using + any parameter. + * max_rank: setting ``method = 'max'`` the records that have the + same values are ranked using the highest rank (e.g.: since 'cat' + and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) + * NA_bottom: choosing ``na_option = 'bottom'``, if there are records + with NaN values they are placed at the bottom of the ranking. + * pct_rank: when setting ``pct = True``, the ranking is expressed as + percentile rank. + + >>> df['default_rank'] = df['Number_legs'].rank() + >>> df['max_rank'] = df['Number_legs'].rank(method='max') + >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') + >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df + Animal Number_legs default_rank max_rank NA_bottom pct_rank + 0 cat 4.0 2.5 3.0 2.5 0.625 + 1 penguin 2.0 1.0 1.0 1.0 0.250 + 2 dog 4.0 2.5 3.0 2.5 0.625 + 3 spider 8.0 4.0 4.0 4.0 1.000 + 4 snake NaN NaN NaN 5.0 NaN """ axis = self._get_axis_number(axis)