@@ -4559,11 +4559,15 @@ def nlargest(self, n, columns, keep='first'):
45594559 Number of rows to return.
45604560 columns : label or list of labels
45614561 Column label(s) to order by.
4562- keep : {'first', 'last'}, default 'first'
4562+ keep : {'first', 'last', 'all' }, default 'first'
45634563 Where there are duplicate values:
45644564
45654565 - `first` : prioritize the first occurrence(s)
45664566 - `last` : prioritize the last occurrence(s)
4567+ - ``all`` : do not drop any duplicates, even it means
4568+ selecting more than `n` items.
4569+
4570+ .. versionadded:: 0.24.0
45674571
45684572 Returns
45694573 -------
@@ -4586,47 +4590,58 @@ def nlargest(self, n, columns, keep='first'):
45864590
45874591 Examples
45884592 --------
4589- >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1 ],
4590- ... 'b': list('abdce '),
4591- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4593+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2 ],
4594+ ... 'b': list('abdcef '),
4595+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0 ]})
45924596 >>> df
45934597 a b c
45944598 0 1 a 1.0
45954599 1 10 b 2.0
45964600 2 8 d NaN
4597- 3 10 c 3.0
4598- 4 -1 e 4.0
4601+ 3 11 c 3.0
4602+ 4 8 e 4.0
4603+ 5 2 f 9.0
45994604
46004605 In the following example, we will use ``nlargest`` to select the three
46014606 rows having the largest values in column "a".
46024607
46034608 >>> df.nlargest(3, 'a')
46044609 a b c
4610+ 3 11 c 3.0
46054611 1 10 b 2.0
4606- 3 10 c 3.0
46074612 2 8 d NaN
46084613
46094614 When using ``keep='last'``, ties are resolved in reverse order:
46104615
46114616 >>> df.nlargest(3, 'a', keep='last')
46124617 a b c
4613- 3 10 c 3.0
4618+ 3 11 c 3.0
4619+ 1 10 b 2.0
4620+ 4 8 e 4.0
4621+
4622+ When using ``keep='all'``, all duplicate items are maintained:
4623+
4624+ >>> df.nlargest(3, 'a', keep='all')
4625+ a b c
4626+ 3 11 c 3.0
46144627 1 10 b 2.0
46154628 2 8 d NaN
4629+ 4 8 e 4.0
46164630
46174631 To order by the largest values in column "a" and then "c", we can
46184632 specify multiple columns like in the next example.
46194633
46204634 >>> df.nlargest(3, ['a', 'c'])
46214635 a b c
4622- 3 10 c 3.0
4636+ 4 8 e 4.0
4637+ 3 11 c 3.0
46234638 1 10 b 2.0
4624- 2 8 d NaN
46254639
46264640 Attempting to use ``nlargest`` on non-numeric dtypes will raise a
46274641 ``TypeError``:
46284642
46294643 >>> df.nlargest(3, 'b')
4644+
46304645 Traceback (most recent call last):
46314646 TypeError: Column 'b' has dtype object, cannot use method 'nlargest'
46324647 """
@@ -4645,25 +4660,75 @@ def nsmallest(self, n, columns, keep='first'):
46454660 Number of items to retrieve
46464661 columns : list or str
46474662 Column name or names to order by
4648- keep : {'first', 'last'}, default 'first'
4663+ keep : {'first', 'last', 'all' }, default 'first'
46494664 Where there are duplicate values:
46504665 - ``first`` : take the first occurrence.
46514666 - ``last`` : take the last occurrence.
4667+ - ``all`` : do not drop any duplicates, even it means
4668+ selecting more than `n` items.
4669+
4670+ .. versionadded:: 0.24.0
46524671
46534672 Returns
46544673 -------
46554674 DataFrame
46564675
46574676 Examples
46584677 --------
4659- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
4660- ... 'b': list('abdce'),
4661- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4678+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4679+ ... 'b': list('abdcef'),
4680+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4681+ >>> df
4682+ a b c
4683+ 0 1 a 1.0
4684+ 1 10 b 2.0
4685+ 2 8 d NaN
4686+ 3 11 c 3.0
4687+ 4 8 e 4.0
4688+ 5 2 f 9.0
4689+
4690+ In the following example, we will use ``nsmallest`` to select the
4691+ three rows having the smallest values in column "a".
4692+
46624693 >>> df.nsmallest(3, 'a')
4663- a b c
4664- 4 -1 e 4
4665- 0 1 a 1
4666- 2 8 d NaN
4694+ a b c
4695+ 0 1 a 1.0
4696+ 5 2 f 9.0
4697+ 2 8 d NaN
4698+
4699+ When using ``keep='last'``, ties are resolved in reverse order:
4700+
4701+ >>> df.nsmallest(3, 'a', keep='last')
4702+ a b c
4703+ 0 1 a 1.0
4704+ 5 2 f 9.0
4705+ 4 8 e 4.0
4706+
4707+ When using ``keep='all'``, all duplicate items are maintained:
4708+
4709+ >>> df.nsmallest(3, 'a', keep='all')
4710+ a b c
4711+ 0 1 a 1.0
4712+ 5 2 f 9.0
4713+ 2 8 d NaN
4714+ 4 8 e 4.0
4715+
4716+ To order by the largest values in column "a" and then "c", we can
4717+ specify multiple columns like in the next example.
4718+
4719+ >>> df.nsmallest(3, ['a', 'c'])
4720+ a b c
4721+ 0 1 a 1.0
4722+ 5 2 f 9.0
4723+ 4 8 e 4.0
4724+
4725+ Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4726+ ``TypeError``:
4727+
4728+ >>> df.nsmallest(3, 'b')
4729+
4730+ Traceback (most recent call last):
4731+ TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
46674732 """
46684733 return algorithms .SelectNFrame (self ,
46694734 n = n ,
0 commit comments