@@ -4559,11 +4559,15 @@ def nlargest(self, n, columns, keep='first'):
45594559 Number of rows to return.
45604560 columns : label or list of labels
45614561 Column label(s) to order by.
4562- keep : {'first', 'last'}, default 'first'
4562+ keep : {'first', 'last', 'all' }, default 'first'
45634563 Where there are duplicate values:
45644564
45654565 - `first` : prioritize the first occurrence(s)
45664566 - `last` : prioritize the last occurrence(s)
4567+ - ``all`` : do not drop any duplicates, even it means
4568+ selecting more than `n` items.
4569+
4570+ .. versionadded:: 0.24.0
45674571
45684572 Returns
45694573 -------
@@ -4586,42 +4590,51 @@ def nlargest(self, n, columns, keep='first'):
45864590
45874591 Examples
45884592 --------
4589- >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1 ],
4590- ... 'b': list('abdce '),
4591- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4593+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2 ],
4594+ ... 'b': list('abdcef '),
4595+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0 ]})
45924596 >>> df
45934597 a b c
45944598 0 1 a 1.0
45954599 1 10 b 2.0
45964600 2 8 d NaN
4597- 3 10 c 3.0
4598- 4 -1 e 4.0
4601+ 3 11 c 3.0
4602+ 4 8 e 4.0
4603+ 5 2 f 9.0
45994604
46004605 In the following example, we will use ``nlargest`` to select the three
46014606 rows having the largest values in column "a".
46024607
46034608 >>> df.nlargest(3, 'a')
46044609 a b c
4610+ 3 11 c 3.0
46054611 1 10 b 2.0
4606- 3 10 c 3.0
46074612 2 8 d NaN
46084613
46094614 When using ``keep='last'``, ties are resolved in reverse order:
46104615
46114616 >>> df.nlargest(3, 'a', keep='last')
46124617 a b c
4613- 3 10 c 3.0
4618+ 3 11 c 3.0
4619+ 1 10 b 2.0
4620+ 4 8 e 4.0
4621+
4622+ When using ``keep='all'``, all duplicate items are maintained
4623+ >>> df.nlargest(3, 'a', keep='all')
4624+ a b c
4625+ 3 11 c 3.0
46144626 1 10 b 2.0
46154627 2 8 d NaN
4628+ 4 8 e 4.0
46164629
46174630 To order by the largest values in column "a" and then "c", we can
46184631 specify multiple columns like in the next example.
46194632
46204633 >>> df.nlargest(3, ['a', 'c'])
46214634 a b c
4622- 3 10 c 3.0
4635+ 4 8 e 4.0
4636+ 3 11 c 3.0
46234637 1 10 b 2.0
4624- 2 8 d NaN
46254638
46264639 Attempting to use ``nlargest`` on non-numeric dtypes will raise a
46274640 ``TypeError``:
@@ -4645,25 +4658,73 @@ def nsmallest(self, n, columns, keep='first'):
46454658 Number of items to retrieve
46464659 columns : list or str
46474660 Column name or names to order by
4648- keep : {'first', 'last'}, default 'first'
4661+ keep : {'first', 'last', 'all' }, default 'first'
46494662 Where there are duplicate values:
46504663 - ``first`` : take the first occurrence.
46514664 - ``last`` : take the last occurrence.
4665+ - ``all`` : do not drop any duplicates, even it means
4666+ selecting more than `n` items.
4667+
4668+ .. versionadded:: 0.24.0
46524669
46534670 Returns
46544671 -------
46554672 DataFrame
46564673
46574674 Examples
46584675 --------
4659- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
4660- ... 'b': list('abdce'),
4661- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4676+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4677+ ... 'b': list('abdcef'),
4678+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4679+ >>> df
4680+ a b c
4681+ 0 1 a 1.0
4682+ 1 10 b 2.0
4683+ 2 8 d NaN
4684+ 3 11 c 3.0
4685+ 4 8 e 4.0
4686+ 5 2 f 9.0
4687+
4688+ In the following example, we will use ``nsmallest`` to select the
4689+ three rows having the smallest values in column "a".
4690+
46624691 >>> df.nsmallest(3, 'a')
4663- a b c
4664- 4 -1 e 4
4665- 0 1 a 1
4666- 2 8 d NaN
4692+ a b c
4693+ 0 1 a 1.0
4694+ 5 2 f 9.0
4695+ 2 8 d NaN
4696+
4697+ When using ``keep='last'``, ties are resolved in reverse order:
4698+
4699+ >>> df.nsmallest(3, 'a', keep='last')
4700+ a b c
4701+ 0 1 a 1.0
4702+ 5 2 f 9.0
4703+ 4 8 e 4.0
4704+
4705+ When using ``keep='all'``, all duplicate items are maintained
4706+ >>> df.nsmallest(3, 'a', keep='all')
4707+ a b c
4708+ 0 1 a 1.0
4709+ 5 2 f 9.0
4710+ 2 8 d NaN
4711+ 4 8 e 4.0
4712+
4713+ To order by the largest values in column "a" and then "c", we can
4714+ specify multiple columns like in the next example.
4715+
4716+ >>> df.nsmallest(3, ['a', 'c'])
4717+ a b c
4718+ 0 1 a 1.0
4719+ 5 2 f 9.0
4720+ 4 8 e 4.0
4721+
4722+ Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4723+ ``TypeError``:
4724+
4725+ >>> df.nsmallest(3, 'b')
4726+ Traceback (most recent call last):
4727+ TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
46674728 """
46684729 return algorithms .SelectNFrame (self ,
46694730 n = n ,
0 commit comments