Skip to content

Commit 8a98f5e

Browse files
toobazjreback
authored andcommitted
BUG: do not raise UnsortedIndexError if sorting is not required
closes #16734 Author: Pietro Battiston <me@pietrobattiston.it> This patch had conflicts when merged, resolved by Committer: Jeff Reback <jeff.reback@twosigma.com> Closes #16736 from toobaz/index_what_you_can and squashes the following commits: f77e2b3 [Pietro Battiston] BUG: do not raise UnsortedIndexError if sorting is not required
1 parent 25e0576 commit 8a98f5e

File tree

5 files changed

+35
-18
lines changed

5 files changed

+35
-18
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ Indexing
9999
^^^^^^^^
100100

101101
- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
102+
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
102103

103104

104105
I/O

pandas/core/common.py

+7
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,13 @@ def is_null_slice(obj):
411411
obj.stop is None and obj.step is None)
412412

413413

414+
def is_true_slices(l):
415+
"""
416+
Find non-trivial slices in "l": return a list of booleans with same length.
417+
"""
418+
return [isinstance(k, slice) and not is_null_slice(k) for k in l]
419+
420+
414421
def is_full_slice(obj, l):
415422
""" we have a full length slice """
416423
return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and

pandas/core/indexes/multi.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from pandas.errors import PerformanceWarning, UnsortedIndexError
2424
from pandas.core.common import (_values_from_object,
2525
is_bool_indexer,
26-
is_null_slice)
26+
is_null_slice,
27+
is_true_slices)
2728

2829
import pandas.core.base as base
2930
from pandas.util._decorators import (Appender, cache_readonly,
@@ -1035,12 +1036,6 @@ def is_lexsorted(self):
10351036
"""
10361037
return self.lexsort_depth == self.nlevels
10371038

1038-
def is_lexsorted_for_tuple(self, tup):
1039-
"""
1040-
Return True if we are correctly lexsorted given the passed tuple
1041-
"""
1042-
return len(tup) <= self.lexsort_depth
1043-
10441039
@cache_readonly
10451040
def lexsort_depth(self):
10461041
if self.sortorder is not None:
@@ -2262,12 +2257,12 @@ def get_locs(self, tup):
22622257
"""
22632258

22642259
# must be lexsorted to at least as many levels
2265-
if not self.is_lexsorted_for_tuple(tup):
2266-
raise UnsortedIndexError('MultiIndex Slicing requires the index '
2267-
'to be fully lexsorted tuple len ({0}), '
2268-
'lexsort depth ({1})'
2269-
.format(len(tup), self.lexsort_depth))
2270-
2260+
true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s]
2261+
if true_slices and true_slices[-1] >= self.lexsort_depth:
2262+
raise UnsortedIndexError('MultiIndex slicing requires the index '
2263+
'to be lexsorted: slicing on levels {0}, '
2264+
'lexsort depth {1}'
2265+
.format(true_slices, self.lexsort_depth))
22712266
# indexer
22722267
# this is the list of all values that we want to select
22732268
n = len(self)

pandas/tests/indexes/test_multi.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -2826,8 +2826,13 @@ def test_unsortedindex(self):
28262826
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
28272827
columns=['one', 'two'])
28282828

2829+
# GH 16734: not sorted, but no real slicing
2830+
result = df.loc(axis=0)['z', 'a']
2831+
expected = df.iloc[0]
2832+
tm.assert_series_equal(result, expected)
2833+
28292834
with pytest.raises(UnsortedIndexError):
2830-
df.loc(axis=0)['z', :]
2835+
df.loc(axis=0)['z', slice('a')]
28312836
df.sort_index(inplace=True)
28322837
assert len(df.loc(axis=0)['z', :]) == 2
28332838

pandas/tests/indexing/test_multiindex.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,13 @@ def f():
817817
assert df.index.lexsort_depth == 0
818818
with tm.assert_raises_regex(
819819
UnsortedIndexError,
820-
'MultiIndex Slicing requires the index to be fully '
821-
r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
822-
df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
820+
'MultiIndex slicing requires the index to be '
821+
r'lexsorted: slicing on levels \[1\], lexsort depth 0'):
822+
df.loc[(slice(None), slice('bar')), :]
823+
824+
# GH 16734: not sorted, but no real slicing
825+
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
826+
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
823827

824828
def test_multiindex_slicers_non_unique(self):
825829

@@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self):
10011005

10021006
# not sorted
10031007
def f():
1004-
df.loc['A1', (slice(None), 'foo')]
1008+
df.loc['A1', ('a', slice('foo'))]
10051009

10061010
pytest.raises(UnsortedIndexError, f)
1011+
1012+
# GH 16734: not sorted, but no real slicing
1013+
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
1014+
df.loc['A1'].iloc[:, [0, 2]])
1015+
10071016
df = df.sort_index(axis=1)
10081017

10091018
# slicing

0 commit comments

Comments
 (0)