pandas-dev · jreback · Nov 19, 2014 · Nov 6, 2014 · jorisvandenbossche · Nov 10, 2014
diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt
@@ -70,7 +70,30 @@ Bug Fixes
 - ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
 - ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
 - ``slice`` string method now takes step into account (:issue:`8754`)
+- Fix negative step support for label-based slices (:issue:`8753`)
 
+  Old behavior:
+
+  .. code-block:: python
+
+     In [1]: s = pd.Series(np.arange(3), ['a', 'b', 'c'])
+     Out[1]:
+     a    0
+     b    1
+     c    2
+     dtype: int64
+
+     In [2]: s.loc['c':'a':-1]
+     Out[2]:
+     c    2
+     dtype: int64
+
+  New behavior:
+
+  .. ipython:: python
+
+     s = pd.Series(np.arange(3), ['a', 'b', 'c'])
+     s.loc['c':'a':-1]
 
 
 

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1959,23 +1959,99 @@ def slice_indexer(self, start=None, end=None, step=None):
         -----
         This function assumes that the data is sorted, so use at your own peril
         """
-        start_slice, end_slice = self.slice_locs(start, end)
+        start_slice, end_slice = self.slice_locs(start, end, step=step)
 
         # return a slice
-        if np.isscalar(start_slice) and np.isscalar(end_slice):
+        if not lib.isscalar(start_slice):
+            raise AssertionError("Start slice bound is non-scalar")
+        if not lib.isscalar(end_slice):
+            raise AssertionError("End slice bound is non-scalar")
 
-            # degenerate cases
-            if start is None and end is None:
-                return slice(None, None, step)
+        return slice(start_slice, end_slice, step)
 
-            return slice(start_slice, end_slice, step)
+    def _maybe_cast_slice_bound(self, label, side):
+        """
+        This function should be overloaded in subclasses that allow non-trivial
+        casting on label-slice bounds, e.g. datetime-like indices allowing
+        strings containing formatted datetimes.
 
-        # loc indexers
-        return (Index(start_slice) & Index(end_slice)).values
+        Parameters
+        ----------
+        label : object
+        side : {'left', 'right'}
+
+        Notes
+        -----
+        Value of `side` parameter should be validated in caller.
 
-    def slice_locs(self, start=None, end=None):
         """
-        For an ordered Index, compute the slice locations for input labels
+        return label
+
+    def get_slice_bound(self, label, side):
+        """
+        Calculate slice bound that corresponds to given label.
+
+        Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
+        of given label.
+
+        Parameters
+        ----------
+        label : object
+        side : {'left', 'right'}
+
+        """
+        if side not in ('left', 'right'):
+            raise ValueError(
+                "Invalid value for side kwarg,"
+                " must be either 'left' or 'right': %s" % (side,))
+
+        original_label = label
+        # For datetime indices label may be a string that has to be converted
+        # to datetime boundary according to its resolution.
+        label = self._maybe_cast_slice_bound(label, side)
+
+        try:
+            slc = self.get_loc(label)
+        except KeyError:
+            if self.is_monotonic_increasing:
+                return self.searchsorted(label, side=side)
+            elif self.is_monotonic_decreasing:
+                # np.searchsorted expects ascending sort order, have to reverse
+                # everything for it to work (element ordering, search side and
+                # resulting value).
+                pos = self[::-1].searchsorted(
+                    label, side='right' if side == 'left' else 'right')
+                return len(self) - pos
+
+            # In all other cases, just re-raise the KeyError
+            raise
+
+        if isinstance(slc, np.ndarray):
+            # get_loc may return a boolean array or an array of indices, which
+            # is OK as long as they are representable by a slice.
+            if com.is_bool_dtype(slc):
+                slc = lib.maybe_booleans_to_slice(slc.view('u1'))
+            else:
+                slc = lib.maybe_indices_to_slice(slc.astype('i8'))
+            if isinstance(slc, np.ndarray):
+                raise KeyError(
+                    "Cannot get %s slice bound for non-unique label:"
+                    " %r" % (side, original_label))
+
+        if isinstance(slc, slice):
+            if side == 'left':
+                return slc.start
+            else:
+                return slc.stop
+        else:
+            if side == 'right':
+                return slc + 1
+            else:
+                return slc
+
+    def slice_locs(self, start=None, end=None, step=None):
+        """
+        Compute slice locations for input labels.
 
         Parameters
         ----------
@@ -1986,51 +2062,51 @@ def slice_locs(self, start=None, end=None):
 
         Returns
         -------
-        (start, end) : (int, int)
+        start, end : int
 
-        Notes
-        -----
-        This function assumes that the data is sorted, so use at your own peril
         """
+        inc = (step is None or step >= 0)
 
-        is_unique = self.is_unique
-
-        def _get_slice(starting_value, offset, search_side, slice_property,
-                       search_value):
-            if search_value is None:
-                return starting_value
+        if not inc:
+            # If it's a reverse slice, temporarily swap bounds.
+            start, end = end, start
 
-            try:
-                slc = self.get_loc(search_value)
-
-                if not is_unique:
-
-                    # get_loc will return a boolean array for non_uniques
-                    # if we are not monotonic
-                    if isinstance(slc, (np.ndarray, Index)):
-                        raise KeyError("cannot peform a slice operation "
-                                       "on a non-unique non-monotonic index")
-
-                if isinstance(slc, slice):
-                    slc = getattr(slc, slice_property)
-                else:
-                    slc += offset
+        start_slice = None
+        if start is not None:
+            start_slice = self.get_slice_bound(start, 'left')
+        if start_slice is None:
+            start_slice = 0
 
-            except KeyError:
-                if self.is_monotonic_increasing:
-                    slc = self.searchsorted(search_value, side=search_side)
-                elif self.is_monotonic_decreasing:
-                    search_side = 'right' if search_side == 'left' else 'left'
-                    slc = len(self) - self[::-1].searchsorted(search_value,
-                                                              side=search_side)
-                else:
-                    raise
-            return slc
+        end_slice = None
+        if end is not None:
+            end_slice = self.get_slice_bound(end, 'right')
+        if end_slice is None:
+            end_slice = len(self)
 
-        start_slice = _get_slice(0, offset=0, search_side='left',
-                                 slice_property='start', search_value=start)
-        end_slice = _get_slice(len(self), offset=1, search_side='right',
-                               slice_property='stop', search_value=end)
+        if not inc:
+            # Bounds at this moment are swapped, swap them back and shift by 1.
+            #
+            # slice_locs('B', 'A', step=-1): s='B', e='A'
+            #
+            #              s='A'                 e='B'
+            # AFTER SWAP:    |                     |
+            #                v ------------------> V
+            #           -----------------------------------
+            #           | | |A|A|A|A| | | | | |B|B| | | | |
+            #           -----------------------------------
+            #              ^ <------------------ ^
+            # SHOULD BE:   |                     |
+            #           end=s-1              start=e-1
+            #
+            end_slice, start_slice = start_slice - 1, end_slice - 1
+
+            # i == -1 triggers ``len(self) + i`` selection that points to the
+            # last element, not before-the-first one, subtracting len(self)
+            # compensates that.
+            if end_slice == -1:
+                end_slice -= len(self)
+            if start_slice == -1:
+                start_slice -= len(self)
 
         return start_slice, end_slice
 
@@ -3887,7 +3963,12 @@ def _tuple_index(self):
         """
         return Index(self.values)
 
-    def slice_locs(self, start=None, end=None, strict=False):
+    def get_slice_bound(self, label, side):
+        if not isinstance(label, tuple):
+            label = label,
+        return self._partial_tup_index(label, side=side)
+
+    def slice_locs(self, start=None, end=None, step=None):
         """
         For an ordered MultiIndex, compute the slice locations for input
         labels. They can be tuples representing partial levels, e.g. for a
@@ -3900,7 +3981,8 @@ def slice_locs(self, start=None, end=None, strict=False):
             If None, defaults to the beginning
         end : label or tuple
             If None, defaults to the end
-        strict : boolean,
+        step : int or None
+            Slice step
 
         Returns
         -------
@@ -3910,21 +3992,9 @@ def slice_locs(self, start=None, end=None, strict=False):
         -----
         This function assumes that the data is sorted by the first level
         """
-        if start is None:
-            start_slice = 0
-        else:
-            if not isinstance(start, tuple):
-                start = start,
-            start_slice = self._partial_tup_index(start, side='left')
-
-        if end is None:
-            end_slice = len(self)
-        else:
-            if not isinstance(end, tuple):
-                end = end,
-            end_slice = self._partial_tup_index(end, side='right')
-
-        return start_slice, end_slice
+        # This function adds nothing to its parent implementation (the magic
+        # happens in get_slice_bound method), but it adds meaningful doc.
+        return super(MultiIndex, self).slice_locs(start, end, step)
 
     def _partial_tup_index(self, tup, side='left'):
         if len(tup) > self.lexsort_depth:

diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -910,8 +910,34 @@ def test_slice_locs_na(self):
         self.assertEqual(idx.slice_locs(1), (1, 3))
         self.assertEqual(idx.slice_locs(np.nan), (0, 3))
 
-        idx = Index([np.nan, np.nan, 1, 2])
-        self.assertRaises(KeyError, idx.slice_locs, np.nan)
+        idx = Index([0, np.nan, np.nan, 1, 2])
+        self.assertEqual(idx.slice_locs(np.nan), (1, 5))
+
+    def test_slice_locs_negative_step(self):
+        idx = Index(list('bcdxy'))
+
+        SLC = pd.IndexSlice
+
+        def check_slice(in_slice, expected):
+            s_start, s_stop = idx.slice_locs(in_slice.start, in_slice.stop,
+                                             in_slice.step)
+            result = idx[s_start:s_stop:in_slice.step]
+            expected = pd.Index(list(expected))
+            self.assertTrue(result.equals(expected))
+
+        for in_slice, expected in [
+                (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
+                (SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
+                (SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
+                (SLC['y'::-4], 'yb'),
+                # absent labels
+                (SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
+                (SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
+                (SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
+                (SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
+                (SLC['m':'m':-1], '')
+        ]:
+            check_slice(in_slice, expected)
 
     def test_drop(self):
         n = len(self.strIndex)

diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -4141,6 +4141,64 @@ def run_tests(df, rhs, right):
 
         run_tests(df, rhs, right)
 
+    def test_str_label_slicing_with_negative_step(self):
+        SLC = pd.IndexSlice
+
+        def assert_slices_equivalent(l_slc, i_slc):
+            assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
+
+            if not idx.is_integer:
+                # For integer indices, ix and plain getitem are position-based.
+                assert_series_equal(s[l_slc], s.iloc[i_slc])
+                assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
+
+        for idx in [_mklbl('A', 20), np.arange(20) + 100,
+                    np.linspace(100, 150, 20)]:
+            idx = Index(idx)
+            s = Series(np.arange(20), index=idx)
+            assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
+            assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
+            assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
+            assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
+
+    def test_multiindex_label_slicing_with_negative_step(self):
+        s = Series(np.arange(20),
+                   MultiIndex.from_product([list('abcde'), np.arange(4)]))
+        SLC = pd.IndexSlice
+
+        def assert_slices_equivalent(l_slc, i_slc):
+            assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
+            assert_series_equal(s[l_slc], s.iloc[i_slc])
+            assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
+
+        assert_slices_equivalent(SLC[::-1], SLC[::-1])
+
+        assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
+        assert_slices_equivalent(SLC[('d',)::-1], SLC[15::-1])
+
+        assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
+        assert_slices_equivalent(SLC[:('d',):-1], SLC[:11:-1])
+
+        assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC[('d',):'b':-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC['d':('b',):-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC[('d',):('b',):-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
+
+        assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
+        assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
+        assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
+
+    def test_slice_with_zero_step_raises(self):
+        s = Series(np.arange(20), index=_mklbl('A', 20))
+        self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
+                                lambda: s[::0])
+        self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
+                                lambda: s.loc[::0])
+        self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
+                                lambda: s.ix[::0])
+
+
 class TestSeriesNoneCoercion(tm.TestCase):
     EXPECTED_RESULTS = [
         # For numeric series, we should coerce to NaN.