From b533c19882c27673772e971aacaa5847a40dcbfd Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 3 Jul 2019 15:56:33 +0100 Subject: [PATCH 01/10] POC: add closed argument to IndexSlice --- pandas/core/indexes/base.py | 29 +++++++++++++++++--------- pandas/core/indexes/datetimes.py | 16 +++++++++++---- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 5 +++-- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/indexing.py | 34 +++++++++++++++++++++++++------ 8 files changed, 66 insertions(+), 26 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a0bd13f1e4f9e..e8fd21082e2c9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4781,7 +4781,8 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex raise NotImplementedError - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer( + self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered or unique index, compute the slice indexer for input labels and step. @@ -4821,7 +4822,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): slice(1, 3) """ start_slice, end_slice = self.slice_locs(start, end, step=step, - kind=kind) + kind=kind, closed=closed) # return a slice if not is_scalar(start_slice): @@ -4883,7 +4884,7 @@ def _validate_indexer(self, form, key, kind): """ @Appender(_index_shared_docs['_maybe_cast_slice_bound']) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): assert kind in ['ix', 'loc', 'getitem', None] # We are a plain index here (sub-class override this method if they @@ -4922,7 +4923,7 @@ def _get_loc_only_exact_matches(self, key): """ return self.get_loc(key) - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind, closed=None): """ Calculate slice bound that corresponds to given label. @@ -4951,7 +4952,7 @@ def get_slice_bound(self, label, side, kind): # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side, kind) + label = self._maybe_cast_slice_bound(label, side, kind, closed=closed) # we need to look up the label try: @@ -4981,11 +4982,18 @@ def get_slice_bound(self, label, side, kind): return slc.stop else: if side == 'right': - return slc + 1 + if closed in ['right', 'both']: + return slc + else: + return slc + 1 else: - return slc + if closed in ['left', 'both']: + return slc + 1 + else: + return slc - def slice_locs(self, start=None, end=None, step=None, kind=None): + def slice_locs( + self, start=None, end=None, step=None, kind=None, closed=None): """ Compute slice locations for input labels. @@ -5039,13 +5047,14 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): start_slice = None if start is not None: - start_slice = self.get_slice_bound(start, 'left', kind) + start_slice = self.get_slice_bound( + start, 'left', kind, closed=closed) if start_slice is None: start_slice = 0 end_slice = None if end is not None: - end_slice = self.get_slice_bound(end, 'right', kind) + end_slice = self.get_slice_bound(end, 'right', kind, closed=closed) if end_slice is None: end_slice = len(self) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e2658b66f83ba..4f7c84f137bb2 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -981,7 +981,7 @@ def get_loc(self, key, method=None, tolerance=None): raise e raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string, cast it to datetime according to resolution. @@ -1018,7 +1018,13 @@ def _maybe_cast_slice_bound(self, label, side, kind): # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == 'left' else lower - return lower if side == 'left' else upper + if side == 'left': + return lower + else: + if closed in ['right', 'both']: + return lower + else: + return upper else: return label @@ -1030,7 +1036,8 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): use_rhs=use_rhs) return loc - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer( + self, start=None, end=None, step=None, kind=None, closed=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -1056,7 +1063,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): raise KeyError('Cannot mix time and non-time slice keys') try: - return Index.slice_indexer(self, start, end, step, kind=kind) + return Index.slice_indexer( + self, start, end, step, kind=kind, closed=closed) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9f9ebcf67cee6..8ca083ab73271 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -564,7 +564,7 @@ def _convert_scalar_indexer(self, key, kind=None): return super()._convert_scalar_indexer(key, kind=kind) return key - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) @Appender(_index_shared_docs['_convert_list_indexer']) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fd64f18c50b34..cb3d09b6b1be8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2361,13 +2361,14 @@ def reindex(self, target, method=None, level=None, limit=None, return target, indexer - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind, closed=None): if not isinstance(label, tuple): label = label, return self._partial_tup_index(label, side=side) - def slice_locs(self, start=None, end=None, step=None, kind=None): + def slice_locs( + self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered MultiIndex, compute the slice locations for input labels. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5f9c1f22887cc..1763f5956d4eb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -59,7 +59,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return cls._simple_new(subarr, name=name) @Appender(_index_shared_docs['_maybe_cast_slice_bound']) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): assert kind in ['ix', 'loc', 'getitem', None] # we will try to coerce to integers diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f61b2e679f0c8..1af8c631a706d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -689,7 +689,7 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError: raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ba5507fa71e8c..529e6f1caf6f6 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -531,7 +531,7 @@ def get_loc(self, key, method=None, tolerance=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string, cast it to timedelta according to resolution. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7e199c6c9f66b..ba495c4da4475 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -17,6 +17,8 @@ import pandas.core.common as com from pandas.core.index import Index, MultiIndex +_VALID_CLOSED = {'left', 'right', 'both', 'neither'} + # the supported indexers def get_indexers_list(): @@ -76,8 +78,21 @@ class _IndexSlice: B1 10 11 """ + def __init__(self, closed=None): + if closed is not None and closed not in _VALID_CLOSED: + msg = "invalid option for 'closed': {closed}".format(closed=closed) + raise ValueError(msg) + self.closed = closed + + def __call__(self, closed=None): + return _IndexSlice(closed=closed) + def __getitem__(self, arg): - return arg + if self.closed is None: + return arg + else: + self.arg = arg + return self IndexSlice = _IndexSlice() @@ -1425,8 +1440,9 @@ def __getitem__(self, key): # we by definition only have the 0th axis axis = self.axis or 0 - maybe_callable = com.apply_if_callable(key, self.obj) - return self._getitem_axis(maybe_callable, axis=axis) + if not isinstance(key, _IndexSlice): + key = com.apply_if_callable(key, self.obj) + return self._getitem_axis(key, axis=axis) def _is_scalar_access(self, key): raise NotImplementedError() @@ -1452,6 +1468,11 @@ def _get_slice_axis(self, slice_obj, axis=None): """ this is pretty simple as we just have to deal with labels """ if axis is None: axis = self.axis or 0 + if isinstance(slice_obj, _IndexSlice): + closed = slice_obj.closed + slice_obj = slice_obj.arg + else: + closed = None obj = self.obj if not need_slice(slice_obj): @@ -1459,7 +1480,8 @@ def _get_slice_axis(self, slice_obj, axis=None): labels = obj._get_axis(axis) indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, - slice_obj.step, kind=self.name) + slice_obj.step, kind=self.name, + closed=closed) if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') @@ -1718,7 +1740,7 @@ def _validate_key(self, key, axis): # slice of integers (only if in the labels) # boolean - if isinstance(key, slice): + if isinstance(key, (slice, _IndexSlice)): return if com.is_bool_indexer(key): @@ -1791,7 +1813,7 @@ def _getitem_axis(self, key, axis=None): labels = self.obj._get_axis(axis) key = self._get_partial_string_timestamp_match_key(key, labels) - if isinstance(key, slice): + if isinstance(key, (slice, _IndexSlice)): self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) elif com.is_bool_indexer(key): From 3a4d38ebb0197010b31e678929280903b5972fc9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 21:11:44 +0100 Subject: [PATCH 02/10] apply black code style to changes --- pandas/core/indexes/base.py | 26 ++++++++++++-------------- pandas/core/indexes/datetimes.py | 12 +++++------- pandas/core/indexes/multi.py | 3 +-- pandas/core/indexes/numeric.py | 4 ++-- pandas/core/indexing.py | 12 ++++++++---- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 537fc6a39a8ab..ecd6d6f4a4a17 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4990,8 +4990,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex raise NotImplementedError - def slice_indexer( - self, start=None, end=None, step=None, kind=None, closed=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered or unique index, compute the slice indexer for input labels and step. @@ -5030,8 +5029,9 @@ def slice_indexer( >>> idx.slice_indexer(start='b', end=('c', 'g')) slice(1, 3) """ - start_slice, end_slice = self.slice_locs(start, end, step=step, - kind=kind, closed=closed) + start_slice, end_slice = self.slice_locs( + start, end, step=step, kind=kind, closed=closed + ) # return a slice if not is_scalar(start_slice): @@ -5094,9 +5094,9 @@ def _validate_indexer(self, form, key, kind): """ - @Appender(_index_shared_docs['_maybe_cast_slice_bound']) + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind, closed=None): - assert kind in ['ix', 'loc', 'getitem', None] + assert kind in ["ix", "loc", "getitem", None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and @@ -5195,19 +5195,18 @@ def get_slice_bound(self, label, side, kind, closed=None): else: return slc.stop else: - if side == 'right': - if closed in ['right', 'both']: + if side == "right": + if closed in ["right", "both"]: return slc else: return slc + 1 else: - if closed in ['left', 'both']: + if closed in ["left", "both"]: return slc + 1 else: return slc - def slice_locs( - self, start=None, end=None, step=None, kind=None, closed=None): + def slice_locs(self, start=None, end=None, step=None, kind=None, closed=None): """ Compute slice locations for input labels. @@ -5259,14 +5258,13 @@ def slice_locs( start_slice = None if start is not None: - start_slice = self.get_slice_bound( - start, 'left', kind, closed=closed) + start_slice = self.get_slice_bound(start, "left", kind, closed=closed) if start_slice is None: start_slice = 0 end_slice = None if end is not None: - end_slice = self.get_slice_bound(end, 'right', kind, closed=closed) + end_slice = self.get_slice_bound(end, "right", kind, closed=closed) if end_slice is None: end_slice = len(self) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c3d68890a27f5..6f5f1601379c6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1110,11 +1110,11 @@ def _maybe_cast_slice_bound(self, label, side, kind, closed=None): # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: - return upper if side == 'left' else lower - if side == 'left': + return upper if side == "left" else lower + if side == "left": return lower else: - if closed in ['right', 'both']: + if closed in ["right", "both"]: return lower else: return upper @@ -1127,8 +1127,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) return loc - def slice_indexer( - self, start=None, end=None, step=None, kind=None, closed=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None, closed=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -1154,8 +1153,7 @@ def slice_indexer( raise KeyError("Cannot mix time and non-time slice keys") try: - return Index.slice_indexer( - self, start, end, step, kind=kind, closed=closed) + return Index.slice_indexer(self, start, end, step, kind=kind, closed=closed) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 07f8aad51b1a6..0695f90e0770f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2514,8 +2514,7 @@ def get_slice_bound(self, label, side, kind, closed=None): label = (label,) return self._partial_tup_index(label, side=side) - def slice_locs( - self, start=None, end=None, step=None, kind=None, closed=None): + def slice_locs(self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered MultiIndex, compute the slice locations for input labels. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5ec0d77fccf76..0f2eaca013b19 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -72,9 +72,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): name = data.name return cls._simple_new(subarr, name=name) - @Appender(_index_shared_docs['_maybe_cast_slice_bound']) + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind, closed=None): - assert kind in ['ix', 'loc', 'getitem', None] + assert kind in ["ix", "loc", "getitem", None] # we will try to coerce to integers return self._maybe_cast_indexer(label) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7ed75273c99d6..fe2771aeb340e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -26,7 +26,7 @@ import pandas.core.common as com from pandas.core.index import Index, MultiIndex -_VALID_CLOSED = {'left', 'right', 'both', 'neither'} +_VALID_CLOSED = {"left", "right", "both", "neither"} # the supported indexers @@ -1509,9 +1509,13 @@ def _get_slice_axis(self, slice_obj, axis=None): return obj.copy(deep=False) labels = obj._get_axis(axis) - indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, - slice_obj.step, kind=self.name, - closed=closed) + indexer = labels.slice_indexer( + slice_obj.start, + slice_obj.stop, + slice_obj.step, + kind=self.name, + closed=closed, + ) if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind="iloc") From 12efbd1d2cc8c3825707d1fc42c2afc483481532 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 21:17:43 +0100 Subject: [PATCH 03/10] fix previous merge error --- pandas/core/indexes/base.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ecd6d6f4a4a17..14b5e676ec529 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5127,13 +5127,6 @@ def _searchsorted_monotonic(self, label, side="left"): raise ValueError("index must be monotonic increasing or decreasing") - def _get_loc_only_exact_matches(self, key): - """ - This is overridden on subclasses (namely, IntervalIndex) to control - get_slice_bound. - """ - return self.get_loc(key) - def get_slice_bound(self, label, side, kind, closed=None): """ Calculate slice bound that corresponds to given label. From 590590e5383404fa7cd48f36d1431a5bf6e91f6d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 21:36:32 +0100 Subject: [PATCH 04/10] fix swapped open/closed --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/datetimes.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 14b5e676ec529..5d5a6c3d979f0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5189,12 +5189,12 @@ def get_slice_bound(self, label, side, kind, closed=None): return slc.stop else: if side == "right": - if closed in ["right", "both"]: + if closed in ["left", "neither"]: return slc else: return slc + 1 else: - if closed in ["left", "both"]: + if closed in ["right", "neither"]: return slc + 1 else: return slc diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 6f5f1601379c6..d4bf8e7e2a0cb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1114,7 +1114,7 @@ def _maybe_cast_slice_bound(self, label, side, kind, closed=None): if side == "left": return lower else: - if closed in ["right", "both"]: + if closed in ["left", "neither"]: return lower else: return upper From 6c0059b2e720f2e5653c4930f41fdce8f4a6ad57 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 00:22:53 +0100 Subject: [PATCH 05/10] add basic tests --- pandas/tests/indexing/test_slice.py | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 pandas/tests/indexing/test_slice.py diff --git a/pandas/tests/indexing/test_slice.py b/pandas/tests/indexing/test_slice.py new file mode 100644 index 0000000000000..b9a1876c1f9f6 --- /dev/null +++ b/pandas/tests/indexing/test_slice.py @@ -0,0 +1,49 @@ +import pytest + +from pandas import DatetimeIndex, IndexSlice, Series, Timestamp +from pandas.util.testing import assert_series_equal + + +def test_indexslice_bad_kwarg_raises(): + with pytest.raises(ValueError, match="invalid option for 'closed': foo"): + IndexSlice(closed="foo") + + +@pytest.mark.parametrize( + "closed, expected_slice", + [ + ("left", slice(0, 1)), + ("right", slice(1, 2)), + ("both", slice(0, 2)), + ("neither", slice(0, 0)), + ], +) +@pytest.mark.parametrize("left", [Timestamp("2001-01-01 23:50"), "2001-01-01"]) +@pytest.mark.parametrize("right", [Timestamp("2001-01-02 00:00"), "2001-01-02"]) +def test_series_getitem_closed_kwarg_dates(closed, left, right, expected_slice): + # gh-27209 + dates = ["2001-01-01 23:50", "2001-01-02 00:00", "2001-01-03 00:08"] + ser = Series(range(3), DatetimeIndex(dates)) + expected = ser.iloc[expected_slice] + idx = IndexSlice(closed=closed) + result = ser.loc[idx[left:right]] + assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "closed, expected_slice", + [ + ("left", slice(0, 1)), + ("right", slice(1, 2)), + ("both", slice(0, 2)), + ("neither", slice(0, 0)), + ], +) +def test_series_getitem_closed_kwarg_int_labels(closed, expected_slice): + # gh-27209 + int_labels = [50, 70, 80] + ser = Series(range(3), index=int_labels) + expected = ser.iloc[expected_slice] + idx = IndexSlice(closed=closed) + result = ser.loc[idx[50:70]] + assert_series_equal(result, expected) From 5a94d1635a832c065e7588f66fc52cf3b15a34bc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 00:23:15 +0100 Subject: [PATCH 06/10] fix failing tests --- pandas/core/indexes/datetimes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d4bf8e7e2a0cb..f18d06b0685c2 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1112,7 +1112,10 @@ def _maybe_cast_slice_bound(self, label, side, kind, closed=None): if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower if side == "left": - return lower + if closed in ["right", "neither"]: + return upper + else: + return lower else: if closed in ["left", "neither"]: return lower From 2137080da93d4f714d3c557adf94c6c3e8d1f92a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 00:40:22 +0100 Subject: [PATCH 07/10] refactor --- pandas/core/indexes/base.py | 10 ++-------- pandas/core/indexes/datetimes.py | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5d5a6c3d979f0..743716696ed24 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5189,15 +5189,9 @@ def get_slice_bound(self, label, side, kind, closed=None): return slc.stop else: if side == "right": - if closed in ["left", "neither"]: - return slc - else: - return slc + 1 + return slc + 1 if closed not in ["left", "neither"] else slc else: - if closed in ["right", "neither"]: - return slc + 1 - else: - return slc + return slc if closed not in ["right", "neither"] else slc + 1 def slice_locs(self, start=None, end=None, step=None, kind=None, closed=None): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f18d06b0685c2..6d6324e9d834e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1112,15 +1112,9 @@ def _maybe_cast_slice_bound(self, label, side, kind, closed=None): if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower if side == "left": - if closed in ["right", "neither"]: - return upper - else: - return lower + return lower if closed not in ["right", "neither"] else upper else: - if closed in ["left", "neither"]: - return lower - else: - return upper + return upper if closed not in ["left", "neither"] else lower else: return label From d13fd2e3679a90533000a086382fcb2b2eae0ba6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 06:20:14 +0100 Subject: [PATCH 08/10] add __getitem__ test for DatetimeIndex --- pandas/tests/indexing/test_slice.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_slice.py b/pandas/tests/indexing/test_slice.py index b9a1876c1f9f6..8f20e75bb50aa 100644 --- a/pandas/tests/indexing/test_slice.py +++ b/pandas/tests/indexing/test_slice.py @@ -12,6 +12,7 @@ def test_indexslice_bad_kwarg_raises(): @pytest.mark.parametrize( "closed, expected_slice", [ + (None, slice(0, 2)), # default ("left", slice(0, 1)), ("right", slice(1, 2)), ("both", slice(0, 2)), @@ -20,13 +21,18 @@ def test_indexslice_bad_kwarg_raises(): ) @pytest.mark.parametrize("left", [Timestamp("2001-01-01 23:50"), "2001-01-01"]) @pytest.mark.parametrize("right", [Timestamp("2001-01-02 00:00"), "2001-01-02"]) -def test_series_getitem_closed_kwarg_dates(closed, left, right, expected_slice): +@pytest.mark.parametrize( + "indexer", [(lambda x: x), (lambda x: x.loc)], ids=["getitem", "loc"] +) +def test_series_getitem_closed_kwarg_dates( + indexer, closed, left, right, expected_slice +): # gh-27209 dates = ["2001-01-01 23:50", "2001-01-02 00:00", "2001-01-03 00:08"] ser = Series(range(3), DatetimeIndex(dates)) expected = ser.iloc[expected_slice] idx = IndexSlice(closed=closed) - result = ser.loc[idx[left:right]] + result = indexer(ser)[idx[left:right]] assert_series_equal(result, expected) From da9b49e1cbe97a9047b289598b3d1845d9303c34 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 06:20:29 +0100 Subject: [PATCH 09/10] fix failing tests --- pandas/core/dtypes/generic.py | 1 + pandas/core/indexes/base.py | 15 +++++++++++++-- pandas/core/indexing.py | 12 +++++++----- pandas/core/series.py | 6 ++++-- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index de41644f09b66..6b8d40c0d05dc 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -76,6 +76,7 @@ def _check(cls, inst): ("extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"), ) ABCPandasArray = create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)) +ABCIndexSlice = create_pandas_abc_type("ABCIndexSlc", "_typ", ("indexslice",)) class _ABCGeneric(type): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 743716696ed24..1ffb645374e9e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -51,6 +51,7 @@ ABCDateOffset, ABCDatetimeArray, ABCIndexClass, + ABCIndexSlice, ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, @@ -3154,9 +3155,15 @@ def _convert_slice_indexer(self, key, kind=None): assert kind in ["ix", "loc", "getitem", "iloc", None] # if we are not a slice, then we are done - if not isinstance(key, slice): + if not isinstance(key, (slice, ABCIndexSlice)): return key + if isinstance(key, ABCIndexSlice): + closed = key.closed + key = key.arg + else: + closed = None + # validate iloc if kind == "iloc": return slice( @@ -3209,7 +3216,9 @@ def is_int(v): indexer = key else: try: - indexer = self.slice_indexer(start, stop, step, kind=kind) + indexer = self.slice_indexer( + start, stop, step, kind=kind, closed=closed + ) except Exception: if is_index_slice: if self.is_integer(): @@ -4718,6 +4727,8 @@ def get_value(self, series, key): raise elif is_integer(key): return s[key] + elif isinstance(key, ABCIndexSlice): + raise InvalidIndexError(key) s = com.values_from_object(series) k = com.values_from_object(key) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fe2771aeb340e..da995608c49e2 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -20,7 +20,7 @@ is_sequence, is_sparse, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexSlice, ABCSeries from pandas.core.dtypes.missing import _infer_fill_value, isna import pandas.core.common as com @@ -87,6 +87,8 @@ class _IndexSlice: B1 10 11 """ + _typ = "indexslice" + def __init__(self, closed=None): if closed is not None and closed not in _VALID_CLOSED: msg = "invalid option for 'closed': {closed}".format(closed=closed) @@ -1470,7 +1472,7 @@ def __getitem__(self, key): # we by definition only have the 0th axis axis = self.axis or 0 - if not isinstance(key, _IndexSlice): + if not isinstance(key, ABCIndexSlice): key = com.apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=axis) @@ -1498,7 +1500,7 @@ def _get_slice_axis(self, slice_obj, axis=None): """ this is pretty simple as we just have to deal with labels """ if axis is None: axis = self.axis or 0 - if isinstance(slice_obj, _IndexSlice): + if isinstance(slice_obj, ABCIndexSlice): closed = slice_obj.closed slice_obj = slice_obj.arg else: @@ -1776,7 +1778,7 @@ def _validate_key(self, key, axis): # slice of integers (only if in the labels) # boolean - if isinstance(key, (slice, _IndexSlice)): + if isinstance(key, (slice, ABCIndexSlice)): return if com.is_bool_indexer(key): @@ -1848,7 +1850,7 @@ def _getitem_axis(self, key, axis=None): labels = self.obj._get_axis(axis) key = self._get_partial_string_timestamp_match_key(key, labels) - if isinstance(key, (slice, _IndexSlice)): + if isinstance(key, (slice, ABCIndexSlice)): self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) elif com.is_bool_indexer(key): diff --git a/pandas/core/series.py b/pandas/core/series.py index b3a7f38aef8ef..799ffd8bc01b2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -41,6 +41,7 @@ ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, + ABCIndexSlice, ABCSeries, ABCSparseArray, ABCSparseSeries, @@ -1069,7 +1070,8 @@ def _slice(self, slobj, axis=0, kind=None): return self._get_values(slobj) def __getitem__(self, key): - key = com.apply_if_callable(key, self) + if not isinstance(key, ABCIndexSlice): + key = com.apply_if_callable(key, self) try: result = self.index.get_value(self, key) @@ -1117,7 +1119,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise - if isinstance(key, slice): + if isinstance(key, (slice, ABCIndexSlice)): indexer = self.index._convert_slice_indexer(key, kind="getitem") return self._get_values(indexer) elif isinstance(key, ABCDataFrame): From e62ba195dde4d3e1f1660164db250c44216980cd Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Jul 2019 06:57:46 +0100 Subject: [PATCH 10/10] refactor --- pandas/core/indexes/base.py | 4 +--- pandas/core/indexing.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1ffb645374e9e..29f8cd25484c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3151,7 +3151,7 @@ def _convert_scalar_indexer(self, key, kind=None): """ @Appender(_index_shared_docs["_convert_slice_indexer"]) - def _convert_slice_indexer(self, key, kind=None): + def _convert_slice_indexer(self, key, kind=None, closed=None): assert kind in ["ix", "loc", "getitem", "iloc", None] # if we are not a slice, then we are done @@ -3161,8 +3161,6 @@ def _convert_slice_indexer(self, key, kind=None): if isinstance(key, ABCIndexSlice): closed = key.closed key = key.arg - else: - closed = None # validate iloc if kind == "iloc": diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index da995608c49e2..530e2cedc927c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1496,15 +1496,13 @@ def _getbool_axis(self, key, axis=None): except Exception as detail: raise self._exception(detail) - def _get_slice_axis(self, slice_obj, axis=None): + def _get_slice_axis(self, slice_obj, axis=None, closed=None): """ this is pretty simple as we just have to deal with labels """ if axis is None: axis = self.axis or 0 if isinstance(slice_obj, ABCIndexSlice): closed = slice_obj.closed slice_obj = slice_obj.arg - else: - closed = None obj = self.obj if not need_slice(slice_obj):