diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index de41644f09b66..6b8d40c0d05dc 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -76,6 +76,7 @@ def _check(cls, inst): ("extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"), ) ABCPandasArray = create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)) +ABCIndexSlice = create_pandas_abc_type("ABCIndexSlc", "_typ", ("indexslice",)) class _ABCGeneric(type): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 973a022cfc3f1..29f8cd25484c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -51,6 +51,7 @@ ABCDateOffset, ABCDatetimeArray, ABCIndexClass, + ABCIndexSlice, ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, @@ -3150,13 +3151,17 @@ def _convert_scalar_indexer(self, key, kind=None): """ @Appender(_index_shared_docs["_convert_slice_indexer"]) - def _convert_slice_indexer(self, key, kind=None): + def _convert_slice_indexer(self, key, kind=None, closed=None): assert kind in ["ix", "loc", "getitem", "iloc", None] # if we are not a slice, then we are done - if not isinstance(key, slice): + if not isinstance(key, (slice, ABCIndexSlice)): return key + if isinstance(key, ABCIndexSlice): + closed = key.closed + key = key.arg + # validate iloc if kind == "iloc": return slice( @@ -3209,7 +3214,9 @@ def is_int(v): indexer = key else: try: - indexer = self.slice_indexer(start, stop, step, kind=kind) + indexer = self.slice_indexer( + start, stop, step, kind=kind, closed=closed + ) except Exception: if is_index_slice: if self.is_integer(): @@ -4718,6 +4725,8 @@ def get_value(self, series, key): raise elif is_integer(key): return s[key] + elif isinstance(key, ABCIndexSlice): + raise InvalidIndexError(key) s = com.values_from_object(series) k = com.values_from_object(key) @@ -4990,7 +4999,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex raise NotImplementedError - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered or unique index, compute the slice indexer for input labels and step. @@ -5029,7 +5038,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): >>> idx.slice_indexer(start='b', end=('c', 'g')) slice(1, 3) """ - start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) + start_slice, end_slice = self.slice_locs( + start, end, step=step, kind=kind, closed=closed + ) # return a slice if not is_scalar(start_slice): @@ -5093,7 +5104,7 @@ def _validate_indexer(self, form, key, kind): """ @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): assert kind in ["ix", "loc", "getitem", None] # We are a plain index here (sub-class override this method if they @@ -5125,7 +5136,7 @@ def _searchsorted_monotonic(self, label, side="left"): raise ValueError("index must be monotonic increasing or decreasing") - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind, closed=None): """ Calculate slice bound that corresponds to given label. @@ -5155,7 +5166,7 @@ def get_slice_bound(self, label, side, kind): # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side, kind) + label = self._maybe_cast_slice_bound(label, side, kind, closed=closed) # we need to look up the label try: @@ -5187,11 +5198,11 @@ def get_slice_bound(self, label, side, kind): return slc.stop else: if side == "right": - return slc + 1 + return slc + 1 if closed not in ["left", "neither"] else slc else: - return slc + return slc if closed not in ["right", "neither"] else slc + 1 - def slice_locs(self, start=None, end=None, step=None, kind=None): + def slice_locs(self, start=None, end=None, step=None, kind=None, closed=None): """ Compute slice locations for input labels. @@ -5243,13 +5254,13 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): start_slice = None if start is not None: - start_slice = self.get_slice_bound(start, "left", kind) + start_slice = self.get_slice_bound(start, "left", kind, closed=closed) if start_slice is None: start_slice = 0 end_slice = None if end is not None: - end_slice = self.get_slice_bound(end, "right", kind) + end_slice = self.get_slice_bound(end, "right", kind, closed=closed) if end_slice is None: end_slice = len(self) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5024eebe03bb4..6d6324e9d834e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1075,7 +1075,7 @@ def get_loc(self, key, method=None, tolerance=None): raise e raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string, cast it to datetime according to resolution. @@ -1111,7 +1111,10 @@ def _maybe_cast_slice_bound(self, label, side, kind): # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower - return lower if side == "left" else upper + if side == "left": + return lower if closed not in ["right", "neither"] else upper + else: + return upper if closed not in ["left", "neither"] else lower else: return label @@ -1121,7 +1124,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) return loc - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None, closed=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -1147,7 +1150,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): raise KeyError("Cannot mix time and non-time slice keys") try: - return Index.slice_indexer(self, start, end, step, kind=kind) + return Index.slice_indexer(self, start, end, step, kind=kind, closed=closed) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b14cff8cc6ade..eeb790d837c2e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -610,7 +610,7 @@ def _convert_scalar_indexer(self, key, kind=None): return super()._convert_scalar_indexer(key, kind=kind) return key - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) @Appender(_index_shared_docs["_convert_list_indexer"]) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 71b551adaf3ef..0695f90e0770f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2508,13 +2508,13 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return target, indexer - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind, closed=None): if not isinstance(label, tuple): label = (label,) return self._partial_tup_index(label, side=side) - def slice_locs(self, start=None, end=None, step=None, kind=None): + def slice_locs(self, start=None, end=None, step=None, kind=None, closed=None): """ For an ordered MultiIndex, compute the slice locations for input labels. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index daf26d53aa6e2..0f2eaca013b19 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -73,7 +73,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): return cls._simple_new(subarr, name=name) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): assert kind in ["ix", "loc", "getitem", None] # we will try to coerce to integers diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0013df44614e8..654752e2e9f90 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -708,7 +708,7 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError: raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 29ed3c6b97318..a8989a5d481e0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -569,7 +569,7 @@ def get_loc(self, key, method=None, tolerance=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side, kind, closed=None): """ If label is a string, cast it to timedelta according to resolution. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ccc3a027af70d..530e2cedc927c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -20,12 +20,14 @@ is_sequence, is_sparse, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexSlice, ABCSeries from pandas.core.dtypes.missing import _infer_fill_value, isna import pandas.core.common as com from pandas.core.index import Index, MultiIndex +_VALID_CLOSED = {"left", "right", "both", "neither"} + # the supported indexers def get_indexers_list(): @@ -85,8 +87,23 @@ class _IndexSlice: B1 10 11 """ + _typ = "indexslice" + + def __init__(self, closed=None): + if closed is not None and closed not in _VALID_CLOSED: + msg = "invalid option for 'closed': {closed}".format(closed=closed) + raise ValueError(msg) + self.closed = closed + + def __call__(self, closed=None): + return _IndexSlice(closed=closed) + def __getitem__(self, arg): - return arg + if self.closed is None: + return arg + else: + self.arg = arg + return self IndexSlice = _IndexSlice() @@ -1455,8 +1472,9 @@ def __getitem__(self, key): # we by definition only have the 0th axis axis = self.axis or 0 - maybe_callable = com.apply_if_callable(key, self.obj) - return self._getitem_axis(maybe_callable, axis=axis) + if not isinstance(key, ABCIndexSlice): + key = com.apply_if_callable(key, self.obj) + return self._getitem_axis(key, axis=axis) def _is_scalar_access(self, key): raise NotImplementedError() @@ -1478,10 +1496,13 @@ def _getbool_axis(self, key, axis=None): except Exception as detail: raise self._exception(detail) - def _get_slice_axis(self, slice_obj, axis=None): + def _get_slice_axis(self, slice_obj, axis=None, closed=None): """ this is pretty simple as we just have to deal with labels """ if axis is None: axis = self.axis or 0 + if isinstance(slice_obj, ABCIndexSlice): + closed = slice_obj.closed + slice_obj = slice_obj.arg obj = self.obj if not need_slice(slice_obj): @@ -1489,7 +1510,11 @@ def _get_slice_axis(self, slice_obj, axis=None): labels = obj._get_axis(axis) indexer = labels.slice_indexer( - slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name + slice_obj.start, + slice_obj.stop, + slice_obj.step, + kind=self.name, + closed=closed, ) if isinstance(indexer, slice): @@ -1751,7 +1776,7 @@ def _validate_key(self, key, axis): # slice of integers (only if in the labels) # boolean - if isinstance(key, slice): + if isinstance(key, (slice, ABCIndexSlice)): return if com.is_bool_indexer(key): @@ -1823,7 +1848,7 @@ def _getitem_axis(self, key, axis=None): labels = self.obj._get_axis(axis) key = self._get_partial_string_timestamp_match_key(key, labels) - if isinstance(key, slice): + if isinstance(key, (slice, ABCIndexSlice)): self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) elif com.is_bool_indexer(key): diff --git a/pandas/core/series.py b/pandas/core/series.py index b3a7f38aef8ef..799ffd8bc01b2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -41,6 +41,7 @@ ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, + ABCIndexSlice, ABCSeries, ABCSparseArray, ABCSparseSeries, @@ -1069,7 +1070,8 @@ def _slice(self, slobj, axis=0, kind=None): return self._get_values(slobj) def __getitem__(self, key): - key = com.apply_if_callable(key, self) + if not isinstance(key, ABCIndexSlice): + key = com.apply_if_callable(key, self) try: result = self.index.get_value(self, key) @@ -1117,7 +1119,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise - if isinstance(key, slice): + if isinstance(key, (slice, ABCIndexSlice)): indexer = self.index._convert_slice_indexer(key, kind="getitem") return self._get_values(indexer) elif isinstance(key, ABCDataFrame): diff --git a/pandas/tests/indexing/test_slice.py b/pandas/tests/indexing/test_slice.py new file mode 100644 index 0000000000000..8f20e75bb50aa --- /dev/null +++ b/pandas/tests/indexing/test_slice.py @@ -0,0 +1,55 @@ +import pytest + +from pandas import DatetimeIndex, IndexSlice, Series, Timestamp +from pandas.util.testing import assert_series_equal + + +def test_indexslice_bad_kwarg_raises(): + with pytest.raises(ValueError, match="invalid option for 'closed': foo"): + IndexSlice(closed="foo") + + +@pytest.mark.parametrize( + "closed, expected_slice", + [ + (None, slice(0, 2)), # default + ("left", slice(0, 1)), + ("right", slice(1, 2)), + ("both", slice(0, 2)), + ("neither", slice(0, 0)), + ], +) +@pytest.mark.parametrize("left", [Timestamp("2001-01-01 23:50"), "2001-01-01"]) +@pytest.mark.parametrize("right", [Timestamp("2001-01-02 00:00"), "2001-01-02"]) +@pytest.mark.parametrize( + "indexer", [(lambda x: x), (lambda x: x.loc)], ids=["getitem", "loc"] +) +def test_series_getitem_closed_kwarg_dates( + indexer, closed, left, right, expected_slice +): + # gh-27209 + dates = ["2001-01-01 23:50", "2001-01-02 00:00", "2001-01-03 00:08"] + ser = Series(range(3), DatetimeIndex(dates)) + expected = ser.iloc[expected_slice] + idx = IndexSlice(closed=closed) + result = indexer(ser)[idx[left:right]] + assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "closed, expected_slice", + [ + ("left", slice(0, 1)), + ("right", slice(1, 2)), + ("both", slice(0, 2)), + ("neither", slice(0, 0)), + ], +) +def test_series_getitem_closed_kwarg_int_labels(closed, expected_slice): + # gh-27209 + int_labels = [50, 70, 80] + ser = Series(range(3), index=int_labels) + expected = ser.iloc[expected_slice] + idx = IndexSlice(closed=closed) + result = ser.loc[idx[50:70]] + assert_series_equal(result, expected)