diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 08b2ae0a4a837..3fdab0fd26643 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1951,6 +1951,10 @@ The ``period`` dtype can be used in ``.astype(...)``. It allows one to change th PeriodIndex partial string indexing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +PeriodIndex now supports partial string slicing with non-monotonic indexes. + +.. versionadded:: 1.1.0 + You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodIndex``, in the same manner as ``DatetimeIndex``. For details, refer to :ref:`DatetimeIndex Partial String Indexing `. .. ipython:: python @@ -1981,6 +1985,7 @@ As with ``DatetimeIndex``, the endpoints will be included in the result. The exa dfp['2013-01-01 10H':'2013-01-01 11H'] + Frequency conversion and resampling with PeriodIndex ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The frequency of ``Period`` and ``PeriodIndex`` can be converted via the ``asfreq`` diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a04ba157ce0ae..3245a4020d2ef 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -13,6 +13,27 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ +.. _whatsnew_110.period_index_partial_string_slicing: + +Nonmonotonic PeriodIndex Partial String Slicing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:class:`PeriodIndex` now supports partial string slicing for non-monotonic indexes, mirroring :class:`DatetimeIndex` behavior (:issue:`31096`) + +For example: + +.. ipython:: python + + dti = pd.date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + ser_monotonic = pd.Series(np.arange(30), index=pi) + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_monotonic[shuffler] + ser + +.. ipython:: python + ser["2014"] + ser.loc["May 2015"] + .. _whatsnew_110.enhancements.other: Other enhancements diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b3386f6104032..2a40f4a6f6239 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -567,6 +567,11 @@ def get_loc(self, key, method=None, tolerance=None): """ if isinstance(key, str): + try: + return self._get_string_slice(key) + except (TypeError, KeyError, ValueError, OverflowError): + pass + try: asdt, reso = parse_time_string(key, self.freq) key = asdt @@ -648,10 +653,6 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): # TODO: Check for non-True use_lhs/use_rhs - raw = key - if not self.is_monotonic: - raise ValueError("Partial indexing only valid for ordered time series") - parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) @@ -660,18 +661,35 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True # TODO: we used to also check for # reso in ["day", "hour", "minute", "second"] # why is that check not needed? - raise TypeError(key) + raise ValueError(key) t1, t2 = self._parsed_string_to_bounds(reso, parsed) - if len(self): - if t2 < self.min() or t1 > self.max(): - raise KeyError(raw) - - # Use asi8 searchsorted to avoid overhead of re-validating inputs - return slice( - self.asi8.searchsorted(t1.ordinal, side="left"), - self.asi8.searchsorted(t2.ordinal, side="right"), - ) + i8vals = self.asi8 + + if self.is_monotonic: + + # we are out of range + if len(self) and ( + (use_lhs and t1 < self[0] and t2 < self[0]) + or ((use_rhs and t1 > self[-1] and t2 > self[-1])) + ): + raise KeyError(key) + + # TODO: does this depend on being monotonic _increasing_? + # If so, DTI will also be affected. + + # a monotonic (sorted) series can be sliced + # Use asi8.searchsorted to avoid re-validating Periods + left = i8vals.searchsorted(t1.ordinal, side="left") if use_lhs else None + right = i8vals.searchsorted(t2.ordinal, side="right") if use_rhs else None + return slice(left, right) + + else: + lhs_mask = (i8vals >= t1.ordinal) if use_lhs else True + rhs_mask = (i8vals <= t2.ordinal) if use_rhs else True + + # try to find a the dates + return (lhs_mask & rhs_mask).nonzero()[0] def _convert_tolerance(self, tolerance, target): tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, target) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 9ca2dd169416f..833901ea7ba22 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -7,9 +7,6 @@ class TestPeriodIndex: - def setup_method(self, method): - pass - def test_slice_with_negative_step(self): ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M")) SLC = pd.IndexSlice @@ -133,3 +130,53 @@ def test_range_slice_outofbounds(self): tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty) tm.assert_frame_equal(df["2013-06":"2013-09"], empty) tm.assert_frame_equal(df["2013-11":"2013-12"], empty) + + def test_partial_slice_doesnt_require_monotonicity(self): + # See also: DatetimeIndex test ofm the same name + dti = pd.date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + + ser_montonic = pd.Series(np.arange(30), index=pi) + + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_montonic[shuffler] + nidx = ser.index + + # Manually identified locations of year==2014 + indexer_2014 = np.array( + [0, 1, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20], dtype=np.intp + ) + assert (nidx[indexer_2014].year == 2014).all() + assert not (nidx[~indexer_2014].year == 2014).any() + + result = nidx.get_loc("2014") + tm.assert_numpy_array_equal(result, indexer_2014) + + expected = ser[indexer_2014] + + result = nidx.get_value(ser, "2014") + tm.assert_series_equal(result, expected) + + result = ser.loc["2014"] + tm.assert_series_equal(result, expected) + + result = ser["2014"] + tm.assert_series_equal(result, expected) + + # Manually identified locations where ser.index is within Mat 2015 + indexer_may2015 = np.array([23], dtype=np.intp) + assert nidx[23].year == 2015 and nidx[23].month == 5 + + result = nidx.get_loc("May 2015") + tm.assert_numpy_array_equal(result, indexer_may2015) + + expected = ser[indexer_may2015] + + result = nidx.get_value(ser, "May 2015") + tm.assert_series_equal(result, expected) + + result = ser.loc["May 2015"] + tm.assert_series_equal(result, expected) + + result = ser["May 2015"] + tm.assert_series_equal(result, expected)