diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d86c1b7911528..4202c01dfe551 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -156,7 +156,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) -- +- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) - Timedelta diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 41cb76d88957e..e0b5df7e2cf0c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8420,7 +8420,12 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: return self offset = to_offset(offset) - end_date = end = self.index[0] + offset + if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + # GH#29623 if first value is end of period, remove offset with n = 1 + # before adding the real offset + end_date = end = self.index[0] - offset.base + offset + else: + end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks if isinstance(offset, Tick): diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index d21e1eee54e16..43469a093f827 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -3,7 +3,7 @@ """ import pytest -from pandas import DataFrame +from pandas import DataFrame, bdate_range import pandas._testing as tm @@ -69,3 +69,22 @@ def test_last_subset(self, frame_or_series): result = ts[:0].last("3M") tm.assert_equal(result, ts[:0]) + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + result = x.first("1M") + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) + + def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) + result = x.first("2M") + expected = frame_or_series( + [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") + ) + tm.assert_equal(result, expected)