Skip to content

Commit

Permalink
allow negative freq strings (pydata#8651)
Browse files Browse the repository at this point in the history
* allow negative freq strings

* update docstring

* fix date_range_like

* whats-new entry

* Apply suggestions from code review

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>

* only test standard calendar

* merge tests

---------

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
  • Loading branch information
mathause and spencerkclark authored Feb 1, 2024
1 parent 4de10d4 commit c9ba2be
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 24 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ v2024.02.0 (unreleased)
New Features
~~~~~~~~~~~~

- Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used
in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`).
By `Mathias Hauser <https://github.com/mathause>`_.
- Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to`
(:pull:`8380`) By `Anderson Banihirwe <https://github.com/andersy005>`_.

Expand Down
21 changes: 12 additions & 9 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ def _generate_anchored_offsets(base_freq, offset):


_FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys())
_PATTERN = rf"^((?P<multiple>\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"
_PATTERN = rf"^((?P<multiple>[+-]?\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"


# pandas defines these offsets as "Tick" objects, which for instance have
Expand Down Expand Up @@ -825,7 +825,8 @@ def _generate_range(start, end, periods, offset):
"""Generate a regular range of cftime.datetime objects with a
given time offset.
Adapted from pandas.tseries.offsets.generate_range.
Adapted from pandas.tseries.offsets.generate_range (now at
pandas.core.arrays.datetimes._generate_range).
Parameters
----------
Expand All @@ -845,10 +846,7 @@ def _generate_range(start, end, periods, offset):
if start:
start = offset.rollforward(start)

if end:
end = offset.rollback(end)

if periods is None and end < start:
if periods is None and end < start and offset.n >= 0:
end = None
periods = 0

Expand Down Expand Up @@ -933,7 +931,7 @@ def cftime_range(
periods : int, optional
Number of periods to generate.
freq : str or None, default: "D"
Frequency strings can have multiples, e.g. "5h".
Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D".
normalize : bool, default: False
Normalize start/end dates to midnight before generating date range.
name : str, default: None
Expand Down Expand Up @@ -1176,7 +1174,7 @@ def date_range(
periods : int, optional
Number of periods to generate.
freq : str or None, default: "D"
Frequency strings can have multiples, e.g. "5h".
Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D".
tz : str or tzinfo, optional
Time zone name for returning localized DatetimeIndex, for example
'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
Expand Down Expand Up @@ -1322,6 +1320,11 @@ def date_range_like(source, calendar, use_cftime=None):

source_start = source.values.min()
source_end = source.values.max()

freq_as_offset = to_offset(freq)
if freq_as_offset.n < 0:
source_start, source_end = source_end, source_start

if is_np_datetime_like(source.dtype):
# We want to use datetime fields (datetime64 object don't have them)
source_calendar = "standard"
Expand All @@ -1344,7 +1347,7 @@ def date_range_like(source, calendar, use_cftime=None):

# For the cases where the source ends on the end of the month, we expect the same in the new calendar.
if source_end.day == source_end.daysinmonth and isinstance(
to_offset(freq), (YearEnd, QuarterEnd, MonthEnd, Day)
freq_as_offset, (YearEnd, QuarterEnd, MonthEnd, Day)
):
end = end.replace(day=end.daysinmonth)

Expand Down
103 changes: 88 additions & 15 deletions xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,20 @@ def test_to_offset_offset_input(offset):
("2U", Microsecond(n=2)),
("us", Microsecond(n=1)),
("2us", Microsecond(n=2)),
# negative
("-2M", MonthEnd(n=-2)),
("-2ME", MonthEnd(n=-2)),
("-2MS", MonthBegin(n=-2)),
("-2D", Day(n=-2)),
("-2H", Hour(n=-2)),
("-2h", Hour(n=-2)),
("-2T", Minute(n=-2)),
("-2min", Minute(n=-2)),
("-2S", Second(n=-2)),
("-2L", Millisecond(n=-2)),
("-2ms", Millisecond(n=-2)),
("-2U", Microsecond(n=-2)),
("-2us", Microsecond(n=-2)),
],
ids=_id_func,
)
Expand All @@ -239,7 +253,7 @@ def test_to_offset_sub_annual(freq, expected):
@pytest.mark.parametrize(
("month_int", "month_label"), list(_MONTH_ABBREVIATIONS.items()) + [(0, "")]
)
@pytest.mark.parametrize("multiple", [None, 2])
@pytest.mark.parametrize("multiple", [None, 2, -1])
@pytest.mark.parametrize("offset_str", ["AS", "A", "YS", "Y"])
@pytest.mark.filterwarnings("ignore::FutureWarning") # Deprecation of "A" etc.
def test_to_offset_annual(month_label, month_int, multiple, offset_str):
Expand Down Expand Up @@ -268,7 +282,7 @@ def test_to_offset_annual(month_label, month_int, multiple, offset_str):
@pytest.mark.parametrize(
("month_int", "month_label"), list(_MONTH_ABBREVIATIONS.items()) + [(0, "")]
)
@pytest.mark.parametrize("multiple", [None, 2])
@pytest.mark.parametrize("multiple", [None, 2, -1])
@pytest.mark.parametrize("offset_str", ["QS", "Q", "QE"])
@pytest.mark.filterwarnings("ignore::FutureWarning") # Deprecation of "Q" etc.
def test_to_offset_quarter(month_label, month_int, multiple, offset_str):
Expand Down Expand Up @@ -403,6 +417,7 @@ def test_eq(a, b):

_MUL_TESTS = [
(BaseCFTimeOffset(), 3, BaseCFTimeOffset(n=3)),
(BaseCFTimeOffset(), -3, BaseCFTimeOffset(n=-3)),
(YearEnd(), 3, YearEnd(n=3)),
(YearBegin(), 3, YearBegin(n=3)),
(QuarterEnd(), 3, QuarterEnd(n=3)),
Expand All @@ -418,6 +433,7 @@ def test_eq(a, b):
(Microsecond(), 3, Microsecond(n=3)),
(Day(), 0.5, Hour(n=12)),
(Hour(), 0.5, Minute(n=30)),
(Hour(), -0.5, Minute(n=-30)),
(Minute(), 0.5, Second(n=30)),
(Second(), 0.5, Millisecond(n=500)),
(Millisecond(), 0.5, Microsecond(n=500)),
Expand Down Expand Up @@ -1162,6 +1178,15 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg
False,
[(10, 1, 1), (8, 1, 1), (6, 1, 1), (4, 1, 1)],
),
(
"0010",
None,
4,
"-2YS",
"both",
False,
[(10, 1, 1), (8, 1, 1), (6, 1, 1), (4, 1, 1)],
),
(
"0001-01-01",
"0001-01-04",
Expand All @@ -1180,6 +1205,24 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg
False,
[(1, 6, 1), (2, 3, 1), (2, 12, 1), (3, 9, 1)],
),
(
"0001-06-01",
None,
4,
"-1MS",
"both",
False,
[(1, 6, 1), (1, 5, 1), (1, 4, 1), (1, 3, 1)],
),
(
"0001-01-30",
None,
4,
"-1D",
"both",
False,
[(1, 1, 30), (1, 1, 29), (1, 1, 28), (1, 1, 27)],
),
]


Expand Down Expand Up @@ -1263,32 +1306,52 @@ def test_invalid_cftime_arg() -> None:


_CALENDAR_SPECIFIC_MONTH_END_TESTS = [
("2ME", "noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "360_day", [(2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (12, 30)]),
("2ME", "standard", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "gregorian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "julian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("360_day", [(2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (12, 30)]),
("standard", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("gregorian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("julian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
]


@pytest.mark.parametrize(
("freq", "calendar", "expected_month_day"),
("calendar", "expected_month_day"),
_CALENDAR_SPECIFIC_MONTH_END_TESTS,
ids=_id_func,
)
def test_calendar_specific_month_end(
freq: str, calendar: str, expected_month_day: list[tuple[int, int]]
calendar: str, expected_month_day: list[tuple[int, int]]
) -> None:
year = 2000 # Use a leap-year to highlight calendar differences
result = cftime_range(
start="2000-02", end="2001", freq=freq, calendar=calendar
start="2000-02", end="2001", freq="2ME", calendar=calendar
).values
date_type = get_date_type(calendar)
expected = [date_type(year, *args) for args in expected_month_day]
np.testing.assert_equal(result, expected)


@pytest.mark.parametrize(
("calendar", "expected_month_day"),
_CALENDAR_SPECIFIC_MONTH_END_TESTS,
ids=_id_func,
)
def test_calendar_specific_month_end_negative_freq(
calendar: str, expected_month_day: list[tuple[int, int]]
) -> None:
year = 2000 # Use a leap-year to highlight calendar differences
result = cftime_range(
start="2000-12",
end="2000",
freq="-2ME",
calendar=calendar,
).values
date_type = get_date_type(calendar)
expected = [date_type(year, *args) for args in expected_month_day[::-1]]
np.testing.assert_equal(result, expected)


@pytest.mark.parametrize(
("calendar", "start", "end", "expected_number_of_days"),
[
Expand Down Expand Up @@ -1376,10 +1439,6 @@ def test_date_range_errors() -> None:
)


@requires_cftime
@pytest.mark.xfail(
reason="https://github.com/pydata/xarray/pull/8636#issuecomment-1902775153"
)
@pytest.mark.parametrize(
"start,freq,cal_src,cal_tgt,use_cftime,exp0,exp_pd",
[
Expand All @@ -1388,9 +1447,11 @@ def test_date_range_errors() -> None:
("2020-02-01", "QE-DEC", "noleap", "gregorian", True, "2020-03-31", True),
("2020-02-01", "YS-FEB", "noleap", "gregorian", True, "2020-02-01", True),
("2020-02-01", "YE-FEB", "noleap", "gregorian", True, "2020-02-29", True),
("2020-02-01", "-1YE-FEB", "noleap", "gregorian", True, "2020-02-29", True),
("2020-02-28", "3h", "all_leap", "gregorian", False, "2020-02-28", True),
("2020-03-30", "ME", "360_day", "gregorian", False, "2020-03-31", True),
("2020-03-31", "ME", "gregorian", "360_day", None, "2020-03-30", False),
("2020-03-31", "-1ME", "gregorian", "360_day", None, "2020-03-30", False),
],
)
def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd):
Expand Down Expand Up @@ -1541,3 +1602,15 @@ def test_to_offset_deprecation_warning(freq):
# Test for deprecations outlined in GitHub issue #8394
with pytest.warns(FutureWarning, match="is deprecated"):
to_offset(freq)


@pytest.mark.filterwarnings("ignore:Converting a CFTimeIndex with:")
@pytest.mark.parametrize("start", ("2000", "2001"))
@pytest.mark.parametrize("end", ("2000", "2001"))
@pytest.mark.parametrize("freq", ("MS", "-1MS", "YS", "-1YS", "M", "-1M", "Y", "-1Y"))
def test_cftime_range_same_as_pandas(start, end, freq):
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
result = result.to_datetimeindex()
expected = date_range(start, end, freq=freq, use_cftime=False)

np.testing.assert_array_equal(result, expected)

0 comments on commit c9ba2be

Please sign in to comment.