Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow negative freq strings #8651

Merged
merged 13 commits into from
Feb 1, 2024
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ v2024.02.0 (unreleased)
New Features
~~~~~~~~~~~~

- Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used
in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`).
By `Mathias Hauser <https://github.com/mathause>`_.
- Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to`
(:pull:`8380`) By `Anderson Banihirwe <https://github.com/andersy005>`_.

Expand Down
21 changes: 12 additions & 9 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ def _generate_anchored_offsets(base_freq, offset):


_FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys())
_PATTERN = rf"^((?P<multiple>\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"
_PATTERN = rf"^((?P<multiple>[+-]?\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"


# pandas defines these offsets as "Tick" objects, which for instance have
Expand Down Expand Up @@ -825,7 +825,8 @@ def _generate_range(start, end, periods, offset):
"""Generate a regular range of cftime.datetime objects with a
given time offset.

Adapted from pandas.tseries.offsets.generate_range.
Adapted from pandas.tseries.offsets.generate_range (now at
pandas.core.arrays.datetimes._generate_range).

Parameters
----------
Expand All @@ -845,10 +846,7 @@ def _generate_range(start, end, periods, offset):
if start:
start = offset.rollforward(start)

if end:
end = offset.rollback(end)

if periods is None and end < start:
if periods is None and end < start and offset.n >= 0:
end = None
periods = 0

Expand Down Expand Up @@ -933,7 +931,7 @@ def cftime_range(
periods : int, optional
Number of periods to generate.
freq : str or None, default: "D"
Frequency strings can have multiples, e.g. "5h".
Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D".
normalize : bool, default: False
Normalize start/end dates to midnight before generating date range.
name : str, default: None
Expand Down Expand Up @@ -1176,7 +1174,7 @@ def date_range(
periods : int, optional
Number of periods to generate.
freq : str or None, default: "D"
Frequency strings can have multiples, e.g. "5h".
Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D".
tz : str or tzinfo, optional
Time zone name for returning localized DatetimeIndex, for example
'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
Expand Down Expand Up @@ -1322,6 +1320,11 @@ def date_range_like(source, calendar, use_cftime=None):

source_start = source.values.min()
source_end = source.values.max()

freq_as_offset = to_offset(freq)
if freq_as_offset.n < 0:
source_start, source_end = source_end, source_start

if is_np_datetime_like(source.dtype):
# We want to use datetime fields (datetime64 object don't have them)
source_calendar = "standard"
Expand All @@ -1344,7 +1347,7 @@ def date_range_like(source, calendar, use_cftime=None):

# For the cases where the source ends on the end of the month, we expect the same in the new calendar.
if source_end.day == source_end.daysinmonth and isinstance(
to_offset(freq), (YearEnd, QuarterEnd, MonthEnd, Day)
freq_as_offset, (YearEnd, QuarterEnd, MonthEnd, Day)
):
end = end.replace(day=end.daysinmonth)

Expand Down
114 changes: 99 additions & 15 deletions xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,20 @@ def test_to_offset_offset_input(offset):
("2U", Microsecond(n=2)),
("us", Microsecond(n=1)),
("2us", Microsecond(n=2)),
# negative
("-2M", MonthEnd(n=-2)),
("-2ME", MonthEnd(n=-2)),
("-2MS", MonthBegin(n=-2)),
("-2D", Day(n=-2)),
("-2H", Hour(n=-2)),
("-2h", Hour(n=-2)),
("-2T", Minute(n=-2)),
("-2min", Minute(n=-2)),
("-2S", Second(n=-2)),
("-2L", Millisecond(n=-2)),
("-2ms", Millisecond(n=-2)),
("-2U", Microsecond(n=-2)),
("-2us", Microsecond(n=-2)),
],
ids=_id_func,
)
Expand All @@ -239,7 +253,7 @@ def test_to_offset_sub_annual(freq, expected):
@pytest.mark.parametrize(
("month_int", "month_label"), list(_MONTH_ABBREVIATIONS.items()) + [(0, "")]
)
@pytest.mark.parametrize("multiple", [None, 2])
@pytest.mark.parametrize("multiple", [None, 2, -1])
@pytest.mark.parametrize("offset_str", ["AS", "A", "YS", "Y"])
@pytest.mark.filterwarnings("ignore::FutureWarning") # Deprecation of "A" etc.
def test_to_offset_annual(month_label, month_int, multiple, offset_str):
Expand Down Expand Up @@ -268,7 +282,7 @@ def test_to_offset_annual(month_label, month_int, multiple, offset_str):
@pytest.mark.parametrize(
("month_int", "month_label"), list(_MONTH_ABBREVIATIONS.items()) + [(0, "")]
)
@pytest.mark.parametrize("multiple", [None, 2])
@pytest.mark.parametrize("multiple", [None, 2, -1])
@pytest.mark.parametrize("offset_str", ["QS", "Q", "QE"])
@pytest.mark.filterwarnings("ignore::FutureWarning") # Deprecation of "Q" etc.
def test_to_offset_quarter(month_label, month_int, multiple, offset_str):
Expand Down Expand Up @@ -403,6 +417,7 @@ def test_eq(a, b):

_MUL_TESTS = [
(BaseCFTimeOffset(), 3, BaseCFTimeOffset(n=3)),
(BaseCFTimeOffset(), -3, BaseCFTimeOffset(n=-3)),
(YearEnd(), 3, YearEnd(n=3)),
(YearBegin(), 3, YearBegin(n=3)),
(QuarterEnd(), 3, QuarterEnd(n=3)),
Expand All @@ -418,6 +433,7 @@ def test_eq(a, b):
(Microsecond(), 3, Microsecond(n=3)),
(Day(), 0.5, Hour(n=12)),
(Hour(), 0.5, Minute(n=30)),
(Hour(), -0.5, Minute(n=-30)),
(Minute(), 0.5, Second(n=30)),
(Second(), 0.5, Millisecond(n=500)),
(Millisecond(), 0.5, Microsecond(n=500)),
Expand Down Expand Up @@ -1162,6 +1178,15 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg
False,
[(10, 1, 1), (8, 1, 1), (6, 1, 1), (4, 1, 1)],
),
(
"0010",
None,
4,
"-2YS",
"both",
False,
[(10, 1, 1), (8, 1, 1), (6, 1, 1), (4, 1, 1)],
),
(
"0001-01-01",
"0001-01-04",
Expand All @@ -1180,6 +1205,24 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg
False,
[(1, 6, 1), (2, 3, 1), (2, 12, 1), (3, 9, 1)],
),
(
"0001-06-01",
None,
4,
"-1MS",
"both",
False,
[(1, 6, 1), (1, 5, 1), (1, 4, 1), (1, 3, 1)],
),
(
"0001-01-30",
None,
4,
"-1D",
"both",
False,
[(1, 1, 30), (1, 1, 29), (1, 1, 28), (1, 1, 27)],
),
]


Expand Down Expand Up @@ -1263,32 +1306,52 @@ def test_invalid_cftime_arg() -> None:


_CALENDAR_SPECIFIC_MONTH_END_TESTS = [
("2ME", "noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "360_day", [(2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (12, 30)]),
("2ME", "standard", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "gregorian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("2ME", "julian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("360_day", [(2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (12, 30)]),
("standard", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("gregorian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
("julian", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]),
]


@pytest.mark.parametrize(
("freq", "calendar", "expected_month_day"),
("calendar", "expected_month_day"),
_CALENDAR_SPECIFIC_MONTH_END_TESTS,
ids=_id_func,
)
def test_calendar_specific_month_end(
freq: str, calendar: str, expected_month_day: list[tuple[int, int]]
calendar: str, expected_month_day: list[tuple[int, int]]
) -> None:
year = 2000 # Use a leap-year to highlight calendar differences
result = cftime_range(
start="2000-02", end="2001", freq=freq, calendar=calendar
start="2000-02", end="2001", freq="2ME", calendar=calendar
).values
date_type = get_date_type(calendar)
expected = [date_type(year, *args) for args in expected_month_day]
np.testing.assert_equal(result, expected)


@pytest.mark.parametrize(
("calendar", "expected_month_day"),
_CALENDAR_SPECIFIC_MONTH_END_TESTS,
ids=_id_func,
)
def test_calendar_specific_month_end_negative_freq(
calendar: str, expected_month_day: list[tuple[int, int]]
) -> None:
year = 2000 # Use a leap-year to highlight calendar differences
result = cftime_range(
start="2000-12",
end="2000",
freq="-2ME",
calendar=calendar,
).values
date_type = get_date_type(calendar)
expected = [date_type(year, *args) for args in expected_month_day[::-1]]
np.testing.assert_equal(result, expected)


@pytest.mark.parametrize(
("calendar", "start", "end", "expected_number_of_days"),
[
Expand Down Expand Up @@ -1376,10 +1439,6 @@ def test_date_range_errors() -> None:
)


@requires_cftime
@pytest.mark.xfail(
reason="https://github.com/pydata/xarray/pull/8636#issuecomment-1902775153"
)
@pytest.mark.parametrize(
"start,freq,cal_src,cal_tgt,use_cftime,exp0,exp_pd",
[
Expand All @@ -1388,9 +1447,11 @@ def test_date_range_errors() -> None:
("2020-02-01", "QE-DEC", "noleap", "gregorian", True, "2020-03-31", True),
("2020-02-01", "YS-FEB", "noleap", "gregorian", True, "2020-02-01", True),
("2020-02-01", "YE-FEB", "noleap", "gregorian", True, "2020-02-29", True),
("2020-02-01", "-1YE-FEB", "noleap", "gregorian", True, "2020-02-29", True),
("2020-02-28", "3h", "all_leap", "gregorian", False, "2020-02-28", True),
("2020-03-30", "ME", "360_day", "gregorian", False, "2020-03-31", True),
("2020-03-31", "ME", "gregorian", "360_day", None, "2020-03-30", False),
("2020-03-31", "-1ME", "gregorian", "360_day", None, "2020-03-30", False),
],
)
def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd):
Expand Down Expand Up @@ -1541,3 +1602,26 @@ def test_to_offset_deprecation_warning(freq):
# Test for deprecations outlined in GitHub issue #8394
with pytest.warns(FutureWarning, match="is deprecated"):
to_offset(freq)


@pytest.mark.filterwarnings("ignore:Converting a CFTimeIndex with:")
@pytest.mark.parametrize("start", ("2000", "2001"))
@pytest.mark.parametrize("end", ("2000", "2001"))
@pytest.mark.parametrize("freq", ("MS", "-1MS", "YS", "-1YS"))
def test_cftime_range_same_as_pandas(start, end, freq):
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
result = result.to_datetimeindex()
expected = date_range(start, end, freq=freq, use_cftime=False)

np.testing.assert_array_equal(result, expected)


@pytest.mark.parametrize("start", ("2000", "2001"))
@pytest.mark.parametrize("end", ("2000", "2001"))
@pytest.mark.parametrize("freq", ("M", "-1M", "Y", "-1Y"))
def test_cftime_range_end_same_len_pandas(start, end, freq):
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
expected = date_range(start, end, freq=freq, use_cftime=False)

# end of month/ year is not convertable to datetimeindex
assert result.size == expected.size
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can check for equality now, similar to the test above:

Suggested change
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
expected = date_range(start, end, freq=freq, use_cftime=False)
# end of month/ year is not convertable to datetimeindex
assert result.size == expected.size
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
result = result.to_datetimeindex()
expected = date_range(start, end, freq=freq, use_cftime=False)
np.testing.assert_array_equal(result, expected)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately I suppose this test could now be merged with the one above—i.e. just merge the frequency strings.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of course - thanks

Loading