From 51185aca1d98f6b0a468b70bdeebad2c00d7496b Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Thu, 29 Aug 2024 17:57:46 +1000 Subject: [PATCH 01/16] Support additional dtypes to resample pandas.BaseOffset, pandas.Timedelta, datetime.timedelta, and BaseCFTimeOffset are now all supported datatypes for resampling. --- xarray/core/common.py | 19 +++++++++++++++---- xarray/groupers.py | 19 ++++++++++++++++--- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 74c03f9baf5..c9c07be8c8e 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime import warnings from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping from contextlib import suppress @@ -32,8 +33,6 @@ if TYPE_CHECKING: - import datetime - from numpy.typing import DTypeLike from xarray.core.dataarray import DataArray @@ -1060,6 +1059,7 @@ def _resample( """ # TODO support non-string indexer after removing the old API. + from xarray.coding.cftime_offsets import BaseCFTimeOffset from xarray.core.dataarray import DataArray from xarray.core.groupby import ResolvedGrouper from xarray.core.resample import RESAMPLE_DIM @@ -1078,14 +1078,25 @@ def _resample( ) grouper: Resampler - if isinstance(freq, str): + if isinstance( + freq, + str + | datetime.timedelta + | pd.Timedelta + | pd.offsets.BaseOffset + | BaseCFTimeOffset, + ): grouper = TimeResampler( freq=freq, closed=closed, label=label, origin=origin, offset=offset ) elif isinstance(freq, Resampler): grouper = freq else: - raise ValueError("freq must be a str or a Resampler object") + raise ValueError( + "freq must be an object of type 'str', 'datetime.timedelta', " + "'pandas.Timedelta', 'pandas.offsets.BaseOffset', 'BaseCFTimeOffset', " + f" or 'TimeResampler'. Received {type(freq)} instead." + ) rgrouper = ResolvedGrouper(grouper, group, self) diff --git a/xarray/groupers.py b/xarray/groupers.py index f70cad655e8..d8a862f1265 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -14,7 +14,7 @@ import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import _new_to_legacy_freq +from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq from xarray.core import duck_array_ops from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray @@ -336,7 +336,7 @@ class TimeResampler(Resampler): Attributes ---------- - freq : str + freq : str, pandas.Timestamp, pandas.BaseOffset, datetime.timedelta, BaseCFTimeOffset Frequency to resample to. See `Pandas frequency aliases `_ for a list of possible values. @@ -358,7 +358,7 @@ class TimeResampler(Resampler): An offset timedelta added to the origin. """ - freq: str + freq: str | pd.Timedelta | pd.offsets.BaseOffset | datetime.timedelta closed: SideOptions | None = field(default=None) label: SideOptions | None = field(default=None) origin: str | DatetimeLike = field(default="start_day") @@ -380,6 +380,13 @@ def _init_properties(self, group: T_Group) -> None: if isinstance(group_as_index, CFTimeIndex): from xarray.core.resample_cftime import CFTimeGrouper + if not isinstance(self.freq, str | BaseCFTimeOffset): + raise ValueError( + "Resample frequency must be a string or 'BaseCFTimeOffset' " + "object when resampling a 'CFTimeIndex'. Received " + f"{type(self.freq)} instead." + ) + self.index_grouper = CFTimeGrouper( freq=self.freq, closed=self.closed, @@ -388,6 +395,12 @@ def _init_properties(self, group: T_Group) -> None: offset=offset, ) else: + if isinstance(self.freq, BaseCFTimeOffset): + raise ValueError( + "'BaseCFTimeOffset' resample frequencies are only supported " + "when resampling a 'CFTimeIndex'" + ) + self.index_grouper = pd.Grouper( # TODO remove once requiring pandas >= 2.2 freq=_new_to_legacy_freq(self.freq), From d63678430fe440db22051c3a5761b2ef5982f095 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Thu, 29 Aug 2024 18:04:47 +1000 Subject: [PATCH 02/16] Update whats-new --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 712ad68aeb3..9ec3f870793 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,6 +60,10 @@ Bug fixes - Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray`` in NumPy 2.0 (:issue:`9312`, :pull:`9393`) By `Andrew Scherer `_. +- Fix support for using ``pandas.BaseOffset``, ``pandas.Timedelta``, and + ``datetime.timedelta`` objects as ``resample`` frequencies + (:issue:`9408`, :pull:`9413`). + By `Oliver Higgs `_. Documentation ~~~~~~~~~~~~~ From 0d37fddd21750107fe3175c0f90d7c8acb876cd3 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Sun, 1 Sep 2024 12:10:38 +1000 Subject: [PATCH 03/16] Fix types --- xarray/core/common.py | 14 +++----------- xarray/groupers.py | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index c9c07be8c8e..af115a8ef09 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1059,7 +1059,6 @@ def _resample( """ # TODO support non-string indexer after removing the old API. - from xarray.coding.cftime_offsets import BaseCFTimeOffset from xarray.core.dataarray import DataArray from xarray.core.groupby import ResolvedGrouper from xarray.core.resample import RESAMPLE_DIM @@ -1078,14 +1077,7 @@ def _resample( ) grouper: Resampler - if isinstance( - freq, - str - | datetime.timedelta - | pd.Timedelta - | pd.offsets.BaseOffset - | BaseCFTimeOffset, - ): + if isinstance(freq, str | datetime.timedelta | pd.Timedelta | pd.DateOffset): grouper = TimeResampler( freq=freq, closed=closed, label=label, origin=origin, offset=offset ) @@ -1094,8 +1086,8 @@ def _resample( else: raise ValueError( "freq must be an object of type 'str', 'datetime.timedelta', " - "'pandas.Timedelta', 'pandas.offsets.BaseOffset', 'BaseCFTimeOffset', " - f" or 'TimeResampler'. Received {type(freq)} instead." + "'pandas.Timedelta', 'pandas.DateOffset', or 'TimeResampler'. " + f"Received {type(freq)} instead." ) rgrouper = ResolvedGrouper(grouper, group, self) diff --git a/xarray/groupers.py b/xarray/groupers.py index d8a862f1265..ba215c247f7 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -336,7 +336,7 @@ class TimeResampler(Resampler): Attributes ---------- - freq : str, pandas.Timestamp, pandas.BaseOffset, datetime.timedelta, BaseCFTimeOffset + freq : str, datetime.timedelta, pandas.Timestamp, pandas.DateOffset Frequency to resample to. See `Pandas frequency aliases `_ for a list of possible values. @@ -358,7 +358,7 @@ class TimeResampler(Resampler): An offset timedelta added to the origin. """ - freq: str | pd.Timedelta | pd.offsets.BaseOffset | datetime.timedelta + freq: str | datetime.timedelta | pd.Timedelta | pd.DateOffset closed: SideOptions | None = field(default=None) label: SideOptions | None = field(default=None) origin: str | DatetimeLike = field(default="start_day") From 6801b79488785ef2bfda951f4d11174162161f15 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Tue, 3 Sep 2024 15:43:59 +1000 Subject: [PATCH 04/16] Add unit test --- xarray/tests/test_groupby.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index fc04b49fabc..ec1723cfd3f 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime import operator import warnings from unittest import mock @@ -1813,6 +1814,30 @@ def resample_as_pandas(array, *args, **kwargs): with pytest.raises(ValueError): reverse.resample(time="1D").mean() + @pytest.mark.parametrize("use_cftime", [True, False]) + def test_resample_dtype(self, use_cftime: bool) -> None: + if use_cftime and not has_cftime: + pytest.skip() + array = DataArray( + np.arange(10), + [ + ( + "time", + xr.date_range( + "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime + ), + ) + ], + ) + test_resample_freqs = [ + "10min", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + datetime.timedelta(days=1, hours=6), + ] + for freq in test_resample_freqs: + array.resample(time=freq) + @pytest.mark.parametrize("use_cftime", [True, False]) def test_resample_doctest(self, use_cftime: bool) -> None: # run the doctest example here so we are not surprised From 36b8a634b2504b9b707caa72c6a0088b1a0aed77 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 00:09:08 +1000 Subject: [PATCH 05/16] Fix test --- xarray/tests/test_groupby.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index ec1723cfd3f..d7efd56560b 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1829,12 +1829,13 @@ def test_resample_dtype(self, use_cftime: bool) -> None: ) ], ) - test_resample_freqs = [ - "10min", - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - datetime.timedelta(days=1, hours=6), - ] + test_resample_freqs = ["10min"] + if not use_cftime: + test_resample_freqs += [ + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + datetime.timedelta(days=1, hours=6), + ] for freq in test_resample_freqs: array.resample(time=freq) From 977c7a4ee51f1f0a0292a928248a6750b7da8b95 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 13:37:02 +1000 Subject: [PATCH 06/16] Support more dtypes for CFTimeIndex resampling --- xarray/coding/cftime_offsets.py | 37 ++++++++++++++++++++++++++++++++- xarray/core/dataarray.py | 6 +++++- xarray/core/dataset.py | 6 +++++- xarray/core/resample_cftime.py | 6 +++++- xarray/groupers.py | 7 ------- xarray/tests/test_groupby.py | 37 ++++++++++++++++++++++++++------- 6 files changed, 80 insertions(+), 19 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index f7bed2c13ef..20ef9805d6f 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -772,11 +772,18 @@ def _emit_freq_deprecation_warning(deprecated_freq): emit_user_level_warning(message, FutureWarning) -def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffset: +def to_offset( + freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset, + warn: bool = True, +) -> BaseCFTimeOffset: """Convert a frequency string to the appropriate subclass of BaseCFTimeOffset.""" if isinstance(freq, BaseCFTimeOffset): return freq + if isinstance(freq, timedelta | pd.Timedelta): + return delta_to_tick(freq) + if isinstance(freq, pd.DateOffset): + freq = freq.freqstr match = re.match(_PATTERN, freq) if match is None: @@ -791,6 +798,34 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs return _FREQUENCIES[freq](n=multiples) +def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick: + """Adapted from pandas.tslib.delta_to_tick""" + if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0: + # pandas.Timedelta has nanoseconds, but these are not supported + raise ValueError( + "Unable to convert 'pandas.Timedelta' object with non-zero " + "nanoseconds to 'CFTimeOffset' object" + ) + if delta.microseconds == 0: + if delta.seconds == 0: + return Day(n=delta.days) + else: + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(n=seconds // 3600) + elif seconds % 60 == 0: + return Minute(n=seconds // 60) + else: + return Second(n=seconds) + else: + # Regardless of the days and seconds this will always be a Millsecond + # or Microsecond object + if delta.microseconds % 1_000 == 0: + return Millisecond(n=delta.microseconds // 1_000) + else: + return Microsecond(n=delta.microseconds) + + def to_cftime_datetime(date_str_or_date, calendar=None): if cftime is None: raise ModuleNotFoundError("No module named 'cftime'") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f17bd057c03..4a48cb2b3e9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -7252,7 +7252,11 @@ def resample( offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", restore_coord_dims: bool | None = None, - **indexer_kwargs: str | Resampler, + **indexer_kwargs: str + | datetime.timedelta + | pd.Timedelta + | pd.DateOffset + | Resampler, ) -> DataArrayResample: """Returns a Resample object for performing resampling operations. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ae387da7e8e..63c53ed5768 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -10693,7 +10693,11 @@ def resample( offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", restore_coord_dims: bool | None = None, - **indexer_kwargs: str | Resampler, + **indexer_kwargs: str + | datetime.timedelta + | pd.Timedelta + | pd.DateOffset + | Resampler, ) -> DatasetResample: """Returns a Resample object for performing resampling operations. diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 2149a62dfb5..2c9b13151ab 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -75,7 +75,11 @@ class CFTimeGrouper: def __init__( self, - freq: str | BaseCFTimeOffset, + freq: str + | datetime.timedelta + | pd.Timedelta + | pd.DateOffset + | BaseCFTimeOffset, closed: SideOptions | None = None, label: SideOptions | None = None, origin: str | CFTimeDatetime = "start_day", diff --git a/xarray/groupers.py b/xarray/groupers.py index ba215c247f7..b1a58714942 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -380,13 +380,6 @@ def _init_properties(self, group: T_Group) -> None: if isinstance(group_as_index, CFTimeIndex): from xarray.core.resample_cftime import CFTimeGrouper - if not isinstance(self.freq, str | BaseCFTimeOffset): - raise ValueError( - "Resample frequency must be a string or 'BaseCFTimeOffset' " - "object when resampling a 'CFTimeIndex'. Received " - f"{type(self.freq)} instead." - ) - self.index_grouper = CFTimeGrouper( freq=self.freq, closed=self.closed, diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d7efd56560b..164d7fb0f49 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -758,7 +758,6 @@ def test_groupby_none_group_name() -> None: def test_groupby_getitem(dataset) -> None: - assert_identical(dataset.sel(x=["a"]), dataset.groupby("x")["a"]) assert_identical(dataset.sel(z=[1]), dataset.groupby("z")[1]) assert_identical(dataset.foo.sel(x=["a"]), dataset.foo.groupby("x")["a"]) @@ -1829,13 +1828,12 @@ def test_resample_dtype(self, use_cftime: bool) -> None: ) ], ) - test_resample_freqs = ["10min"] - if not use_cftime: - test_resample_freqs += [ - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - datetime.timedelta(days=1, hours=6), - ] + test_resample_freqs = ( + "10min", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + datetime.timedelta(days=1, hours=6), + ) for freq in test_resample_freqs: array.resample(time=freq) @@ -2258,6 +2256,29 @@ def test_resample_and_first(self) -> None: result = actual.reduce(method) assert_equal(expected, result) + @pytest.mark.parametrize("use_cftime", [True, False]) + def test_resample_dtype(self, use_cftime: bool) -> None: + if use_cftime and not has_cftime: + pytest.skip() + times = xr.date_range( + "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime + ) + ds = Dataset( + { + "foo": (["time", "x", "y"], np.random.randn(10, 5, 3)), + "bar": ("time", np.random.randn(10), {"meta": "data"}), + "time": times, + } + ) + test_resample_freqs = [ + "10min", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + datetime.timedelta(days=1, hours=6), + ] + for freq in test_resample_freqs: + ds.resample(time=freq) + def test_resample_min_count(self) -> None: times = pd.date_range("2000-01-01", freq="6h", periods=10) ds = Dataset( From 769e8505b91c97ef14c3084a09765dac48e791fd Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 13:56:02 +1000 Subject: [PATCH 07/16] Tidy resample type hints --- xarray/core/common.py | 7 +++++-- xarray/core/dataarray.py | 13 +++++-------- xarray/core/dataset.py | 13 +++++-------- xarray/core/resample_cftime.py | 8 ++------ xarray/core/types.py | 2 ++ xarray/groupers.py | 12 +++++++++--- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index af115a8ef09..f9c203d3249 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -28,6 +28,9 @@ except ImportError: cftime = None +if TYPE_CHECKING: + from xarray.core.types import ResampleCompatible + # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -890,7 +893,7 @@ def rolling_exp( def _resample( self, resample_cls: type[T_Resample], - indexer: Mapping[Hashable, str | Resampler] | None, + indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None, skipna: bool | None, closed: SideOptions | None, label: SideOptions | None, @@ -1077,7 +1080,7 @@ def _resample( ) grouper: Resampler - if isinstance(freq, str | datetime.timedelta | pd.Timedelta | pd.DateOffset): + if isinstance(freq, ResampleCompatible): grouper = TimeResampler( freq=freq, closed=closed, label=label, origin=origin, offset=offset ) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4a48cb2b3e9..643e50c6307 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -110,6 +110,7 @@ QueryEngineOptions, QueryParserOptions, ReindexMethodOptions, + ResampleCompatible, Self, SideOptions, T_ChunkDimFreq, @@ -7244,7 +7245,7 @@ def coarsen( @_deprecate_positional_args("v2024.07.0") def resample( self, - indexer: Mapping[Hashable, str | Resampler] | None = None, + indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None = None, *, skipna: bool | None = None, closed: SideOptions | None = None, @@ -7252,11 +7253,7 @@ def resample( offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", restore_coord_dims: bool | None = None, - **indexer_kwargs: str - | datetime.timedelta - | pd.Timedelta - | pd.DateOffset - | Resampler, + **indexer_kwargs: ResampleCompatible | Resampler, ) -> DataArrayResample: """Returns a Resample object for performing resampling operations. @@ -7267,7 +7264,7 @@ def resample( Parameters ---------- - indexer : Mapping of Hashable to str, optional + indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional Mapping from the dimension name to resample frequency [1]_. The dimension must be datetime-like. skipna : bool, optional @@ -7291,7 +7288,7 @@ def resample( restore_coord_dims : bool, optional If True, also restore the dimension order of multi-dimensional coordinates. - **indexer_kwargs : str + **indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler The keyword arguments form of ``indexer``. One of indexer or indexer_kwargs must be provided. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 63c53ed5768..a5e6f97ddfc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -162,6 +162,7 @@ QueryEngineOptions, QueryParserOptions, ReindexMethodOptions, + ResampleCompatible, SideOptions, T_ChunkDimFreq, T_DatasetPadConstantValues, @@ -10685,7 +10686,7 @@ def coarsen( @_deprecate_positional_args("v2024.07.0") def resample( self, - indexer: Mapping[Any, str | Resampler] | None = None, + indexer: Mapping[Any, ResampleCompatible | Resampler] | None = None, *, skipna: bool | None = None, closed: SideOptions | None = None, @@ -10693,11 +10694,7 @@ def resample( offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", restore_coord_dims: bool | None = None, - **indexer_kwargs: str - | datetime.timedelta - | pd.Timedelta - | pd.DateOffset - | Resampler, + **indexer_kwargs: ResampleCompatible | Resampler, ) -> DatasetResample: """Returns a Resample object for performing resampling operations. @@ -10708,7 +10705,7 @@ def resample( Parameters ---------- - indexer : Mapping of Hashable to str, optional + indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional Mapping from the dimension name to resample frequency [1]_. The dimension must be datetime-like. skipna : bool, optional @@ -10732,7 +10729,7 @@ def resample( restore_coord_dims : bool, optional If True, also restore the dimension order of multi-dimensional coordinates. - **indexer_kwargs : str + **indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler The keyword arguments form of ``indexer``. One of indexer or indexer_kwargs must be provided. diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 2c9b13151ab..c084640e763 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -58,7 +58,7 @@ from xarray.core.types import SideOptions if typing.TYPE_CHECKING: - from xarray.core.types import CFTimeDatetime + from xarray.core.types import CFTimeDatetime, ResampleCompatible class CFTimeGrouper: @@ -75,11 +75,7 @@ class CFTimeGrouper: def __init__( self, - freq: str - | datetime.timedelta - | pd.Timedelta - | pd.DateOffset - | BaseCFTimeOffset, + freq: ResampleCompatible | BaseCFTimeOffset, closed: SideOptions | None = None, label: SideOptions | None = None, origin: str | CFTimeDatetime = "start_day", diff --git a/xarray/core/types.py b/xarray/core/types.py index d3a8e7a9f4c..d008014dd97 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -309,3 +309,5 @@ def copy( Bins = Union[ int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index ] + +ResampleCompatible = Union[str, datetime.timedelta, pd.Timedelta, pd.DateOffset] diff --git a/xarray/groupers.py b/xarray/groupers.py index b1a58714942..3acdeb21167 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -21,7 +21,13 @@ from xarray.core.groupby import T_Group, _DummyGroup from xarray.core.indexes import safe_cast_to_index from xarray.core.resample_cftime import CFTimeGrouper -from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions +from xarray.core.types import ( + Bins, + DatetimeLike, + GroupIndices, + ResampleCompatible, + SideOptions, +) from xarray.core.variable import Variable __all__ = [ @@ -336,7 +342,7 @@ class TimeResampler(Resampler): Attributes ---------- - freq : str, datetime.timedelta, pandas.Timestamp, pandas.DateOffset + freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset Frequency to resample to. See `Pandas frequency aliases `_ for a list of possible values. @@ -358,7 +364,7 @@ class TimeResampler(Resampler): An offset timedelta added to the origin. """ - freq: str | datetime.timedelta | pd.Timedelta | pd.DateOffset + freq: ResampleCompatible closed: SideOptions | None = field(default=None) label: SideOptions | None = field(default=None) origin: str | DatetimeLike = field(default="start_day") From ab5e8f4a876c7d6234737e43fd3dab14eb3028cd Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 14:22:15 +1000 Subject: [PATCH 08/16] Fix some mypy bugs --- xarray/core/common.py | 2 +- xarray/core/types.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index f9c203d3249..3c6e75e0e26 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -900,7 +900,7 @@ def _resample( offset: pd.Timedelta | datetime.timedelta | str | None, origin: str | DatetimeLike, restore_coord_dims: bool | None, - **indexer_kwargs: str | Resampler, + **indexer_kwargs: ResampleCompatible | Resampler, ) -> T_Resample: """Returns a Resample object for performing resampling operations. diff --git a/xarray/core/types.py b/xarray/core/types.py index d008014dd97..b3064c4dd18 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -310,4 +310,4 @@ def copy( int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index ] -ResampleCompatible = Union[str, datetime.timedelta, pd.Timedelta, pd.DateOffset] +ResampleCompatible: TypeAlias = str | datetime.timedelta | pd.Timedelta | pd.DateOffset From 0a6a5905f3931a9f31ed8793e4ef72228cce63c9 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 15:09:00 +1000 Subject: [PATCH 09/16] Fixes --- xarray/core/common.py | 4 +--- xarray/tests/test_groupby.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 3c6e75e0e26..1ed1398746f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -14,6 +14,7 @@ from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs +from xarray.core.types import ResampleCompatible from xarray.core.utils import ( Frozen, either_dict_or_kwargs, @@ -28,9 +29,6 @@ except ImportError: cftime = None -if TYPE_CHECKING: - from xarray.core.types import ResampleCompatible - # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 164d7fb0f49..d1511d8a27d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2270,12 +2270,12 @@ def test_resample_dtype(self, use_cftime: bool) -> None: "time": times, } ) - test_resample_freqs = [ + test_resample_freqs = ( "10min", pd.Timedelta(hours=2), pd.offsets.MonthBegin(), datetime.timedelta(days=1, hours=6), - ] + ) for freq in test_resample_freqs: ds.resample(time=freq) From cb6795eb48776a48e2d6d9329d5081c18e6412e7 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 23:19:37 +1000 Subject: [PATCH 10/16] Fix tests --- xarray/tests/test_groupby.py | 147 +++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 57 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d1511d8a27d..2bf908909b5 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -3,6 +3,7 @@ import datetime import operator import warnings +from itertools import product from unittest import mock import numpy as np @@ -14,7 +15,7 @@ from xarray import DataArray, Dataset, Variable from xarray.core.alignment import broadcast from xarray.core.groupby import _consolidate_slices -from xarray.core.types import InterpOptions +from xarray.core.types import InterpOptions, ResampleCompatible from xarray.groupers import ( BinGrouper, EncodedGroups, @@ -1772,8 +1773,24 @@ def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None: class TestDataArrayResample: - @pytest.mark.parametrize("use_cftime", [True, False]) - def test_resample(self, use_cftime: bool) -> None: + @pytest.mark.parametrize( + "use_cftime,resample_freq", + product( + [True, False], + [ + "24h", + "123456s", + "1234567890us", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + pd.offsets.Second(123456), + datetime.timedelta(days=1, hours=6), + ], + ), + ) + def test_resample( + self, use_cftime: bool, resample_freq: ResampleCompatible + ) -> None: if use_cftime and not has_cftime: pytest.skip() times = xr.date_range( @@ -1795,47 +1812,23 @@ def resample_as_pandas(array, *args, **kwargs): array = DataArray(np.arange(10), [("time", times)]) - actual = array.resample(time="24h").mean() - expected = resample_as_pandas(array, "24h") + actual = array.resample(time=resample_freq).mean() + expected = resample_as_pandas(array, resample_freq) assert_identical(expected, actual) - actual = array.resample(time="24h").reduce(np.mean) + actual = array.resample(time=resample_freq).reduce(np.mean) assert_identical(expected, actual) - actual = array.resample(time="24h", closed="right").mean() - expected = resample_as_pandas(array, "24h", closed="right") + actual = array.resample(time=resample_freq, closed="right").mean() + expected = resample_as_pandas(array, resample_freq, closed="right") assert_identical(expected, actual) with pytest.raises(ValueError, match=r"Index must be monotonic"): - array[[2, 0, 1]].resample(time="1D") + array[[2, 0, 1]].resample(time=resample_freq) reverse = array.isel(time=slice(-1, None, -1)) with pytest.raises(ValueError): - reverse.resample(time="1D").mean() - - @pytest.mark.parametrize("use_cftime", [True, False]) - def test_resample_dtype(self, use_cftime: bool) -> None: - if use_cftime and not has_cftime: - pytest.skip() - array = DataArray( - np.arange(10), - [ - ( - "time", - xr.date_range( - "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime - ), - ) - ], - ) - test_resample_freqs = ( - "10min", - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - datetime.timedelta(days=1, hours=6), - ) - for freq in test_resample_freqs: - array.resample(time=freq) + reverse.resample(time=resample_freq).mean() @pytest.mark.parametrize("use_cftime", [True, False]) def test_resample_doctest(self, use_cftime: bool) -> None: @@ -2230,6 +2223,69 @@ def test_resample_origin(self) -> None: class TestDatasetResample: + @pytest.mark.parametrize( + "use_cftime,resample_freq", + product( + [True, False], + [ + "24h", + "123456s", + "1234567890us", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + pd.offsets.Second(123456), + datetime.timedelta(days=1, hours=6), + ], + ), + ) + def test_resample( + self, use_cftime: bool, resample_freq: ResampleCompatible + ) -> None: + if use_cftime and not has_cftime: + pytest.skip() + times = xr.date_range( + "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime + ) + + def resample_as_pandas(ds, *args, **kwargs): + ds_ = ds.copy(deep=True) + if use_cftime: + ds_["time"] = times.to_datetimeindex() + result = Dataset.from_dataframe( + ds_.to_dataframe().resample(*args, **kwargs).mean() + ) + if use_cftime: + result = result.convert_calendar( + calendar="standard", use_cftime=use_cftime + ) + return result + + ds = Dataset( + { + "foo": ("time", np.random.randint(1, 1000, 10)), + "bar": ("time", np.random.randint(1, 1000, 10)), + "time": times, + } + ) + + actual = ds.resample(time=resample_freq).mean() + expected = resample_as_pandas(ds, resample_freq) + assert_identical(expected, actual) + + actual = ds.resample(time=resample_freq).reduce(np.mean) + assert_identical(expected, actual) + + actual = ds.resample(time=resample_freq, closed="right").mean() + expected = resample_as_pandas(ds, resample_freq, closed="right") + assert_identical(expected, actual) + + with pytest.raises(ValueError, match=r"Index must be monotonic"): + ds.isel(time=[2, 0, 1]).resample(time=resample_freq) + + reverse = ds.isel(time=slice(-1, None, -1)) + with pytest.raises(ValueError): + reverse.resample(time=resample_freq).mean() + def test_resample_and_first(self) -> None: times = pd.date_range("2000-01-01", freq="6h", periods=10) ds = Dataset( @@ -2256,29 +2312,6 @@ def test_resample_and_first(self) -> None: result = actual.reduce(method) assert_equal(expected, result) - @pytest.mark.parametrize("use_cftime", [True, False]) - def test_resample_dtype(self, use_cftime: bool) -> None: - if use_cftime and not has_cftime: - pytest.skip() - times = xr.date_range( - "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime - ) - ds = Dataset( - { - "foo": (["time", "x", "y"], np.random.randn(10, 5, 3)), - "bar": ("time", np.random.randn(10), {"meta": "data"}), - "time": times, - } - ) - test_resample_freqs = ( - "10min", - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - datetime.timedelta(days=1, hours=6), - ) - for freq in test_resample_freqs: - ds.resample(time=freq) - def test_resample_min_count(self) -> None: times = pd.date_range("2000-01-01", freq="6h", periods=10) ds = Dataset( From 818c10e6257c744defadd9104de8326448bae838 Mon Sep 17 00:00:00 2001 From: Oliver Higgs Date: Wed, 4 Sep 2024 23:57:20 +1000 Subject: [PATCH 11/16] WIP --- xarray/tests/test_groupby.py | 48 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 2bf908909b5..2cb247ff1eb 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1773,20 +1773,18 @@ def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None: class TestDataArrayResample: + @pytest.mark.parametrize("use_cftime", [True, False]) @pytest.mark.parametrize( - "use_cftime,resample_freq", - product( - [True, False], - [ - "24h", - "123456s", - "1234567890us", - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - pd.offsets.Second(123456), - datetime.timedelta(days=1, hours=6), - ], - ), + "resample_freq", + [ + "24h", + "123456s", + "1234567890us", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + pd.offsets.Second(123456), + datetime.timedelta(days=1, hours=6), + ], ) def test_resample( self, use_cftime: bool, resample_freq: ResampleCompatible @@ -2223,20 +2221,18 @@ def test_resample_origin(self) -> None: class TestDatasetResample: + @pytest.mark.parametrize("use_cftime", [True, False]) @pytest.mark.parametrize( - "use_cftime,resample_freq", - product( - [True, False], - [ - "24h", - "123456s", - "1234567890us", - pd.Timedelta(hours=2), - pd.offsets.MonthBegin(), - pd.offsets.Second(123456), - datetime.timedelta(days=1, hours=6), - ], - ), + "resample_freq", + [ + "24h", + "123456s", + "1234567890us", + pd.Timedelta(hours=2), + pd.offsets.MonthBegin(), + pd.offsets.Second(123456), + datetime.timedelta(days=1, hours=6), + ], ) def test_resample( self, use_cftime: bool, resample_freq: ResampleCompatible From a89561dd2f5e8c0fa2c3c5491a3c6900e9a43e66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 13:57:59 +0000 Subject: [PATCH 12/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 2cb247ff1eb..9d7302017ce 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -3,7 +3,6 @@ import datetime import operator import warnings -from itertools import product from unittest import mock import numpy as np From 88c74597f25ffbbcaecc48ae67dd22d7ccd62876 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 5 Sep 2024 08:14:12 -0600 Subject: [PATCH 13/16] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9ec3f870793..cd8cb9a04b6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,7 +60,7 @@ Bug fixes - Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray`` in NumPy 2.0 (:issue:`9312`, :pull:`9393`) By `Andrew Scherer `_. -- Fix support for using ``pandas.BaseOffset``, ``pandas.Timedelta``, and +- Fix support for using ``pandas.DateOffset``, ``pandas.Timedelta``, and ``datetime.timedelta`` objects as ``resample`` frequencies (:issue:`9408`, :pull:`9413`). By `Oliver Higgs `_. From 7649cb4d745903a67c6107691d3a2a9545dafcff Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 7 Sep 2024 07:17:41 -0600 Subject: [PATCH 14/16] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/cftime_offsets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 20ef9805d6f..d91236c5885 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -783,7 +783,7 @@ def to_offset( if isinstance(freq, timedelta | pd.Timedelta): return delta_to_tick(freq) if isinstance(freq, pd.DateOffset): - freq = freq.freqstr + freq = _legacy_to_new_freq(freq.freqstr) match = re.match(_PATTERN, freq) if match is None: @@ -818,7 +818,7 @@ def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick: else: return Second(n=seconds) else: - # Regardless of the days and seconds this will always be a Millsecond + # Regardless of the days and seconds this will always be a Millisecond # or Microsecond object if delta.microseconds % 1_000 == 0: return Millisecond(n=delta.microseconds // 1_000) From 50b69d98e061f61640d9a462042108446d05ba58 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Sep 2024 12:41:17 -0400 Subject: [PATCH 15/16] Fix mypy error --- xarray/coding/cftime_offsets.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index d91236c5885..79158dac40e 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -47,7 +47,7 @@ from collections.abc import Mapping from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, ClassVar, Literal +from typing import TYPE_CHECKING, ClassVar, Literal, TypeVar import numpy as np import pandas as pd @@ -80,6 +80,7 @@ DayOption: TypeAlias = Literal["start", "end"] +T_FreqStr = TypeVar("T_FreqStr", str, None) def _nanosecond_precision_timestamp(*args, **kwargs): @@ -783,7 +784,7 @@ def to_offset( if isinstance(freq, timedelta | pd.Timedelta): return delta_to_tick(freq) if isinstance(freq, pd.DateOffset): - freq = _legacy_to_new_freq(freq.freqstr) + freq = _new_freq(freq.freqstr) match = re.match(_PATTERN, freq) if match is None: @@ -1367,7 +1368,7 @@ def _new_to_legacy_freq(freq): return freq -def _legacy_to_new_freq(freq): +def _legacy_to_new_freq(freq: T_FreqStr) -> T_FreqStr: # to avoid internal deprecation warnings when freq is determined using pandas < 2.2 # TODO: remove once requiring pandas >= 2.2 From 564ae213976fbbf55a87906ee1180eb4840148e5 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Sep 2024 12:43:12 -0400 Subject: [PATCH 16/16] Fix bad edit --- xarray/coding/cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 79158dac40e..0167119e98e 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -784,7 +784,7 @@ def to_offset( if isinstance(freq, timedelta | pd.Timedelta): return delta_to_tick(freq) if isinstance(freq, pd.DateOffset): - freq = _new_freq(freq.freqstr) + freq = _legacy_to_new_freq(freq.freqstr) match = re.match(_PATTERN, freq) if match is None: