Delete base and loffset parameters to resample (#9233)

* Remove `base`, `loffset` in Resampler * resample: Remove how/dim checks * lint * cleanups * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_cftimeindex_resample.py * cleanup --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pydata · Jul 19, 2024 · 39d5b39 · 39d5b39
1 parent 3013fb4
commit 39d5b39
Show file tree

Hide file tree

Showing 9 changed files with 34 additions and 391 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -42,6 +42,11 @@ New Features
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
+- The ``base`` and ``loffset`` parameters to :py:meth:`Dataset.resample` and :py:meth:`DataArray.resample`
+  is now removed. These parameters has been deprecated since v2023.03.0. Using the
+  ``origin`` or ``offset`` parameters is recommended as a replacement for using
+  the ``base`` parameter and using time offset arithmetic is recommended as a
+  replacement for using the ``loffset`` parameter.
 
 
 Deprecations

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -881,10 +881,8 @@ def _resample(
         skipna: bool | None,
         closed: SideOptions | None,
         label: SideOptions | None,
-        base: int | None,
         offset: pd.Timedelta | datetime.timedelta | str | None,
         origin: str | DatetimeLike,
-        loffset: datetime.timedelta | str | None,
         restore_coord_dims: bool | None,
         **indexer_kwargs: str | Resampler,
     ) -> T_Resample:
@@ -906,16 +904,6 @@ def _resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, optional
-            For frequencies that evenly subdivide 1 day, the "origin" of the
-            aggregated intervals. For example, for "24H" frequency, base could
-            range from 0 through 23.
-
-            .. deprecated:: 2023.03.0
-                Following pandas, the ``base`` parameter is deprecated in favor
-                of the ``origin`` and ``offset`` parameters, and will be removed
-                in a future version of xarray.
-
         origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
             The datetime on which to adjust the grouping. The timezone of origin
             must match the timezone of the index.
@@ -928,15 +916,6 @@ def _resample(
             - 'end_day': `origin` is the ceiling midnight of the last day
         offset : pd.Timedelta, datetime.timedelta, or str, default is None
             An offset timedelta added to the origin.
-        loffset : timedelta or str, optional
-            Offset used to adjust the resampled time labels. Some pandas date
-            offset strings are supported.
-
-            .. deprecated:: 2023.03.0
-                Following pandas, the ``loffset`` parameter is deprecated in favor
-                of using time offset arithmetic, and will be removed in a future
-                version of xarray.
-
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
@@ -1072,18 +1051,6 @@ def _resample(
         from xarray.core.groupers import Resampler, TimeResampler
         from xarray.core.resample import RESAMPLE_DIM
 
-        # note: the second argument (now 'skipna') use to be 'dim'
-        if (
-            (skipna is not None and not isinstance(skipna, bool))
-            or ("how" in indexer_kwargs and "how" not in self.dims)
-            or ("dim" in indexer_kwargs and "dim" not in self.dims)
-        ):
-            raise TypeError(
-                "resample() no longer supports the `how` or "
-                "`dim` arguments. Instead call methods on resample "
-                "objects, e.g., data.resample(time='1D').mean()"
-            )
-
         indexer = either_dict_or_kwargs(indexer, indexer_kwargs, "resample")
         if len(indexer) != 1:
             raise ValueError("Resampling only supported along single dimensions.")
@@ -1093,22 +1060,13 @@ def _resample(
         dim_coord = self[dim]
 
         group = DataArray(
-            dim_coord,
-            coords=dim_coord.coords,
-            dims=dim_coord.dims,
-            name=RESAMPLE_DIM,
+            dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
         )
 
         grouper: Resampler
         if isinstance(freq, str):
             grouper = TimeResampler(
-                freq=freq,
-                closed=closed,
-                label=label,
-                origin=origin,
-                offset=offset,
-                loffset=loffset,
-                base=base,
+                freq=freq, closed=closed, label=label, origin=origin, offset=offset
             )
         elif isinstance(freq, Resampler):
             grouper = freq

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -7245,10 +7245,8 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int | None = None,
         offset: pd.Timedelta | datetime.timedelta | str | None = None,
         origin: str | DatetimeLike = "start_day",
-        loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
         **indexer_kwargs: str | Resampler,
     ) -> DataArrayResample:
@@ -7270,10 +7268,6 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, optional
-            For frequencies that evenly subdivide 1 day, the "origin" of the
-            aggregated intervals. For example, for "24H" frequency, base could
-            range from 0 through 23.
         origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
             The datetime on which to adjust the grouping. The timezone of origin
             must match the timezone of the index.
@@ -7286,15 +7280,6 @@ def resample(
             - 'end_day': `origin` is the ceiling midnight of the last day
         offset : pd.Timedelta, datetime.timedelta, or str, default is None
             An offset timedelta added to the origin.
-        loffset : timedelta or str, optional
-            Offset used to adjust the resampled time labels. Some pandas date
-            offset strings are supported.
-
-            .. deprecated:: 2023.03.0
-                Following pandas, the ``loffset`` parameter is deprecated in favor
-                of using time offset arithmetic, and will be removed in a future
-                version of xarray.
-
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
@@ -7399,10 +7384,8 @@ def resample(
             skipna=skipna,
             closed=closed,
             label=label,
-            base=base,
             offset=offset,
             origin=origin,
-            loffset=loffset,
             restore_coord_dims=restore_coord_dims,
             **indexer_kwargs,
         )

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -10631,10 +10631,8 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int | None = None,
         offset: pd.Timedelta | datetime.timedelta | str | None = None,
         origin: str | DatetimeLike = "start_day",
-        loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
         **indexer_kwargs: str | Resampler,
     ) -> DatasetResample:
@@ -10656,10 +10654,6 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, optional
-            For frequencies that evenly subdivide 1 day, the "origin" of the
-            aggregated intervals. For example, for "24H" frequency, base could
-            range from 0 through 23.
         origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
             The datetime on which to adjust the grouping. The timezone of origin
             must match the timezone of the index.
@@ -10672,15 +10666,6 @@ def resample(
             - 'end_day': `origin` is the ceiling midnight of the last day
         offset : pd.Timedelta, datetime.timedelta, or str, default is None
             An offset timedelta added to the origin.
-        loffset : timedelta or str, optional
-            Offset used to adjust the resampled time labels. Some pandas date
-            offset strings are supported.
-
-            .. deprecated:: 2023.03.0
-                Following pandas, the ``loffset`` parameter is deprecated in favor
-                of using time offset arithmetic, and will be removed in a future
-                version of xarray.
-
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
@@ -10713,10 +10698,8 @@ def resample(
             skipna=skipna,
             closed=closed,
             label=label,
-            base=base,
             offset=offset,
             origin=origin,
-            loffset=loffset,
             restore_coord_dims=restore_coord_dims,
             **indexer_kwargs,
         )

diff --git a/xarray/core/groupers.py b/xarray/core/groupers.py
@@ -9,7 +9,7 @@
 import datetime
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import Any, Literal, cast
 
 import numpy as np
 import pandas as pd
@@ -21,12 +21,8 @@
 from xarray.core.indexes import safe_cast_to_index
 from xarray.core.resample_cftime import CFTimeGrouper
 from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
-from xarray.core.utils import emit_user_level_warning
 from xarray.core.variable import Variable
 
-if TYPE_CHECKING:
-    pass
-
 __all__ = [
     "EncodedGroups",
     "Grouper",
@@ -299,17 +295,7 @@ class TimeResampler(Resampler):
         Side of each interval to treat as closed.
     label : {"left", "right"}, optional
         Side of each interval to use for labeling.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for "24H" frequency, base could
-        range from 0 through 23.
-
-        .. deprecated:: 2023.03.0
-            Following pandas, the ``base`` parameter is deprecated in favor
-            of the ``origin`` and ``offset`` parameters, and will be removed
-            in a future version of xarray.
-
-    origin : {"epoch", "start", "start_day", "end", "end_day"}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default: "start_day"
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default 'start_day'
         The datetime on which to adjust the grouping. The timezone of origin
         must match the timezone of the index.
 
@@ -321,60 +307,22 @@ class TimeResampler(Resampler):
         - 'end_day': `origin` is the ceiling midnight of the last day
     offset : pd.Timedelta, datetime.timedelta, or str, default is None
         An offset timedelta added to the origin.
-    loffset : timedelta or str, optional
-        Offset used to adjust the resampled time labels. Some pandas date
-        offset strings are supported.
-
-        .. deprecated:: 2023.03.0
-            Following pandas, the ``loffset`` parameter is deprecated in favor
-            of using time offset arithmetic, and will be removed in a future
-            version of xarray.
-
     """
 
     freq: str
     closed: SideOptions | None = field(default=None)
     label: SideOptions | None = field(default=None)
     origin: str | DatetimeLike = field(default="start_day")
     offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
-    loffset: datetime.timedelta | str | None = field(default=None)
-    base: int | None = field(default=None)
 
     index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False)
     group_as_index: pd.Index = field(init=False, repr=False)
 
-    def __post_init__(self):
-        if self.loffset is not None:
-            emit_user_level_warning(
-                "Following pandas, the `loffset` parameter to resample is deprecated.  "
-                "Switch to updating the resampled dataset time coordinate using "
-                "time offset arithmetic.  For example:\n"
-                "    >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
-                '    >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
-                FutureWarning,
-            )
-
-        if self.base is not None:
-            emit_user_level_warning(
-                "Following pandas, the `base` parameter to resample will be deprecated in "
-                "a future version of xarray.  Switch to using `origin` or `offset` instead.",
-                FutureWarning,
-            )
-
-        if self.base is not None and self.offset is not None:
-            raise ValueError("base and offset cannot be present at the same time")
-
     def _init_properties(self, group: T_Group) -> None:
         from xarray import CFTimeIndex
-        from xarray.core.pdcompat import _convert_base_to_offset
 
         group_as_index = safe_cast_to_index(group)
-
-        if self.base is not None:
-            # grouper constructor verifies that grouper.offset is None at this point
-            offset = _convert_base_to_offset(self.base, self.freq, group_as_index)
-        else:
-            offset = self.offset
+        offset = self.offset
 
         if not group_as_index.is_monotonic_increasing:
             # TODO: sort instead of raising an error
@@ -389,7 +337,6 @@ def _init_properties(self, group: T_Group) -> None:
                 label=self.label,
                 origin=self.origin,
                 offset=offset,
-                loffset=self.loffset,
             )
         else:
             self.index_grouper = pd.Grouper(
@@ -419,18 +366,16 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
             return self.index_grouper.first_items(
                 cast(CFTimeIndex, self.group_as_index)
             )
-
-        s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
-        grouped = s.groupby(self.index_grouper)
-        first_items = grouped.first()
-        counts = grouped.count()
-        # This way we generate codes for the final output index: full_index.
-        # So for _flox_reduce we avoid one reindex and copy by avoiding
-        # _maybe_restore_empty_groups
-        codes = np.repeat(np.arange(len(first_items)), counts)
-        if self.loffset is not None:
-            _apply_loffset(self.loffset, first_items)
-        return first_items, codes
+        else:
+            s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
+            grouped = s.groupby(self.index_grouper)
+            first_items = grouped.first()
+            counts = grouped.count()
+            # This way we generate codes for the final output index: full_index.
+            # So for _flox_reduce we avoid one reindex and copy by avoiding
+            # _maybe_restore_empty_groups
+            codes = np.repeat(np.arange(len(first_items)), counts)
+            return first_items, codes
 
     def factorize(self, group: T_Group) -> EncodedGroups:
         self._init_properties(group)
@@ -454,43 +399,6 @@ def factorize(self, group: T_Group) -> EncodedGroups:
         )
 
 
-def _apply_loffset(
-    loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
-    result: pd.Series | pd.DataFrame,
-):
-    """
-    (copied from pandas)
-    if loffset is set, offset the result index
-
-    This is NOT an idempotent routine, it will be applied
-    exactly once to the result.
-
-    Parameters
-    ----------
-    result : Series or DataFrame
-        the result of resample
-    """
-    # pd.Timedelta is a subclass of datetime.timedelta so we do not need to
-    # include it in instance checks.
-    if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
-        raise ValueError(
-            f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
-            f"Got {loffset}."
-        )
-
-    if isinstance(loffset, str):
-        loffset = pd.tseries.frequencies.to_offset(loffset)  # type: ignore[assignment]
-
-    needs_offset = (
-        isinstance(loffset, (pd.DateOffset, datetime.timedelta))
-        and isinstance(result.index, pd.DatetimeIndex)
-        and len(result.index) > 0
-    )
-
-    if needs_offset:
-        result.index = result.index + loffset
-
-
 def unique_value_groups(
     ar, sort: bool = True
 ) -> tuple[np.ndarray | pd.Index, np.ndarray]: