Skip to content

Commit

Permalink
Delete base and loffset parameters to resample (#9233)
Browse files Browse the repository at this point in the history
* Remove `base`, `loffset` in Resampler

* resample: Remove how/dim checks

* lint

* cleanups

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update test_cftimeindex_resample.py

* cleanup

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
dcherian and pre-commit-ci[bot] authored Jul 19, 2024
1 parent 3013fb4 commit 39d5b39
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 391 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ New Features

Breaking changes
~~~~~~~~~~~~~~~~
- The ``base`` and ``loffset`` parameters to :py:meth:`Dataset.resample` and :py:meth:`DataArray.resample`
is now removed. These parameters has been deprecated since v2023.03.0. Using the
``origin`` or ``offset`` parameters is recommended as a replacement for using
the ``base`` parameter and using time offset arithmetic is recommended as a
replacement for using the ``loffset`` parameter.


Deprecations
Expand Down
46 changes: 2 additions & 44 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,10 +881,8 @@ def _resample(
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
base: int | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
loffset: datetime.timedelta | str | None,
restore_coord_dims: bool | None,
**indexer_kwargs: str | Resampler,
) -> T_Resample:
Expand All @@ -906,16 +904,6 @@ def _resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -928,15 +916,6 @@ def _resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -1072,18 +1051,6 @@ def _resample(
from xarray.core.groupers import Resampler, TimeResampler
from xarray.core.resample import RESAMPLE_DIM

# note: the second argument (now 'skipna') use to be 'dim'
if (
(skipna is not None and not isinstance(skipna, bool))
or ("how" in indexer_kwargs and "how" not in self.dims)
or ("dim" in indexer_kwargs and "dim" not in self.dims)
):
raise TypeError(
"resample() no longer supports the `how` or "
"`dim` arguments. Instead call methods on resample "
"objects, e.g., data.resample(time='1D').mean()"
)

indexer = either_dict_or_kwargs(indexer, indexer_kwargs, "resample")
if len(indexer) != 1:
raise ValueError("Resampling only supported along single dimensions.")
Expand All @@ -1093,22 +1060,13 @@ def _resample(
dim_coord = self[dim]

group = DataArray(
dim_coord,
coords=dim_coord.coords,
dims=dim_coord.dims,
name=RESAMPLE_DIM,
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)

grouper: Resampler
if isinstance(freq, str):
grouper = TimeResampler(
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
loffset=loffset,
base=base,
freq=freq, closed=closed, label=label, origin=origin, offset=offset
)
elif isinstance(freq, Resampler):
grouper = freq
Expand Down
17 changes: 0 additions & 17 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7245,10 +7245,8 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
) -> DataArrayResample:
Expand All @@ -7270,10 +7268,6 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -7286,15 +7280,6 @@ def resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -7399,10 +7384,8 @@ def resample(
skipna=skipna,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
**indexer_kwargs,
)
Expand Down
17 changes: 0 additions & 17 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10631,10 +10631,8 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
) -> DatasetResample:
Expand All @@ -10656,10 +10654,6 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -10672,15 +10666,6 @@ def resample(
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -10713,10 +10698,8 @@ def resample(
skipna=skipna,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
**indexer_kwargs,
)
Expand Down
118 changes: 13 additions & 105 deletions xarray/core/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import datetime
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal, cast
from typing import Any, Literal, cast

import numpy as np
import pandas as pd
Expand All @@ -21,12 +21,8 @@
from xarray.core.indexes import safe_cast_to_index
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
from xarray.core.utils import emit_user_level_warning
from xarray.core.variable import Variable

if TYPE_CHECKING:
pass

__all__ = [
"EncodedGroups",
"Grouper",
Expand Down Expand Up @@ -299,17 +295,7 @@ class TimeResampler(Resampler):
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.
origin : {"epoch", "start", "start_day", "end", "end_day"}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default: "start_day"
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -321,60 +307,22 @@ class TimeResampler(Resampler):
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
"""

freq: str
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
loffset: datetime.timedelta | str | None = field(default=None)
base: int | None = field(default=None)

index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False)
group_as_index: pd.Index = field(init=False, repr=False)

def __post_init__(self):
if self.loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample is deprecated. "
"Switch to updating the resampled dataset time coordinate using "
"time offset arithmetic. For example:\n"
" >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
FutureWarning,
)

if self.base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if self.base is not None and self.offset is not None:
raise ValueError("base and offset cannot be present at the same time")

def _init_properties(self, group: T_Group) -> None:
from xarray import CFTimeIndex
from xarray.core.pdcompat import _convert_base_to_offset

group_as_index = safe_cast_to_index(group)

if self.base is not None:
# grouper constructor verifies that grouper.offset is None at this point
offset = _convert_base_to_offset(self.base, self.freq, group_as_index)
else:
offset = self.offset
offset = self.offset

if not group_as_index.is_monotonic_increasing:
# TODO: sort instead of raising an error
Expand All @@ -389,7 +337,6 @@ def _init_properties(self, group: T_Group) -> None:
label=self.label,
origin=self.origin,
offset=offset,
loffset=self.loffset,
)
else:
self.index_grouper = pd.Grouper(
Expand Down Expand Up @@ -419,18 +366,16 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
return self.index_grouper.first_items(
cast(CFTimeIndex, self.group_as_index)
)

s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
grouped = s.groupby(self.index_grouper)
first_items = grouped.first()
counts = grouped.count()
# This way we generate codes for the final output index: full_index.
# So for _flox_reduce we avoid one reindex and copy by avoiding
# _maybe_restore_empty_groups
codes = np.repeat(np.arange(len(first_items)), counts)
if self.loffset is not None:
_apply_loffset(self.loffset, first_items)
return first_items, codes
else:
s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index)
grouped = s.groupby(self.index_grouper)
first_items = grouped.first()
counts = grouped.count()
# This way we generate codes for the final output index: full_index.
# So for _flox_reduce we avoid one reindex and copy by avoiding
# _maybe_restore_empty_groups
codes = np.repeat(np.arange(len(first_items)), counts)
return first_items, codes

def factorize(self, group: T_Group) -> EncodedGroups:
self._init_properties(group)
Expand All @@ -454,43 +399,6 @@ def factorize(self, group: T_Group) -> EncodedGroups:
)


def _apply_loffset(
loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
result: pd.Series | pd.DataFrame,
):
"""
(copied from pandas)
if loffset is set, offset the result index
This is NOT an idempotent routine, it will be applied
exactly once to the result.
Parameters
----------
result : Series or DataFrame
the result of resample
"""
# pd.Timedelta is a subclass of datetime.timedelta so we do not need to
# include it in instance checks.
if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
raise ValueError(
f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
f"Got {loffset}."
)

if isinstance(loffset, str):
loffset = pd.tseries.frequencies.to_offset(loffset) # type: ignore[assignment]

needs_offset = (
isinstance(loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + loffset


def unique_value_groups(
ar, sort: bool = True
) -> tuple[np.ndarray | pd.Index, np.ndarray]:
Expand Down
Loading

0 comments on commit 39d5b39

Please sign in to comment.