Skip to content
forked from pydata/xarray

Commit

Permalink
Introduce Grouper objects.
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Mar 18, 2023
1 parent 511415f commit 5979a05
Show file tree
Hide file tree
Showing 7 changed files with 398 additions and 301 deletions.
13 changes: 7 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,11 +996,16 @@ def _resample(
if base is not None and offset is not None:
raise ValueError("base and offset cannot be present at the same time")

index = self._indexes[dim_name].to_pandas_index()
if base is not None:
index = self._indexes[dim_name].to_pandas_index()
offset = _convert_base_to_offset(base, freq, index)

group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)

grouper = TimeResampleGrouper(
group=group,
freq=freq,
closed=closed,
label=label,
Expand All @@ -1009,14 +1014,10 @@ def _resample(
loffset=loffset,
)

group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)
return resample_cls(
self,
group=group,
dim=dim_name,
grouper=grouper,
dim=dim_name,
resample_dim=RESAMPLE_DIM,
restore_coord_dims=restore_coord_dims,
)
Expand Down
7 changes: 4 additions & 3 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,16 @@ def apply_groupby_func(func, *args):
groupbys = [arg for arg in args if isinstance(arg, GroupBy)]
assert groupbys, "must have at least one groupby to iterate over"
first_groupby = groupbys[0]
if any(not first_groupby._group.equals(gb._group) for gb in groupbys[1:]):
(grouper,) = first_groupby.groupers
if any(not grouper.group.equals(gb.groupers[0].group) for gb in groupbys[1:]):
raise ValueError(
"apply_ufunc can only perform operations over "
"multiple GroupBy objects at once if they are all "
"grouped the same way"
)

grouped_dim = first_groupby._group.name
unique_values = first_groupby._unique_coord.values
grouped_dim = grouper.name
unique_values = grouper.unique_coord.values

iterators = []
for arg in args:
Expand Down
32 changes: 24 additions & 8 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6256,7 +6256,7 @@ def groupby(
core.groupby.DataArrayGroupBy
pandas.DataFrame.groupby
"""
from xarray.core.groupby import DataArrayGroupBy
from xarray.core.groupby import DataArrayGroupBy, UniqueGrouper

# While we don't generally check the type of every arg, passing
# multiple dimensions as multiple arguments is common enough, and the
Expand All @@ -6269,8 +6269,9 @@ def groupby(
f"`squeeze` must be True or False, but {squeeze} was supplied"
)

grouper = UniqueGrouper(group)
return DataArrayGroupBy(
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
)

def groupby_bins(
Expand Down Expand Up @@ -6341,14 +6342,22 @@ def groupby_bins(
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
from xarray.core.groupby import DataArrayGroupBy
from xarray.core.groupby import BinGrouper, DataArrayGroupBy

return DataArrayGroupBy(
self,
group,
squeeze=squeeze,
# While we don't generally check the type of every arg, passing
# multiple dimensions as multiple arguments is common enough, and the
# consequences hidden enough (strings evaluate as true) to warrant
# checking here.
# A future version could make squeeze kwarg only, but would face
# backward-compat issues.
if not isinstance(squeeze, bool):
raise TypeError(
f"`squeeze` must be True or False, but {squeeze} was supplied"
)

grouper = BinGrouper(
group=group,
bins=bins,
restore_coord_dims=restore_coord_dims,
cut_kwargs={
"right": right,
"labels": labels,
Expand All @@ -6357,6 +6366,13 @@ def groupby_bins(
},
)

return DataArrayGroupBy(
self,
grouper,
squeeze=squeeze,
restore_coord_dims=restore_coord_dims,
)

def weighted(self, weights: DataArray) -> DataArrayWeighted:
"""
Weighted DataArray operations.
Expand Down
19 changes: 11 additions & 8 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8943,7 +8943,7 @@ def groupby(
Dataset.resample
DataArray.resample
"""
from xarray.core.groupby import DatasetGroupBy
from xarray.core.groupby import DatasetGroupBy, UniqueGrouper

# While we don't generally check the type of every arg, passing
# multiple dimensions as multiple arguments is common enough, and the
Expand All @@ -8956,8 +8956,10 @@ def groupby(
f"`squeeze` must be True or False, but {squeeze} was supplied"
)

grouper = UniqueGrouper(group)

return DatasetGroupBy(
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
)

def groupby_bins(
Expand Down Expand Up @@ -9028,14 +9030,11 @@ def groupby_bins(
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
from xarray.core.groupby import DatasetGroupBy
from xarray.core.groupby import BinGrouper, DatasetGroupBy

return DatasetGroupBy(
self,
group,
squeeze=squeeze,
grouper = BinGrouper(
group=group,
bins=bins,
restore_coord_dims=restore_coord_dims,
cut_kwargs={
"right": right,
"labels": labels,
Expand All @@ -9044,6 +9043,10 @@ def groupby_bins(
},
)

return DatasetGroupBy(
self, grouper, squeeze=squeeze, restore_coord_dims=restore_coord_dims
)

def weighted(self, weights: DataArray) -> DatasetWeighted:
"""
Weighted Dataset operations.
Expand Down
Loading

0 comments on commit 5979a05

Please sign in to comment.