Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Grouper objects internally #7561

Merged
merged 39 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
71f5e10
Introduce Grouper objects.
dcherian Feb 26, 2023
b9500ce
Remove a copy after stacking for a groupby.
dcherian Mar 10, 2023
44f1325
Fix typing
dcherian Mar 10, 2023
1168ab7
[WIP] typing
dcherian Mar 16, 2023
c905b74
Cleanup
dcherian Mar 18, 2023
22ad7fa
[WIP]
dcherian Mar 19, 2023
22ac6de
group as Variable?
dcherian Mar 20, 2023
912e5c5
Revert "group as Variable?"
dcherian Mar 20, 2023
60abafe
Small cleanup
dcherian Mar 30, 2023
c6bfdaa
De-duplicate alignment check
dcherian Mar 30, 2023
a2290aa
Fix resampling
dcherian Mar 30, 2023
e863045
Bugfix
dcherian Mar 30, 2023
0d0b2cd
Partial reverts commit 22ad7fa7607cb83832935533a55df1f73c65811d.
dcherian Mar 30, 2023
c5daa47
fix tests
dcherian Mar 31, 2023
dda40f5
small cleanup
dcherian Mar 31, 2023
eb43043
more cleanup
dcherian Mar 31, 2023
8347313
Apply suggestions from code review
Illviljan Mar 31, 2023
a89ec14
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 31, 2023
fe4e0a7
Add ResolvedGrouper class
dcherian Mar 31, 2023
0ffc0ad
GroupBy only handles ResolvedGrouper objects.
dcherian Apr 2, 2023
3e9479d
review feedback
dcherian Apr 2, 2023
81319e2
minimize diff
dcherian Apr 2, 2023
f271d1b
dataclass
dcherian Apr 2, 2023
e07ae31
moar dataclass
dcherian Apr 2, 2023
d557418
Add typing
Illviljan Apr 3, 2023
2188a17
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 3, 2023
5a0ba46
$Merge branch 'main' into grouper-objects
dcherian Apr 18, 2023
fe0e421
Ignore type checking error.
dcherian Apr 18, 2023
142bc69
Merge branch 'main' into grouper-objects
dcherian Apr 25, 2023
0cc1ba3
Update groupby.py
Illviljan Apr 26, 2023
2e10d3f
Move factorize to _factorize
Illviljan Apr 27, 2023
d06bdeb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 27, 2023
867629f
Update groupby.py
Illviljan Apr 27, 2023
89ab508
Update xarray/core/groupby.py
dcherian Apr 28, 2023
8d7e6b8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 28, 2023
afe41db
Merge branch 'main' into grouper-objects
dcherian May 2, 2023
dde8866
Calculate group_indices only when necessary
dcherian Apr 25, 2023
b719976
Revert "Calculate group_indices only when necessary"
dcherian May 2, 2023
265f1dd
Fix regression from deep copy
dcherian May 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ def _resample(
# TODO support non-string indexer after removing the old API.

from xarray.core.dataarray import DataArray
from xarray.core.groupby import TimeResampleGrouper
from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper
from xarray.core.resample import RESAMPLE_DIM

if keep_attrs is not None:
Expand Down Expand Up @@ -1012,11 +1012,13 @@ def _resample(
group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)

rgrouper = ResolvedTimeResampleGrouper(grouper, group, self)

return resample_cls(
self,
group=group,
(rgrouper,),
dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM,
restore_coord_dims=restore_coord_dims,
)
Expand Down
7 changes: 4 additions & 3 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,15 +515,16 @@ def apply_groupby_func(func, *args):
groupbys = [arg for arg in args if isinstance(arg, GroupBy)]
assert groupbys, "must have at least one groupby to iterate over"
first_groupby = groupbys[0]
if any(not first_groupby._group.equals(gb._group) for gb in groupbys[1:]):
(grouper,) = first_groupby.groupers
if any(not grouper.group.equals(gb.groupers[0].group) for gb in groupbys[1:]):
raise ValueError(
"apply_ufunc can only perform operations over "
"multiple GroupBy objects at once if they are all "
"grouped the same way"
)

grouped_dim = first_groupby._group.name
unique_values = first_groupby._unique_coord.values
grouped_dim = grouper.name
unique_values = grouper.unique_coord.values

iterators = []
for arg in args:
Expand Down
47 changes: 28 additions & 19 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6467,21 +6467,20 @@ def groupby(
core.groupby.DataArrayGroupBy
pandas.DataFrame.groupby
"""
from xarray.core.groupby import DataArrayGroupBy

# While we don't generally check the type of every arg, passing
# multiple dimensions as multiple arguments is common enough, and the
# consequences hidden enough (strings evaluate as true) to warrant
# checking here.
# A future version could make squeeze kwarg only, but would face
# backward-compat issues.
if not isinstance(squeeze, bool):
raise TypeError(
f"`squeeze` must be True or False, but {squeeze} was supplied"
)
from xarray.core.groupby import (
DataArrayGroupBy,
ResolvedUniqueGrouper,
UniqueGrouper,
_validate_groupby_squeeze,
)

_validate_groupby_squeeze(squeeze)
rgrouper = ResolvedUniqueGrouper(UniqueGrouper(), group, self)
return DataArrayGroupBy(
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
self,
(rgrouper,),
squeeze=squeeze,
restore_coord_dims=restore_coord_dims,
)

def groupby_bins(
Expand Down Expand Up @@ -6552,21 +6551,31 @@ def groupby_bins(
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
from xarray.core.groupby import DataArrayGroupBy
from xarray.core.groupby import (
BinGrouper,
DataArrayGroupBy,
ResolvedBinGrouper,
_validate_groupby_squeeze,
)

return DataArrayGroupBy(
self,
group,
squeeze=squeeze,
_validate_groupby_squeeze(squeeze)
grouper = BinGrouper(
bins=bins,
restore_coord_dims=restore_coord_dims,
cut_kwargs={
"right": right,
"labels": labels,
"precision": precision,
"include_lowest": include_lowest,
},
)
rgrouper = ResolvedBinGrouper(grouper, group, self)

return DataArrayGroupBy(
self,
(rgrouper,),
squeeze=squeeze,
restore_coord_dims=restore_coord_dims,
)

def weighted(self, weights: DataArray) -> DataArrayWeighted:
"""
Expand Down
48 changes: 29 additions & 19 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8950,21 +8950,21 @@ def groupby(
Dataset.resample
DataArray.resample
"""
from xarray.core.groupby import DatasetGroupBy

# While we don't generally check the type of every arg, passing
# multiple dimensions as multiple arguments is common enough, and the
# consequences hidden enough (strings evaluate as true) to warrant
# checking here.
# A future version could make squeeze kwarg only, but would face
# backward-compat issues.
if not isinstance(squeeze, bool):
raise TypeError(
f"`squeeze` must be True or False, but {squeeze} was supplied"
)
from xarray.core.groupby import (
dcherian marked this conversation as resolved.
Show resolved Hide resolved
DatasetGroupBy,
ResolvedUniqueGrouper,
UniqueGrouper,
_validate_groupby_squeeze,
)

_validate_groupby_squeeze(squeeze)
rgrouper = ResolvedUniqueGrouper(UniqueGrouper(), group, self)

return DatasetGroupBy(
self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims
self,
(rgrouper,),
squeeze=squeeze,
restore_coord_dims=restore_coord_dims,
)

def groupby_bins(
Expand Down Expand Up @@ -9035,21 +9035,31 @@ def groupby_bins(
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
from xarray.core.groupby import DatasetGroupBy
from xarray.core.groupby import (
BinGrouper,
DatasetGroupBy,
ResolvedBinGrouper,
_validate_groupby_squeeze,
)

return DatasetGroupBy(
self,
group,
squeeze=squeeze,
_validate_groupby_squeeze(squeeze)
grouper = BinGrouper(
bins=bins,
restore_coord_dims=restore_coord_dims,
cut_kwargs={
"right": right,
"labels": labels,
"precision": precision,
"include_lowest": include_lowest,
},
)
rgrouper = ResolvedBinGrouper(grouper, group, self)

return DatasetGroupBy(
self,
(rgrouper,),
squeeze=squeeze,
restore_coord_dims=restore_coord_dims,
)

def weighted(self, weights: DataArray) -> DatasetWeighted:
"""
Expand Down
Loading