Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: Compute complete result_index upfront in groupby #55738

Merged
merged 45 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
e32b789
REF: Compute correct result_index upfront in groupby
rhshadrach Aug 16, 2023
31a7c92
Refinements
rhshadrach Oct 30, 2023
5ecfbeb
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Oct 30, 2023
8ce08d1
Refinements
rhshadrach Nov 1, 2023
6296f4a
Refinements
rhshadrach Nov 1, 2023
68f2aeb
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Nov 5, 2023
7141425
Restore inferring index dtype
rhshadrach Nov 5, 2023
7f74812
Merge branch 'gb_observed_pre' of https://github.com/rhshadrach/panda…
rhshadrach Nov 5, 2023
e39cbc8
Test fixups
rhshadrach Nov 5, 2023
c82bd65
Refinements
rhshadrach Nov 5, 2023
3a9892d
Refinements
rhshadrach Nov 5, 2023
25770be
fixup
rhshadrach Nov 5, 2023
a338efc
fixup
rhshadrach Nov 5, 2023
dbdec9f
fixup
rhshadrach Nov 5, 2023
0ae70b7
Fix sorting and non-sorting
rhshadrach Nov 12, 2023
99d2beb
Cleanup
rhshadrach Nov 12, 2023
a477dc0
Call ensure_plantform_int last
rhshadrach Nov 13, 2023
7fb7ca6
fixup
rhshadrach Nov 14, 2023
b79cc85
fixup
rhshadrach Nov 14, 2023
da9169d
REF: Compute correct result_index upfront in groupby
rhshadrach Aug 16, 2023
d2eee13
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Nov 17, 2023
b247544
Merge branch 'gb_observed_pre' of https://github.com/rhshadrach/panda…
rhshadrach Nov 17, 2023
700f40f
Add test
rhshadrach Nov 17, 2023
efd20c7
Remove test
rhshadrach Nov 18, 2023
08d02b0
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Nov 18, 2023
9dac297
Move unobserved to the end
rhshadrach Nov 18, 2023
2c30d63
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Nov 18, 2023
26da0b8
cleanup
rhshadrach Nov 18, 2023
0a6b63a
cleanup
rhshadrach Nov 18, 2023
001881f
cleanup
rhshadrach Nov 18, 2023
e285742
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_o…
rhshadrach Nov 18, 2023
4aaa1d2
Merge branch 'gb_observed_pre' of https://github.com/rhshadrach/panda…
rhshadrach Nov 22, 2023
e5d5c92
Merge remote-tracking branch 'upstream/main' into gb_observed_pre
rhshadrach Dec 8, 2023
d5b37a4
Merge fixup
rhshadrach Dec 8, 2023
c2c3859
Merge remote-tracking branch 'upstream/main' into gb_observed_pre
rhshadrach Feb 1, 2024
4f284ce
fixup
rhshadrach Feb 2, 2024
4374cdb
Merge remote-tracking branch 'upstream/main' into gb_observed_pre
rhshadrach Feb 2, 2024
c7e6a89
Merge remote-tracking branch 'upstream/main' into gb_observed_pre
rhshadrach Feb 2, 2024
dce05da
fixup
rhshadrach Feb 2, 2024
72209a8
Fixup and test
rhshadrach Feb 3, 2024
b58b69d
whatsnew
rhshadrach Feb 3, 2024
fe99dc5
type ignore
rhshadrach Feb 3, 2024
766c229
Refactor & type annotations
rhshadrach Feb 4, 2024
8f592ad
Merge remote-tracking branch 'upstream/main' into gb_observed_pre
rhshadrach Feb 4, 2024
a05ff18
Better bikeshed
rhshadrach Feb 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,6 @@ def _wrap_applied_output(
# GH #823 #24880
index = self._grouper.result_index
res_df = self.obj._constructor_expanddim(values, index=index)
res_df = self._reindex_output(res_df)
# if self.observed is False,
# keep all-NaN rows created while re-indexing
res_ser = res_df.stack(future_stack=True)
Expand All @@ -437,7 +436,7 @@ def _wrap_applied_output(
if not self.as_index:
result = self._insert_inaxis_grouper(result)
result.index = default_index(len(result))
return self._reindex_output(result)
return result

def _aggregate_named(self, func, *args, **kwargs):
# Note: this is very similar to _aggregate_series_pure_python,
Expand Down Expand Up @@ -658,7 +657,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
2023-02-01 1
Freq: MS, dtype: int64
"""
ids, _, ngroups = self._grouper.group_info
ids, ngroups = self._grouper.group_info
val = self.obj._values
codes, uniques = algorithms.factorize(val, use_na_sentinel=dropna, sort=False)

Expand Down Expand Up @@ -691,7 +690,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
if not self.as_index:
result = self._insert_inaxis_grouper(result)
result.index = default_index(len(result))
return self._reindex_output(result, fill_value=0)
return result

@doc(Series.describe)
def describe(self, percentiles=None, include=None, exclude=None) -> Series:
Expand Down Expand Up @@ -719,7 +718,7 @@ def value_counts(
from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut

ids, _, _ = self._grouper.group_info
ids, _ = self._grouper.group_info
val = self.obj._values

index_names = self._grouper.names + [self.obj.name]
Expand Down Expand Up @@ -789,9 +788,18 @@ def value_counts(
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))

# multi-index components
codes = self._grouper.reconstructed_codes
if isinstance(self._grouper.result_index, MultiIndex):
codes = list(self._grouper.result_index.codes)
else:
codes = [
algorithms.factorize(
self._grouper.result_index,
sort=self._grouper._sort,
use_na_sentinel=self._grouper.dropna,
)[0]
]
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
levels = [ping._group_index for ping in self._grouper.groupings] + [lev]
levels = self._grouper.levels + [lev]

if dropna:
mask = codes[-1] != -1
Expand Down Expand Up @@ -834,7 +842,7 @@ def value_counts(
# ndarray[Any, Any]], Index, Series]]
_, idx = get_join_indexers(
left, # type: ignore[arg-type]
right, # type: ignore[arg-type]
right,
sort=False,
how="left",
)
Expand Down Expand Up @@ -1605,7 +1613,7 @@ def _wrap_applied_output_series(
if not self.as_index:
result = self._insert_inaxis_grouper(result)

return self._reindex_output(result)
return result

def _cython_transform(
self,
Expand Down
Loading
Loading