Skip to content

feat: support dict param for dataframe.agg() #1772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2925,9 +2925,23 @@ def nunique(self) -> bigframes.series.Series:
return bigframes.series.Series(block)

def agg(
self, func: str | typing.Sequence[str]
self,
func: str
| typing.Sequence[str]
| typing.Mapping[blocks.Label, typing.Sequence[str] | str],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] use typing.Union

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be fine given that we already have from __future import annotations at the top of the file.

) -> DataFrame | bigframes.series.Series:
if utils.is_list_like(func):
if utils.is_dict_like(func):
# Must check dict-like first because dictionaries are list-like
# according to Pandas.
agg_cols = []
for col_label, agg_func in func.items():
agg_cols.append(self[col_label].agg(agg_func))

from bigframes.core.reshape import api as reshape

return reshape.concat(agg_cols, axis=1)

elif utils.is_list_like(func):
aggregations = [agg_ops.lookup_agg_func(f) for f in func]

for dtype, agg in itertools.product(self.dtypes, aggregations):
Expand All @@ -2941,6 +2955,7 @@ def agg(
aggregations,
)
)

else:
return bigframes.series.Series(
self._block.aggregate_all_and_stack(
Expand Down
26 changes: 26 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5652,3 +5652,29 @@ def test_astype_invalid_type_fail(scalars_dfs):

with pytest.raises(TypeError, match=r".*Share your usecase with.*"):
bf_df.astype(123)


def test_agg_with_dict(scalars_dfs):
bf_df, pd_df = scalars_dfs
agg_funcs = {
"int64_too": ["min", "max"],
"int64_col": ["min", "count"],
}

bf_result = bf_df.agg(agg_funcs).to_pandas()
pd_result = pd_df.agg(agg_funcs)

pd.testing.assert_frame_equal(
bf_result, pd_result, check_dtype=False, check_index_type=False
)


def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
bf_df, _ = scalars_dfs
agg_funcs = {
"int64_too": ["min", "max"],
"nonexisting_col": ["count"],
}

with pytest.raises(KeyError):
bf_df.agg(agg_funcs)