Skip to content

Commit

Permalink
fix: Fix group first value after group-by slice (#18603)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Sep 7, 2024
1 parent ac4b114 commit 6037ca5
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
11 changes: 9 additions & 2 deletions crates/polars-expr/src/expressions/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,16 @@ fn check_argument(arg: &Series, groups: &GroupsProxy, name: &str, expr: &Expr) -
Ok(())
}

fn slice_groups_idx(offset: i64, length: usize, first: IdxSize, idx: &[IdxSize]) -> IdxItem {
fn slice_groups_idx(offset: i64, length: usize, mut first: IdxSize, idx: &[IdxSize]) -> IdxItem {
let (offset, len) = slice_offsets(offset, length, idx.len());
(first + offset as IdxSize, idx[offset..offset + len].into())

// If slice isn't out of bounds, we replace first.
// If slice is oob, the `idx` vec will be empty and `first` will be ignored
if let Some(f) = idx.get(offset) {
first = *f;
}
// This is a clone of the vec, which is unfortunate. Maybe we have a `sliceable` unitvec one day.
(first, idx[offset..offset + len].into())
}

fn slice_groups_slice(offset: i64, length: usize, first: IdxSize, len: IdxSize) -> [IdxSize; 2] {
Expand Down
15 changes: 15 additions & 0 deletions py-polars/tests/unit/operations/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,18 @@ def test_group_by_slice_all_keys() -> None:

gb = df.group_by(["a", "b", "c"], maintain_order=True)
assert_frame_equal(gb.tail(1), gb.head(1))


def test_slice_first_in_agg_18551() -> None:
df = pl.DataFrame({"id": [1, 1, 2], "name": ["A", "B", "C"], "value": [31, 21, 32]})

assert df.group_by("id", maintain_order=True).agg(
sort_by=pl.col("name").sort_by("value"),
x=pl.col("name").sort_by("value").slice(0, 1).first(),
y=pl.col("name").sort_by("value").slice(1, 1).first(),
).to_dict(as_series=False) == {
"id": [1, 2],
"sort_by": [["B", "A"], ["C"]],
"x": ["B", "C"],
"y": ["A", None],
}

0 comments on commit 6037ca5

Please sign in to comment.