diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index eec69162c9e0..8850250e733c 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1513,13 +1513,25 @@ impl LazyFrame { /// Apply explode operation. [See eager explode](polars_core::frame::DataFrame::explode). pub fn explode, IE: Into + Clone>(self, columns: E) -> LazyFrame { + self.explode_impl(columns, false) + } + + /// Apply explode operation. [See eager explode](polars_core::frame::DataFrame::explode). + fn explode_impl, IE: Into + Clone>( + self, + columns: E, + allow_empty: bool, + ) -> LazyFrame { let columns = columns .as_ref() .iter() .map(|e| e.clone().into()) .collect::>(); let opt_state = self.get_opt_state(); - let lp = self.get_plan_builder().explode(columns).build(); + let lp = self + .get_plan_builder() + .explode(columns, allow_empty) + .build(); Self::from_logical_plan(lp, opt_state) } @@ -1877,7 +1889,7 @@ impl LazyGroupBy { .collect::>(); self.agg([col("*").exclude(&keys).head(n)]) - .explode([col("*").exclude(&keys)]) + .explode_impl([col("*").exclude(&keys)], true) } /// Return last n rows of each group @@ -1889,7 +1901,7 @@ impl LazyGroupBy { .collect::>(); self.agg([col("*").exclude(&keys).tail(n)]) - .explode([col("*").exclude(&keys)]) + .explode_impl([col("*").exclude(&keys)], true) } /// Apply a function over the groups as a new DataFrame. diff --git a/crates/polars-plan/src/plans/builder_dsl.rs b/crates/polars-plan/src/plans/builder_dsl.rs index 96d61cf62bce..849be4a6e4ea 100644 --- a/crates/polars-plan/src/plans/builder_dsl.rs +++ b/crates/polars-plan/src/plans/builder_dsl.rs @@ -346,10 +346,13 @@ impl DslBuilder { .into() } - pub fn explode(self, columns: Vec) -> Self { + pub fn explode(self, columns: Vec, allow_empty: bool) -> Self { DslPlan::MapFunction { input: Arc::new(self.0), - function: DslFunction::Explode { columns }, + function: DslFunction::Explode { + columns, + allow_empty, + }, } .into() } diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs index 8dba0f49f3c4..08b3a8b66d1e 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs @@ -636,6 +636,23 @@ pub fn to_alp_impl( let input_schema = lp_arena.get(input).schema(lp_arena); match function { + DslFunction::Explode { + columns, + allow_empty, + } => { + let columns = expand_selectors(columns, &input_schema, &[])?; + validate_columns_in_input(&columns, &input_schema, "explode")?; + polars_ensure!(!columns.is_empty() || allow_empty, InvalidOperation: "no columns provided in explode"); + if columns.is_empty() { + return Ok(input); + } + let function = FunctionIR::Explode { + columns, + schema: Default::default(), + }; + let ir = IR::MapFunction { input, function }; + return Ok(lp_arena.add(ir)); + }, DslFunction::FillNan(fill_value) => { let exprs = input_schema .iter() diff --git a/crates/polars-plan/src/plans/functions/dsl.rs b/crates/polars-plan/src/plans/functions/dsl.rs index 76c7dc9d3211..458c7c6d8e28 100644 --- a/crates/polars-plan/src/plans/functions/dsl.rs +++ b/crates/polars-plan/src/plans/functions/dsl.rs @@ -29,6 +29,7 @@ pub enum DslFunction { OpaquePython(OpaquePythonUdf), Explode { columns: Vec, + allow_empty: bool, }, #[cfg(feature = "pivot")] Unpivot { @@ -79,7 +80,7 @@ pub enum StatsFunction { Max, } -fn validate_columns>( +pub(crate) fn validate_columns_in_input>( columns: &[S], input_schema: &Schema, operation_name: &str, @@ -93,20 +94,12 @@ fn validate_columns>( impl DslFunction { pub(crate) fn into_function_ir(self, input_schema: &Schema) -> PolarsResult { let function = match self { - DslFunction::Explode { columns } => { - let columns = expand_selectors(columns, input_schema, &[])?; - validate_columns(columns.as_ref(), input_schema, "explode")?; - FunctionIR::Explode { - columns, - schema: Default::default(), - } - }, #[cfg(feature = "pivot")] DslFunction::Unpivot { args } => { let on = expand_selectors(args.on, input_schema, &[])?; let index = expand_selectors(args.index, input_schema, &[])?; - validate_columns(on.as_ref(), input_schema, "unpivot")?; - validate_columns(index.as_ref(), input_schema, "unpivot")?; + validate_columns_in_input(on.as_ref(), input_schema, "unpivot")?; + validate_columns_in_input(index.as_ref(), input_schema, "unpivot")?; let args = UnpivotArgsIR { on: on.iter().map(|s| s.as_ref().into()).collect(), @@ -128,7 +121,7 @@ impl DslFunction { }, DslFunction::Rename { existing, new } => { let swapping = new.iter().any(|name| input_schema.get(name).is_some()); - validate_columns(existing.as_ref(), input_schema, "rename")?; + validate_columns_in_input(existing.as_ref(), input_schema, "rename")?; FunctionIR::Rename { existing, @@ -139,12 +132,15 @@ impl DslFunction { }, DslFunction::Unnest(selectors) => { let columns = expand_selectors(selectors, input_schema, &[])?; - validate_columns(columns.as_ref(), input_schema, "explode")?; + validate_columns_in_input(columns.as_ref(), input_schema, "explode")?; FunctionIR::Unnest { columns } }, #[cfg(feature = "python")] DslFunction::OpaquePython(inner) => FunctionIR::OpaquePython(inner), - DslFunction::Stats(_) | DslFunction::FillNan(_) | DslFunction::Drop(_) => { + DslFunction::Stats(_) + | DslFunction::FillNan(_) + | DslFunction::Drop(_) + | DslFunction::Explode { .. } => { // We should not reach this. panic!("impl error") }, diff --git a/py-polars/tests/unit/operations/test_slice.py b/py-polars/tests/unit/operations/test_slice.py index 6f8526a53061..692fcb5634dc 100644 --- a/py-polars/tests/unit/operations/test_slice.py +++ b/py-polars/tests/unit/operations/test_slice.py @@ -253,3 +253,23 @@ def test_slice_pushdown_simple_projection_18288() -> None: "col": [0], "literal": [None], } + + +def test_group_by_slice_all_keys() -> None: + df = pl.DataFrame( + { + "a": ["Tom", "Nick", "Marry", "Krish", "Jack", None], + "b": [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + "2020-01-05", + None, + ], + "c": [5, 6, 6, 7, 8, 5], + } + ) + + gb = df.group_by(["a", "b", "c"], maintain_order=True) + assert_frame_equal(gb.tail(1), gb.head(1))