diff --git a/polars/polars-lazy/src/dsl/mod.rs b/polars/polars-lazy/src/dsl/mod.rs index 4c79058689ca..32db0f4cd44e 100644 --- a/polars/polars-lazy/src/dsl/mod.rs +++ b/polars/polars-lazy/src/dsl/mod.rs @@ -46,6 +46,7 @@ use polars_arrow::array::default_arrays::FromData; #[cfg(feature = "diff")] use polars_core::series::ops::NullBehavior; use polars_core::utils::{get_supertype, NoNull}; +use polars_ops::prelude::SeriesOps; pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr { Expr::BinaryExpr { @@ -421,6 +422,30 @@ impl Expr { } } + /// Append expressions. This is done by adding the chunks of `other` to this [`Series`]. + pub fn append>(self, other: E, upcast: bool) -> Self { + let output_type = if upcast { + GetOutput::super_type() + } else { + GetOutput::same_type() + }; + + apply_binary( + self, + other.into(), + move |mut a, mut b| { + if upcast { + let dtype = get_supertype(a.dtype(), b.dtype())?; + a = a.cast(&dtype)?; + b = b.cast(&dtype)?; + } + a.append(&b)?; + Ok(a) + }, + output_type, + ) + } + /// Get the first `n` elements of the Expr result pub fn head(self, length: Option) -> Self { self.slice(lit(0), lit(length.unwrap_or(10) as u64)) diff --git a/py-polars/docs/source/reference/expression.rst b/py-polars/docs/source/reference/expression.rst index 2f3892d75b35..4a06325be0d5 100644 --- a/py-polars/docs/source/reference/expression.rst +++ b/py-polars/docs/source/reference/expression.rst @@ -177,6 +177,7 @@ Manipulation/ selection Expr.inspect Expr.slice + Expr.append Expr.explode Expr.flatten Expr.take_every @@ -204,6 +205,7 @@ Manipulation/ selection Expr.reinterpret Expr.drop_nulls Expr.drop_nans + Expr.rechunk Expr.interpolate Expr.arg_sort Expr.clip diff --git a/py-polars/polars/internals/expr.py b/py-polars/polars/internals/expr.py index 57c1d5b4470b..c4da5ed62eba 100644 --- a/py-polars/polars/internals/expr.py +++ b/py-polars/polars/internals/expr.py @@ -774,6 +774,26 @@ def slice(self, offset: Union[int, "Expr"], length: Union[int, "Expr"]) -> "Expr length = pli.lit(length) return wrap_expr(self._pyexpr.slice(offset._pyexpr, length._pyexpr)) + def append(self, other: "Expr", upcast: bool = True) -> "Expr": + """ + Append expressions. This is done by adding the chunks of `other` to this `Series`. + + Parameters + ---------- + other + Expression to append + upcast + Cast both `Series` to the same supertype + """ + other = expr_to_lit_or_expr(other) + return wrap_expr(self._pyexpr.append(other._pyexpr, upcast)) + + def rechunk(self) -> "Expr": + """ + Create a single chunk of memory for this Series. + """ + return wrap_expr(self._pyexpr.rechunk()) + def drop_nulls(self) -> "Expr": """ Drop null values. diff --git a/py-polars/polars/internals/functions.py b/py-polars/polars/internals/functions.py index da406f592bb1..c6eb81ffcb2d 100644 --- a/py-polars/polars/internals/functions.py +++ b/py-polars/polars/internals/functions.py @@ -60,13 +60,25 @@ def concat( ... +@overload +def concat( + items: Sequence["pli.Expr"], + rechunk: bool = True, + how: str = "vertical", +) -> "pli.Expr": + ... + + def concat( items: Union[ - Sequence["pli.DataFrame"], Sequence["pli.Series"], Sequence["pli.LazyFrame"] + Sequence["pli.DataFrame"], + Sequence["pli.Series"], + Sequence["pli.LazyFrame"], + Sequence["pli.Expr"], ], rechunk: bool = True, how: str = "vertical", -) -> Union["pli.DataFrame", "pli.Series", "pli.LazyFrame"]: +) -> Union["pli.DataFrame", "pli.Series", "pli.LazyFrame", "pli.Expr"]: """ Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single DataFrame/Series. @@ -105,8 +117,9 @@ def concat( if not len(items) > 0: raise ValueError("cannot concat empty list") - out: Union["pli.Series", "pli.DataFrame", "pli.LazyFrame"] - if isinstance(items[0], pli.DataFrame): + out: Union["pli.Series", "pli.DataFrame", "pli.LazyFrame", "pli.Expr"] + first = items[0] + if isinstance(first, pli.DataFrame): if how == "vertical": out = pli.wrap_df(_concat_df(items)) elif how == "diagonal": @@ -117,10 +130,16 @@ def concat( raise ValueError( f"how should be one of {'vertical', 'diagonal'}, got {how}" ) - elif isinstance(items[0], pli.LazyFrame): + elif isinstance(first, pli.LazyFrame): return pli.wrap_ldf(_concat_lf(items, rechunk)) - else: + elif isinstance(first, pli.Series): out = pli.wrap_s(_concat_series(items)) + elif isinstance(first, pli.Expr): + out = first + for e in items[1:]: + out = out.append(e) # type: ignore + else: + raise ValueError(f"did not expect type: {type(first)} in 'pl.concat'.") if rechunk: return out.rechunk() diff --git a/py-polars/src/lazy/dsl.rs b/py-polars/src/lazy/dsl.rs index c28e87179049..55913227b4ae 100644 --- a/py-polars/src/lazy/dsl.rs +++ b/py-polars/src/lazy/dsl.rs @@ -327,6 +327,17 @@ impl PyExpr { self.inner.clone().slice(offset.inner, length.inner).into() } + pub fn append(&self, other: PyExpr, upcast: bool) -> PyExpr { + self.inner.clone().append(other.inner, upcast).into() + } + + pub fn rechunk(&self) -> PyExpr { + self.inner + .clone() + .map(|s| Ok(s.rechunk()), GetOutput::same_type()) + .into() + } + pub fn round(&self, decimals: u32) -> PyExpr { self.clone().inner.round(decimals).into() } diff --git a/py-polars/tests/test_exprs.py b/py-polars/tests/test_exprs.py index c5712a28f3aa..f208ea35232f 100644 --- a/py-polars/tests/test_exprs.py +++ b/py-polars/tests/test_exprs.py @@ -235,3 +235,16 @@ def test_power_by_expression() -> None: None, 46656.0, ] + + +def test_expression_appends() -> None: + df = pl.DataFrame({"a": [1, 1, 2]}) + + assert df.select(pl.repeat(None, 3).append(pl.col("a"))).n_chunks() == 2 + + assert df.select(pl.repeat(None, 3).append(pl.col("a")).rechunk()).n_chunks() == 1 + + out = df.select(pl.concat([pl.repeat(None, 3), pl.col("a")])) + + assert out.n_chunks() == 1 + assert out.to_series().to_list() == [None, None, None, 1, 1, 2]