diff --git a/Cargo.toml b/Cargo.toml index 75c022e..0572d08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,8 @@ members = [ ] [workspace.dependencies] -polars = { version = "0.40.0", default-features = false } -polars-core = { version = "0.40.0", default-features = false } -polars-ffi = { version = "0.40.0", default-features = false } -polars-plan = { version = "0.40.0", default-feautres = false } -polars-lazy = { version = "0.40.0", default-features = false } +polars = { version = "0.41.0", default-features = false } +polars-core = { version = "0.41.0", default-features = false } +polars-ffi = { version = "0.41.0", default-features = false } +polars-plan = { version = "0.41.0", default-feautres = false } +polars-lazy = { version = "0.41.0", default-features = false } diff --git a/README.md b/README.md index 6c53bf3..fb42f31 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,10 @@ Documentation for this functionality may also be found in the [Polars User Guide This is new functionality and should be preferred over `2.` as this will circumvent the GIL and will be the way we want to support extending polars. - Parallelism and optimizations are managed by the default polars runtime. That runtime will call into the plugin function. The plugin functions are compiled separately. -We can therefore keep polars more lean and maybe add support for a `polars-distance`, `polars-geo`, `polars-ml`, etc. +We can therefore keep polars more lean and maybe add support for a `polars-distance`, `polars-geo`, `polars-ml`, etc. Those can then have specialized expressions and don't have to worry as much for code bloat as they can be optionally installed. The idea is that you define an expression in another Rust crate with a proc_macro `polars_expr`. @@ -75,6 +74,7 @@ def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: kwargs={"capitalize": capitalize}, ) ``` + Compile/ship and then it is ready to use: ```python @@ -90,7 +90,9 @@ out = df.with_columns( pig_latin = language.pig_latinnify("names") ) ``` + Alternatively, you can [register a custom namespace](https://docs.pola.rs/py-polars/html/reference/api/polars.api.register_expr_namespace.html#polars.api.register_expr_namespace), which enables you to write: + ```python out = df.with_columns( pig_latin = pl.col("names").language.pig_latinnify() diff --git a/example/derive_expression/expression_lib/Cargo.toml b/example/derive_expression/expression_lib/Cargo.toml index 8057b19..8c01ac8 100644 --- a/example/derive_expression/expression_lib/Cargo.toml +++ b/example/derive_expression/expression_lib/Cargo.toml @@ -12,8 +12,8 @@ crate-type = ["cdylib"] polars = { workspace = true, features = ["fmt", "dtype-date", "timezones"], default-features = false } pyo3 = { version = "0.21", features = ["abi3-py38"] } pyo3-polars = { version = "*", path = "../../../pyo3-polars", features = ["derive"] } -serde = { version = "1", features = ["derive"] } rayon = "1.7.0" +serde = { version = "1", features = ["derive"] } [target.'cfg(target_os = "linux")'.dependencies] jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] } diff --git a/example/derive_expression/expression_lib/expression_lib/date_util.py b/example/derive_expression/expression_lib/expression_lib/date_util.py index b461902..928d7b8 100644 --- a/example/derive_expression/expression_lib/expression_lib/date_util.py +++ b/example/derive_expression/expression_lib/expression_lib/date_util.py @@ -1,17 +1,17 @@ import polars as pl from polars.type_aliases import IntoExpr -from polars.utils.udfs import _get_shared_lib_location +from polars.plugins import register_plugin_function +from pathlib import Path from expression_lib.utils import parse_into_expr -lib = _get_shared_lib_location(__file__) - def is_leap_year(expr: IntoExpr) -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - symbol="is_leap_year", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr], + function_name="is_leap_year", is_elementwise=True, ) @@ -20,6 +20,8 @@ def is_leap_year(expr: IntoExpr) -> pl.Expr: # purposes. def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz} + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr], + function_name="change_time_zone", is_elementwise=True, kwargs={"tz": tz} ) diff --git a/example/derive_expression/expression_lib/expression_lib/dist.py b/example/derive_expression/expression_lib/expression_lib/dist.py index 933d4a2..e9274e6 100644 --- a/example/derive_expression/expression_lib/expression_lib/dist.py +++ b/example/derive_expression/expression_lib/expression_lib/dist.py @@ -1,28 +1,27 @@ import polars as pl from polars.type_aliases import IntoExpr -from polars.utils.udfs import _get_shared_lib_location +from polars.plugins import register_plugin_function +from pathlib import Path -from expression_lib.utils import parse_into_expr - -lib = _get_shared_lib_location(__file__) +from expression_lib.utils import parse_into_expr def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - args=[other], - symbol="hamming_distance", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr, other], + function_name="hamming_distance", is_elementwise=True, ) def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - args=[other], - symbol="jaccard_similarity", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr, other], + function_name="jaccard_similarity", is_elementwise=True, ) @@ -34,10 +33,10 @@ def haversine( end_long: IntoExpr, ) -> pl.Expr: start_lat = parse_into_expr(start_lat) - return start_lat.register_plugin( - lib=lib, - args=[start_long, end_lat, end_long], - symbol="haversine", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[start_lat, start_long, end_lat, end_long], + function_name="haversine", is_elementwise=True, - cast_to_supertypes=True, + cast_to_supertype=True, ) diff --git a/example/derive_expression/expression_lib/expression_lib/language.py b/example/derive_expression/expression_lib/expression_lib/language.py index 20dcb66..0713f0d 100644 --- a/example/derive_expression/expression_lib/expression_lib/language.py +++ b/example/derive_expression/expression_lib/expression_lib/language.py @@ -1,17 +1,18 @@ import polars as pl from polars.type_aliases import IntoExpr -from polars.utils.udfs import _get_shared_lib_location +from polars.plugins import register_plugin_function +from pathlib import Path from expression_lib.utils import parse_into_expr -lib = _get_shared_lib_location(__file__) def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - symbol="pig_latinnify", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr], + function_name="pig_latinnify", is_elementwise=True, kwargs={"capitalize": capitalize}, ) @@ -28,15 +29,15 @@ def append_args( This example shows how arguments other than `Series` can be used. """ expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - args=[], + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr], kwargs={ "float_arg": float_arg, "integer_arg": integer_arg, "string_arg": string_arg, "boolean_arg": boolean_arg, }, - symbol="append_kwargs", + function_name="append_kwargs", is_elementwise=True, ) diff --git a/example/derive_expression/expression_lib/expression_lib/panic.py b/example/derive_expression/expression_lib/expression_lib/panic.py index eff27e0..6338a76 100644 --- a/example/derive_expression/expression_lib/expression_lib/panic.py +++ b/example/derive_expression/expression_lib/expression_lib/panic.py @@ -1,15 +1,15 @@ import polars as pl from polars.type_aliases import IntoExpr -from polars.utils.udfs import _get_shared_lib_location +from polars.plugins import register_plugin_function from expression_lib.utils import parse_into_expr - -lib = _get_shared_lib_location(__file__) +from pathlib import Path def panic(expr: IntoExpr) -> pl.Expr: expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - symbol="panic", + return register_plugin_function( + plugin_path=Path(__file__).parent, + args=[expr], + function_name="panic", ) diff --git a/example/derive_expression/run.py b/example/derive_expression/run.py index 22094eb..e89b959 100644 --- a/example/derive_expression/run.py +++ b/example/derive_expression/run.py @@ -71,13 +71,13 @@ string_arg="example", ) ) -except pl.ComputeError as e: +except pl.exceptions.ComputeError as e: assert "the plugin failed with message" in str(e) try: out.with_columns(pl.col("names").panic.panic()) -except pl.ComputeError as e: +except pl.exceptions.ComputeError as e: assert "the plugin panicked" in str(e) print("finished") diff --git a/pyo3-polars-derive/Cargo.toml b/pyo3-polars-derive/Cargo.toml index 10aa318..2a4c413 100644 --- a/pyo3-polars-derive/Cargo.toml +++ b/pyo3-polars-derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-polars-derive" -version = "0.8.0" +version = "0.9.0" edition = "2021" license = "MIT" readme = "README.md" diff --git a/pyo3-polars/Cargo.toml b/pyo3-polars/Cargo.toml index 3c1fda2..6a9476a 100644 --- a/pyo3-polars/Cargo.toml +++ b/pyo3-polars/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-polars" -version = "0.14.0" +version = "0.15.0" edition = "2021" license = "MIT" readme = "../README.md" @@ -17,7 +17,7 @@ polars-ffi = { workspace = true, optional = true } polars-lazy = { workspace = true, optional = true } polars-plan = { workspace = true, optional = true } pyo3 = "0.21.0" -pyo3-polars-derive = { version = "0.8.0", path = "../pyo3-polars-derive", optional = true } +pyo3-polars-derive = { version = "0.9.0", path = "../pyo3-polars-derive", optional = true } serde = { version = "1", optional = true } serde-pickle = { version = "1", optional = true } thiserror = "1" diff --git a/pyo3-polars/src/error.rs b/pyo3-polars/src/error.rs index d93b22d..de46c4c 100644 --- a/pyo3-polars/src/error.rs +++ b/pyo3-polars/src/error.rs @@ -36,6 +36,12 @@ impl std::convert::From for PyErr { PolarsError::StringCacheMismatch(err) => { StringCacheMismatchError::new_err(err.to_string()) } + PolarsError::SQLInterface(err) => { + SQLInterface::new_err(err.to_string()) + }, + PolarsError::SQLSyntax(err) => { + SQLSyntax::new_err(err.to_string()) + } PolarsError::Context { error, .. } => convert(error), } } @@ -67,3 +73,5 @@ create_exception!(exceptions, ShapeError, PyException); create_exception!(exceptions, SchemaError, PyException); create_exception!(exceptions, DuplicateError, PyException); create_exception!(exceptions, StringCacheMismatchError, PyException); +create_exception!(exceptions, SQLInterface, PyException); +create_exception!(exceptions, SQLSyntax, PyException); diff --git a/pyo3-polars/src/lib.rs b/pyo3-polars/src/lib.rs index a0427b9..f41f3cb 100644 --- a/pyo3-polars/src/lib.rs +++ b/pyo3-polars/src/lib.rs @@ -56,7 +56,7 @@ use pyo3::ffi::Py_uintptr_t; use pyo3::prelude::*; #[cfg(feature = "lazy")] -use {polars_lazy::frame::LazyFrame, polars_plan::logical_plan::DslPlan}; +use {polars_lazy::frame::LazyFrame, polars_plan::plans::DslPlan}; #[repr(transparent)] #[derive(Debug, Clone)]