Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update readme example #42

Merged
merged 1 commit into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,35 @@ The idea is that you define an expression in another Rust crate with a proc_macr
That macro can have the following attributes:

- `output_type` -> to define the output type of that expression
- `type_func` -> to define a function that computes the output type based on input types.
- `output_type_func` -> to define a function that computes the output type based on input types.

Here is an example of a `String` conversion expression that converts any string to [pig latin](https://en.wikipedia.org/wiki/Pig_Latin):

```rust
fn pig_latin_str(value: &str, output: &mut String) {
fn pig_latin_str(value: &str, capitalize: bool, output: &mut String) {
if let Some(first_char) = value.chars().next() {
write!(output, "{}{}ay", &value[1..], first_char).unwrap()
if capitalize {
for c in value.chars().skip(1).map(|char| char.to_uppercase()) {
write!(output, "{c}").unwrap()
}
write!(output, "AY").unwrap()
} else {
let offset = first_char.len_utf8();
write!(output, "{}{}ay", &value[offset..], first_char).unwrap()
}
}
}

#[derive(Deserialize)]
struct PigLatinKwargs {
capitalize: bool,
}

#[polars_expr(output_type=Utf8)]
fn pig_latinnify(inputs: &[Series]) -> PolarsResult<Series> {
fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult<Series> {
let ca = inputs[0].utf8()?;
let out: Utf8Chunked = ca.apply_to_buffer(pig_latin_str);
let out: Utf8Chunked =
ca.apply_to_buffer(|value, output| pig_latin_str(value, kwargs.capitalize, output));
Ok(out.into_series())
}
```
Expand All @@ -46,19 +60,20 @@ class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self) -> pl.Expr:
def pig_latinnify(self, capatilize: bool = False) -> pl.Expr:
return self._expr._register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capatilize}
)
```

Compile/ship and then it is ready to use:

```python
import polars as pl
from expression_lib import Language
import expression_lib

df = pl.DataFrame({
"names": ["Richard", "Alice", "Bob"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self) -> pl.Expr:
def pig_latinnify(self, capitalize: bool = False) -> pl.Expr:
return self._expr._register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)

def append_args(
Expand Down Expand Up @@ -77,12 +78,12 @@ def haversine(
cast_to_supertypes=True,
)


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr


def is_leap_year(self) -> pl.Expr:
return self._expr._register_plugin(
lib=lib,
Expand Down
22 changes: 18 additions & 4 deletions example/derive_expression/expression_lib/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,30 @@ use pyo3_polars::derive::polars_expr;
use serde::Deserialize;
use std::fmt::Write;

fn pig_latin_str(value: &str, output: &mut String) {
#[derive(Deserialize)]
struct PigLatinKwargs {
capitalize: bool,
}

fn pig_latin_str(value: &str, capitalize: bool, output: &mut String) {
if let Some(first_char) = value.chars().next() {
write!(output, "{}{}ay", &value[1..], first_char).unwrap()
if capitalize {
for c in value.chars().skip(1).map(|char| char.to_uppercase()) {
write!(output, "{c}").unwrap()
}
write!(output, "AY").unwrap()
} else {
let offset = first_char.len_utf8();
write!(output, "{}{}ay", &value[offset..], first_char).unwrap()
}
}
}

#[polars_expr(output_type=Utf8)]
fn pig_latinnify(inputs: &[Series]) -> PolarsResult<Series> {
fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult<Series> {
let ca = inputs[0].utf8()?;
let out: Utf8Chunked = ca.apply_to_buffer(pig_latin_str);
let out: Utf8Chunked =
ca.apply_to_buffer(|value, output| pig_latin_str(value, kwargs.capitalize, output));
Ok(out.into_series())
}

Expand Down
18 changes: 10 additions & 8 deletions example/derive_expression/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

out = df.with_columns(
pig_latin=pl.col("names").language.pig_latinnify(),
pig_latin_cap=pl.col("names").language.pig_latinnify(capitalize=True),
).with_columns(
hamming_dist=pl.col("names").dist.hamming_distance("pig_latin"),
jaccard_sim=pl.col("dist_a").dist.jaccard_similarity("dist_b"),
Expand All @@ -26,7 +27,7 @@
integer_arg=93,
boolean_arg=False,
string_arg="example",
)
),
)

print(out)
Expand All @@ -35,11 +36,12 @@
# Tests we can return errors from FFI by passing wrong types.
try:
out.with_columns(
appended_args=pl.col("names").language.append_args(
float_arg=True,
integer_arg=True,
boolean_arg=True,
string_arg="example",
))
appended_args=pl.col("names").language.append_args(
float_arg=True,
integer_arg=True,
boolean_arg=True,
string_arg="example",
)
)
except pl.ComputeError as e:
assert "the plugin failed with message" in str(e)
assert "the plugin failed with message" in str(e)
Loading