-
-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add cat.len_chars
and cat.len_bytes
#20211
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,13 +5,21 @@ use crate::map; | |
#[derive(Clone, PartialEq, Debug, Eq, Hash)] | ||
pub enum CategoricalFunction { | ||
GetCategories, | ||
#[cfg(feature = "strings")] | ||
LenBytes, | ||
#[cfg(feature = "strings")] | ||
LenChars, | ||
} | ||
|
||
impl CategoricalFunction { | ||
pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult<Field> { | ||
use CategoricalFunction::*; | ||
match self { | ||
GetCategories => mapper.with_dtype(DataType::String), | ||
#[cfg(feature = "strings")] | ||
LenBytes => mapper.with_dtype(DataType::UInt32), | ||
#[cfg(feature = "strings")] | ||
LenChars => mapper.with_dtype(DataType::UInt32), | ||
} | ||
} | ||
} | ||
|
@@ -21,6 +29,10 @@ impl Display for CategoricalFunction { | |
use CategoricalFunction::*; | ||
let s = match self { | ||
GetCategories => "get_categories", | ||
#[cfg(feature = "strings")] | ||
LenBytes => "len_bytes", | ||
#[cfg(feature = "strings")] | ||
LenChars => "len_chars", | ||
}; | ||
write!(f, "cat.{s}") | ||
} | ||
|
@@ -31,6 +43,10 @@ impl From<CategoricalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> { | |
use CategoricalFunction::*; | ||
match func { | ||
GetCategories => map!(get_categories), | ||
#[cfg(feature = "strings")] | ||
LenBytes => map!(len_bytes), | ||
#[cfg(feature = "strings")] | ||
LenChars => map!(len_chars), | ||
} | ||
} | ||
} | ||
|
@@ -48,3 +64,42 @@ fn get_categories(s: &Column) -> PolarsResult<Column> { | |
let arr = rev_map.get_categories().clone().boxed(); | ||
Series::try_from((ca.name().clone(), arr)).map(Column::from) | ||
} | ||
|
||
/// Apply a function to the categories of a categorical column and re-broadcast the result back to | ||
/// to the array. | ||
fn apply_to_cats<F, T>(s: &Column, mut op: F) -> PolarsResult<Column> | ||
where | ||
F: FnMut(&StringChunked) -> ChunkedArray<T>, | ||
ChunkedArray<T>: IntoSeries, | ||
T: PolarsDataType, | ||
{ | ||
let ca = s.categorical()?; | ||
let (categories, phys) = match &**ca.get_rev_map() { | ||
RevMapping::Local(c, _) => (c, ca.physical().cast(&IDX_DTYPE)?), | ||
RevMapping::Global(physical_map, c, _) => { | ||
// Map physical to its local representation for use with take() later. | ||
let phys = ca | ||
.physical() | ||
.apply(|opt_v| opt_v.map(|v| *physical_map.get(&v).unwrap())); | ||
let out = phys.cast(&IDX_DTYPE)?; | ||
(c, out) | ||
}, | ||
}; | ||
|
||
// Apply function to categories | ||
let categories = StringChunked::with_chunk(PlSmallStr::EMPTY, categories.clone()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should take the name of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
let result = op(&categories).into_series(); | ||
|
||
let out = result.take(phys.idx()?)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can do a |
||
Ok(out.into_column()) | ||
} | ||
|
||
#[cfg(feature = "strings")] | ||
fn len_bytes(s: &Column) -> PolarsResult<Column> { | ||
apply_to_cats(s, |s| s.str_len_bytes()) | ||
} | ||
|
||
#[cfg(feature = "strings")] | ||
fn len_chars(s: &Column) -> PolarsResult<Column> { | ||
apply_to_cats(s, |s| s.str_len_chars()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ use polars_ops::series::InterpolationMethod; | |
use polars_ops::series::SearchSortedSide; | ||
use polars_plan::dsl::function_expr::rolling::RollingFunction; | ||
use polars_plan::dsl::function_expr::rolling_by::RollingFunctionBy; | ||
use polars_plan::dsl::{BooleanFunction, StringFunction, TemporalFunction}; | ||
use polars_plan::dsl::{BooleanFunction, CategoricalFunction, StringFunction, TemporalFunction}; | ||
use polars_plan::prelude::{ | ||
AExpr, FunctionExpr, GroupbyOptions, IRAggExpr, LiteralValue, Operator, PowFunction, | ||
WindowMapping, WindowType, | ||
|
@@ -171,6 +171,21 @@ impl PyStringFunction { | |
} | ||
} | ||
|
||
#[pyclass(name = "CategoricalFunction", eq)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you remove these. I don't think we need those for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted the whole file, 0a83dbe |
||
#[derive(Copy, Clone, PartialEq)] | ||
pub enum PyCategoricalFunction { | ||
GetCategories, | ||
LenBytes, | ||
LenChars, | ||
} | ||
|
||
#[pymethods] | ||
impl PyCategoricalFunction { | ||
fn __hash__(&self) -> isize { | ||
*self as isize | ||
} | ||
} | ||
|
||
#[pyclass(name = "BooleanFunction", eq)] | ||
#[derive(Copy, Clone, PartialEq)] | ||
pub enum PyBooleanFunction { | ||
|
@@ -793,8 +808,16 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> { | |
FunctionExpr::BinaryExpr(_) => { | ||
return Err(PyNotImplementedError::new_err("binary expr")) | ||
}, | ||
FunctionExpr::Categorical(_) => { | ||
return Err(PyNotImplementedError::new_err("categorical expr")) | ||
FunctionExpr::Categorical(catfun) => match catfun { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can remain NotImplemented. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted the whole file, 0a83dbe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. 👍 |
||
CategoricalFunction::GetCategories => { | ||
(PyCategoricalFunction::GetCategories.into_py(py),).to_object(py) | ||
}, | ||
CategoricalFunction::LenBytes => { | ||
(PyCategoricalFunction::LenBytes.into_py(py),).to_object(py) | ||
}, | ||
CategoricalFunction::LenChars => { | ||
(PyCategoricalFunction::LenChars.into_py(py),).to_object(py) | ||
}, | ||
mcrumiller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}, | ||
FunctionExpr::ListExpr(_) => { | ||
return Err(PyNotImplementedError::new_err("list expr")) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this move into its own function, that saves monomorphizaton bloat.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this resolve it? 2b9edc3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, great!