From 04a7efca254b565115a884d329ee53ecfd37dbbc Mon Sep 17 00:00:00 2001 From: Rui He <118280419+ruihe774@users.noreply.github.com> Date: Thu, 25 Jul 2024 21:22:44 +0800 Subject: [PATCH] perf: set compat_level when calling to_arrow (#85) --- Cargo.toml | 7 +++++++ pyo3-polars-derive/src/lib.rs | 4 ++-- pyo3-polars/src/error.rs | 8 ++------ pyo3-polars/src/lib.rs | 28 +++++++++++++++++++++++----- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0572d08..4e401e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,10 @@ polars-core = { version = "0.41.0", default-features = false } polars-ffi = { version = "0.41.0", default-features = false } polars-plan = { version = "0.41.0", default-feautres = false } polars-lazy = { version = "0.41.0", default-features = false } + +[patch.crates-io] +polars = { git = "https://github.com/pola-rs/polars.git" } +polars-core = { git = "https://github.com/pola-rs/polars.git" } +polars-ffi = { git = "https://github.com/pola-rs/polars.git" } +polars-plan = { git = "https://github.com/pola-rs/polars.git" } +polars-lazy = { git = "https://github.com/pola-rs/polars.git" } diff --git a/pyo3-polars-derive/src/lib.rs b/pyo3-polars-derive/src/lib.rs index 166708c..097196e 100644 --- a/pyo3-polars-derive/src/lib.rs +++ b/pyo3-polars-derive/src/lib.rs @@ -241,7 +241,7 @@ fn create_field_function( match result { Ok(out) => { - let out = polars_core::export::arrow::ffi::export_field_to_c(&out.to_arrow(true)); + let out = polars_core::export::arrow::ffi::export_field_to_c(&out.to_arrow(CompatLevel::newest())); *return_value = out; }, Err(err) => { @@ -278,7 +278,7 @@ fn create_field_function_from_with_dtype( let mapper = polars_plan::dsl::FieldsMapper::new(&inputs); let dtype = polars_core::datatypes::DataType::#dtype; let out = mapper.with_dtype(dtype).unwrap(); - let out = polars_core::export::arrow::ffi::export_field_to_c(&out.to_arrow(true)); + let out = polars_core::export::arrow::ffi::export_field_to_c(&out.to_arrow(CompatLevel::newest())); *return_value = out; } ) diff --git a/pyo3-polars/src/error.rs b/pyo3-polars/src/error.rs index de46c4c..9a55bc6 100644 --- a/pyo3-polars/src/error.rs +++ b/pyo3-polars/src/error.rs @@ -36,12 +36,8 @@ impl std::convert::From for PyErr { PolarsError::StringCacheMismatch(err) => { StringCacheMismatchError::new_err(err.to_string()) } - PolarsError::SQLInterface(err) => { - SQLInterface::new_err(err.to_string()) - }, - PolarsError::SQLSyntax(err) => { - SQLSyntax::new_err(err.to_string()) - } + PolarsError::SQLInterface(err) => SQLInterface::new_err(err.to_string()), + PolarsError::SQLSyntax(err) => SQLSyntax::new_err(err.to_string()), PolarsError::Context { error, .. } => convert(error), } } diff --git a/pyo3-polars/src/lib.rs b/pyo3-polars/src/lib.rs index bea64c7..206fd90 100644 --- a/pyo3-polars/src/lib.rs +++ b/pyo3-polars/src/lib.rs @@ -54,7 +54,7 @@ use polars::export::arrow; use polars::prelude::*; use pyo3::ffi::Py_uintptr_t; use pyo3::prelude::*; - +use pyo3::types::PyDict; #[cfg(feature = "lazy")] use {polars_lazy::frame::LazyFrame, polars_plan::plans::DslPlan}; @@ -126,7 +126,14 @@ impl<'a> FromPyObject<'a> for PySeries { let py_name = name.str()?; let name = py_name.to_cow()?; - let arr = ob.call_method0("to_arrow")?; + let kwargs = PyDict::new_bound(ob.py()); + if let Ok(compat_level) = ob.call_method0("_newest_compat_level") { + let compat_level = compat_level.extract().unwrap(); + let compat_level = + CompatLevel::with_level(compat_level).unwrap_or(CompatLevel::newest()); + kwargs.set_item("compat_level", compat_level.get_level())?; + } + let arr = ob.call_method("to_arrow", (), Some(&kwargs))?; let arr = ffi::to_rust::array_to_rust(&arr)?; Ok(PySeries( Series::try_from((&*name, arr)).map_err(PyPolarsErr::from)?, @@ -165,13 +172,24 @@ impl IntoPy for PySeries { fn into_py(self, py: Python<'_>) -> PyObject { let polars = py.import_bound("polars").expect("polars not installed"); let s = polars.getattr("Series").unwrap(); - match s.getattr("_import_arrow_from_c") { + match s + .getattr("_import_arrow_from_c") + .or_else(|_| s.getattr("_import_from_c")) + { // Go via polars Ok(import_arrow_from_c) => { + // Get supported compatibility level + let compat_level = CompatLevel::with_level( + s.getattr("_newest_compat_level") + .map_or(1, |newest_compat_level| { + newest_compat_level.call0().unwrap().extract().unwrap() + }), + ) + .unwrap_or(CompatLevel::newest()); // Prepare pointers on the heap. let mut chunk_ptrs = Vec::with_capacity(self.0.n_chunks()); for i in 0..self.0.n_chunks() { - let array = self.0.to_arrow(i, true); + let array = self.0.to_arrow(i, compat_level); let schema = Box::leak(Box::new(arrow::ffi::export_field_to_c( &ArrowField::new("", array.data_type().clone(), true), ))); @@ -208,7 +226,7 @@ impl IntoPy for PySeries { Err(_) => { let s = self.0.rechunk(); let name = s.name(); - let arr = s.to_arrow(0, false); + let arr = s.to_arrow(0, CompatLevel::oldest()); let pyarrow = py.import_bound("pyarrow").expect("pyarrow not installed"); let arg = to_py_array(arr, py, pyarrow).unwrap();