Skip to content

Commit

Permalink
perf(python): remove pySequence downcast (#6803)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Feb 11, 2023
1 parent 0a4a2c1 commit 8204c88
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 38 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ def __floordiv__(self, other: Any) -> Series:
raise ValueError("first cast to integer before dividing datelike dtypes")
if not isinstance(other, pli.Expr):
other = pli.lit(other)
return self.to_frame().select(pli.lit(self) // other).to_series()
return self.to_frame().select(pli.col(self.name) // other).to_series()

def __invert__(self) -> Series:
if self.dtype == Boolean:
Expand Down
20 changes: 7 additions & 13 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ impl<T> From<T> for Wrap<T> {
}
}

pub(crate) fn get_pyseq(obj: &PyAny) -> PyResult<(&PySequence, usize)> {
let seq = <PySequence as PyTryFrom>::try_from(obj)?;
let len = seq.len()?;
Ok((seq, len))
}

// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
pub(crate) fn get_df(obj: &PyAny) -> PyResult<DataFrame> {
let pydf = obj.getattr("_df")?;
Expand All @@ -93,10 +87,10 @@ where
T::Native: FromPyObject<'a>,
{
fn extract(obj: &'a PyAny) -> PyResult<Self> {
let (seq, len) = get_pyseq(obj)?;
let len = obj.len()?;
let mut builder = PrimitiveChunkedBuilder::new("", len);

for res in seq.iter()? {
for res in obj.iter()? {
let item = res?;
match item.extract::<T::Native>() {
Ok(val) => builder.append_value(val),
Expand All @@ -109,10 +103,10 @@ where

impl<'a> FromPyObject<'a> for Wrap<BooleanChunked> {
fn extract(obj: &'a PyAny) -> PyResult<Self> {
let (seq, len) = get_pyseq(obj)?;
let len = obj.len()?;
let mut builder = BooleanChunkedBuilder::new("", len);

for res in seq.iter()? {
for res in obj.iter()? {
let item = res?;
match item.extract::<bool>() {
Ok(val) => builder.append_value(val),
Expand Down Expand Up @@ -777,14 +771,14 @@ pub(crate) fn dicts_to_rows(
schema_columns: PlIndexSet<String>,
) -> PyResult<(Vec<Row>, Vec<String>)> {
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
let (dicts, len) = get_pyseq(records)?;
let len = records.len()?;

let key_names = {
if !schema_columns.is_empty() {
schema_columns
} else {
let mut inferred_keys = PlIndexSet::new();
for d in dicts.iter()?.take(infer_schema_len.unwrap_or(usize::MAX)) {
for d in records.iter()?.take(infer_schema_len.unwrap_or(usize::MAX)) {
let d = d?;
let d = d.downcast::<PyDict>()?;
let keys = d.keys();
Expand All @@ -798,7 +792,7 @@ pub(crate) fn dicts_to_rows(
};
let mut rows = Vec::with_capacity(len);

for d in dicts.iter()? {
for d in records.iter()? {
let d = d?;
let d = d.downcast::<PyDict>()?;

Expand Down
21 changes: 8 additions & 13 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ use pyo3::prelude::*;
use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyString};
use pyo3::wrap_pyfunction;

use crate::conversion::{get_df, get_lf, get_pyseq, get_series, Wrap};
use crate::conversion::{get_df, get_lf, get_series, Wrap};
use crate::dataframe::PyDataFrame;
use crate::error::{
ArrowErrorException, ColumnNotFoundError, ComputeError, DuplicateError, InvalidOperationError,
Expand Down Expand Up @@ -305,8 +305,7 @@ fn concat_df(dfs: &PyAny, py: Python) -> PyResult<PyDataFrame> {
use polars_core::error::PolarsResult;
use polars_core::utils::rayon::prelude::*;

let (seq, _len) = get_pyseq(dfs)?;
let mut iter = seq.iter()?;
let mut iter = dfs.iter()?;
let first = iter.next().unwrap()?;

let first_rdf = get_df(first)?;
Expand Down Expand Up @@ -343,8 +342,8 @@ fn concat_df(dfs: &PyAny, py: Python) -> PyResult<PyDataFrame> {
}

#[pyfunction]
fn concat_lf(lfs: &PyAny, rechunk: bool, parallel: bool) -> PyResult<PyLazyFrame> {
let (seq, len) = get_pyseq(lfs)?;
fn concat_lf(seq: &PyAny, rechunk: bool, parallel: bool) -> PyResult<PyLazyFrame> {
let len = seq.len()?;
let mut lfs = Vec::with_capacity(len);

for res in seq.iter()? {
Expand All @@ -359,8 +358,7 @@ fn concat_lf(lfs: &PyAny, rechunk: bool, parallel: bool) -> PyResult<PyLazyFrame

#[pyfunction]
fn py_diag_concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {
let (seq, _len) = get_pyseq(dfs)?;
let iter = seq.iter()?;
let iter = dfs.iter()?;

let dfs = iter
.map(|item| {
Expand All @@ -375,8 +373,7 @@ fn py_diag_concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {

#[pyfunction]
fn py_diag_concat_lf(lfs: &PyAny, rechunk: bool, parallel: bool) -> PyResult<PyLazyFrame> {
let (seq, _len) = get_pyseq(lfs)?;
let iter = seq.iter()?;
let iter = lfs.iter()?;

let lfs = iter
.map(|item| {
Expand All @@ -392,8 +389,7 @@ fn py_diag_concat_lf(lfs: &PyAny, rechunk: bool, parallel: bool) -> PyResult<PyL

#[pyfunction]
fn py_hor_concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {
let (seq, _len) = get_pyseq(dfs)?;
let iter = seq.iter()?;
let iter = dfs.iter()?;

let dfs = iter
.map(|item| {
Expand All @@ -408,8 +404,7 @@ fn py_hor_concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {

#[pyfunction]
fn concat_series(series: &PyAny) -> PyResult<PySeries> {
let (seq, _len) = get_pyseq(series)?;
let mut iter = seq.iter()?;
let mut iter = series.iter()?;
let first = iter.next().unwrap()?;

let mut s = get_series(first)?;
Expand Down
12 changes: 5 additions & 7 deletions py-polars/src/list_construction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@ use polars::prelude::*;
use polars_core::utils::CustomIterTools;
use pyo3::{PyAny, PyResult};

use crate::conversion::get_pyseq;

pub fn py_seq_to_list(name: &str, seq: &PyAny, dtype: &DataType) -> PyResult<Series> {
let (seq, len) = get_pyseq(seq)?;
let len = seq.len()?;
let s = match dtype {
DataType::Int64 => {
let mut builder =
ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
for sub_seq in seq.iter()? {
let sub_seq = sub_seq?;
let (sub_seq, len) = get_pyseq(sub_seq)?;
let len = sub_seq.len()?;

// safety: we know the iterators len
let iter = unsafe {
Expand Down Expand Up @@ -41,7 +39,7 @@ pub fn py_seq_to_list(name: &str, seq: &PyAny, dtype: &DataType) -> PyResult<Ser
);
for sub_seq in seq.iter()? {
let sub_seq = sub_seq?;
let (sub_seq, len) = get_pyseq(sub_seq)?;
let len = sub_seq.len()?;
// safety: we know the iterators len
let iter = unsafe {
sub_seq
Expand All @@ -64,7 +62,7 @@ pub fn py_seq_to_list(name: &str, seq: &PyAny, dtype: &DataType) -> PyResult<Ser
let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
for sub_seq in seq.iter()? {
let sub_seq = sub_seq?;
let (sub_seq, len) = get_pyseq(sub_seq)?;
let len = sub_seq.len()?;
// safety: we know the iterators len
let iter = unsafe {
sub_seq
Expand All @@ -87,7 +85,7 @@ pub fn py_seq_to_list(name: &str, seq: &PyAny, dtype: &DataType) -> PyResult<Ser
let mut builder = ListUtf8ChunkedBuilder::new(name, len, len * 5);
for sub_seq in seq.iter()? {
let sub_seq = sub_seq?;
let (sub_seq, len) = get_pyseq(sub_seq)?;
let len = sub_seq.len()?;
// safety: we know the iterators len
let iter = unsafe {
sub_seq
Expand Down
8 changes: 4 additions & 4 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ impl PySeries {

#[staticmethod]
pub fn new_opt_bool(name: &str, obj: &PyAny, strict: bool) -> PyResult<PySeries> {
let (seq, len) = get_pyseq(obj)?;
let len = obj.len()?;
let mut builder = BooleanChunkedBuilder::new(name, len);

for res in seq.iter()? {
for res in obj.iter()? {
let item = res?;
if item.is_none() {
builder.append_null()
Expand Down Expand Up @@ -160,10 +160,10 @@ where
ChunkedArray<T>: IntoSeries,
T::Native: FromPyObject<'a>,
{
let (seq, len) = get_pyseq(obj)?;
let len = obj.len()?;
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);

for res in seq.iter()? {
for res in obj.iter()? {
let item = res?;

if item.is_none() {
Expand Down

0 comments on commit 8204c88

Please sign in to comment.