From 34a2f2daca2d6e5349ab5276fe2688ec551b0879 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 17 Jul 2022 10:25:24 +0200 Subject: [PATCH] parquet: stop reading when slice is reached (#4046) --- polars/polars-core/src/frame/mod.rs | 6 ++++++ polars/polars-io/src/parquet/read_impl.rs | 11 ++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/polars/polars-core/src/frame/mod.rs b/polars/polars-core/src/frame/mod.rs index e92a709c2c6..ff49fc1fea7 100644 --- a/polars/polars-core/src/frame/mod.rs +++ b/polars/polars-core/src/frame/mod.rs @@ -2170,6 +2170,9 @@ impl DataFrame { /// ``` #[must_use] pub fn slice(&self, offset: i64, length: usize) -> Self { + if offset == 0 && length == self.height() { + return self.clone(); + } let col = self .columns .iter() @@ -2180,6 +2183,9 @@ impl DataFrame { #[must_use] pub fn slice_par(&self, offset: i64, length: usize) -> Self { + if offset == 0 && length == self.height() { + return self.clone(); + } let col = POOL.install(|| { self.columns .par_iter() diff --git a/polars/polars-io/src/parquet/read_impl.rs b/polars/polars-io/src/parquet/read_impl.rs index 5d716fca144..0433e141f69 100644 --- a/polars/polars-io/src/parquet/read_impl.rs +++ b/polars/polars-io/src/parquet/read_impl.rs @@ -140,7 +140,11 @@ fn rg_to_dfs( apply_aggregations(&mut df, aggregate)?; previous_row_count += current_row_count; - dfs.push(df) + dfs.push(df); + + if remaining_rows == 0 { + break; + } } Ok(dfs) } @@ -178,7 +182,8 @@ fn rg_to_dfs_par( let dfs = row_groups .into_par_iter() .map(|(md, local_limit, row_count_start)| { - if !read_this_row_group(predicate.as_ref(), file_metadata, schema)? { + if local_limit == 0 || !read_this_row_group(predicate.as_ref(), file_metadata, schema)? + { return Ok(None); } // test we don't read the parquet file if this env var is set @@ -282,6 +287,6 @@ pub fn read_parquet( } else { let mut df = accumulate_dataframes_vertical(dfs.into_iter())?; apply_aggregations(&mut df, aggregate)?; - Ok(df.slice(0, limit)) + Ok(df.slice_par(0, limit)) } }