From 6ae8d604ecc736b40f90e77ffb61e96006d59d87 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 16 Aug 2022 17:44:59 +0100 Subject: [PATCH 1/2] Make filter APIs public (#1792) --- parquet/src/arrow/arrow_reader/mod.rs | 26 ++++++++------------- parquet/src/arrow/arrow_reader/selection.rs | 4 ++-- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 052ef40ee841..8944770a09a5 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -39,16 +39,11 @@ use crate::file::reader::{ChunkReader, FileReader, SerializedFileReader}; use crate::file::serialized_reader::ReadOptionsBuilder; use crate::schema::types::SchemaDescriptor; -#[allow(unused)] mod filter; -#[allow(unused)] mod selection; -// TODO: Make these public once stable (#1792) -#[allow(unused_imports)] -pub(crate) use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter}; -#[allow(unused_imports)] -pub(crate) use selection::{RowSelection, RowSelector}; +pub use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter}; +pub use selection::{RowSelection, RowSelector}; /// A generic builder for constructing sync or async arrow parquet readers. This is not intended /// to be used directly, instead you should use the specialization for the type of reader @@ -141,14 +136,16 @@ impl ArrowReaderBuilder { } /// Provide a [`RowSelection] to filter out rows, and avoid fetching their - /// data into memory + /// data into memory. /// - /// Row group filtering is applied prior to this, and rows from skipped + /// Row group filtering is applied prior to this, and therefore rows from skipped /// row groups should not be included in the [`RowSelection`] /// - /// TODO: Make public once stable (#1792) - #[allow(unused)] - pub(crate) fn with_row_selection(self, selection: RowSelection) -> Self { + /// An example use case of this would be applying a selection determined by + /// evaluating predicates against the [`Index`] + /// + /// [`Index`]: [parquet::file::page_index::index::Index] + pub fn with_row_selection(self, selection: RowSelection) -> Self { Self { selection: Some(selection), ..self @@ -158,10 +155,7 @@ impl ArrowReaderBuilder { /// Provide a [`RowFilter`] to skip decoding rows /// /// Row filters are applied after row group selection and row selection - /// - /// TODO: Make public once stable (#1792) - #[allow(unused)] - pub(crate) fn with_row_filter(self, filter: RowFilter) -> Self { + pub fn with_row_filter(self, filter: RowFilter) -> Self { Self { filter: Some(filter), ..self diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 8e129f5667ec..ef674ba1774e 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -398,11 +398,11 @@ mod tests { let mut rand = thread_rng(); for _ in 0..100 { let a_len = rand.gen_range(10..100); - let a_bools: Vec<_> = (0..a_len).map(|x| rand.gen_bool(0.2)).collect(); + let a_bools: Vec<_> = (0..a_len).map(|_| rand.gen_bool(0.2)).collect(); let a = RowSelection::from_filters(&[BooleanArray::from(a_bools.clone())]); let b_len: usize = a_bools.iter().map(|x| *x as usize).sum(); - let b_bools: Vec<_> = (0..b_len).map(|x| rand.gen_bool(0.8)).collect(); + let b_bools: Vec<_> = (0..b_len).map(|_| rand.gen_bool(0.8)).collect(); let b = RowSelection::from_filters(&[BooleanArray::from(b_bools.clone())]); let mut expected_bools = vec![false; a_len]; From 23d954558c9804bc52a85676ea11a90e52cc0ee5 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:41:47 +0100 Subject: [PATCH 2/2] Update parquet/src/arrow/arrow_reader/mod.rs Co-authored-by: Liang-Chi Hsieh --- parquet/src/arrow/arrow_reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 8944770a09a5..1247e4399e62 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -135,7 +135,7 @@ impl ArrowReaderBuilder { } } - /// Provide a [`RowSelection] to filter out rows, and avoid fetching their + /// Provide a [`RowSelection`] to filter out rows, and avoid fetching their /// data into memory. /// /// Row group filtering is applied prior to this, and therefore rows from skipped