From e84fe2050fb6c898f0c963230da22876ca3c018f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Fri, 30 Jul 2021 21:30:33 +0200 Subject: [PATCH] Speed up filter_record_batch with one array (#637) * Speed up filter_record_batch with one array * Don't into() --- arrow/benches/filter_kernels.rs | 19 +++++++++++++++++-- arrow/src/compute/kernels/filter.rs | 21 +++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs index ca0b09740d3e..d5ff09c040b8 100644 --- a/arrow/benches/filter_kernels.rs +++ b/arrow/benches/filter_kernels.rs @@ -16,12 +16,15 @@ // under the License. extern crate arrow; -use arrow::compute::Filter; +use std::sync::Arc; + +use arrow::compute::{filter_record_batch, Filter}; +use arrow::record_batch::RecordBatch; use arrow::util::bench_util::*; use arrow::array::*; use arrow::compute::{build_filter, filter}; -use arrow::datatypes::{Float32Type, UInt8Type}; +use arrow::datatypes::{Field, Float32Type, Schema, UInt8Type}; use criterion::{criterion_group, criterion_main, Criterion}; @@ -100,6 +103,18 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("filter context string low selectivity", |b| { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); + + let data_array = create_primitive_array::(size, 0.0); + + let field = Field::new("c1", data_array.data_type().clone(), true); + let schema = Schema::new(vec![field]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap(); + + c.bench_function("filter single record batch", |b| { + b.iter(|| filter_record_batch(&batch, &filter_array)) + }); } criterion_group!(benches, add_benchmark); diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs index 075943c2b2c5..55b1cd1ceee7 100644 --- a/arrow/src/compute/kernels/filter.rs +++ b/arrow/src/compute/kernels/filter.rs @@ -288,12 +288,21 @@ pub fn filter_record_batch( return filter_record_batch(record_batch, &predicate); } - let filter = build_filter(predicate)?; - let filtered_arrays = record_batch - .columns() - .iter() - .map(|a| make_array(filter(a.data()))) - .collect(); + let num_colums = record_batch.columns().len(); + + let filtered_arrays = match num_colums { + 1 => { + vec![filter(record_batch.columns()[0].as_ref(), predicate)?] + } + _ => { + let filter = build_filter(predicate)?; + record_batch + .columns() + .iter() + .map(|a| make_array(filter(a.data()))) + .collect() + } + }; RecordBatch::try_new(record_batch.schema(), filtered_arrays) }