-
Notifications
You must be signed in to change notification settings - Fork 804
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-9523 [Rust] Improve filter kernel performance
The filter kernel located here https://github.com/apache/arrow/blob/master/rust/arrow/src/compute/kernels/filter.rs currently has the following performance: filter old u8 low selectivity time: [1.7782 ms 1.7790 ms 1.7801 ms] filter old u8 high selectivity time: [815.58 us 816.58 us 817.57 us] filter old u8 w NULLs low selectivity time: [1.8131 ms 1.8231 ms 1.8336 ms] filter old u8 w NULLs high selectivity time: [817.41 us 820.01 us 823.05 us] I have been working on a new implementation which performs between approximately 17 and 550 times faster depending mostly on filter selectivity. Here are the benchmark results: filter u8 low selectivity time: [107.26 us 108.24 us 109.58 us] filter u8 high selectivity time: [4.7854 us 4.8050 us 4.8276 us] filter context u8 low selectivity time: [102.59 us 102.93 us 103.38 us] filter context u8 high selectivity time: [1.4709 us 1.4760 us 1.4823 us] filter context u8 w NULLs low selectivity time: [130.48 us 131.00 us 131.65 us] filter context u8 w NULLs high selectivity time: [2.0520 us 2.0818 us 2.1137 us] filter context f32 low selectivity time: [117.26 us 118.58 us 120.13 us] filter context f32 high selectivity time: [1.7895 us 1.7919 us 1.7942 us] This new implementation is based on a few key ideas: (1) if the data array being filtered doesn't have a null bitmap, no time should be wasted to copy or create a null bitmap in the resulting filtered data array - this is implemented using the CopyNullBit trait which has a no-op implementation and an actual implementation (2) when the filter is highly selective, e.g. only a small number of values from the data array are selected, the filter implementation should efficiently skip entire batches of 0s in the filter array - this is implemented by transmuting the filter array to u64 which allows to quickly check and skip entire batches of 64 bits (3) when an entire record batch is filtered, any computation which only depends on the filter array is done once and then shared for filtering all the data arrays in the record batch - this is implemented using the FilterContext struct This pull request also implements support for filtering dictionary arrays. @paddyhoran @andygrove Closes #7798 from yordan-pavlov/improve_filter_kernel_perf Lead-authored-by: Yordan Pavlov <yordan.pavlov@outlook.com> Co-authored-by: Yordan Pavlov <64363766+yordan-pavlov@users.noreply.github.com> Signed-off-by: Andy Grove <andygrove@nvidia.com>
- Loading branch information
Showing
6 changed files
with
758 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use arrow::array::*; | ||
use arrow::compute::{filter, FilterContext}; | ||
use arrow::datatypes::ArrowNumericType; | ||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
|
||
fn create_primitive_array<T, F>(size: usize, value_fn: F) -> PrimitiveArray<T> | ||
where | ||
T: ArrowNumericType, | ||
F: Fn(usize) -> T::Native, | ||
{ | ||
let mut builder = PrimitiveArray::<T>::builder(size); | ||
for i in 0..size { | ||
builder.append_value(value_fn(i)).unwrap(); | ||
} | ||
builder.finish() | ||
} | ||
|
||
fn create_u8_array_with_nulls(size: usize) -> UInt8Array { | ||
let mut builder = UInt8Builder::new(size); | ||
for i in 0..size { | ||
if i % 2 == 0 { | ||
builder.append_value(1).unwrap(); | ||
} else { | ||
builder.append_null().unwrap(); | ||
} | ||
} | ||
builder.finish() | ||
} | ||
|
||
fn create_bool_array<F>(size: usize, value_fn: F) -> BooleanArray | ||
where | ||
F: Fn(usize) -> bool, | ||
{ | ||
let mut builder = BooleanBuilder::new(size); | ||
for i in 0..size { | ||
builder.append_value(value_fn(i)).unwrap(); | ||
} | ||
builder.finish() | ||
} | ||
|
||
fn bench_filter_u8(data_array: &UInt8Array, filter_array: &BooleanArray) { | ||
filter( | ||
criterion::black_box(data_array), | ||
criterion::black_box(filter_array), | ||
) | ||
.unwrap(); | ||
} | ||
|
||
// fn bench_filter_f32(data_array: &Float32Array, filter_array: &BooleanArray) { | ||
// filter(criterion::black_box(data_array), criterion::black_box(filter_array)).unwrap(); | ||
// } | ||
|
||
fn bench_filter_context_u8(data_array: &UInt8Array, filter_context: &FilterContext) { | ||
filter_context | ||
.filter(criterion::black_box(data_array)) | ||
.unwrap(); | ||
} | ||
|
||
fn bench_filter_context_f32(data_array: &Float32Array, filter_context: &FilterContext) { | ||
filter_context | ||
.filter(criterion::black_box(data_array)) | ||
.unwrap(); | ||
} | ||
|
||
fn add_benchmark(c: &mut Criterion) { | ||
let size = 65536; | ||
let filter_array = create_bool_array(size, |i| match i % 2 { | ||
0 => true, | ||
_ => false, | ||
}); | ||
let sparse_filter_array = create_bool_array(size, |i| match i % 8000 { | ||
0 => true, | ||
_ => false, | ||
}); | ||
let dense_filter_array = create_bool_array(size, |i| match i % 8000 { | ||
0 => false, | ||
_ => true, | ||
}); | ||
|
||
let filter_context = FilterContext::new(&filter_array).unwrap(); | ||
let sparse_filter_context = FilterContext::new(&sparse_filter_array).unwrap(); | ||
let dense_filter_context = FilterContext::new(&dense_filter_array).unwrap(); | ||
|
||
let data_array = create_primitive_array(size, |i| match i % 2 { | ||
0 => 1, | ||
_ => 0, | ||
}); | ||
c.bench_function("filter u8 low selectivity", |b| { | ||
b.iter(|| bench_filter_u8(&data_array, &filter_array)) | ||
}); | ||
c.bench_function("filter u8 high selectivity", |b| { | ||
b.iter(|| bench_filter_u8(&data_array, &sparse_filter_array)) | ||
}); | ||
c.bench_function("filter u8 very low selectivity", |b| { | ||
b.iter(|| bench_filter_u8(&data_array, &dense_filter_array)) | ||
}); | ||
|
||
c.bench_function("filter context u8 low selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &filter_context)) | ||
}); | ||
c.bench_function("filter context u8 high selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &sparse_filter_context)) | ||
}); | ||
c.bench_function("filter context u8 very low selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &dense_filter_context)) | ||
}); | ||
|
||
let data_array = create_u8_array_with_nulls(size); | ||
c.bench_function("filter context u8 w NULLs low selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &filter_context)) | ||
}); | ||
c.bench_function("filter context u8 w NULLs high selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &sparse_filter_context)) | ||
}); | ||
c.bench_function("filter context u8 w NULLs very low selectivity", |b| { | ||
b.iter(|| bench_filter_context_u8(&data_array, &dense_filter_context)) | ||
}); | ||
|
||
let data_array = create_primitive_array(size, |i| match i % 2 { | ||
0 => 1.0, | ||
_ => 0.0, | ||
}); | ||
c.bench_function("filter context f32 low selectivity", |b| { | ||
b.iter(|| bench_filter_context_f32(&data_array, &filter_context)) | ||
}); | ||
c.bench_function("filter context f32 high selectivity", |b| { | ||
b.iter(|| bench_filter_context_f32(&data_array, &sparse_filter_context)) | ||
}); | ||
c.bench_function("filter context f32 very low selectivity", |b| { | ||
b.iter(|| bench_filter_context_f32(&data_array, &dense_filter_context)) | ||
}); | ||
} | ||
|
||
criterion_group!(benches, add_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.