From ba5b771159eb6f8faf81151b0a751362a0c8c9a5 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 14 Oct 2022 16:00:07 +1300 Subject: [PATCH 1/3] Filter DecimalArray as PrimitiveArray (#2637) --- arrow/src/compute/kernels/filter.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs index d1e2ad17593d..150253b1c0de 100644 --- a/arrow/src/compute/kernels/filter.rs +++ b/arrow/src/compute/kernels/filter.rs @@ -338,6 +338,16 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result downcast_primitive_array! { values => Ok(Arc::new(filter_primitive(values, predicate))), + DataType::Decimal128(p, s) => { + let values = values.as_any().downcast_ref::().unwrap(); + let filtered = filter_primitive(values, predicate); + Ok(Arc::new(filtered.with_precision_and_scale(*p, *s).unwrap())) + } + DataType::Decimal256(p, s) => { + let values = values.as_any().downcast_ref::().unwrap(); + let filtered = filter_primitive(values, predicate); + Ok(Arc::new(filtered.with_precision_and_scale(*p, *s).unwrap())) + } DataType::Boolean => { let values = values.as_any().downcast_ref::().unwrap(); Ok(Arc::new(filter_boolean(values, predicate))) From 5010ade2cc57af5d546124ca3e462ba9a05d6388 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Sun, 16 Oct 2022 08:20:48 +1300 Subject: [PATCH 2/3] Add decimal filter benches --- arrow/benches/filter_kernels.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs index bd6129946630..a9f1fb8ce4fc 100644 --- a/arrow/benches/filter_kernels.rs +++ b/arrow/benches/filter_kernels.rs @@ -27,6 +27,7 @@ use arrow::compute::filter; use arrow::datatypes::{Field, Float32Type, Int32Type, Schema, UInt8Type}; use criterion::{criterion_group, criterion_main, Criterion}; +use arrow_array::types::Decimal128Type; fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) { criterion::black_box(filter(data_array, filter_array).unwrap()); @@ -143,6 +144,27 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); + let data_array = create_primitive_array::(size, 0.0); + c.bench_function("filter decimal128 (kept 1/2)", |b| { + b.iter(|| bench_filter(&data_array, &filter_array)) + }); + c.bench_function("filter decimal128 high selectivity (kept 1023/1024)", |b| { + b.iter(|| bench_filter(&data_array, &dense_filter_array)) + }); + c.bench_function("filter decimal128 low selectivity (kept 1/1024)", |b| { + b.iter(|| bench_filter(&data_array, &sparse_filter_array)) + }); + + c.bench_function("filter context decimal128 (kept 1/2)", |b| { + b.iter(|| bench_built_filter(&filter, &data_array)) + }); + c.bench_function("filter context decimal128 high selectivity (kept 1023/1024)", |b| { + b.iter(|| bench_built_filter(&dense_filter, &data_array)) + }); + c.bench_function("filter context decimal128 low selectivity (kept 1/1024)", |b| { + b.iter(|| bench_built_filter(&sparse_filter, &data_array)) + }); + let data_array = create_string_array::(size, 0.5); c.bench_function("filter context string (kept 1/2)", |b| { b.iter(|| bench_built_filter(&filter, &data_array)) From 4f1bbfc9b1ee85d036f2f465a8722b403baf9c34 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Sun, 16 Oct 2022 08:50:26 +1300 Subject: [PATCH 3/3] Format --- arrow/benches/filter_kernels.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs index a9f1fb8ce4fc..9dd3e7ebba09 100644 --- a/arrow/benches/filter_kernels.rs +++ b/arrow/benches/filter_kernels.rs @@ -26,8 +26,8 @@ use arrow::array::*; use arrow::compute::filter; use arrow::datatypes::{Field, Float32Type, Int32Type, Schema, UInt8Type}; -use criterion::{criterion_group, criterion_main, Criterion}; use arrow_array::types::Decimal128Type; +use criterion::{criterion_group, criterion_main, Criterion}; fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) { criterion::black_box(filter(data_array, filter_array).unwrap()); @@ -158,12 +158,14 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("filter context decimal128 (kept 1/2)", |b| { b.iter(|| bench_built_filter(&filter, &data_array)) }); - c.bench_function("filter context decimal128 high selectivity (kept 1023/1024)", |b| { - b.iter(|| bench_built_filter(&dense_filter, &data_array)) - }); - c.bench_function("filter context decimal128 low selectivity (kept 1/1024)", |b| { - b.iter(|| bench_built_filter(&sparse_filter, &data_array)) - }); + c.bench_function( + "filter context decimal128 high selectivity (kept 1023/1024)", + |b| b.iter(|| bench_built_filter(&dense_filter, &data_array)), + ); + c.bench_function( + "filter context decimal128 low selectivity (kept 1/1024)", + |b| b.iter(|| bench_built_filter(&sparse_filter, &data_array)), + ); let data_array = create_string_array::(size, 0.5); c.bench_function("filter context string (kept 1/2)", |b| {