From e1d9ef804ac242f3c6c46fbeed298d844a8dad9c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 8 Apr 2025 07:00:22 -0400 Subject: [PATCH] Improve binary_op benchmark --- datafusion/physical-expr/benches/binary_op.rs | 80 +------------------ 1 file changed, 4 insertions(+), 76 deletions(-) diff --git a/datafusion/physical-expr/benches/binary_op.rs b/datafusion/physical-expr/benches/binary_op.rs index 7ac5c0485203..59a602df053c 100644 --- a/datafusion/physical-expr/benches/binary_op.rs +++ b/datafusion/physical-expr/benches/binary_op.rs @@ -17,7 +17,6 @@ use arrow::{ array::BooleanArray, - compute::{bool_and, bool_or}, datatypes::{DataType, Field, Schema}, }; use arrow::{array::StringArray, record_batch::RecordBatch}; @@ -28,7 +27,7 @@ use datafusion_physical_expr::{ planner::logical2physical, PhysicalExpr, }; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; /// Generates BooleanArrays with different true/false distributions for benchmarking. /// @@ -130,75 +129,6 @@ fn generate_boolean_cases( cases } -/// Benchmarks boolean operations `false_count/bool_or` and `true_count/bool_and` on [`BooleanArray`] -/// You can run this benchmark with: -/// ```sh -/// # test true_count/false_count -/// TEST_BOOL_COUNT=1 cargo bench --bench binary_op -- boolean_ops -/// # test bool_or/bool_and -/// cargo bench --bench binary_op -- boolean_ops -/// ``` -fn benchmark_boolean_ops(c: &mut Criterion) { - let len = 1_000_000; // Use one million elements for clear performance differentiation - static TEST_BOOL_COUNT: LazyLock = - LazyLock::new(|| match std::env::var("TEST_BOOL_COUNT") { - Ok(_) => { - println!("TEST_BOOL_COUNT=ON"); - true - } - Err(_) => { - println!("TEST_BOOL_COUNT=OFF"); - false - } - }); - - // Determine the test function to be executed based on the ENV `TEST_BOOL_COUNT` - fn test_func(array: &BooleanArray) -> bool { - // Use false_count for all false and true_count for all true - if *TEST_BOOL_COUNT { - if TEST_ALL_FALSE { - array.false_count() == array.len() - } else { - array.true_count() == array.len() - } - } - // Use bool_or for all false and bool_and for all true - else if TEST_ALL_FALSE { - match bool_or(array) { - Some(v) => !v, - None => false, - } - } else { - bool_and(array).unwrap_or(false) - } - } - - // Test cases for false_count and bool_or - { - let test_cases = generate_boolean_cases::(len); - for (scenario, array) in test_cases { - let arr_ref = Arc::new(array); - - // Benchmark test_func across different scenarios - c.bench_function(&format!("boolean_ops/or/{}", scenario), |b| { - b.iter(|| test_func::(black_box(&arr_ref))) - }); - } - } - // Test cases for true_count and bool_and - { - let test_cases = generate_boolean_cases::(len); - for (scenario, array) in test_cases { - let arr_ref = Arc::new(array); - - // Benchmark test_func across different scenarios - c.bench_function(&format!("boolean_ops/and/{}", scenario), |b| { - b.iter(|| test_func::(black_box(&arr_ref))) - }); - } - } -} - /// Benchmarks AND/OR operator short-circuiting by evaluating complex regex conditions. /// /// Creates 6 test scenarios per operator: @@ -257,12 +187,14 @@ fn benchmark_binary_op_in_short_circuit(c: &mut Criterion) { ); // Create physical binary expressions + // a AND ((b ~ regex) AND (c ~ regex)) let expr_and = BinaryExpr::new( Arc::new(Column::new("a", 0)), Operator::And, logical2physical(&right_condition_and, &schema), ); + // a OR ((b ~ regex) OR (c ~ regex)) let expr_or = BinaryExpr::new( Arc::new(Column::new("a", 0)), Operator::Or, @@ -364,10 +296,6 @@ fn create_record_batch( Ok(rbs) } -criterion_group!( - benches, - benchmark_boolean_ops, - benchmark_binary_op_in_short_circuit -); +criterion_group!(benches, benchmark_binary_op_in_short_circuit); criterion_main!(benches);