From f41fde5bb1fdd789840cccdbb1fbee5050938773 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 4 Feb 2026 23:11:35 -0500 Subject: [PATCH 01/14] feat: Add selectivity-tracking wrapper for dynamic filters Add `SelectivityAwareFilterExpr`, a wrapper `PhysicalExpr` that tracks filter selectivity at runtime and automatically disables filters that aren't pruning enough rows. This addresses the issue where dynamic filters from `HashJoinExec` can be expensive to evaluate for little benefit when the build side covers most of the probe side values. Key features: - Selectivity threshold: Filter disabled when rows_passed/rows_total >= threshold - Minimum rows: Statistics collected for min_rows before making a decision - Generation-aware reset: Resets when inner filter updates (e.g., hash table built) - Permanent disable: Once disabled, stays disabled for rest of query - Disabled behavior: Returns all-true array to bypass filter evaluation New configuration options in OptimizerOptions: - enable_dynamic_filter_selectivity_tracking (default: false) - dynamic_filter_selectivity_threshold (default: 0.95) - dynamic_filter_min_rows_for_selectivity (default: 10000) Co-Authored-By: Claude Opus 4.5 --- datafusion/common/src/config.rs | 28 + .../physical-expr/src/expressions/mod.rs | 2 + .../expressions/selectivity_aware_filter.rs | 486 ++++++++++++++++++ .../physical-plan/src/joins/hash_join/exec.rs | 43 +- .../dynamic_filter_pushdown_config.slt | 101 ++++ 5 files changed, 656 insertions(+), 4 deletions(-) create mode 100644 datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index dad12c1c6bc91..7c17a88137ca3 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1115,6 +1115,34 @@ config_namespace! { /// See: pub hash_join_inlist_pushdown_max_distinct_values: usize, default = 150 + /// Enable selectivity-based disabling of dynamic filters from joins. + /// + /// When enabled, join dynamic filters that pass most rows (above the threshold) + /// will be automatically disabled to avoid evaluation overhead. This is useful + /// when the build side of a join covers most of the probe side values, making + /// the filter expensive to evaluate for little benefit. + /// + /// The selectivity tracking resets when the dynamic filter is updated (e.g., when + /// the hash table is built), allowing the filter to be re-evaluated with new data. + pub enable_dynamic_filter_selectivity_tracking: bool, default = false + + /// Selectivity threshold for disabling join dynamic filters. + /// + /// If the filter passes this fraction or more of rows, it will be disabled. + /// Value should be between 0.0 and 1.0. + /// + /// For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. + /// Only used when `enable_dynamic_filter_selectivity_tracking` is true. + pub dynamic_filter_selectivity_threshold: f64, default = 0.95 + + /// Minimum number of rows to process before making a selectivity decision + /// for join dynamic filters. + /// + /// The filter will remain in a tracking state until this many rows have been + /// processed. This ensures statistical stability before making the disable decision. + /// Only used when `enable_dynamic_filter_selectivity_tracking` is true. + pub dynamic_filter_min_rows_for_selectivity: usize, default = 10_000 + /// The default filter selectivity used by Filter Statistics /// when an exact selectivity cannot be determined. Valid values are /// between 0 (no selectivity) and 100 (all rows are selected). diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index c9e02708d6c28..5f991899e376c 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -32,6 +32,7 @@ mod literal; mod negative; mod no_op; mod not; +mod selectivity_aware_filter; mod try_cast; mod unknown_column; @@ -54,5 +55,6 @@ pub use literal::{Literal, lit}; pub use negative::{NegativeExpr, negative}; pub use no_op::NoOp; pub use not::{NotExpr, not}; +pub use selectivity_aware_filter::{SelectivityAwareFilterExpr, SelectivityConfig}; pub use try_cast::{TryCastExpr, try_cast}; pub use unknown_column::UnKnownColumn; diff --git a/datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs b/datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs new file mode 100644 index 0000000000000..4ad85d6d6149e --- /dev/null +++ b/datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs @@ -0,0 +1,486 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A wrapper [`PhysicalExpr`] that tracks filter selectivity at runtime and +//! automatically disables filters that aren't pruning enough rows. + +use std::any::Any; +use std::fmt::Display; +use std::hash::Hash; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; + +use arrow::array::{ArrayRef, BooleanArray}; +use arrow::datatypes::{DataType, Schema}; +use arrow::record_batch::RecordBatch; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_physical_expr_common::physical_expr::DynHash; +use parking_lot::RwLock; + +use crate::PhysicalExpr; + +/// Configuration for selectivity-based filter disabling. +#[derive(Debug, Clone)] +pub struct SelectivityConfig { + /// Threshold above which the filter is disabled (e.g., 0.95 = 95% selectivity). + /// If the filter passes this fraction or more of rows, it will be disabled. + pub threshold: f64, + /// Minimum rows to process before making a selectivity decision. + pub min_rows: usize, +} + +impl Default for SelectivityConfig { + fn default() -> Self { + Self { + threshold: 0.95, + min_rows: 10_000, + } + } +} + +/// State machine for selectivity tracking. +#[derive(Debug)] +enum SelectivityState { + /// Collecting statistics, not yet enough data. + Tracking { + rows_passed: AtomicUsize, + rows_total: AtomicUsize, + }, + /// Filter is sufficiently selective, keep active. + Active, + /// Filter has been disabled due to poor selectivity. + Disabled, +} + +impl SelectivityState { + fn new_tracking() -> Self { + Self::Tracking { + rows_passed: AtomicUsize::new(0), + rows_total: AtomicUsize::new(0), + } + } +} + +/// A wrapper [`PhysicalExpr`] that tracks selectivity and can disable filters +/// that pass too many rows. +/// +/// This wrapper is designed to be used with dynamic filters from joins. +/// It monitors how many rows pass through the filter, and if the filter +/// is found to be ineffective (passes most rows), it automatically disables +/// itself to avoid evaluation overhead. +/// +/// The wrapper resets its statistics when the inner filter's generation changes, +/// which happens when the dynamic filter is updated (e.g., when the hash table +/// is built in a hash join). +#[derive(Debug)] +pub struct SelectivityAwareFilterExpr { + /// The inner filter expression (typically DynamicFilterPhysicalExpr). + inner: Arc, + /// Selectivity tracking state. + state: RwLock, + /// The generation of the inner filter when we started tracking. + /// If this changes, we need to reset our state. + tracked_generation: AtomicU64, + /// Configuration for selectivity tracking. + config: SelectivityConfig, +} + +impl SelectivityAwareFilterExpr { + /// Create a new `SelectivityAwareFilterExpr` wrapping the given inner expression. + pub fn new(inner: Arc, config: SelectivityConfig) -> Self { + let current_generation = inner.snapshot_generation(); + Self { + inner, + state: RwLock::new(SelectivityState::new_tracking()), + tracked_generation: AtomicU64::new(current_generation), + config, + } + } + + /// Get the current selectivity information for observability. + /// + /// Returns `(rows_passed, rows_total, is_disabled)`. + pub fn selectivity_info(&self) -> (usize, usize, bool) { + let state = self.state.read(); + match &*state { + SelectivityState::Tracking { + rows_passed, + rows_total, + } => { + let passed = rows_passed.load(Ordering::Relaxed); + let total = rows_total.load(Ordering::Relaxed); + (passed, total, false) + } + SelectivityState::Active => (0, 0, false), + SelectivityState::Disabled => (0, 0, true), + } + } + + /// Check if the filter is disabled. + pub fn is_disabled(&self) -> bool { + matches!(*self.state.read(), SelectivityState::Disabled) + } + + /// Get the inner expression. + pub fn inner(&self) -> &Arc { + &self.inner + } + + /// Check if the inner generation has changed and reset state if needed. + fn check_and_reset_if_needed(&self) { + let current_generation = self.inner.snapshot_generation(); + let tracked = self.tracked_generation.load(Ordering::Relaxed); + + if current_generation != tracked { + // Generation changed - reset to tracking state + let mut state = self.state.write(); + *state = SelectivityState::new_tracking(); + self.tracked_generation + .store(current_generation, Ordering::Relaxed); + } + } + + /// Count the number of true values in a boolean array. + fn count_true_values(array: &BooleanArray) -> usize { + array.true_count() + } + + /// Process the result and update selectivity statistics. + fn process_result(&self, result: &ColumnarValue) -> Result<()> { + // Only track selectivity for array results + let (true_count, total_count) = match result { + ColumnarValue::Array(array) => { + let bool_array = array + .as_any() + .downcast_ref::() + .expect("Filter expression should return BooleanArray"); + (Self::count_true_values(bool_array), array.len()) + } + ColumnarValue::Scalar(scalar) => { + // Scalar result - we can't track selectivity meaningfully + // Just skip tracking for this batch + if let datafusion_common::ScalarValue::Boolean(Some(v)) = scalar { + if *v { (1, 1) } else { (0, 1) } + } else { + return Ok(()); + } + } + }; + + let state = self.state.read(); + if let SelectivityState::Tracking { + rows_passed, + rows_total, + } = &*state + { + rows_passed.fetch_add(true_count, Ordering::Relaxed); + let new_total = + rows_total.fetch_add(total_count, Ordering::Relaxed) + total_count; + let passed = rows_passed.load(Ordering::Relaxed); + + // Check if we've seen enough rows to make a decision + if new_total >= self.config.min_rows { + // Calculate selectivity + let selectivity = passed as f64 / new_total as f64; + drop(state); + + // Decide whether to disable or keep active + let mut state = self.state.write(); + // Re-check in case another thread already updated + if matches!(*state, SelectivityState::Tracking { .. }) { + if selectivity >= self.config.threshold { + *state = SelectivityState::Disabled; + } else { + *state = SelectivityState::Active; + } + } + } + } + + Ok(()) + } + + /// Create an all-true boolean array of the given length. + fn all_true_array(len: usize) -> ArrayRef { + Arc::new(BooleanArray::from(vec![true; len])) + } +} + +impl Display for SelectivityAwareFilterExpr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let (passed, total, disabled) = self.selectivity_info(); + if disabled { + write!(f, "SelectivityAware(DISABLED) [ {} ]", self.inner) + } else if total > 0 { + let selectivity = passed as f64 / total as f64; + write!( + f, + "SelectivityAware({:.1}%, {}/{}) [ {} ]", + selectivity * 100.0, + passed, + total, + self.inner + ) + } else { + write!(f, "SelectivityAware [ {} ]", self.inner) + } + } +} + +impl Hash for SelectivityAwareFilterExpr { + fn hash(&self, state: &mut H) { + // Hash based on the inner expression + self.inner.dyn_hash(state); + } +} + +impl PartialEq for SelectivityAwareFilterExpr { + fn eq(&self, other: &Self) -> bool { + self.inner.eq(&other.inner) + } +} + +impl Eq for SelectivityAwareFilterExpr {} + +impl PhysicalExpr for SelectivityAwareFilterExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.inner] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + if children.len() != 1 { + return Err(datafusion_common::DataFusionError::Internal( + "SelectivityAwareFilterExpr expects exactly one child".to_string(), + )); + } + Ok(Arc::new(Self::new( + Arc::clone(&children[0]), + self.config.clone(), + ))) + } + + fn data_type(&self, input_schema: &Schema) -> Result { + self.inner.data_type(input_schema) + } + + fn nullable(&self, input_schema: &Schema) -> Result { + self.inner.nullable(input_schema) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result { + // Check if the inner generation has changed + self.check_and_reset_if_needed(); + + // Check if we're disabled + { + let state = self.state.read(); + if matches!(*state, SelectivityState::Disabled) { + // Return all-true to bypass the filter + return Ok(ColumnarValue::Array(Self::all_true_array(batch.num_rows()))); + } + } + + // Evaluate the inner expression + let result = self.inner.evaluate(batch)?; + + // Update selectivity statistics + self.process_result(&result)?; + + Ok(result) + } + + fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt_sql(f) + } + + fn snapshot(&self) -> Result>> { + // Return the inner's snapshot + self.inner.snapshot() + } + + fn snapshot_generation(&self) -> u64 { + // Return the inner's generation + self.inner.snapshot_generation() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::expressions::{BinaryExpr, col, lit}; + use arrow::array::Int32Array; + use arrow::datatypes::Field; + use datafusion_expr::Operator; + + fn create_batch(values: Vec) -> RecordBatch { + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + RecordBatch::try_new(schema, vec![Arc::new(Int32Array::from(values)) as ArrayRef]) + .unwrap() + } + + fn create_filter_expr(threshold: i32) -> Arc { + // Create a filter: a < threshold + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + Arc::new(BinaryExpr::new( + col("a", &schema).unwrap(), + Operator::Lt, + lit(threshold), + )) + } + + #[test] + fn test_high_selectivity_filter_gets_disabled() { + // Create a filter that passes 95%+ of rows: a < 100 (all values pass) + let filter = create_filter_expr(100); + let config = SelectivityConfig { + threshold: 0.95, + min_rows: 100, + }; + let wrapper = SelectivityAwareFilterExpr::new(filter, config); + + // Create batches where all rows pass the filter + let batch = create_batch((0..100).collect()); + + // Evaluate - should process and track + let result = wrapper.evaluate(&batch).unwrap(); + let ColumnarValue::Array(arr) = result else { + panic!("Expected array result"); + }; + assert_eq!(arr.len(), 100); + + // After enough rows, the filter should be disabled + let (passed, total, disabled) = wrapper.selectivity_info(); + assert_eq!(passed, 0); // Moved to Disabled state, counters reset conceptually + assert_eq!(total, 0); + assert!(disabled, "Filter should be disabled after high selectivity"); + } + + #[test] + fn test_low_selectivity_filter_stays_active() { + // Create a filter that passes ~50% of rows: a < 50 + let filter = create_filter_expr(50); + let config = SelectivityConfig { + threshold: 0.95, + min_rows: 100, + }; + let wrapper = SelectivityAwareFilterExpr::new(filter, config); + + // Create batch where ~50% pass + let batch = create_batch((0..100).collect()); + + // Evaluate + let _result = wrapper.evaluate(&batch).unwrap(); + + // Filter should stay active (not disabled) + let (_, _, disabled) = wrapper.selectivity_info(); + assert!(!disabled, "Low selectivity filter should stay active"); + } + + #[test] + fn test_disabled_filter_returns_all_true() { + // Create a filter that will be disabled + let filter = create_filter_expr(100); // All pass + let config = SelectivityConfig { + threshold: 0.95, + min_rows: 10, + }; + let wrapper = SelectivityAwareFilterExpr::new(filter, config); + + // First batch - get it disabled + let batch = create_batch((0..100).collect()); + let _ = wrapper.evaluate(&batch).unwrap(); + + assert!(wrapper.is_disabled(), "Filter should be disabled"); + + // Now create a batch where the original filter would return some false + // But since we're disabled, we should get all true + let batch2 = create_batch(vec![200, 201, 202]); // These would fail a < 100 + let result = wrapper.evaluate(&batch2).unwrap(); + + let ColumnarValue::Array(arr) = result else { + panic!("Expected array result"); + }; + let bool_arr = arr.as_any().downcast_ref::().unwrap(); + + // All values should be true because the filter is disabled + assert_eq!(bool_arr.true_count(), 3); + } + + #[test] + fn test_min_rows_threshold_respected() { + let filter = create_filter_expr(100); // All pass + let config = SelectivityConfig { + threshold: 0.95, + min_rows: 1000, // High threshold + }; + let wrapper = SelectivityAwareFilterExpr::new(filter, config); + + // Process less than min_rows + let batch = create_batch((0..100).collect()); + let _ = wrapper.evaluate(&batch).unwrap(); + + // Should still be tracking, not yet disabled + let (passed, total, disabled) = wrapper.selectivity_info(); + assert_eq!(passed, 100); + assert_eq!(total, 100); + assert!( + !disabled, + "Should still be tracking under min_rows threshold" + ); + } + + #[test] + fn test_display() { + let filter = create_filter_expr(50); + let config = SelectivityConfig::default(); + let wrapper = SelectivityAwareFilterExpr::new(filter, config); + + let display = format!("{wrapper}"); + assert!( + display.contains("SelectivityAware"), + "Display should show wrapper name" + ); + } + + #[test] + fn test_with_new_children() { + let filter = create_filter_expr(50); + let config = SelectivityConfig { + threshold: 0.80, + min_rows: 5000, + }; + let wrapper = Arc::new(SelectivityAwareFilterExpr::new(filter, config)); + + let new_filter = create_filter_expr(75); + let new_wrapper = wrapper.with_new_children(vec![new_filter]).unwrap(); + + // Should create a new wrapper with the new child + let new_wrapper = new_wrapper + .as_any() + .downcast_ref::() + .unwrap(); + assert!(!new_wrapper.is_disabled()); + } +} diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index eb2e841791cd5..078f7d1e027f8 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -80,6 +80,9 @@ use datafusion_functions_aggregate_common::min_max::{MaxAccumulator, MinAccumula use datafusion_physical_expr::equivalence::{ ProjectionMapping, join_equivalence_properties, }; +use datafusion_physical_expr::expressions::{ + DynamicFilterPhysicalExpr, SelectivityAwareFilterExpr, SelectivityConfig, lit, +}; use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit}; use datafusion_physical_expr::projection::{ProjectionRef, combine_projections}; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; @@ -1466,7 +1469,25 @@ impl ExecutionPlan for HashJoinExec { { // Add actual dynamic filter to right side (probe side) let dynamic_filter = Self::create_dynamic_filter(&self.on); - right_child = right_child.with_self_filter(dynamic_filter); + + // Optionally wrap with selectivity tracking + let filter_expr: Arc = if config + .optimizer + .enable_dynamic_filter_selectivity_tracking + { + let selectivity_config = SelectivityConfig { + threshold: config.optimizer.dynamic_filter_selectivity_threshold, + min_rows: config.optimizer.dynamic_filter_min_rows_for_selectivity, + }; + Arc::new(SelectivityAwareFilterExpr::new( + dynamic_filter, + selectivity_config, + )) + } else { + dynamic_filter + }; + + right_child = right_child.with_self_filter(filter_expr); } Ok(FilterDescription::new() @@ -1501,9 +1522,23 @@ impl ExecutionPlan for HashJoinExec { // Note that we don't check PushdDownPredicate::discrimnant because even if nothing said // "yes, I can fully evaluate this filter" things might still use it for statistics -> it's worth updating let predicate = Arc::clone(&filter.predicate); - if let Ok(dynamic_filter) = - Arc::downcast::(predicate) - { + + // Try to extract the DynamicFilterPhysicalExpr, either directly or from a SelectivityAwareFilterExpr wrapper + let maybe_dynamic_filter: Option> = + // First, try direct downcast to DynamicFilterPhysicalExpr + if let Ok(df) = Arc::downcast::(predicate.clone()) { + Some(df) + } else if let Some(wrapper) = predicate + .as_any() + .downcast_ref::() + { + // Try to get it from a SelectivityAwareFilterExpr wrapper + Arc::downcast::(wrapper.inner().clone()).ok() + } else { + None + }; + + if let Some(dynamic_filter) = maybe_dynamic_filter { // We successfully pushed down our self filter - we need to make a new node with the dynamic filter let new_node = Arc::new(HashJoinExec { left: Arc::clone(&self.left), diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt index 1b037ee2b83af..bfede1d339b97 100644 --- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt +++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt @@ -476,3 +476,104 @@ SET datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = true; statement ok SET datafusion.optimizer.enable_dynamic_filter_pushdown = true; + +# Test 6: Selectivity-based dynamic filter disabling configuration +# These options control automatic disabling of dynamic filters that pass most rows + +statement ok +set datafusion.catalog.information_schema = true + +# Verify default values for selectivity tracking config options +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.enable_dynamic_filter_selectivity_tracking'; +---- +false + +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.dynamic_filter_selectivity_threshold'; +---- +0.95 + +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.dynamic_filter_min_rows_for_selectivity'; +---- +10000 + +# Enable selectivity tracking +statement ok +SET datafusion.optimizer.enable_dynamic_filter_selectivity_tracking = true; + +# Verify it's enabled +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.enable_dynamic_filter_selectivity_tracking'; +---- +true + +# Set custom threshold and min_rows +statement ok +SET datafusion.optimizer.dynamic_filter_selectivity_threshold = 0.80; + +statement ok +SET datafusion.optimizer.dynamic_filter_min_rows_for_selectivity = 5000; + +# Verify custom values +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.dynamic_filter_selectivity_threshold'; +---- +0.8 + +query T +SELECT value FROM information_schema.df_settings +WHERE name = 'datafusion.optimizer.dynamic_filter_min_rows_for_selectivity'; +---- +5000 + +statement ok +set datafusion.catalog.information_schema = false + +# Test that join queries still work correctly with selectivity tracking enabled +statement ok +CREATE TABLE sel_left(id INT, data VARCHAR) AS VALUES +(1, 'left1'), +(2, 'left2'), +(3, 'left3'), +(4, 'left4'), +(5, 'left5'); + +statement ok +CREATE TABLE sel_right(id INT, info VARCHAR) AS VALUES +(1, 'right1'), +(3, 'right3'), +(5, 'right5'); + +# Verify join returns correct results with selectivity tracking enabled +query ITT rowsort +SELECT l.id, l.data, r.info +FROM sel_left l +INNER JOIN sel_right r ON l.id = r.id; +---- +1 left1 right1 +3 left3 right3 +5 left5 right5 + +# Cleanup selectivity test tables +statement ok +DROP TABLE sel_left; + +statement ok +DROP TABLE sel_right; + +# Reset selectivity tracking configs to defaults +statement ok +SET datafusion.optimizer.enable_dynamic_filter_selectivity_tracking = false; + +statement ok +SET datafusion.optimizer.dynamic_filter_selectivity_threshold = 0.95; + +statement ok +SET datafusion.optimizer.dynamic_filter_min_rows_for_selectivity = 10000; From 9f506cfafecfd9561230b33702b7996db22a6e7e Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 07:13:38 -0500 Subject: [PATCH 02/14] fix: Remove duplicate import and update information_schema tests - Remove duplicate import of DynamicFilterPhysicalExpr and lit - Add new selectivity tracking config options to information_schema.slt Co-Authored-By: Claude Opus 4.5 --- datafusion/physical-plan/src/joins/hash_join/exec.rs | 1 - datafusion/sqllogictest/test_files/information_schema.slt | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 078f7d1e027f8..4ce1b2c743f3a 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -83,7 +83,6 @@ use datafusion_physical_expr::equivalence::{ use datafusion_physical_expr::expressions::{ DynamicFilterPhysicalExpr, SelectivityAwareFilterExpr, SelectivityConfig, lit, }; -use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit}; use datafusion_physical_expr::projection::{ProjectionRef, combine_projections}; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b61ceecb24fc0..62430432c3404 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -293,9 +293,12 @@ datafusion.format.timestamp_tz_format NULL datafusion.format.types_info false datafusion.optimizer.allow_symmetric_joins_without_pruning true datafusion.optimizer.default_filter_selectivity 20 +datafusion.optimizer.dynamic_filter_min_rows_for_selectivity 10000 +datafusion.optimizer.dynamic_filter_selectivity_threshold 0.95 datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true datafusion.optimizer.enable_distinct_aggregation_soft_limit true datafusion.optimizer.enable_dynamic_filter_pushdown true +datafusion.optimizer.enable_dynamic_filter_selectivity_tracking false datafusion.optimizer.enable_join_dynamic_filter_pushdown true datafusion.optimizer.enable_leaf_expression_pushdown true datafusion.optimizer.enable_piecewise_merge_join false @@ -431,9 +434,12 @@ datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with t datafusion.format.types_info false Show types in visual representation batches datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). +datafusion.optimizer.dynamic_filter_min_rows_for_selectivity 10000 Minimum number of rows to process before making a selectivity decision for join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_dynamic_filter_selectivity_tracking` is true. +datafusion.optimizer.dynamic_filter_selectivity_threshold 0.95 Selectivity threshold for disabling join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_dynamic_filter_selectivity_tracking` is true. datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase. datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read. datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden. +datafusion.optimizer.enable_dynamic_filter_selectivity_tracking false Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase. datafusion.optimizer.enable_leaf_expression_pushdown true When set to true, the optimizer will extract leaf expressions (such as `get_field`) from filter/sort/join nodes into projections closer to the leaf table scans, and push those projections down towards the leaf nodes. datafusion.optimizer.enable_piecewise_merge_join false When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter. From b08820ff4b30cef242e6e11d06003711cb0e8cd3 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 08:17:08 -0500 Subject: [PATCH 03/14] refactor: Rename dynamic_filter_* configs to adaptive_filter_* Rename configuration options and the wrapper expression for clarity: - dynamic_filter_min_rows_for_selectivity -> adaptive_filter_min_rows_for_selectivity - dynamic_filter_selectivity_threshold -> adaptive_filter_selectivity_threshold - enable_dynamic_filter_selectivity_tracking -> enable_adaptive_filter_selectivity_tracking - SelectivityAwareFilterExpr -> AdaptiveSelectivityFilterExpr - selectivity_aware_filter.rs -> adaptive_selectivity_filter.rs Co-Authored-By: Claude Opus 4.5 --- datafusion/common/src/config.rs | 36 +++++++++---------- ...lter.rs => adaptive_selectivity_filter.rs} | 32 ++++++++--------- .../physical-expr/src/expressions/mod.rs | 4 +-- .../physical-plan/src/joins/hash_join/exec.rs | 26 ++++++++------ .../dynamic_filter_pushdown_config.slt | 36 +++++++++---------- .../test_files/information_schema.slt | 12 +++---- 6 files changed, 75 insertions(+), 71 deletions(-) rename datafusion/physical-expr/src/expressions/{selectivity_aware_filter.rs => adaptive_selectivity_filter.rs} (93%) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 7c17a88137ca3..e067a6b435a1b 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1115,33 +1115,33 @@ config_namespace! { /// See: pub hash_join_inlist_pushdown_max_distinct_values: usize, default = 150 - /// Enable selectivity-based disabling of dynamic filters from joins. - /// - /// When enabled, join dynamic filters that pass most rows (above the threshold) - /// will be automatically disabled to avoid evaluation overhead. This is useful - /// when the build side of a join covers most of the probe side values, making - /// the filter expensive to evaluate for little benefit. + /// Minimum number of rows to process before making a selectivity decision + /// for adaptive filtering of join dynamic filters. /// - /// The selectivity tracking resets when the dynamic filter is updated (e.g., when - /// the hash table is built), allowing the filter to be re-evaluated with new data. - pub enable_dynamic_filter_selectivity_tracking: bool, default = false + /// The filter will remain in a tracking state until this many rows have been + /// processed. This ensures statistical stability before making the disable decision. + /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. + pub adaptive_filter_min_rows_for_selectivity: usize, default = 10_000 - /// Selectivity threshold for disabling join dynamic filters. + /// Selectivity threshold for adaptive disabling of join dynamic filters. /// /// If the filter passes this fraction or more of rows, it will be disabled. /// Value should be between 0.0 and 1.0. /// /// For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. - /// Only used when `enable_dynamic_filter_selectivity_tracking` is true. - pub dynamic_filter_selectivity_threshold: f64, default = 0.95 + /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. + pub adaptive_filter_selectivity_threshold: f64, default = 0.95 - /// Minimum number of rows to process before making a selectivity decision - /// for join dynamic filters. + /// Enable selectivity-based disabling of dynamic filters from joins. /// - /// The filter will remain in a tracking state until this many rows have been - /// processed. This ensures statistical stability before making the disable decision. - /// Only used when `enable_dynamic_filter_selectivity_tracking` is true. - pub dynamic_filter_min_rows_for_selectivity: usize, default = 10_000 + /// When enabled, join dynamic filters that pass most rows (above the threshold) + /// will be automatically disabled to avoid evaluation overhead. This is useful + /// when the build side of a join covers most of the probe side values, making + /// the filter expensive to evaluate for little benefit. + /// + /// The selectivity tracking resets when the dynamic filter is updated (e.g., when + /// the hash table is built), allowing the filter to be re-evaluated with new data. + pub enable_adaptive_filter_selectivity_tracking: bool, default = false /// The default filter selectivity used by Filter Statistics /// when an exact selectivity cannot be determined. Valid values are diff --git a/datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs similarity index 93% rename from datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs rename to datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 4ad85d6d6149e..959d831728c97 100644 --- a/datafusion/physical-expr/src/expressions/selectivity_aware_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -88,7 +88,7 @@ impl SelectivityState { /// which happens when the dynamic filter is updated (e.g., when the hash table /// is built in a hash join). #[derive(Debug)] -pub struct SelectivityAwareFilterExpr { +pub struct AdaptiveSelectivityFilterExpr { /// The inner filter expression (typically DynamicFilterPhysicalExpr). inner: Arc, /// Selectivity tracking state. @@ -100,8 +100,8 @@ pub struct SelectivityAwareFilterExpr { config: SelectivityConfig, } -impl SelectivityAwareFilterExpr { - /// Create a new `SelectivityAwareFilterExpr` wrapping the given inner expression. +impl AdaptiveSelectivityFilterExpr { + /// Create a new `AdaptiveSelectivityFilterExpr` wrapping the given inner expression. pub fn new(inner: Arc, config: SelectivityConfig) -> Self { let current_generation = inner.snapshot_generation(); Self { @@ -221,7 +221,7 @@ impl SelectivityAwareFilterExpr { } } -impl Display for SelectivityAwareFilterExpr { +impl Display for AdaptiveSelectivityFilterExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let (passed, total, disabled) = self.selectivity_info(); if disabled { @@ -242,22 +242,22 @@ impl Display for SelectivityAwareFilterExpr { } } -impl Hash for SelectivityAwareFilterExpr { +impl Hash for AdaptiveSelectivityFilterExpr { fn hash(&self, state: &mut H) { // Hash based on the inner expression self.inner.dyn_hash(state); } } -impl PartialEq for SelectivityAwareFilterExpr { +impl PartialEq for AdaptiveSelectivityFilterExpr { fn eq(&self, other: &Self) -> bool { self.inner.eq(&other.inner) } } -impl Eq for SelectivityAwareFilterExpr {} +impl Eq for AdaptiveSelectivityFilterExpr {} -impl PhysicalExpr for SelectivityAwareFilterExpr { +impl PhysicalExpr for AdaptiveSelectivityFilterExpr { fn as_any(&self) -> &dyn Any { self } @@ -272,7 +272,7 @@ impl PhysicalExpr for SelectivityAwareFilterExpr { ) -> Result> { if children.len() != 1 { return Err(datafusion_common::DataFusionError::Internal( - "SelectivityAwareFilterExpr expects exactly one child".to_string(), + "AdaptiveSelectivityFilterExpr expects exactly one child".to_string(), )); } Ok(Arc::new(Self::new( @@ -358,7 +358,7 @@ mod tests { threshold: 0.95, min_rows: 100, }; - let wrapper = SelectivityAwareFilterExpr::new(filter, config); + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); // Create batches where all rows pass the filter let batch = create_batch((0..100).collect()); @@ -385,7 +385,7 @@ mod tests { threshold: 0.95, min_rows: 100, }; - let wrapper = SelectivityAwareFilterExpr::new(filter, config); + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); // Create batch where ~50% pass let batch = create_batch((0..100).collect()); @@ -406,7 +406,7 @@ mod tests { threshold: 0.95, min_rows: 10, }; - let wrapper = SelectivityAwareFilterExpr::new(filter, config); + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); // First batch - get it disabled let batch = create_batch((0..100).collect()); @@ -435,7 +435,7 @@ mod tests { threshold: 0.95, min_rows: 1000, // High threshold }; - let wrapper = SelectivityAwareFilterExpr::new(filter, config); + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); // Process less than min_rows let batch = create_batch((0..100).collect()); @@ -455,7 +455,7 @@ mod tests { fn test_display() { let filter = create_filter_expr(50); let config = SelectivityConfig::default(); - let wrapper = SelectivityAwareFilterExpr::new(filter, config); + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); let display = format!("{wrapper}"); assert!( @@ -471,7 +471,7 @@ mod tests { threshold: 0.80, min_rows: 5000, }; - let wrapper = Arc::new(SelectivityAwareFilterExpr::new(filter, config)); + let wrapper = Arc::new(AdaptiveSelectivityFilterExpr::new(filter, config)); let new_filter = create_filter_expr(75); let new_wrapper = wrapper.with_new_children(vec![new_filter]).unwrap(); @@ -479,7 +479,7 @@ mod tests { // Should create a new wrapper with the new child let new_wrapper = new_wrapper .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); assert!(!new_wrapper.is_disabled()); } diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index 5f991899e376c..3ff668a56869c 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -19,6 +19,7 @@ #[macro_use] mod binary; +mod adaptive_selectivity_filter; mod case; mod cast; mod cast_column; @@ -32,7 +33,6 @@ mod literal; mod negative; mod no_op; mod not; -mod selectivity_aware_filter; mod try_cast; mod unknown_column; @@ -40,6 +40,7 @@ pub use crate::PhysicalSortExpr; /// Module with some convenient methods used in expression building pub use crate::aggregate::stats::StatsType; +pub use adaptive_selectivity_filter::{AdaptiveSelectivityFilterExpr, SelectivityConfig}; pub use binary::{BinaryExpr, binary, similar_to}; pub use case::{CaseExpr, case}; pub use cast::{CastExpr, cast}; @@ -55,6 +56,5 @@ pub use literal::{Literal, lit}; pub use negative::{NegativeExpr, negative}; pub use no_op::NoOp; pub use not::{NotExpr, not}; -pub use selectivity_aware_filter::{SelectivityAwareFilterExpr, SelectivityConfig}; pub use try_cast::{TryCastExpr, try_cast}; pub use unknown_column::UnKnownColumn; diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 4ce1b2c743f3a..508628bfd9e0b 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -81,7 +81,7 @@ use datafusion_physical_expr::equivalence::{ ProjectionMapping, join_equivalence_properties, }; use datafusion_physical_expr::expressions::{ - DynamicFilterPhysicalExpr, SelectivityAwareFilterExpr, SelectivityConfig, lit, + AdaptiveSelectivityFilterExpr, DynamicFilterPhysicalExpr, SelectivityConfig, lit, }; use datafusion_physical_expr::projection::{ProjectionRef, combine_projections}; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; @@ -1472,13 +1472,13 @@ impl ExecutionPlan for HashJoinExec { // Optionally wrap with selectivity tracking let filter_expr: Arc = if config .optimizer - .enable_dynamic_filter_selectivity_tracking + .enable_adaptive_filter_selectivity_tracking { let selectivity_config = SelectivityConfig { - threshold: config.optimizer.dynamic_filter_selectivity_threshold, - min_rows: config.optimizer.dynamic_filter_min_rows_for_selectivity, + threshold: config.optimizer.adaptive_filter_selectivity_threshold, + min_rows: config.optimizer.adaptive_filter_min_rows_for_selectivity, }; - Arc::new(SelectivityAwareFilterExpr::new( + Arc::new(AdaptiveSelectivityFilterExpr::new( dynamic_filter, selectivity_config, )) @@ -1520,22 +1520,26 @@ impl ExecutionPlan for HashJoinExec { if let Some(filter) = right_child_self_filters.first() { // Note that we don't check PushdDownPredicate::discrimnant because even if nothing said // "yes, I can fully evaluate this filter" things might still use it for statistics -> it's worth updating - let predicate = Arc::clone(&filter.predicate); - // Try to extract the DynamicFilterPhysicalExpr, either directly or from a SelectivityAwareFilterExpr wrapper - let maybe_dynamic_filter: Option> = + // Try to extract the DynamicFilterPhysicalExpr, either directly or from a AdaptiveSelectivityFilterExpr wrapper + let maybe_dynamic_filter: Option> = { + let predicate = Arc::clone(&filter.predicate); // First, try direct downcast to DynamicFilterPhysicalExpr + // Using .clone() instead of Arc::clone because it enables implicit coercion to Arc + #[expect(clippy::clone_on_ref_ptr)] if let Ok(df) = Arc::downcast::(predicate.clone()) { Some(df) } else if let Some(wrapper) = predicate .as_any() - .downcast_ref::() + .downcast_ref::() { - // Try to get it from a SelectivityAwareFilterExpr wrapper + // Try to get it from a AdaptiveSelectivityFilterExpr wrapper + #[expect(clippy::clone_on_ref_ptr)] Arc::downcast::(wrapper.inner().clone()).ok() } else { None - }; + } + }; if let Some(dynamic_filter) = maybe_dynamic_filter { // We successfully pushed down our self filter - we need to make a new node with the dynamic filter diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt index bfede1d339b97..55fc825422146 100644 --- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt +++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt @@ -477,66 +477,66 @@ SET datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = true; statement ok SET datafusion.optimizer.enable_dynamic_filter_pushdown = true; -# Test 6: Selectivity-based dynamic filter disabling configuration +# Test 6: Adaptive selectivity-based dynamic filter disabling configuration # These options control automatic disabling of dynamic filters that pass most rows statement ok set datafusion.catalog.information_schema = true -# Verify default values for selectivity tracking config options +# Verify default values for adaptive selectivity tracking config options query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.enable_dynamic_filter_selectivity_tracking'; +WHERE name = 'datafusion.optimizer.enable_adaptive_filter_selectivity_tracking'; ---- false query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.dynamic_filter_selectivity_threshold'; +WHERE name = 'datafusion.optimizer.adaptive_filter_selectivity_threshold'; ---- 0.95 query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.dynamic_filter_min_rows_for_selectivity'; +WHERE name = 'datafusion.optimizer.adaptive_filter_min_rows_for_selectivity'; ---- 10000 -# Enable selectivity tracking +# Enable adaptive selectivity tracking statement ok -SET datafusion.optimizer.enable_dynamic_filter_selectivity_tracking = true; +SET datafusion.optimizer.enable_adaptive_filter_selectivity_tracking = true; # Verify it's enabled query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.enable_dynamic_filter_selectivity_tracking'; +WHERE name = 'datafusion.optimizer.enable_adaptive_filter_selectivity_tracking'; ---- true # Set custom threshold and min_rows statement ok -SET datafusion.optimizer.dynamic_filter_selectivity_threshold = 0.80; +SET datafusion.optimizer.adaptive_filter_selectivity_threshold = 0.80; statement ok -SET datafusion.optimizer.dynamic_filter_min_rows_for_selectivity = 5000; +SET datafusion.optimizer.adaptive_filter_min_rows_for_selectivity = 5000; # Verify custom values query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.dynamic_filter_selectivity_threshold'; +WHERE name = 'datafusion.optimizer.adaptive_filter_selectivity_threshold'; ---- 0.8 query T SELECT value FROM information_schema.df_settings -WHERE name = 'datafusion.optimizer.dynamic_filter_min_rows_for_selectivity'; +WHERE name = 'datafusion.optimizer.adaptive_filter_min_rows_for_selectivity'; ---- 5000 statement ok set datafusion.catalog.information_schema = false -# Test that join queries still work correctly with selectivity tracking enabled +# Test that join queries still work correctly with adaptive selectivity tracking enabled statement ok CREATE TABLE sel_left(id INT, data VARCHAR) AS VALUES (1, 'left1'), @@ -551,7 +551,7 @@ CREATE TABLE sel_right(id INT, info VARCHAR) AS VALUES (3, 'right3'), (5, 'right5'); -# Verify join returns correct results with selectivity tracking enabled +# Verify join returns correct results with adaptive selectivity tracking enabled query ITT rowsort SELECT l.id, l.data, r.info FROM sel_left l @@ -568,12 +568,12 @@ DROP TABLE sel_left; statement ok DROP TABLE sel_right; -# Reset selectivity tracking configs to defaults +# Reset adaptive selectivity tracking configs to defaults statement ok -SET datafusion.optimizer.enable_dynamic_filter_selectivity_tracking = false; +SET datafusion.optimizer.enable_adaptive_filter_selectivity_tracking = false; statement ok -SET datafusion.optimizer.dynamic_filter_selectivity_threshold = 0.95; +SET datafusion.optimizer.adaptive_filter_selectivity_threshold = 0.95; statement ok -SET datafusion.optimizer.dynamic_filter_min_rows_for_selectivity = 10000; +SET datafusion.optimizer.adaptive_filter_min_rows_for_selectivity = 10000; diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 62430432c3404..b2d63b3a2197d 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -291,14 +291,14 @@ datafusion.format.time_format %H:%M:%S%.f datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f datafusion.format.timestamp_tz_format NULL datafusion.format.types_info false +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 10000 +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.95 datafusion.optimizer.allow_symmetric_joins_without_pruning true datafusion.optimizer.default_filter_selectivity 20 -datafusion.optimizer.dynamic_filter_min_rows_for_selectivity 10000 -datafusion.optimizer.dynamic_filter_selectivity_threshold 0.95 +datafusion.optimizer.enable_adaptive_filter_selectivity_tracking false datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true datafusion.optimizer.enable_distinct_aggregation_soft_limit true datafusion.optimizer.enable_dynamic_filter_pushdown true -datafusion.optimizer.enable_dynamic_filter_selectivity_tracking false datafusion.optimizer.enable_join_dynamic_filter_pushdown true datafusion.optimizer.enable_leaf_expression_pushdown true datafusion.optimizer.enable_piecewise_merge_join false @@ -432,14 +432,14 @@ datafusion.format.time_format %H:%M:%S%.f Time format for time arrays datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f Timestamp format for timestamp arrays datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. datafusion.format.types_info false Show types in visual representation batches +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 10000 Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.95 Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). -datafusion.optimizer.dynamic_filter_min_rows_for_selectivity 10000 Minimum number of rows to process before making a selectivity decision for join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_dynamic_filter_selectivity_tracking` is true. -datafusion.optimizer.dynamic_filter_selectivity_threshold 0.95 Selectivity threshold for disabling join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_dynamic_filter_selectivity_tracking` is true. +datafusion.optimizer.enable_adaptive_filter_selectivity_tracking false Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase. datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read. datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden. -datafusion.optimizer.enable_dynamic_filter_selectivity_tracking false Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase. datafusion.optimizer.enable_leaf_expression_pushdown true When set to true, the optimizer will extract leaf expressions (such as `get_field`) from filter/sort/join nodes into projections closer to the leaf table scans, and push those projections down towards the leaf nodes. datafusion.optimizer.enable_piecewise_merge_join false When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter. From c4b5b666d16e325c3a89505d7e9fefc6d970da88 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 08:43:58 -0500 Subject: [PATCH 04/14] lint --- datafusion/physical-plan/src/joins/hash_join/exec.rs | 11 +++++++---- docs/source/user-guide/configs.md | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 508628bfd9e0b..afd1873a38cf9 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -1527,15 +1527,18 @@ impl ExecutionPlan for HashJoinExec { // First, try direct downcast to DynamicFilterPhysicalExpr // Using .clone() instead of Arc::clone because it enables implicit coercion to Arc #[expect(clippy::clone_on_ref_ptr)] - if let Ok(df) = Arc::downcast::(predicate.clone()) { + if let Ok(df) = + Arc::downcast::(predicate.clone()) + { Some(df) } else if let Some(wrapper) = predicate .as_any() - .downcast_ref::() - { + .downcast_ref::( + ) { // Try to get it from a AdaptiveSelectivityFilterExpr wrapper #[expect(clippy::clone_on_ref_ptr)] - Arc::downcast::(wrapper.inner().clone()).ok() + Arc::downcast::(wrapper.inner().clone()) + .ok() } else { None } diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index e48f0a7c92276..cc2b6d0654b47 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -161,6 +161,9 @@ The following configuration settings are available: | datafusion.optimizer.hash_join_single_partition_threshold_rows | 131072 | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition | | datafusion.optimizer.hash_join_inlist_pushdown_max_size | 131072 | Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides larger than this will use hash table lookups instead. Set to 0 to always use hash table lookups. InList pushdown can be more efficient for small build sides because it can result in better statistics pruning as well as use any bloom filters present on the scan side. InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion. On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory. This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` \* `target_partitions` memory. The default is 128kB per partition. This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases but avoids excessive memory usage or overhead for larger joins. | | datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values | 150 | Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: | +| datafusion.optimizer.adaptive_filter_min_rows_for_selectivity | 10000 | Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.adaptive_filter_selectivity_threshold | 0.95 | Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.enable_adaptive_filter_selectivity_tracking | false | Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. | | datafusion.optimizer.default_filter_selectivity | 20 | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). | | datafusion.optimizer.prefer_existing_union | false | When set to true, the optimizer will not attempt to convert Union to Interleave | | datafusion.optimizer.expand_views_at_output | false | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. | From 916b46fdeb7a39ce4195be3b12dae962f6dce9dd Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 09:29:26 -0500 Subject: [PATCH 05/14] enable --- datafusion/common/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index e067a6b435a1b..05e06da2f397a 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1141,7 +1141,7 @@ config_namespace! { /// /// The selectivity tracking resets when the dynamic filter is updated (e.g., when /// the hash table is built), allowing the filter to be re-evaluated with new data. - pub enable_adaptive_filter_selectivity_tracking: bool, default = false + pub enable_adaptive_filter_selectivity_tracking: bool, default = true /// The default filter selectivity used by Filter Statistics /// when an exact selectivity cannot be determined. Valid values are From 61cb0bfbfefc70f81604a41c9dc0d203f2943573 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:12:20 -0500 Subject: [PATCH 06/14] perf: Optimize AdaptiveSelectivityFilterExpr to avoid locks Remove RwLock and optimize the hot path: - Replace RwLock with simple atomics - Fast path for ACTIVE state: single atomic load, no tracking - Fast path for DISABLED state: single atomic load, return all-true - Only call snapshot_generation() in TRACKING state - No counter updates in ACTIVE state This eliminates the performance overhead that was causing slowdowns even with threshold=1.0, since the overhead came from lock acquisition and generation checks on every evaluate() call. Co-Authored-By: Claude Opus 4.5 --- .../adaptive_selectivity_filter.rs | 272 ++++++++++-------- 1 file changed, 156 insertions(+), 116 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 959d831728c97..3e6b0934450d1 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -30,7 +30,6 @@ use arrow::record_batch::RecordBatch; use datafusion_common::Result; use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::physical_expr::DynHash; -use parking_lot::RwLock; use crate::PhysicalExpr; @@ -53,28 +52,10 @@ impl Default for SelectivityConfig { } } -/// State machine for selectivity tracking. -#[derive(Debug)] -enum SelectivityState { - /// Collecting statistics, not yet enough data. - Tracking { - rows_passed: AtomicUsize, - rows_total: AtomicUsize, - }, - /// Filter is sufficiently selective, keep active. - Active, - /// Filter has been disabled due to poor selectivity. - Disabled, -} - -impl SelectivityState { - fn new_tracking() -> Self { - Self::Tracking { - rows_passed: AtomicUsize::new(0), - rows_total: AtomicUsize::new(0), - } - } -} +// State values for the atomic state machine +const STATE_TRACKING: u8 = 0; +const STATE_ACTIVE: u8 = 1; +const STATE_DISABLED: u8 = 2; /// A wrapper [`PhysicalExpr`] that tracks selectivity and can disable filters /// that pass too many rows. @@ -91,8 +72,13 @@ impl SelectivityState { pub struct AdaptiveSelectivityFilterExpr { /// The inner filter expression (typically DynamicFilterPhysicalExpr). inner: Arc, - /// Selectivity tracking state. - state: RwLock, + /// Simple atomic state: 0 = Tracking, 1 = Active, 2 = Disabled + /// This allows the hot path to be a single atomic load with no locks. + state: AtomicUsize, + /// Rows that passed the filter (only used in Tracking state). + rows_passed: AtomicUsize, + /// Total rows processed (only used in Tracking state). + rows_total: AtomicUsize, /// The generation of the inner filter when we started tracking. /// If this changes, we need to reset our state. tracked_generation: AtomicU64, @@ -106,7 +92,9 @@ impl AdaptiveSelectivityFilterExpr { let current_generation = inner.snapshot_generation(); Self { inner, - state: RwLock::new(SelectivityState::new_tracking()), + state: AtomicUsize::new(STATE_TRACKING as usize), + rows_passed: AtomicUsize::new(0), + rows_total: AtomicUsize::new(0), tracked_generation: AtomicU64::new(current_generation), config, } @@ -116,24 +104,23 @@ impl AdaptiveSelectivityFilterExpr { /// /// Returns `(rows_passed, rows_total, is_disabled)`. pub fn selectivity_info(&self) -> (usize, usize, bool) { - let state = self.state.read(); - match &*state { - SelectivityState::Tracking { - rows_passed, - rows_total, - } => { - let passed = rows_passed.load(Ordering::Relaxed); - let total = rows_total.load(Ordering::Relaxed); + let state = self.state.load(Ordering::Relaxed) as u8; + match state { + STATE_TRACKING => { + let passed = self.rows_passed.load(Ordering::Relaxed); + let total = self.rows_total.load(Ordering::Relaxed); (passed, total, false) } - SelectivityState::Active => (0, 0, false), - SelectivityState::Disabled => (0, 0, true), + STATE_ACTIVE => (0, 0, false), + STATE_DISABLED => (0, 0, true), + _ => (0, 0, false), } } /// Check if the filter is disabled. + #[inline] pub fn is_disabled(&self) -> bool { - matches!(*self.state.read(), SelectivityState::Disabled) + self.state.load(Ordering::Relaxed) as u8 == STATE_DISABLED } /// Get the inner expression. @@ -141,83 +128,97 @@ impl AdaptiveSelectivityFilterExpr { &self.inner } - /// Check if the inner generation has changed and reset state if needed. - fn check_and_reset_if_needed(&self) { - let current_generation = self.inner.snapshot_generation(); - let tracked = self.tracked_generation.load(Ordering::Relaxed); - - if current_generation != tracked { - // Generation changed - reset to tracking state - let mut state = self.state.write(); - *state = SelectivityState::new_tracking(); - self.tracked_generation - .store(current_generation, Ordering::Relaxed); - } + /// Create an all-true boolean array of the given length. + #[inline] + fn all_true_array(len: usize) -> ArrayRef { + Arc::new(BooleanArray::from(vec![true; len])) } - /// Count the number of true values in a boolean array. - fn count_true_values(array: &BooleanArray) -> usize { - array.true_count() + /// Fast path evaluation - just check the atomic state. + /// Returns Some(result) if we can short-circuit, None if we need to do full evaluation. + #[inline] + fn try_fast_path(&self, batch: &RecordBatch) -> Option { + let state = self.state.load(Ordering::Relaxed) as u8; + match state { + STATE_DISABLED => { + // Fast path: filter is disabled, return all-true + Some(ColumnarValue::Array(Self::all_true_array(batch.num_rows()))) + } + STATE_ACTIVE => { + // Fast path: filter is active and we've finished tracking + // Just evaluate the inner expression, no tracking overhead + None + } + STATE_TRACKING => { + // Need to do tracking - check generation first + let current_gen = self.inner.snapshot_generation(); + let tracked_gen = self.tracked_generation.load(Ordering::Relaxed); + if current_gen != tracked_gen { + // Generation changed - reset tracking + self.rows_passed.store(0, Ordering::Relaxed); + self.rows_total.store(0, Ordering::Relaxed); + self.tracked_generation + .store(current_gen, Ordering::Relaxed); + self.state.store(STATE_TRACKING as usize, Ordering::Relaxed); + } + None + } + _ => None, + } } - /// Process the result and update selectivity statistics. - fn process_result(&self, result: &ColumnarValue) -> Result<()> { - // Only track selectivity for array results + /// Update tracking statistics after evaluating a batch. + /// Only called when in TRACKING state. + #[inline] + fn update_tracking(&self, result: &ColumnarValue) { + // Only update if still in tracking state + if self.state.load(Ordering::Relaxed) as u8 != STATE_TRACKING { + return; + } + let (true_count, total_count) = match result { ColumnarValue::Array(array) => { let bool_array = array .as_any() .downcast_ref::() .expect("Filter expression should return BooleanArray"); - (Self::count_true_values(bool_array), array.len()) + (bool_array.true_count(), array.len()) } ColumnarValue::Scalar(scalar) => { - // Scalar result - we can't track selectivity meaningfully - // Just skip tracking for this batch if let datafusion_common::ScalarValue::Boolean(Some(v)) = scalar { if *v { (1, 1) } else { (0, 1) } } else { - return Ok(()); + return; } } }; - let state = self.state.read(); - if let SelectivityState::Tracking { - rows_passed, - rows_total, - } = &*state - { - rows_passed.fetch_add(true_count, Ordering::Relaxed); - let new_total = - rows_total.fetch_add(total_count, Ordering::Relaxed) + total_count; - let passed = rows_passed.load(Ordering::Relaxed); - - // Check if we've seen enough rows to make a decision - if new_total >= self.config.min_rows { - // Calculate selectivity - let selectivity = passed as f64 / new_total as f64; - drop(state); - - // Decide whether to disable or keep active - let mut state = self.state.write(); - // Re-check in case another thread already updated - if matches!(*state, SelectivityState::Tracking { .. }) { - if selectivity >= self.config.threshold { - *state = SelectivityState::Disabled; - } else { - *state = SelectivityState::Active; - } - } - } + // Update counters + self.rows_passed.fetch_add(true_count, Ordering::Relaxed); + let new_total = + self.rows_total.fetch_add(total_count, Ordering::Relaxed) + total_count; + + // Check if we've seen enough rows to make a decision + if new_total >= self.config.min_rows { + let passed = self.rows_passed.load(Ordering::Relaxed); + let selectivity = passed as f64 / new_total as f64; + + // Use compare_exchange to ensure only one thread makes the transition + let new_state = if selectivity >= self.config.threshold { + STATE_DISABLED + } else { + STATE_ACTIVE + }; + + // Try to transition from TRACKING to the new state + // If this fails, another thread already did the transition, which is fine + let _ = self.state.compare_exchange( + STATE_TRACKING as usize, + new_state as usize, + Ordering::Relaxed, + Ordering::Relaxed, + ); } - - Ok(()) - } - - /// Create an all-true boolean array of the given length. - fn all_true_array(len: usize) -> ArrayRef { - Arc::new(BooleanArray::from(vec![true; len])) } } @@ -225,19 +226,19 @@ impl Display for AdaptiveSelectivityFilterExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let (passed, total, disabled) = self.selectivity_info(); if disabled { - write!(f, "SelectivityAware(DISABLED) [ {} ]", self.inner) + write!(f, "AdaptiveSelectivity(DISABLED) [ {} ]", self.inner) } else if total > 0 { let selectivity = passed as f64 / total as f64; write!( f, - "SelectivityAware({:.1}%, {}/{}) [ {} ]", + "AdaptiveSelectivity({:.1}%, {}/{}) [ {} ]", selectivity * 100.0, passed, total, self.inner ) } else { - write!(f, "SelectivityAware [ {} ]", self.inner) + write!(f, "AdaptiveSelectivity [ {} ]", self.inner) } } } @@ -289,24 +290,20 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { self.inner.nullable(input_schema) } + #[inline] fn evaluate(&self, batch: &RecordBatch) -> Result { - // Check if the inner generation has changed - self.check_and_reset_if_needed(); - - // Check if we're disabled - { - let state = self.state.read(); - if matches!(*state, SelectivityState::Disabled) { - // Return all-true to bypass the filter - return Ok(ColumnarValue::Array(Self::all_true_array(batch.num_rows()))); - } + // Fast path: single atomic load to check state + if let Some(result) = self.try_fast_path(batch) { + return Ok(result); } // Evaluate the inner expression let result = self.inner.evaluate(batch)?; - // Update selectivity statistics - self.process_result(&result)?; + // Update tracking if in tracking state (cheap check + possible update) + if self.state.load(Ordering::Relaxed) as u8 == STATE_TRACKING { + self.update_tracking(&result); + } Ok(result) } @@ -371,10 +368,10 @@ mod tests { assert_eq!(arr.len(), 100); // After enough rows, the filter should be disabled - let (passed, total, disabled) = wrapper.selectivity_info(); - assert_eq!(passed, 0); // Moved to Disabled state, counters reset conceptually - assert_eq!(total, 0); - assert!(disabled, "Filter should be disabled after high selectivity"); + assert!( + wrapper.is_disabled(), + "Filter should be disabled after high selectivity" + ); } #[test] @@ -394,8 +391,10 @@ mod tests { let _result = wrapper.evaluate(&batch).unwrap(); // Filter should stay active (not disabled) - let (_, _, disabled) = wrapper.selectivity_info(); - assert!(!disabled, "Low selectivity filter should stay active"); + assert!( + !wrapper.is_disabled(), + "Low selectivity filter should stay active" + ); } #[test] @@ -459,7 +458,7 @@ mod tests { let display = format!("{wrapper}"); assert!( - display.contains("SelectivityAware"), + display.contains("AdaptiveSelectivity"), "Display should show wrapper name" ); } @@ -483,4 +482,45 @@ mod tests { .unwrap(); assert!(!new_wrapper.is_disabled()); } + + #[test] + fn test_active_state_no_tracking_overhead() { + // Test that once in Active state, there's minimal overhead + let filter = create_filter_expr(50); // ~50% pass rate + let config = SelectivityConfig { + threshold: 0.95, + min_rows: 100, + }; + let wrapper = AdaptiveSelectivityFilterExpr::new(filter, config); + + // Process enough rows to transition to Active + let batch = create_batch((0..100).collect()); + let _ = wrapper.evaluate(&batch).unwrap(); + + // Should be in Active state now + assert!(!wrapper.is_disabled()); + let state = wrapper.state.load(Ordering::Relaxed) as u8; + assert_eq!(state, STATE_ACTIVE, "Should be in Active state"); + + // Further evaluations should not update tracking counters + let initial_passed = wrapper.rows_passed.load(Ordering::Relaxed); + let initial_total = wrapper.rows_total.load(Ordering::Relaxed); + + // Evaluate more batches + for _ in 0..10 { + let _ = wrapper.evaluate(&batch).unwrap(); + } + + // Counters should NOT have changed (no tracking in Active state) + assert_eq!( + wrapper.rows_passed.load(Ordering::Relaxed), + initial_passed, + "Counters should not change in Active state" + ); + assert_eq!( + wrapper.rows_total.load(Ordering::Relaxed), + initial_total, + "Counters should not change in Active state" + ); + } } From 61a99795a3d8f5e550fde33c876e3cfc76c6609f Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 12:54:46 -0500 Subject: [PATCH 07/14] fix: Fix AdaptiveSelectivityFilterExpr breaking row group pruning The snapshot() method was returning None, causing the wrapper to be preserved during snapshotting. Since PruningPredicate doesn't recognize the wrapper type, it fell back to lit(true) which disabled all row group and file pruning. Changed snapshot() to return the inner expression directly, stripping the wrapper during snapshotting so pruning predicates work correctly. Also set enable_adaptive_filter_selectivity_tracking default to false since this is an experimental feature. Benchmarks show no slowdowns after this fix. Co-Authored-By: Claude Opus 4.5 --- datafusion/common/src/config.rs | 2 +- .../adaptive_selectivity_filter.rs | 21 +++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 05e06da2f397a..e067a6b435a1b 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1141,7 +1141,7 @@ config_namespace! { /// /// The selectivity tracking resets when the dynamic filter is updated (e.g., when /// the hash table is built), allowing the filter to be re-evaluated with new data. - pub enable_adaptive_filter_selectivity_tracking: bool, default = true + pub enable_adaptive_filter_selectivity_tracking: bool, default = false /// The default filter selectivity used by Filter Statistics /// when an exact selectivity cannot be determined. Valid values are diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 3e6b0934450d1..8d19cfaf01cfa 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -292,16 +292,17 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { #[inline] fn evaluate(&self, batch: &RecordBatch) -> Result { - // Fast path: single atomic load to check state + // Fast path: check state first if let Some(result) = self.try_fast_path(batch) { return Ok(result); } - // Evaluate the inner expression + // Evaluate inner expression let result = self.inner.evaluate(batch)?; - // Update tracking if in tracking state (cheap check + possible update) - if self.state.load(Ordering::Relaxed) as u8 == STATE_TRACKING { + // Update tracking if still in tracking state + let state = self.state.load(Ordering::Relaxed) as u8; + if state == STATE_TRACKING { self.update_tracking(&result); } @@ -313,8 +314,16 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { } fn snapshot(&self) -> Result>> { - // Return the inner's snapshot - self.inner.snapshot() + // Return the inner expression directly to strip the wrapper during snapshotting. + // This is important for PruningPredicate which needs to pattern-match on the + // underlying expression types (BinaryExpr, InListExpr, etc.) to build pruning + // predicates. If we return None, the wrapper would be preserved and + // PruningPredicate wouldn't recognize it, falling back to lit(true) which + // disables pruning entirely. + // + // Note: at this point in tree transformation, the inner has already been + // snapshotted via with_new_children, so self.inner is the snapshotted expression. + Ok(Some(Arc::clone(&self.inner))) } fn snapshot_generation(&self) -> u64 { From a31892f0cb81fb35f57fba2d39e8486b06661dae Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 14:46:44 -0500 Subject: [PATCH 08/14] update tests, make fast paths fast --- datafusion/common/src/config.rs | 4 +- .../physical_optimizer/filter_pushdown.rs | 32 ++-- .../physical-expr-common/src/physical_expr.rs | 1 - .../adaptive_selectivity_filter.rs | 141 +++++++++--------- 4 files changed, 85 insertions(+), 93 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index e067a6b435a1b..959e29cf9073e 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1121,7 +1121,7 @@ config_namespace! { /// The filter will remain in a tracking state until this many rows have been /// processed. This ensures statistical stability before making the disable decision. /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. - pub adaptive_filter_min_rows_for_selectivity: usize, default = 10_000 + pub adaptive_filter_min_rows_for_selectivity: usize, default = 50_000 /// Selectivity threshold for adaptive disabling of join dynamic filters. /// @@ -1141,7 +1141,7 @@ config_namespace! { /// /// The selectivity tracking resets when the dynamic filter is updated (e.g., when /// the hash table is built), allowing the filter to be re-evaluated with new data. - pub enable_adaptive_filter_selectivity_tracking: bool, default = false + pub enable_adaptive_filter_selectivity_tracking: bool, default = true /// The default filter selectivity used by Filter Statistics /// when an exact selectivity cannot be determined. Valid values are diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs index b3ed8d9653fe1..cdd29e053c45c 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs @@ -263,7 +263,7 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] AND DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] AND DynamicFilter [ empty ] " ); @@ -287,7 +287,7 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false], filter=[e@4 IS NULL OR e@4 < bb] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ] " ); } @@ -1003,7 +1003,7 @@ async fn test_hashjoin_dynamic_filter_pushdown() { Ok: - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] ", ); @@ -1037,7 +1037,7 @@ async fn test_hashjoin_dynamic_filter_pushdown() { @r" - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); } @@ -1213,7 +1213,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] " ); @@ -1265,7 +1265,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); @@ -1405,7 +1405,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() { - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] " ); @@ -1437,7 +1437,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() { - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); @@ -1579,8 +1579,8 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] ", ); @@ -1610,8 +1610,8 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(40.0%, 2/5) [ DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(25.0%, 2/8) [ DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ] ] " ); } @@ -3090,7 +3090,7 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() { - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] " ); @@ -3115,7 +3115,7 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() { - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ false ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(0.0%, 0/1) [ DynamicFilter [ false ] ] " ); } @@ -3217,7 +3217,7 @@ async fn test_hashjoin_dynamic_filter_with_nulls() { @r" - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] " ); @@ -3240,7 +3240,7 @@ async fn test_hashjoin_dynamic_filter_with_nulls() { @r" - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(25.0%, 1/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ] ] " ); diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index 7107b0a9004d3..ab21be7903835 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -595,7 +595,6 @@ pub fn snapshot_physical_expr( /// Take a snapshot of the given `PhysicalExpr` if it is dynamic. /// -/// Take a snapshot of this `PhysicalExpr` if it is dynamic. /// This is used to capture the current state of `PhysicalExpr`s that may contain /// dynamic references to other operators in order to serialize it over the wire /// or treat it via downcast matching. diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 8d19cfaf01cfa..987335e3a6cd2 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -24,14 +24,15 @@ use std::hash::Hash; use std::sync::Arc; use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; -use arrow::array::{ArrayRef, BooleanArray}; +use arrow::array::BooleanArray; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_common::Result; +use datafusion_common::{Result, ScalarValue}; use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::physical_expr::DynHash; use crate::PhysicalExpr; +use crate::expressions::lit; /// Configuration for selectivity-based filter disabling. #[derive(Debug, Clone)] @@ -103,7 +104,7 @@ impl AdaptiveSelectivityFilterExpr { /// Get the current selectivity information for observability. /// /// Returns `(rows_passed, rows_total, is_disabled)`. - pub fn selectivity_info(&self) -> (usize, usize, bool) { + fn selectivity_info(&self) -> (usize, usize, bool) { let state = self.state.load(Ordering::Relaxed) as u8; match state { STATE_TRACKING => { @@ -118,8 +119,8 @@ impl AdaptiveSelectivityFilterExpr { } /// Check if the filter is disabled. - #[inline] - pub fn is_disabled(&self) -> bool { + #[cfg(test)] + fn is_disabled(&self) -> bool { self.state.load(Ordering::Relaxed) as u8 == STATE_DISABLED } @@ -128,79 +129,37 @@ impl AdaptiveSelectivityFilterExpr { &self.inner } - /// Create an all-true boolean array of the given length. - #[inline] - fn all_true_array(len: usize) -> ArrayRef { - Arc::new(BooleanArray::from(vec![true; len])) - } - - /// Fast path evaluation - just check the atomic state. - /// Returns Some(result) if we can short-circuit, None if we need to do full evaluation. - #[inline] - fn try_fast_path(&self, batch: &RecordBatch) -> Option { - let state = self.state.load(Ordering::Relaxed) as u8; - match state { - STATE_DISABLED => { - // Fast path: filter is disabled, return all-true - Some(ColumnarValue::Array(Self::all_true_array(batch.num_rows()))) - } - STATE_ACTIVE => { - // Fast path: filter is active and we've finished tracking - // Just evaluate the inner expression, no tracking overhead - None - } - STATE_TRACKING => { - // Need to do tracking - check generation first - let current_gen = self.inner.snapshot_generation(); - let tracked_gen = self.tracked_generation.load(Ordering::Relaxed); - if current_gen != tracked_gen { - // Generation changed - reset tracking - self.rows_passed.store(0, Ordering::Relaxed); - self.rows_total.store(0, Ordering::Relaxed); - self.tracked_generation - .store(current_gen, Ordering::Relaxed); - self.state.store(STATE_TRACKING as usize, Ordering::Relaxed); - } - None - } - _ => None, - } - } - /// Update tracking statistics after evaluating a batch. /// Only called when in TRACKING state. - #[inline] fn update_tracking(&self, result: &ColumnarValue) { - // Only update if still in tracking state - if self.state.load(Ordering::Relaxed) as u8 != STATE_TRACKING { - return; - } - let (true_count, total_count) = match result { ColumnarValue::Array(array) => { - let bool_array = array - .as_any() - .downcast_ref::() - .expect("Filter expression should return BooleanArray"); + let Some(bool_array) = array.as_any().downcast_ref::() + else { + // TODO: should this handle / propagate errors instead? + // Can this be a dictionary array or other wrapper type? + return; + }; (bool_array.true_count(), array.len()) } ColumnarValue::Scalar(scalar) => { - if let datafusion_common::ScalarValue::Boolean(Some(v)) = scalar { + if let ScalarValue::Boolean(Some(v)) = scalar { if *v { (1, 1) } else { (0, 1) } } else { + // Similarly, should this error? return; } } }; // Update counters - self.rows_passed.fetch_add(true_count, Ordering::Relaxed); + let passed = + self.rows_passed.fetch_add(true_count, Ordering::Relaxed) + true_count; let new_total = self.rows_total.fetch_add(total_count, Ordering::Relaxed) + total_count; // Check if we've seen enough rows to make a decision if new_total >= self.config.min_rows { - let passed = self.rows_passed.load(Ordering::Relaxed); let selectivity = passed as f64 / new_total as f64; // Use compare_exchange to ensure only one thread makes the transition @@ -290,18 +249,34 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { self.inner.nullable(input_schema) } - #[inline] fn evaluate(&self, batch: &RecordBatch) -> Result { // Fast path: check state first - if let Some(result) = self.try_fast_path(batch) { - return Ok(result); + let state = self.state.load(Ordering::Relaxed) as u8; + match state { + STATE_DISABLED => { + // Fast path: filter is disabled, return all-true + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))); + } + STATE_TRACKING => { + // Need to do tracking - check generation first + let current_gen = self.inner.snapshot_generation(); + let tracked_gen = self.tracked_generation.load(Ordering::Relaxed); + if current_gen != tracked_gen { + // Generation changed - reset tracking + self.rows_passed.store(0, Ordering::Relaxed); + self.rows_total.store(0, Ordering::Relaxed); + self.tracked_generation + .store(current_gen, Ordering::Relaxed); + self.state.store(STATE_TRACKING as usize, Ordering::Relaxed); + } + } + _ => {} } // Evaluate inner expression let result = self.inner.evaluate(batch)?; // Update tracking if still in tracking state - let state = self.state.load(Ordering::Relaxed) as u8; if state == STATE_TRACKING { self.update_tracking(&result); } @@ -314,21 +289,39 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { } fn snapshot(&self) -> Result>> { - // Return the inner expression directly to strip the wrapper during snapshotting. - // This is important for PruningPredicate which needs to pattern-match on the - // underlying expression types (BinaryExpr, InListExpr, etc.) to build pruning - // predicates. If we return None, the wrapper would be preserved and - // PruningPredicate wouldn't recognize it, falling back to lit(true) which - // disables pruning entirely. - // - // Note: at this point in tree transformation, the inner has already been - // snapshotted via with_new_children, so self.inner is the snapshotted expression. - Ok(Some(Arc::clone(&self.inner))) + match self.state.load(Ordering::Relaxed) as u8 { + STATE_DISABLED => { + // If disabled, we can return a literal true expression instead + return Ok(Some(lit(true))); + } + _ => { + // Return the inner expression directly to strip the wrapper during snapshotting. + // This is important for PruningPredicate which needs to pattern-match on the + // underlying expression types (BinaryExpr, InListExpr, etc.) to build pruning + // predicates. If we return None, the wrapper would be preserved and + // PruningPredicate wouldn't recognize it, falling back to lit(true) which + // disables pruning entirely. + // Note: at this point in tree transformation, the inner has already been + // snapshotted via with_new_children, so self.inner is the snapshotted expression. + Ok(Some(Arc::clone(&self.inner))) + } + } } fn snapshot_generation(&self) -> u64 { - // Return the inner's generation - self.inner.snapshot_generation() + let state = self.state.load(Ordering::Relaxed) as u8; + match state { + STATE_TRACKING => { + let inner = self.inner.snapshot_generation(); + // Update our tracked generation to match inner + self.tracked_generation.store(inner, Ordering::Relaxed); + inner + } + // Defer to inner expression's generation since we are basically a pass-through now + STATE_ACTIVE => self.inner.snapshot_generation(), + // Add 1 to distinguish from active state, we evaluate to all-true now + _disabled => self.tracked_generation.load(Ordering::Relaxed) + 1, + } } } @@ -336,7 +329,7 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { mod tests { use super::*; use crate::expressions::{BinaryExpr, col, lit}; - use arrow::array::Int32Array; + use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::Field; use datafusion_expr::Operator; From 6d5d9660fe1f5869ec893dbf5f917fef4aca61e2 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 14:53:02 -0500 Subject: [PATCH 09/14] refactor: Remove generation tracking from AdaptiveSelectivityFilterExpr Simplifies the implementation by removing generation-aware reset logic: - No longer tracks inner filter's generation - snapshot_generation() returns inner's generation, or 0 when disabled - Faster evaluate() path without generation checks The generation tracking was unnecessary for hash join filters and added overhead to the hot path. Co-Authored-By: Claude Opus 4.5 --- .../adaptive_selectivity_filter.rs | 61 +++++-------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 987335e3a6cd2..510db2c67f727 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -22,7 +22,7 @@ use std::any::Any; use std::fmt::Display; use std::hash::Hash; use std::sync::Arc; -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use arrow::array::BooleanArray; use arrow::datatypes::{DataType, Schema}; @@ -65,10 +65,6 @@ const STATE_DISABLED: u8 = 2; /// It monitors how many rows pass through the filter, and if the filter /// is found to be ineffective (passes most rows), it automatically disables /// itself to avoid evaluation overhead. -/// -/// The wrapper resets its statistics when the inner filter's generation changes, -/// which happens when the dynamic filter is updated (e.g., when the hash table -/// is built in a hash join). #[derive(Debug)] pub struct AdaptiveSelectivityFilterExpr { /// The inner filter expression (typically DynamicFilterPhysicalExpr). @@ -80,9 +76,6 @@ pub struct AdaptiveSelectivityFilterExpr { rows_passed: AtomicUsize, /// Total rows processed (only used in Tracking state). rows_total: AtomicUsize, - /// The generation of the inner filter when we started tracking. - /// If this changes, we need to reset our state. - tracked_generation: AtomicU64, /// Configuration for selectivity tracking. config: SelectivityConfig, } @@ -90,13 +83,11 @@ pub struct AdaptiveSelectivityFilterExpr { impl AdaptiveSelectivityFilterExpr { /// Create a new `AdaptiveSelectivityFilterExpr` wrapping the given inner expression. pub fn new(inner: Arc, config: SelectivityConfig) -> Self { - let current_generation = inner.snapshot_generation(); Self { inner, state: AtomicUsize::new(STATE_TRACKING as usize), rows_passed: AtomicUsize::new(0), rows_total: AtomicUsize::new(0), - tracked_generation: AtomicU64::new(current_generation), config, } } @@ -252,25 +243,9 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { // Fast path: check state first let state = self.state.load(Ordering::Relaxed) as u8; - match state { - STATE_DISABLED => { - // Fast path: filter is disabled, return all-true - return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))); - } - STATE_TRACKING => { - // Need to do tracking - check generation first - let current_gen = self.inner.snapshot_generation(); - let tracked_gen = self.tracked_generation.load(Ordering::Relaxed); - if current_gen != tracked_gen { - // Generation changed - reset tracking - self.rows_passed.store(0, Ordering::Relaxed); - self.rows_total.store(0, Ordering::Relaxed); - self.tracked_generation - .store(current_gen, Ordering::Relaxed); - self.state.store(STATE_TRACKING as usize, Ordering::Relaxed); - } - } - _ => {} + if state == STATE_DISABLED { + // Fast path: filter is disabled, return all-true + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))); } // Evaluate inner expression @@ -310,17 +285,12 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { fn snapshot_generation(&self) -> u64 { let state = self.state.load(Ordering::Relaxed) as u8; - match state { - STATE_TRACKING => { - let inner = self.inner.snapshot_generation(); - // Update our tracked generation to match inner - self.tracked_generation.store(inner, Ordering::Relaxed); - inner - } - // Defer to inner expression's generation since we are basically a pass-through now - STATE_ACTIVE => self.inner.snapshot_generation(), - // Add 1 to distinguish from active state, we evaluate to all-true now - _disabled => self.tracked_generation.load(Ordering::Relaxed) + 1, + if state == STATE_DISABLED { + // When disabled, return 0 to indicate static behavior + 0 + } else { + // Pass through to inner expression's generation + self.inner.snapshot_generation() } } } @@ -416,17 +386,14 @@ mod tests { assert!(wrapper.is_disabled(), "Filter should be disabled"); // Now create a batch where the original filter would return some false - // But since we're disabled, we should get all true + // But since we're disabled, we should get scalar true (efficient bypass) let batch2 = create_batch(vec![200, 201, 202]); // These would fail a < 100 let result = wrapper.evaluate(&batch2).unwrap(); - let ColumnarValue::Array(arr) = result else { - panic!("Expected array result"); + // Should return scalar true when disabled + let ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))) = result else { + panic!("Expected scalar true result when disabled, got: {:?}", result); }; - let bool_arr = arr.as_any().downcast_ref::().unwrap(); - - // All values should be true because the filter is disabled - assert_eq!(bool_arr.true_count(), 3); } #[test] From d35a17c4154e96adf68f98bf7f6e80ad2960b80e Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 15:31:46 -0500 Subject: [PATCH 10/14] update defaults, update tests --- datafusion/common/src/config.rs | 2 +- .../physical_optimizer/filter_pushdown.rs | 18 +++++++-------- .../adaptive_selectivity_filter.rs | 23 +++++-------------- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 959e29cf9073e..3eb752eda1d1b 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1130,7 +1130,7 @@ config_namespace! { /// /// For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. - pub adaptive_filter_selectivity_threshold: f64, default = 0.95 + pub adaptive_filter_selectivity_threshold: f64, default = 0.85 /// Enable selectivity-based disabling of dynamic filters from joins. /// diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs index cdd29e053c45c..fe0d3872f8907 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs @@ -287,7 +287,7 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false], filter=[e@4 IS NULL OR e@4 < bb] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ] " ); } @@ -1037,7 +1037,7 @@ async fn test_hashjoin_dynamic_filter_pushdown() { @r" - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); } @@ -1247,7 +1247,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ CASE hash_repartition % 12 WHEN 2 THEN a@0 >= ab AND a@0 <= ab AND b@1 >= bb AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:ab,c1:bb}]) WHEN 4 THEN a@0 >= aa AND a@0 <= aa AND b@1 >= ba AND b@1 <= ba AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}]) ELSE false END ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ CASE hash_repartition % 12 WHEN 2 THEN a@0 >= ab AND a@0 <= ab AND b@1 >= bb AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:ab,c1:bb}]) WHEN 4 THEN a@0 >= aa AND a@0 <= aa AND b@1 >= ba AND b@1 <= ba AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}]) ELSE false END ] ] " ); @@ -1265,7 +1265,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); @@ -1437,7 +1437,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() { - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(50.0%, 2/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ] ] " ); @@ -1610,8 +1610,8 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(40.0%, 2/5) [ DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ] ] - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(25.0%, 2/8) [ DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ] ] " ); } @@ -3115,7 +3115,7 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() { - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1 - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(0.0%, 0/1) [ DynamicFilter [ false ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ false ] ] " ); } @@ -3240,7 +3240,7 @@ async fn test_hashjoin_dynamic_filter_with_nulls() { @r" - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)] - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true - - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity(25.0%, 1/4) [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ] ] + - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=AdaptiveSelectivity [ DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ] ] " ); diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 510db2c67f727..4e12849d5166a 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -95,6 +95,7 @@ impl AdaptiveSelectivityFilterExpr { /// Get the current selectivity information for observability. /// /// Returns `(rows_passed, rows_total, is_disabled)`. + #[cfg(test)] fn selectivity_info(&self) -> (usize, usize, bool) { let state = self.state.load(Ordering::Relaxed) as u8; match state { @@ -174,22 +175,7 @@ impl AdaptiveSelectivityFilterExpr { impl Display for AdaptiveSelectivityFilterExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let (passed, total, disabled) = self.selectivity_info(); - if disabled { - write!(f, "AdaptiveSelectivity(DISABLED) [ {} ]", self.inner) - } else if total > 0 { - let selectivity = passed as f64 / total as f64; - write!( - f, - "AdaptiveSelectivity({:.1}%, {}/{}) [ {} ]", - selectivity * 100.0, - passed, - total, - self.inner - ) - } else { - write!(f, "AdaptiveSelectivity [ {} ]", self.inner) - } + write!(f, "AdaptiveSelectivity [ {} ]", self.inner) } } @@ -392,7 +378,10 @@ mod tests { // Should return scalar true when disabled let ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))) = result else { - panic!("Expected scalar true result when disabled, got: {:?}", result); + panic!( + "Expected scalar true result when disabled, got: {:?}", + result + ); }; } From 919659cceedf81f397b7a6060e8ce324bfed9525 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 17:00:55 -0500 Subject: [PATCH 11/14] fix slts --- .../src/expressions/adaptive_selectivity_filter.rs | 2 +- .../test_files/dynamic_filter_pushdown_config.slt | 12 ++++++------ .../sqllogictest/test_files/information_schema.slt | 12 ++++++------ .../sqllogictest/test_files/projection_pushdown.slt | 8 ++++---- .../sqllogictest/test_files/push_down_filter.slt | 2 +- .../test_files/repartition_subset_satisfaction.slt | 4 ++-- docs/source/user-guide/configs.md | 6 +++--- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index 4e12849d5166a..e6495d0ab19b6 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -253,7 +253,7 @@ impl PhysicalExpr for AdaptiveSelectivityFilterExpr { match self.state.load(Ordering::Relaxed) as u8 { STATE_DISABLED => { // If disabled, we can return a literal true expression instead - return Ok(Some(lit(true))); + Ok(Some(lit(true) as Arc)) } _ => { // Return the inner expression directly to strip the wrapper during snapshotting. diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt index 55fc825422146..2c7cce75e2982 100644 --- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt +++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt @@ -158,7 +158,7 @@ physical_plan 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info] 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Disable Join dynamic filter pushdown statement ok @@ -213,7 +213,7 @@ physical_plan 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info] 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Enable TopK, disable Join statement ok @@ -436,7 +436,7 @@ physical_plan 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info] 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Cleanup @@ -488,19 +488,19 @@ query T SELECT value FROM information_schema.df_settings WHERE name = 'datafusion.optimizer.enable_adaptive_filter_selectivity_tracking'; ---- -false +true query T SELECT value FROM information_schema.df_settings WHERE name = 'datafusion.optimizer.adaptive_filter_selectivity_threshold'; ---- -0.95 +0.85 query T SELECT value FROM information_schema.df_settings WHERE name = 'datafusion.optimizer.adaptive_filter_min_rows_for_selectivity'; ---- -10000 +50000 # Enable adaptive selectivity tracking statement ok diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b2d63b3a2197d..d39317407f723 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -291,11 +291,11 @@ datafusion.format.time_format %H:%M:%S%.f datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f datafusion.format.timestamp_tz_format NULL datafusion.format.types_info false -datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 10000 -datafusion.optimizer.adaptive_filter_selectivity_threshold 0.95 +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 50000 +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.85 datafusion.optimizer.allow_symmetric_joins_without_pruning true datafusion.optimizer.default_filter_selectivity 20 -datafusion.optimizer.enable_adaptive_filter_selectivity_tracking false +datafusion.optimizer.enable_adaptive_filter_selectivity_tracking true datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true datafusion.optimizer.enable_distinct_aggregation_soft_limit true datafusion.optimizer.enable_dynamic_filter_pushdown true @@ -432,11 +432,11 @@ datafusion.format.time_format %H:%M:%S%.f Time format for time arrays datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f Timestamp format for timestamp arrays datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. datafusion.format.types_info false Show types in visual representation batches -datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 10000 Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. -datafusion.optimizer.adaptive_filter_selectivity_threshold 0.95 Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 50000 Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.85 Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). -datafusion.optimizer.enable_adaptive_filter_selectivity_tracking false Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. +datafusion.optimizer.enable_adaptive_filter_selectivity_tracking true Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase. datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read. datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden. diff --git a/datafusion/sqllogictest/test_files/projection_pushdown.slt b/datafusion/sqllogictest/test_files/projection_pushdown.slt index c25b80a0d7f20..80aaedc55d903 100644 --- a/datafusion/sqllogictest/test_files/projection_pushdown.slt +++ b/datafusion/sqllogictest/test_files/projection_pushdown.slt @@ -1457,7 +1457,7 @@ physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)] 02)--FilterExec: __datafusion_extracted_1@0 > 150, projection=[id@1] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, value) as __datafusion_extracted_1, id], file_type=parquet -04)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[id], file_type=parquet, predicate=DynamicFilter [ empty ] +04)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness - id matches and value > 150 query II @@ -1497,7 +1497,7 @@ physical_plan 02)--FilterExec: __datafusion_extracted_1@0 > 100, projection=[id@1] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, value) as __datafusion_extracted_1, id], file_type=parquet 04)--FilterExec: __datafusion_extracted_2@0 > 3, projection=[id@1] -05)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, level) as __datafusion_extracted_2, id], file_type=parquet, predicate=DynamicFilter [ empty ] +05)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, level) as __datafusion_extracted_2, id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness - id matches, value > 100, and level > 3 # Matching ids where value > 100: 2(200), 3(150), 4(300), 5(250) @@ -1533,7 +1533,7 @@ physical_plan 01)ProjectionExec: expr=[id@1 as id, __datafusion_extracted_1@0 as simple_struct.s[label], __datafusion_extracted_2@2 as join_right.s[role]] 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@1, id@1)], projection=[__datafusion_extracted_1@0, id@1, __datafusion_extracted_2@2] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, label) as __datafusion_extracted_1, id], file_type=parquet -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, role) as __datafusion_extracted_2, id], file_type=parquet, predicate=DynamicFilter [ empty ] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, role) as __datafusion_extracted_2, id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness query ITT @@ -1565,7 +1565,7 @@ logical_plan physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[id], file_type=parquet -03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[id], file_type=parquet, predicate=DynamicFilter [ empty ] +03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness query II diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt index edafcfaa543f2..10f17f3d49c50 100644 --- a/datafusion/sqllogictest/test_files/push_down_filter.slt +++ b/datafusion/sqllogictest/test_files/push_down_filter.slt @@ -303,7 +303,7 @@ physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k@0, k@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/small_table.parquet]]}, projection=[k], file_type=parquet 03)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/large_table.parquet]]}, projection=[k, v], file_type=parquet, predicate=v@1 >= 50 AND DynamicFilter [ empty ], pruning_predicate=v_null_count@1 != row_count@2 AND v_max@0 >= 50, required_guarantees=[] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/large_table.parquet]]}, projection=[k, v], file_type=parquet, predicate=v@1 >= 50 AND AdaptiveSelectivity [ DynamicFilter [ empty ] ], pruning_predicate=v_null_count@1 != row_count@2 AND v_max@0 >= 50, required_guarantees=[] statement ok drop table small_table; diff --git a/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt b/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt index e2c9fa4237939..4fdcb8c8393d7 100644 --- a/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt +++ b/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt @@ -383,7 +383,7 @@ physical_plan 14)--------------------------CoalescePartitionsExec 15)----------------------------FilterExec: service@1 = log, projection=[env@0, d_dkey@2] 16)------------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=C/data.parquet]]}, projection=[env, service, d_dkey], file_type=parquet, predicate=service@1 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)] -17)--------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ] +17)--------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify results without subset satisfaction query TPR rowsort @@ -479,7 +479,7 @@ physical_plan 11)--------------------CoalescePartitionsExec 12)----------------------FilterExec: service@1 = log, projection=[env@0, d_dkey@2] 13)------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=C/data.parquet]]}, projection=[env, service, d_dkey], file_type=parquet, predicate=service@1 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)] -14)--------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ] +14)--------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify results match with subset satisfaction query TPR rowsort diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index cc2b6d0654b47..0d7586c1231b8 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -161,9 +161,9 @@ The following configuration settings are available: | datafusion.optimizer.hash_join_single_partition_threshold_rows | 131072 | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition | | datafusion.optimizer.hash_join_inlist_pushdown_max_size | 131072 | Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides larger than this will use hash table lookups instead. Set to 0 to always use hash table lookups. InList pushdown can be more efficient for small build sides because it can result in better statistics pruning as well as use any bloom filters present on the scan side. InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion. On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory. This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` \* `target_partitions` memory. The default is 128kB per partition. This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases but avoids excessive memory usage or overhead for larger joins. | | datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values | 150 | Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: | -| datafusion.optimizer.adaptive_filter_min_rows_for_selectivity | 10000 | Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | -| datafusion.optimizer.adaptive_filter_selectivity_threshold | 0.95 | Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | -| datafusion.optimizer.enable_adaptive_filter_selectivity_tracking | false | Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. | +| datafusion.optimizer.adaptive_filter_min_rows_for_selectivity | 50000 | Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.adaptive_filter_selectivity_threshold | 0.85 | Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.enable_adaptive_filter_selectivity_tracking | true | Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. | | datafusion.optimizer.default_filter_selectivity | 20 | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). | | datafusion.optimizer.prefer_existing_union | false | When set to true, the optimizer will not attempt to convert Union to Interleave | | datafusion.optimizer.expand_views_at_output | false | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. | From ddb1c55caebf57013b5a49763e4face03ae30d34 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 5 Feb 2026 17:40:22 -0500 Subject: [PATCH 12/14] tune adaptive filter defaults based on TPC-DS benchmarks Co-Authored-By: Claude Opus 4.6 --- datafusion/common/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 3eb752eda1d1b..65c61c377fbca 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1121,7 +1121,7 @@ config_namespace! { /// The filter will remain in a tracking state until this many rows have been /// processed. This ensures statistical stability before making the disable decision. /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. - pub adaptive_filter_min_rows_for_selectivity: usize, default = 50_000 + pub adaptive_filter_min_rows_for_selectivity: usize, default = 100_000 /// Selectivity threshold for adaptive disabling of join dynamic filters. /// @@ -1130,7 +1130,7 @@ config_namespace! { /// /// For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. /// Only used when `enable_adaptive_filter_selectivity_tracking` is true. - pub adaptive_filter_selectivity_threshold: f64, default = 0.85 + pub adaptive_filter_selectivity_threshold: f64, default = 0.50 /// Enable selectivity-based disabling of dynamic filters from joins. /// From 4124aae8d3a731d642c6bf19f52cfb78fd780540 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:16:52 -0500 Subject: [PATCH 13/14] fix slts, clippy, docs --- .../src/expressions/adaptive_selectivity_filter.rs | 5 +---- .../test_files/dynamic_filter_pushdown_config.slt | 4 ++-- datafusion/sqllogictest/test_files/information_schema.slt | 8 ++++---- .../test_files/preserve_file_partitioning.slt | 4 ++-- docs/source/user-guide/configs.md | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs index e6495d0ab19b6..83e8ecb9eb71c 100644 --- a/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs +++ b/datafusion/physical-expr/src/expressions/adaptive_selectivity_filter.rs @@ -378,10 +378,7 @@ mod tests { // Should return scalar true when disabled let ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))) = result else { - panic!( - "Expected scalar true result when disabled, got: {:?}", - result - ); + panic!("Expected scalar true result when disabled, got: {result:?}"); }; } diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt index 2c7cce75e2982..cea1a3265e745 100644 --- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt +++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt @@ -494,13 +494,13 @@ query T SELECT value FROM information_schema.df_settings WHERE name = 'datafusion.optimizer.adaptive_filter_selectivity_threshold'; ---- -0.85 +0.5 query T SELECT value FROM information_schema.df_settings WHERE name = 'datafusion.optimizer.adaptive_filter_min_rows_for_selectivity'; ---- -50000 +100000 # Enable adaptive selectivity tracking statement ok diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index d39317407f723..b86b5c18f4aec 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -291,8 +291,8 @@ datafusion.format.time_format %H:%M:%S%.f datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f datafusion.format.timestamp_tz_format NULL datafusion.format.types_info false -datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 50000 -datafusion.optimizer.adaptive_filter_selectivity_threshold 0.85 +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 100000 +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.5 datafusion.optimizer.allow_symmetric_joins_without_pruning true datafusion.optimizer.default_filter_selectivity 20 datafusion.optimizer.enable_adaptive_filter_selectivity_tracking true @@ -432,8 +432,8 @@ datafusion.format.time_format %H:%M:%S%.f Time format for time arrays datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f Timestamp format for timestamp arrays datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. datafusion.format.types_info false Show types in visual representation batches -datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 50000 Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. -datafusion.optimizer.adaptive_filter_selectivity_threshold 0.85 Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. +datafusion.optimizer.adaptive_filter_min_rows_for_selectivity 100000 Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. +datafusion.optimizer.adaptive_filter_selectivity_threshold 0.5 Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). datafusion.optimizer.enable_adaptive_filter_selectivity_tracking true Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. diff --git a/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt index 297094fab16e7..8e1d8a37cd438 100644 --- a/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt +++ b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt @@ -370,7 +370,7 @@ physical_plan 10)------------------FilterExec: service@2 = log 11)--------------------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1 12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension/data.parquet]]}, projection=[d_dkey, env, service], file_type=parquet, predicate=service@2 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)] -13)----------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ] +13)----------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify results without optimization query TTTIR rowsort @@ -422,7 +422,7 @@ physical_plan 07)------------FilterExec: service@2 = log 08)--------------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension/data.parquet]]}, projection=[d_dkey, env, service], file_type=parquet, predicate=service@2 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)] -10)----------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ] +10)----------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] query TTTIR rowsort SELECT f.f_dkey, MAX(d.env), MAX(d.service), count(*), sum(f.value) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 0d7586c1231b8..ef8bdfc59b054 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -161,8 +161,8 @@ The following configuration settings are available: | datafusion.optimizer.hash_join_single_partition_threshold_rows | 131072 | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition | | datafusion.optimizer.hash_join_inlist_pushdown_max_size | 131072 | Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides larger than this will use hash table lookups instead. Set to 0 to always use hash table lookups. InList pushdown can be more efficient for small build sides because it can result in better statistics pruning as well as use any bloom filters present on the scan side. InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion. On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory. This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` \* `target_partitions` memory. The default is 128kB per partition. This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases but avoids excessive memory usage or overhead for larger joins. | | datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values | 150 | Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: | -| datafusion.optimizer.adaptive_filter_min_rows_for_selectivity | 50000 | Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | -| datafusion.optimizer.adaptive_filter_selectivity_threshold | 0.85 | Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.adaptive_filter_min_rows_for_selectivity | 100000 | Minimum number of rows to process before making a selectivity decision for adaptive filtering of join dynamic filters. The filter will remain in a tracking state until this many rows have been processed. This ensures statistical stability before making the disable decision. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | +| datafusion.optimizer.adaptive_filter_selectivity_threshold | 0.5 | Selectivity threshold for adaptive disabling of join dynamic filters. If the filter passes this fraction or more of rows, it will be disabled. Value should be between 0.0 and 1.0. For example, 0.95 means if 95% or more of rows pass the filter, it will be disabled. Only used when `enable_adaptive_filter_selectivity_tracking` is true. | | datafusion.optimizer.enable_adaptive_filter_selectivity_tracking | true | Enable selectivity-based disabling of dynamic filters from joins. When enabled, join dynamic filters that pass most rows (above the threshold) will be automatically disabled to avoid evaluation overhead. This is useful when the build side of a join covers most of the probe side values, making the filter expensive to evaluate for little benefit. The selectivity tracking resets when the dynamic filter is updated (e.g., when the hash table is built), allowing the filter to be re-evaluated with new data. | | datafusion.optimizer.default_filter_selectivity | 20 | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). | | datafusion.optimizer.prefer_existing_union | false | When set to true, the optimizer will not attempt to convert Union to Interleave | From 6b80e85272c6c020de4b0377d043418d27ca120c Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 14 Feb 2026 00:00:41 +0000 Subject: [PATCH 14/14] fix slts: update DynamicFilter to AdaptiveSelectivity in new upstream tests Co-Authored-By: Claude Opus 4.6 --- .../sqllogictest/test_files/preserve_file_partitioning.slt | 2 +- datafusion/sqllogictest/test_files/projection_pushdown.slt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt index 8e1d8a37cd438..2a3f21f19a229 100644 --- a/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt +++ b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt @@ -648,7 +648,7 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([d_dkey@1], 3), input_partitions=3 07)------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension_partitioned/d_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension_partitioned/d_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension_partitioned/d_dkey=C/data.parquet]]}, projection=[env, d_dkey], file_type=parquet 08)----------RepartitionExec: partitioning=Hash([f_dkey@1], 3), input_partitions=3 -09)------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], file_type=parquet, predicate=DynamicFilter [ empty ] +09)------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] query TTR rowsort SELECT f.f_dkey, d.env, sum(f.value) diff --git a/datafusion/sqllogictest/test_files/projection_pushdown.slt b/datafusion/sqllogictest/test_files/projection_pushdown.slt index 80aaedc55d903..253b1b7a56ad6 100644 --- a/datafusion/sqllogictest/test_files/projection_pushdown.slt +++ b/datafusion/sqllogictest/test_files/projection_pushdown.slt @@ -1896,7 +1896,7 @@ physical_plan 01)ProjectionExec: expr=[__datafusion_extracted_3@1 as s.s[value], __datafusion_extracted_4@0 as j.s[role]] 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@2, id@2)], filter=__datafusion_extracted_1@1 > __datafusion_extracted_2@0, projection=[__datafusion_extracted_4@1, __datafusion_extracted_3@4] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, level) as __datafusion_extracted_2, get_field(s@1, role) as __datafusion_extracted_4, id], file_type=parquet -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, value) as __datafusion_extracted_1, get_field(s@1, value) as __datafusion_extracted_3, id], file_type=parquet, predicate=DynamicFilter [ empty ] +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, value) as __datafusion_extracted_1, get_field(s@1, value) as __datafusion_extracted_3, id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness - only admin roles match (ids 1 and 4) query II @@ -1932,7 +1932,7 @@ logical_plan physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@1, id@1)], filter=__datafusion_extracted_1@0 > __datafusion_extracted_2@1, projection=[id@1, id@3] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/simple.parquet]]}, projection=[get_field(s@1, value) as __datafusion_extracted_1, id], file_type=parquet -03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, level) as __datafusion_extracted_2, id], file_type=parquet, predicate=DynamicFilter [ empty ] +03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection_pushdown/join_right.parquet]]}, projection=[get_field(s@1, level) as __datafusion_extracted_2, id], file_type=parquet, predicate=AdaptiveSelectivity [ DynamicFilter [ empty ] ] # Verify correctness - all rows match since value >> level for all ids # simple_struct: (1,100), (2,200), (3,150), (4,300), (5,250)