Skip to content

Commit

Permalink
Fix pruning on not equal predicate (#561)
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb authored Jun 14, 2021
1 parent e46920a commit e3e7e29
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions datafusion/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,12 +553,14 @@ fn build_predicate_expression(
let corrected_op = expr_builder.correct_operator(op);
let statistics_expr = match corrected_op {
Operator::NotEq => {
// column != literal => (min, max) = literal => min > literal || literal > max
// column != literal => (min, max) = literal =>
// !(min != literal && max != literal) ==>
// min != literal || literal != max
let min_column_expr = expr_builder.min_column_expr()?;
let max_column_expr = expr_builder.max_column_expr()?;
min_column_expr
.gt(expr_builder.scalar_expr().clone())
.or(expr_builder.scalar_expr().clone().gt(max_column_expr))
.not_eq(expr_builder.scalar_expr().clone())
.or(expr_builder.scalar_expr().clone().not_eq(max_column_expr))
}
Operator::Eq => {
// column = literal => (min, max) = literal => min <= literal && literal <= max
Expand Down Expand Up @@ -940,7 +942,7 @@ mod tests {
#[test]
fn row_group_predicate_not_eq() -> Result<()> {
let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
let expected_expr = "#c1_min Gt Int32(1) Or Int32(1) Gt #c1_max";
let expected_expr = "#c1_min NotEq Int32(1) Or Int32(1) NotEq #c1_max";

// test column on the left
let expr = col("c1").not_eq(lit(1));
Expand Down Expand Up @@ -1190,6 +1192,34 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn prune_not_eq_data() {
let schema = Arc::new(Schema::new(vec![Field::new("s1", DataType::Utf8, true)]));

// Prune using s2 != 'M'
let expr = col("s1").not_eq(lit("M"));

let statistics = TestStatistics::new().with(
"s1",
ContainerStats::new_utf8(
vec![Some("A"), Some("A"), Some("N"), Some("M"), None, Some("A")], // min
vec![Some("Z"), Some("L"), Some("Z"), Some("M"), None, None], // max
),
);

// s1 [A, Z] ==> might have values that pass predicate
// s1 [A, L] ==> all rows pass the predicate
// s1 [N, Z] ==> all rows pass the predicate
// s1 [M, M] ==> all rows do not pass the predicate
// No stats for s2 ==> some rows could pass
// s2 [3, None] (null max) ==> some rows could pass

let p = PruningPredicate::try_new(&expr, schema).unwrap();
let result = p.prune(&statistics).unwrap();
let expected = vec![true, true, true, false, true, true];
assert_eq!(result, expected);
}

/// Creates setup for boolean chunk pruning
///
/// For predicate "b1" (boolean expr)
Expand Down

0 comments on commit e3e7e29

Please sign in to comment.