Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in subquery join filters referencing outer query #2416

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions datafusion/core/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ use datafusion_expr::{window_function::WindowFunction, BuiltinScalarFunction};
use hashbrown::HashMap;

use datafusion_common::field_not_found;
use datafusion_expr::logical_plan::Subquery;
use datafusion_expr::logical_plan::{Filter, Subquery};
use sqlparser::ast::{
BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName, Query,
Expand Down Expand Up @@ -889,9 +889,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

// remove join expressions from filter
match remove_join_expressions(&filter_expr, &all_join_keys)? {
Some(filter_expr) => {
LogicalPlanBuilder::from(left).filter(filter_expr)?.build()
}
Some(filter_expr) => Ok(LogicalPlan::Filter(Filter {
predicate: filter_expr,
input: Arc::new(left),
})),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the bug fix. We cannot delegate to the LogicalPlanBuilder to build this Filter expression because it is not subquery-aware.

_ => Ok(left),
}
}
Expand Down Expand Up @@ -4249,6 +4250,18 @@ mod tests {
Field::new("t_date32", DataType::Date32, false),
Field::new("t_date64", DataType::Date64, false),
])),
"j1" => Some(Schema::new(vec![
Field::new("j1_id", DataType::Int32, false),
Field::new("j1_string", DataType::Utf8, false),
])),
"j2" => Some(Schema::new(vec![
Field::new("j2_id", DataType::Int32, false),
Field::new("j2_string", DataType::Utf8, false),
])),
"j3" => Some(Schema::new(vec![
Field::new("j3_id", DataType::Int32, false),
Field::new("j3_string", DataType::Utf8, false),
])),
"person" => Some(Schema::new(vec![
Field::new("id", DataType::UInt32, false),
Field::new("first_name", DataType::Utf8, false),
Expand Down Expand Up @@ -4523,6 +4536,35 @@ mod tests {
quick_test(sql, &expected);
}

#[test]
fn scalar_subquery_reference_outer_field() {
let sql = "SELECT j1_string, j2_string \
FROM j1, j2 \
WHERE j1_id = j2_id - 1 \
AND j2_id < (SELECT count(*) \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh so tricky 👍

FROM j1, j3 \
WHERE j2_id = j1_id \
AND j1_id = j3_id)";

let subquery = "Subquery: Projection: #COUNT(UInt8(1))\
\n Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]\
\n Filter: #j2_id = #j1_id\
\n Inner Join: #j1.j1_id = #j3.j3_id\
\n TableScan: j1 projection=None\
\n TableScan: j3 projection=None";

let expected = format!(
"Projection: #j1.j1_string, #j2.j2_string\
\n Filter: #j1_id = #j2_id - Int64(1) AND #j2_id < ({})\
\n CrossJoin:\
\n TableScan: j1 projection=None\
\n TableScan: j2 projection=None",
subquery
);

quick_test(sql, &expected);
}

#[tokio::test]
async fn subquery_references_cte() {
let sql = "WITH \
Expand Down