From 8a78b63854185626bea3175c5d942fcc9c9d68ff Mon Sep 17 00:00:00 2001 From: Bo Lin Date: Tue, 5 Nov 2024 23:29:55 -0500 Subject: [PATCH 1/2] Fix `replace_qualified_name` --- datafusion/optimizer/src/utils.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 05b1744d90c5..9261a30abee2 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -101,9 +101,7 @@ pub(crate) fn replace_qualified_name( ) -> Result { let alias_cols: Vec = cols .iter() - .map(|col| { - Column::from_qualified_name(format!("{}.{}", subquery_alias, col.name)) - }) + .map(|col| Column::new(Some(subquery_alias), col.name())) .collect(); let replace_map: HashMap<&Column, &Column> = cols.iter().zip(alias_cols.iter()).collect(); From ef9d1a09ca8691b19e419be5fa8532d93f3552e5 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 13 Sep 2024 00:22:21 +0800 Subject: [PATCH 2/2] Handle case-sensitive identifier when decorrelating predicate subquery (#12443) * fix replace_qualified_name to handle case-sensitive ident * add tests * fix clippy --- .../src/decorrelate_predicate_subquery.rs | 35 +++++++++++++++++-- datafusion/optimizer/src/utils.rs | 2 +- datafusion/sqllogictest/test_files/join.slt | 31 ++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index b6d49490d437..bcbb614d2257 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -369,8 +369,8 @@ mod tests { use super::*; use crate::test::*; - use arrow::datatypes::DataType; - use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col}; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col, table_scan}; fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> { assert_optimized_plan_eq_display_indent( @@ -1908,4 +1908,35 @@ mod tests { assert_optimized_plan_equal(plan, expected) } + + #[test] + fn upper_case_ident() -> Result<()> { + let fields = vec![ + Field::new("A", DataType::UInt32, false), + Field::new("B", DataType::UInt32, false), + ]; + + let schema = Schema::new(fields); + let table_scan_a = table_scan(Some("\"TEST_A\""), &schema, None)?.build()?; + let table_scan_b = table_scan(Some("\"TEST_B\""), &schema, None)?.build()?; + + let subquery = LogicalPlanBuilder::from(table_scan_b) + .filter(col("\"A\"").eq(out_ref_col(DataType::UInt32, "\"TEST_A\".\"A\"")))? + .project(vec![lit(1)])? + .build()?; + + let plan = LogicalPlanBuilder::from(table_scan_a) + .filter(exists(Arc::new(subquery)))? + .project(vec![col("\"TEST_A\".\"B\"")])? + .build()?; + + let expected = "Projection: TEST_A.B [B:UInt32]\ + \n LeftSemi Join: Filter: __correlated_sq_1.A = TEST_A.A [A:UInt32, B:UInt32]\ + \n TableScan: TEST_A [A:UInt32, B:UInt32]\ + \n SubqueryAlias: __correlated_sq_1 [Int32(1):Int32, A:UInt32]\ + \n Projection: Int32(1), TEST_B.A [Int32(1):Int32, A:UInt32]\ + \n TableScan: TEST_B [A:UInt32, B:UInt32]"; + + assert_optimized_plan_equal(plan, expected) + } } diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 9261a30abee2..aa233e253499 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -101,7 +101,7 @@ pub(crate) fn replace_qualified_name( ) -> Result { let alias_cols: Vec = cols .iter() - .map(|col| Column::new(Some(subquery_alias), col.name())) + .map(|col| Column::new(Some(subquery_alias), &col.name)) .collect(); let replace_map: HashMap<&Column, &Column> = cols.iter().zip(alias_cols.iter()).collect(); diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt index 21fea4ad1025..3e7a08981eac 100644 --- a/datafusion/sqllogictest/test_files/join.slt +++ b/datafusion/sqllogictest/test_files/join.slt @@ -1178,3 +1178,34 @@ drop table t1; statement ok drop table t0; + +# Test decorrelate query with the uppercase table name and column name +statement ok +create table "T1"("C1" int, "C2" int); + +statement ok +create table "T2"("C1" int, "C3" int); + +statement ok +select "C1" from "T1" where not exists (select 1 from "T2" where "T1"."C1" = "T2"."C1") + +statement ok +create table t1(c1 int, c2 int); + +statement ok +create table t2(c1 int, c3 int); + +statement ok +select "C1" from (select c1 as "C1", c2 as "C2" from t1) as "T1" where not exists (select 1 from (select c1 as "C1", c3 as "C3" from t2) as "T2" where "T1"."C1" = "T2"."C1") + +statement ok +drop table "T1"; + +statement ok +drop table "T2"; + +statement ok +drop table t1; + +statement ok +drop table t2;