From 68b0db268f81a941d32d6edba84a2f84c644a9f9 Mon Sep 17 00:00:00 2001 From: irenjj Date: Sat, 8 Mar 2025 11:07:37 +0800 Subject: [PATCH 1/5] Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `SortMergeJoinExec` and `SymmetricHashJoinExec` --- .../physical-plan/src/joins/cross_join.rs | 8 +- .../src/joins/nested_loop_join.rs | 3 +- .../src/joins/sort_merge_join.rs | 11 +- .../src/joins/symmetric_hash_join.rs | 12 +- .../sqllogictest/test_files/explain_tree.slt | 138 ++++++++++++++++++ 5 files changed, 161 insertions(+), 11 deletions(-) diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 48dc7c9df302..e0998862bdd8 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -237,13 +237,11 @@ impl DisplayAs for CrossJoinExec { f: &mut std::fmt::Formatter, ) -> std::fmt::Result { match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { + DisplayFormatType::Default + | DisplayFormatType::Verbose + | DisplayFormatType::TreeRender => { write!(f, "CrossJoinExec") } - DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") - } } } } diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index f680de6738e5..b7b81c9476f3 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -425,8 +425,7 @@ impl DisplayAs for NestedLoopJoinExec { ) } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") + writeln!(f, "join_type={:?}", self.join_type) } } } diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 20c0d76a9cdb..a2837c91bbe6 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -370,8 +370,15 @@ impl DisplayAs for SortMergeJoinExec { ) } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") + let on = self + .on + .iter() + .map(|(c1, c2)| format!("({}, {})", c1, c2)) + .collect::>() + .join(", "); + + writeln!(f, "join_type={:?}", self.join_type)?; + writeln!(f, "on=[{}]", on) } } } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 03a68831438a..38e542d58b2f 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -381,8 +381,16 @@ impl DisplayAs for SymmetricHashJoinExec { ) } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") + let on = self + .on + .iter() + .map(|(c1, c2)| format!("({}, {})", c1, c2)) + .collect::>() + .join(", "); + + writeln!(f, "mode={:?}", self.mode)?; + writeln!(f, "join_type={:?}", self.join_type)?; + writeln!(f, "on={}", on) } } } diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 9659bdae195d..65942e802e17 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -82,7 +82,43 @@ CREATE EXTERNAL TABLE table5 STORED AS ARROW LOCATION 'test_files/scratch/explain_tree/table5.arrow'; +statement ok +CREATE TABLE hashjoin_datatype_table_t1_source(c1 INT, c2 BIGINT, c3 DECIMAL(5,2), c4 VARCHAR) +AS VALUES +(1, 86400000, 1.23, 'abc'), +(2, 172800000, 456.00, 'def'), +(null, 259200000, 789.000, 'ghi'), +(3, null, -123.12, 'jkl') +; +statement ok +CREATE TABLE hashjoin_datatype_table_t1 +AS SELECT + arrow_cast(c1, 'Date32') as c1, + arrow_cast(c2, 'Date64') as c2, + c3, + arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4 +FROM + hashjoin_datatype_table_t1_source + +statement ok +CREATE TABLE hashjoin_datatype_table_t2_source(c1 INT, c2 BIGINT, c3 DECIMAL(10,2), c4 VARCHAR) +AS VALUES +(1, 86400000, -123.12, 'abc'), +(null, null, 100000.00, 'abcdefg'), +(null, 259200000, 0.00, 'qwerty'), +(3, null, 789.000, 'qwe') +; + +statement ok +CREATE TABLE hashjoin_datatype_table_t2 +AS SELECT + arrow_cast(c1, 'Date32') as c1, + arrow_cast(c2, 'Date64') as c2, + c3, + arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4 +FROM + hashjoin_datatype_table_t2_source ######## Begin Queries ######## @@ -519,6 +555,108 @@ physical_plan 17)│ format: arrow │ 18)└───────────────────────────┘ +# Query with nested loop join. +query TT +explain select int_col from table1 where exists (select count(*) from table2); +---- +logical_plan +01)LeftSemi Join: +02)--TableScan: table1 projection=[int_col], partial_filters=[Boolean(true)] +03)--SubqueryAlias: __correlated_sq_1 +04)----Projection: +05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +06)--------TableScan: table2 projection=[] +physical_plan +01)┌───────────────────────────┐ +02)│ NestedLoopJoinExec │ +03)│ -------------------- ├──────────────┐ +04)│ join_type: LeftSemi │ │ +05)└─────────────┬─────────────┘ │ +06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +07)│ DataSourceExec ││ ProjectionExec │ +08)│ -------------------- ││ │ +09)│ files: 1 ││ │ +10)│ format: csv ││ │ +11)└───────────────────────────┘└─────────────┬─────────────┘ +12)-----------------------------┌─────────────┴─────────────┐ +13)-----------------------------│ AggregateExec │ +14)-----------------------------└─────────────┬─────────────┘ +15)-----------------------------┌─────────────┴─────────────┐ +16)-----------------------------│ CoalescePartitionsExec │ +17)-----------------------------└─────────────┬─────────────┘ +18)-----------------------------┌─────────────┴─────────────┐ +19)-----------------------------│ AggregateExec │ +20)-----------------------------└─────────────┬─────────────┘ +21)-----------------------------┌─────────────┴─────────────┐ +22)-----------------------------│ RepartitionExec │ +23)-----------------------------└─────────────┬─────────────┘ +24)-----------------------------┌─────────────┴─────────────┐ +25)-----------------------------│ DataSourceExec │ +26)-----------------------------│ -------------------- │ +27)-----------------------------│ files: 1 │ +28)-----------------------------│ format: parquet │ +29)-----------------------------└───────────────────────────┘ + +# Query with cross join. +query TT +explain select * from table1 cross join table2 ; +---- +logical_plan +01)Cross Join: +02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col] +03)--TableScan: table2 projection=[int_col, string_col, bigint_col, date_col] +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├──────────────┐ +03)└─────────────┬─────────────┘ │ +04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +05)│ DataSourceExec ││ RepartitionExec │ +06)│ -------------------- ││ │ +07)│ files: 1 ││ │ +08)│ format: csv ││ │ +09)└───────────────────────────┘└─────────────┬─────────────┘ +10)-----------------------------┌─────────────┴─────────────┐ +11)-----------------------------│ DataSourceExec │ +12)-----------------------------│ -------------------- │ +13)-----------------------------│ files: 1 │ +14)-----------------------------│ format: parquet │ +15)-----------------------------└───────────────────────────┘ + + +# Query with sort merge join. +statement ok +set datafusion.optimizer.prefer_hash_join = false; + +query TT +explain select * from hashjoin_datatype_table_t1 t1 join hashjoin_datatype_table_t2 t2 on t1.c1 = t2.c1 +---- +logical_plan +01)Inner Join: t1.c1 = t2.c1 +02)--SubqueryAlias: t1 +03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4] +04)--SubqueryAlias: t2 +05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4] +physical_plan +01)┌───────────────────────────┐ +02)│ SortMergeJoinExec │ +03)│ -------------------- │ +04)│ join_type: Inner ├──────────────┐ +05)│ on: [(c1@0, c1@0)] │ │ +06)└─────────────┬─────────────┘ │ +07)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +08)│ SortExec ││ SortExec │ +09)└─────────────┬─────────────┘└─────────────┬─────────────┘ +10)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +11)│ DataSourceExec ││ DataSourceExec │ +12)│ -------------------- ││ -------------------- │ +13)│ bytes: 6040 ││ bytes: 6040 │ +14)│ format: memory ││ format: memory │ +15)│ rows: 1 ││ rows: 1 │ +16)└───────────────────────────┘└───────────────────────────┘ + +statement ok +set datafusion.optimizer.prefer_hash_join = true; + # cleanup statement ok drop table table1; From 77d83619969fa87b0176c44e658873fd61fca869 Mon Sep 17 00:00:00 2001 From: irenjj Date: Sat, 8 Mar 2025 13:22:19 +0800 Subject: [PATCH 2/5] fix issues --- datafusion/physical-plan/src/joins/sort_merge_join.rs | 2 +- datafusion/physical-plan/src/joins/symmetric_hash_join.rs | 2 +- datafusion/sqllogictest/test_files/explain_tree.slt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index a2837c91bbe6..dcb95c77887e 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -373,7 +373,7 @@ impl DisplayAs for SortMergeJoinExec { let on = self .on .iter() - .map(|(c1, c2)| format!("({}, {})", c1, c2)) + .map(|(c1, c2)| format!("({} = {})", c1, c2)) .collect::>() .join(", "); diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 38e542d58b2f..a6c52252a21e 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -384,7 +384,7 @@ impl DisplayAs for SymmetricHashJoinExec { let on = self .on .iter() - .map(|(c1, c2)| format!("({}, {})", c1, c2)) + .map(|(c1, c2)| format!("({} = {})", c1, c2)) .collect::>() .join(", "); diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 65942e802e17..b4895675a4f3 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -641,7 +641,7 @@ physical_plan 02)│ SortMergeJoinExec │ 03)│ -------------------- │ 04)│ join_type: Inner ├──────────────┐ -05)│ on: [(c1@0, c1@0)] │ │ +05)│ on: [(c1@0 = c1@0)] │ │ 06)└─────────────┬─────────────┘ │ 07)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 08)│ SortExec ││ SortExec │ From 220cd71c65a8e9e80bd1536e4f936c3026d9b805 Mon Sep 17 00:00:00 2001 From: irenjj Date: Sat, 8 Mar 2025 20:32:35 +0800 Subject: [PATCH 3/5] fix issues --- .../src/joins/nested_loop_join.rs | 6 ++++- .../src/joins/sort_merge_join.rs | 6 +++-- .../src/joins/symmetric_hash_join.rs | 4 ++- .../sqllogictest/test_files/explain_tree.slt | 27 +++++++++---------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index b7b81c9476f3..f6fa8878e033 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -425,7 +425,11 @@ impl DisplayAs for NestedLoopJoinExec { ) } DisplayFormatType::TreeRender => { - writeln!(f, "join_type={:?}", self.join_type) + if *self.join_type() != JoinType::Inner { + writeln!(f, "join_type={:?}", self.join_type) + } else { + Ok(()) + } } } } diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index dcb95c77887e..d8446fb332b1 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -377,8 +377,10 @@ impl DisplayAs for SortMergeJoinExec { .collect::>() .join(", "); - writeln!(f, "join_type={:?}", self.join_type)?; - writeln!(f, "on=[{}]", on) + if self.join_type() != JoinType::Inner { + writeln!(f, "join_type={:?}", self.join_type)?; + } + writeln!(f, "on={}", on) } } } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index a6c52252a21e..63e95c7a3018 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -389,7 +389,9 @@ impl DisplayAs for SymmetricHashJoinExec { .join(", "); writeln!(f, "mode={:?}", self.mode)?; - writeln!(f, "join_type={:?}", self.join_type)?; + if *self.join_type() != JoinType::Inner { + writeln!(f, "join_type={:?}", self.join_type)?; + } writeln!(f, "on={}", on) } } diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index b4895675a4f3..e3b9b9d6c5f0 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -639,20 +639,19 @@ logical_plan physical_plan 01)┌───────────────────────────┐ 02)│ SortMergeJoinExec │ -03)│ -------------------- │ -04)│ join_type: Inner ├──────────────┐ -05)│ on: [(c1@0 = c1@0)] │ │ -06)└─────────────┬─────────────┘ │ -07)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -08)│ SortExec ││ SortExec │ -09)└─────────────┬─────────────┘└─────────────┬─────────────┘ -10)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -11)│ DataSourceExec ││ DataSourceExec │ -12)│ -------------------- ││ -------------------- │ -13)│ bytes: 6040 ││ bytes: 6040 │ -14)│ format: memory ││ format: memory │ -15)│ rows: 1 ││ rows: 1 │ -16)└───────────────────────────┘└───────────────────────────┘ +03)│ -------------------- ├──────────────┐ +04)│ on: (c1@0 = c1@0) │ │ +05)└─────────────┬─────────────┘ │ +06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +07)│ SortExec ││ SortExec │ +08)└─────────────┬─────────────┘└─────────────┬─────────────┘ +09)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +10)│ DataSourceExec ││ DataSourceExec │ +11)│ -------------------- ││ -------------------- │ +12)│ bytes: 6040 ││ bytes: 6040 │ +13)│ format: memory ││ format: memory │ +14)│ rows: 1 ││ rows: 1 │ +15)└───────────────────────────┘└───────────────────────────┘ statement ok set datafusion.optimizer.prefer_hash_join = true; From 7bad1b607c597e8e24751493f3b97f9f36254beb Mon Sep 17 00:00:00 2001 From: irenjj Date: Sun, 9 Mar 2025 21:52:07 +0800 Subject: [PATCH 4/5] fix --- .../sqllogictest/test_files/explain_tree.slt | 255 ++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index a71f6e07a719..08e3d27f2ee1 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -82,6 +82,19 @@ CREATE EXTERNAL TABLE table5 STORED AS ARROW LOCATION 'test_files/scratch/explain_tree/table5.arrow'; +statement ok +CREATE UNBOUNDED EXTERNAL TABLE annotated_data_infinite2 ( + a0 INTEGER, + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +) +STORED AS CSV +WITH ORDER (a ASC, b ASC, c ASC) +LOCATION '../core/tests/data/window_2.csv' +OPTIONS ('format.has_header' 'true'); + statement ok CREATE TABLE hashjoin_datatype_table_t1_source(c1 INT, c2 BIGINT, c3 DECIMAL(5,2), c4 VARCHAR) AS VALUES @@ -564,6 +577,248 @@ physical_plan 17)│ format: arrow │ 18)└───────────────────────────┘ +# Query for sort. +query TT +explain SELECT * FROM table1 ORDER BY string_col; +---- +logical_plan +01)Sort: table1.string_col ASC NULLS LAST +02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col] +physical_plan +01)┌───────────────────────────┐ +02)│ SortExec │ +03)│ -------------------- │ +04)│ sort keys: │ +05)│ [string_col@1 ASC NULLS │ +06)│ LAST] │ +07)└─────────────┬─────────────┘ +08)┌─────────────┴─────────────┐ +09)│ DataSourceExec │ +10)│ -------------------- │ +11)│ files: 1 │ +12)│ format: csv │ +13)└───────────────────────────┘ + +# Query for sort with limit. +query TT +explain SELECT * FROM table1 ORDER BY string_col LIMIT 1; +---- +logical_plan +01)Sort: table1.string_col ASC NULLS LAST, fetch=1 +02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col] +physical_plan +01)┌───────────────────────────┐ +02)│ SortExec │ +03)│ -------------------- │ +04)│ limit: 1 │ +05)│ │ +06)│ sort keys: │ +07)│ [string_col@1 ASC NULLS │ +08)│ LAST] │ +09)└─────────────┬─────────────┘ +10)┌─────────────┴─────────────┐ +11)│ DataSourceExec │ +12)│ -------------------- │ +13)│ files: 1 │ +14)│ format: csv │ +15)└───────────────────────────┘ + +# Query with projection on csv +query TT +explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table1; +---- +logical_plan +01)Projection: table1.int_col, table1.bigint_col, CAST(table1.int_col AS Int64) + table1.bigint_col AS sum_col +02)--TableScan: table1 projection=[int_col, bigint_col] +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ bigint_col: │ +05)│ bigint_col@1 │ +06)│ │ +07)│ int_col: int_col@0 │ +08)│ │ +09)│ sum_col: │ +10)│ CAST(int_col@0 AS Int64) +│ +11)│ bigint_col@1 │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ RepartitionExec │ +15)└─────────────┬─────────────┘ +16)┌─────────────┴─────────────┐ +17)│ DataSourceExec │ +18)│ -------------------- │ +19)│ files: 1 │ +20)│ format: csv │ +21)└───────────────────────────┘ + + +# Query with projection on parquet +query TT +explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table2; +---- +logical_plan +01)Projection: table2.int_col, table2.bigint_col, CAST(table2.int_col AS Int64) + table2.bigint_col AS sum_col +02)--TableScan: table2 projection=[int_col, bigint_col] +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ bigint_col: │ +05)│ bigint_col@1 │ +06)│ │ +07)│ int_col: int_col@0 │ +08)│ │ +09)│ sum_col: │ +10)│ CAST(int_col@0 AS Int64) +│ +11)│ bigint_col@1 │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ RepartitionExec │ +15)└─────────────┬─────────────┘ +16)┌─────────────┴─────────────┐ +17)│ DataSourceExec │ +18)│ -------------------- │ +19)│ files: 1 │ +20)│ format: parquet │ +21)└───────────────────────────┘ + + +# Query with projection on memory +query TT +explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table3; +---- +logical_plan +01)Projection: table3.int_col, table3.bigint_col, CAST(table3.int_col AS Int64) + table3.bigint_col AS sum_col +02)--TableScan: table3 projection=[int_col, bigint_col] +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ bigint_col: │ +05)│ bigint_col@1 │ +06)│ │ +07)│ int_col: int_col@0 │ +08)│ │ +09)│ sum_col: │ +10)│ CAST(int_col@0 AS Int64) +│ +11)│ bigint_col@1 │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ DataSourceExec │ +15)│ -------------------- │ +16)│ bytes: 1560 │ +17)│ format: memory │ +18)│ rows: 1 │ +19)└───────────────────────────┘ + +# Query with projection on json +query TT +explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table4; +---- +logical_plan +01)Projection: table4.int_col, table4.bigint_col, table4.int_col + table4.bigint_col AS sum_col +02)--TableScan: table4 projection=[bigint_col, int_col] +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ bigint_col: │ +05)│ bigint_col@0 │ +06)│ │ +07)│ int_col: int_col@1 │ +08)│ │ +09)│ sum_col: │ +10)│ int_col@1 + bigint_col@0 │ +11)└─────────────┬─────────────┘ +12)┌─────────────┴─────────────┐ +13)│ RepartitionExec │ +14)└─────────────┬─────────────┘ +15)┌─────────────┴─────────────┐ +16)│ DataSourceExec │ +17)│ -------------------- │ +18)│ files: 1 │ +19)│ format: json │ +20)└───────────────────────────┘ + + +# Query with projection on arrow +query TT +explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table5; +---- +logical_plan +01)Projection: table5.int_col, table5.bigint_col, CAST(table5.int_col AS Int64) + table5.bigint_col AS sum_col +02)--TableScan: table5 projection=[int_col, bigint_col] +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ bigint_col: │ +05)│ bigint_col@1 │ +06)│ │ +07)│ int_col: int_col@0 │ +08)│ │ +09)│ sum_col: │ +10)│ CAST(int_col@0 AS Int64) +│ +11)│ bigint_col@1 │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ RepartitionExec │ +15)└─────────────┬─────────────┘ +16)┌─────────────┴─────────────┐ +17)│ DataSourceExec │ +18)│ -------------------- │ +19)│ files: 1 │ +20)│ format: arrow │ +21)└───────────────────────────┘ + +# Query with PartialSortExec. +query TT +EXPLAIN SELECT * +FROM annotated_data_infinite2 +ORDER BY a, b, d; +---- +logical_plan +01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST +02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d] +physical_plan +01)┌───────────────────────────┐ +02)│ PartialSortExec │ +03)│ -------------------- │ +04)│ sort keys: │ +05)│ a@1 ASC NULLS LAST, b@2 │ +06)│ ASC NULLS LAST, d@4 │ +07)│ ASC NULLS LAST │ +08)└─────────────┬─────────────┘ +09)┌─────────────┴─────────────┐ +10)│ StreamingTableExec │ +11)└───────────────────────────┘ + +query TT +EXPLAIN SELECT * +FROM annotated_data_infinite2 +ORDER BY a, b, d +LIMIT 50; +---- +logical_plan +01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST, fetch=50 +02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d] +physical_plan +01)┌───────────────────────────┐ +02)│ PartialSortExec │ +03)│ -------------------- │ +04)│ limit: 50 │ +05)│ │ +06)│ sort keys: │ +07)│ a@1 ASC NULLS LAST, b@2 │ +08)│ ASC NULLS LAST, d@4 │ +09)│ ASC NULLS LAST │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ StreamingTableExec │ +13)└───────────────────────────┘ + # Query with hash join. query TT explain select * from table1 inner join table2 on table1.int_col = table2.int_col and table1.string_col = table2.string_col; From d47d8876ee29edc4b8684cd8debd777af8906d82 Mon Sep 17 00:00:00 2001 From: irenjj Date: Sun, 9 Mar 2025 21:53:20 +0800 Subject: [PATCH 5/5] fix proj --- .../sqllogictest/test_files/explain_tree.slt | 98 ++++++++++++------- 1 file changed, 64 insertions(+), 34 deletions(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 08e3d27f2ee1..e0459b6da2ab 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -848,19 +848,33 @@ physical_plan 18)└─────────────┬─────────────┘└─────────────┬─────────────┘ 19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 20)│ ProjectionExec ││ RepartitionExec │ -21)└─────────────┬─────────────┘└─────────────┬─────────────┘ -22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -23)│ RepartitionExec ││ DataSourceExec │ -24)│ ││ -------------------- │ -25)│ ││ files: 1 │ -26)│ ││ format: parquet │ -27)└─────────────┬─────────────┘└───────────────────────────┘ -28)┌─────────────┴─────────────┐ -29)│ DataSourceExec │ -30)│ -------------------- │ -31)│ files: 1 │ -32)│ format: csv │ -33)└───────────────────────────┘ +21)│ -------------------- ││ │ +22)│ CAST(table1.string_col AS ││ │ +23)│ Utf8View): ││ │ +24)│ CAST(string_col@1 AS ││ │ +25)│ Utf8View) ││ │ +26)│ ││ │ +27)│ bigint_col: ││ │ +28)│ bigint_col@2 ││ │ +29)│ ││ │ +30)│ date_col: date_col@3 ││ │ +31)│ int_col: int_col@0 ││ │ +32)│ ││ │ +33)│ string_col: ││ │ +34)│ string_col@1 ││ │ +35)└─────────────┬─────────────┘└─────────────┬─────────────┘ +36)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +37)│ RepartitionExec ││ DataSourceExec │ +38)│ ││ -------------------- │ +39)│ ││ files: 1 │ +40)│ ││ format: parquet │ +41)└─────────────┬─────────────┘└───────────────────────────┘ +42)┌─────────────┴─────────────┐ +43)│ DataSourceExec │ +44)│ -------------------- │ +45)│ files: 1 │ +46)│ format: csv │ +47)└───────────────────────────┘ # Query with outer hash join. query TT @@ -893,19 +907,33 @@ physical_plan 20)└─────────────┬─────────────┘└─────────────┬─────────────┘ 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 22)│ ProjectionExec ││ RepartitionExec │ -23)└─────────────┬─────────────┘└─────────────┬─────────────┘ -24)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -25)│ RepartitionExec ││ DataSourceExec │ -26)│ ││ -------------------- │ -27)│ ││ files: 1 │ -28)│ ││ format: parquet │ -29)└─────────────┬─────────────┘└───────────────────────────┘ -30)┌─────────────┴─────────────┐ -31)│ DataSourceExec │ -32)│ -------------------- │ -33)│ files: 1 │ -34)│ format: csv │ -35)└───────────────────────────┘ +23)│ -------------------- ││ │ +24)│ CAST(table1.string_col AS ││ │ +25)│ Utf8View): ││ │ +26)│ CAST(string_col@1 AS ││ │ +27)│ Utf8View) ││ │ +28)│ ││ │ +29)│ bigint_col: ││ │ +30)│ bigint_col@2 ││ │ +31)│ ││ │ +32)│ date_col: date_col@3 ││ │ +33)│ int_col: int_col@0 ││ │ +34)│ ││ │ +35)│ string_col: ││ │ +36)│ string_col@1 ││ │ +37)└─────────────┬─────────────┘└─────────────┬─────────────┘ +38)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +39)│ RepartitionExec ││ DataSourceExec │ +40)│ ││ -------------------- │ +41)│ ││ files: 1 │ +42)│ ││ format: parquet │ +43)└─────────────┬─────────────┘└───────────────────────────┘ +44)┌─────────────┴─────────────┐ +45)│ DataSourceExec │ +46)│ -------------------- │ +47)│ files: 1 │ +48)│ format: csv │ +49)└───────────────────────────┘ # Query with nested loop join. query TT @@ -996,14 +1024,16 @@ physical_plan 05)└─────────────┬─────────────┘ │ 06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 07)│ SortExec ││ SortExec │ -08)└─────────────┬─────────────┘└─────────────┬─────────────┘ -09)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -10)│ DataSourceExec ││ DataSourceExec │ -11)│ -------------------- ││ -------------------- │ -12)│ bytes: 6040 ││ bytes: 6040 │ -13)│ format: memory ││ format: memory │ -14)│ rows: 1 ││ rows: 1 │ -15)└───────────────────────────┘└───────────────────────────┘ +08)│ -------------------- ││ -------------------- │ +09)│ sort keys: [c1@0 ASC] ││ sort keys: [c1@0 ASC] │ +10)└─────────────┬─────────────┘└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +12)│ DataSourceExec ││ DataSourceExec │ +13)│ -------------------- ││ -------------------- │ +14)│ bytes: 6040 ││ bytes: 6040 │ +15)│ format: memory ││ format: memory │ +16)│ rows: 1 ││ rows: 1 │ +17)└───────────────────────────┘└───────────────────────────┘ statement ok set datafusion.optimizer.prefer_hash_join = true;