From 0072eac71d369bae7972714fd3440b6647d45411 Mon Sep 17 00:00:00 2001 From: shruti2522 Date: Fri, 14 Mar 2025 20:13:58 +0530 Subject: [PATCH 1/3] feat: implement tree explain for locallimitexec --- datafusion/physical-plan/src/limit.rs | 3 +- .../sqllogictest/test_files/explain_tree.slt | 72 +++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index b9464e3a88fb..89cf47a6d650 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -268,8 +268,7 @@ impl DisplayAs for LocalLimitExec { write!(f, "LocalLimitExec: fetch={}", self.fetch) } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") + write!(f, "limit={}", self.fetch) } } } diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 9e047133fcfc..b4080354b728 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -133,6 +133,12 @@ AS SELECT FROM hashjoin_datatype_table_t2_source +statement ok +CREATE TABLE limit_table AS +SELECT * FROM table1 +UNION ALL SELECT * FROM table1 +UNION ALL SELECT * FROM table1; + ######## Begin Queries ######## # Filter @@ -246,6 +252,72 @@ physical_plan 11)│ format: csv │ 12)└───────────────────────────┘ +query TT +explain VERBOSE +SELECT /*+ REPARTITION(4) */ * +FROM limit_table +LIMIT 10; +---- +initial_physical_plan +01)GlobalLimitExec: skip=0, fetch=10 +02)--LocalLimitExec: fetch=10 +03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +initial_physical_plan_with_stats +01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +02)--LocalLimitExec: fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +initial_physical_plan_with_schema +01)GlobalLimitExec: skip=0, fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +02)--LocalLimitExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +physical_plan after OutputRequirements +01)OutputRequirementExec +02)--GlobalLimitExec: skip=0, fetch=10 +03)----LocalLimitExec: fetch=10 +04)------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +physical_plan after aggregate_statistics SAME TEXT AS ABOVE +physical_plan after join_selection SAME TEXT AS ABOVE +physical_plan after LimitedDistinctAggregation SAME TEXT AS ABOVE +physical_plan after EnforceDistribution +01)OutputRequirementExec +02)--GlobalLimitExec: skip=0, fetch=10 +03)----CoalescePartitionsExec +04)------LocalLimitExec: fetch=10 +05)--------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +physical_plan after CombinePartialFinalAggregate SAME TEXT AS ABOVE +physical_plan after EnforceSorting SAME TEXT AS ABOVE +physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE +physical_plan after ProjectionPushdown SAME TEXT AS ABOVE +physical_plan after coalesce_batches SAME TEXT AS ABOVE +physical_plan after OutputRequirements +01)GlobalLimitExec: skip=0, fetch=10 +02)--CoalescePartitionsExec +03)----LocalLimitExec: fetch=10 +04)------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +physical_plan after LimitAggregation SAME TEXT AS ABOVE +physical_plan after LimitPushdown +01)CoalescePartitionsExec: fetch=10 +02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10 +physical_plan after ProjectionPushdown SAME TEXT AS ABOVE +physical_plan after SanityCheckPlan SAME TEXT AS ABOVE +physical_plan +01)┌───────────────────────────┐ +02)│ CoalescePartitionsExec │ +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ DataSourceExec │ +06)│ -------------------- │ +07)│ bytes: 4680 │ +08)│ format: memory │ +09)│ rows: 3 │ +10)└───────────────────────────┘ +physical_plan_with_stats +01)CoalescePartitionsExec: fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +physical_plan_with_schema +01)CoalescePartitionsExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] + # 2 Joins query TT explain SELECT table1.string_col, table2.date_col FROM table1 JOIN table2 ON table1.int_col = table2.int_col; From 17fb753d4508f81b2861597ae3fbbbefd6474d13 Mon Sep 17 00:00:00 2001 From: shruti2522 Date: Fri, 14 Mar 2025 22:11:34 +0530 Subject: [PATCH 2/3] remove hint --- .../sqllogictest/test_files/explain_tree.slt | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index b4080354b728..bddf210849d1 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -137,7 +137,6 @@ statement ok CREATE TABLE limit_table AS SELECT * FROM table1 UNION ALL SELECT * FROM table1 -UNION ALL SELECT * FROM table1; ######## Begin Queries ######## @@ -253,28 +252,25 @@ physical_plan 12)└───────────────────────────┘ query TT -explain VERBOSE -SELECT /*+ REPARTITION(4) */ * -FROM limit_table -LIMIT 10; +explain VERBOSE SELECT * FROM limit_table LIMIT 10; ---- initial_physical_plan 01)GlobalLimitExec: skip=0, fetch=10 02)--LocalLimitExec: fetch=10 -03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +03)----DataSourceExec: partitions=2, partition_sizes=[1, 1] initial_physical_plan_with_stats -01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -02)--LocalLimitExec: fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +02)--LocalLimitExec: fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +03)----DataSourceExec: partitions=2, partition_sizes=[1, 1], statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] initial_physical_plan_with_schema 01)GlobalLimitExec: skip=0, fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] 02)--LocalLimitExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -03)----DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +03)----DataSourceExec: partitions=2, partition_sizes=[1, 1], schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] physical_plan after OutputRequirements 01)OutputRequirementExec 02)--GlobalLimitExec: skip=0, fetch=10 03)----LocalLimitExec: fetch=10 -04)------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +04)------DataSourceExec: partitions=2, partition_sizes=[1, 1] physical_plan after aggregate_statistics SAME TEXT AS ABOVE physical_plan after join_selection SAME TEXT AS ABOVE physical_plan after LimitedDistinctAggregation SAME TEXT AS ABOVE @@ -283,7 +279,7 @@ physical_plan after EnforceDistribution 02)--GlobalLimitExec: skip=0, fetch=10 03)----CoalescePartitionsExec 04)------LocalLimitExec: fetch=10 -05)--------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +05)--------DataSourceExec: partitions=2, partition_sizes=[1, 1] physical_plan after CombinePartialFinalAggregate SAME TEXT AS ABOVE physical_plan after EnforceSorting SAME TEXT AS ABOVE physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE @@ -293,11 +289,11 @@ physical_plan after OutputRequirements 01)GlobalLimitExec: skip=0, fetch=10 02)--CoalescePartitionsExec 03)----LocalLimitExec: fetch=10 -04)------DataSourceExec: partitions=3, partition_sizes=[1, 1, 1] +04)------DataSourceExec: partitions=2, partition_sizes=[1, 1] physical_plan after LimitAggregation SAME TEXT AS ABOVE physical_plan after LimitPushdown 01)CoalescePartitionsExec: fetch=10 -02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10 +02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE physical_plan after SanityCheckPlan SAME TEXT AS ABOVE physical_plan @@ -307,16 +303,16 @@ physical_plan 04)┌─────────────┴─────────────┐ 05)│ DataSourceExec │ 06)│ -------------------- │ -07)│ bytes: 4680 │ +07)│ bytes: 3120 │ 08)│ format: memory │ -09)│ rows: 3 │ +09)│ rows: 2 │ 10)└───────────────────────────┘ physical_plan_with_stats -01)CoalescePartitionsExec: fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10, statistics=[Rows=Exact(9), Bytes=Exact(4680), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +01)CoalescePartitionsExec: fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] +02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] physical_plan_with_schema 01)CoalescePartitionsExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -02)--DataSourceExec: partitions=3, partition_sizes=[1, 1, 1], fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] +02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] # 2 Joins query TT From 3d375f0f7262f644b919da8563ef31e8ace253f6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 14 Mar 2025 15:14:43 -0400 Subject: [PATCH 3/3] Do not use VERBOSE --- .../sqllogictest/test_files/explain_tree.slt | 50 +------------------ 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index bddf210849d1..1a742768d579 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -252,50 +252,8 @@ physical_plan 12)└───────────────────────────┘ query TT -explain VERBOSE SELECT * FROM limit_table LIMIT 10; +explain SELECT * FROM limit_table LIMIT 10; ---- -initial_physical_plan -01)GlobalLimitExec: skip=0, fetch=10 -02)--LocalLimitExec: fetch=10 -03)----DataSourceExec: partitions=2, partition_sizes=[1, 1] -initial_physical_plan_with_stats -01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -02)--LocalLimitExec: fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -03)----DataSourceExec: partitions=2, partition_sizes=[1, 1], statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -initial_physical_plan_with_schema -01)GlobalLimitExec: skip=0, fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -02)--LocalLimitExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -03)----DataSourceExec: partitions=2, partition_sizes=[1, 1], schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -physical_plan after OutputRequirements -01)OutputRequirementExec -02)--GlobalLimitExec: skip=0, fetch=10 -03)----LocalLimitExec: fetch=10 -04)------DataSourceExec: partitions=2, partition_sizes=[1, 1] -physical_plan after aggregate_statistics SAME TEXT AS ABOVE -physical_plan after join_selection SAME TEXT AS ABOVE -physical_plan after LimitedDistinctAggregation SAME TEXT AS ABOVE -physical_plan after EnforceDistribution -01)OutputRequirementExec -02)--GlobalLimitExec: skip=0, fetch=10 -03)----CoalescePartitionsExec -04)------LocalLimitExec: fetch=10 -05)--------DataSourceExec: partitions=2, partition_sizes=[1, 1] -physical_plan after CombinePartialFinalAggregate SAME TEXT AS ABOVE -physical_plan after EnforceSorting SAME TEXT AS ABOVE -physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE -physical_plan after ProjectionPushdown SAME TEXT AS ABOVE -physical_plan after coalesce_batches SAME TEXT AS ABOVE -physical_plan after OutputRequirements -01)GlobalLimitExec: skip=0, fetch=10 -02)--CoalescePartitionsExec -03)----LocalLimitExec: fetch=10 -04)------DataSourceExec: partitions=2, partition_sizes=[1, 1] -physical_plan after LimitAggregation SAME TEXT AS ABOVE -physical_plan after LimitPushdown -01)CoalescePartitionsExec: fetch=10 -02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10 -physical_plan after ProjectionPushdown SAME TEXT AS ABOVE -physical_plan after SanityCheckPlan SAME TEXT AS ABOVE physical_plan 01)┌───────────────────────────┐ 02)│ CoalescePartitionsExec │ @@ -307,12 +265,6 @@ physical_plan 08)│ format: memory │ 09)│ rows: 2 │ 10)└───────────────────────────┘ -physical_plan_with_stats -01)CoalescePartitionsExec: fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10, statistics=[Rows=Exact(6), Bytes=Exact(3120), [(Col[0]: Null=Exact(0)),(Col[1]: Null=Exact(0)),(Col[2]: Null=Exact(0)),(Col[3]: Null=Exact(0))]] -physical_plan_with_schema -01)CoalescePartitionsExec: fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] -02)--DataSourceExec: partitions=2, partition_sizes=[1, 1], fetch=10, schema=[int_col:Int32;N, string_col:Utf8;N, bigint_col:Int64;N, date_col:Date32;N] # 2 Joins query TT