From 720fa46737446b36324682b4480f24cbb531c021 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Sun, 2 Apr 2023 13:30:42 +0800 Subject: [PATCH] fix(query): expr children with projectset not fold count (#10827) * fix(query): expr children with projectset not fold count * if expr contains ProjectSet, not prune unused column * ProjectSet precise_cardinality set None * add count() from (unnest()) explain result * fix ci: unnest explain result read bytes result diff * modify explain test result diff --- .../heuristic/prune_unused_columns.rs | 9 ++ src/query/sql/src/planner/optimizer/util.rs | 15 +++ .../sql/src/planner/plans/project_set.rs | 2 + .../mode/standalone/explain/project_set.test | 91 +++++++++++++++---- .../query/02_function/02_0062_function_unnest | 27 ++++++ 5 files changed, 124 insertions(+), 20 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs b/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs index d324024971cb1..fe869d0466635 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs @@ -15,6 +15,7 @@ use common_exception::ErrorCode; use common_exception::Result; +use crate::optimizer::util::contaions_project_set; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; use crate::plans::Aggregate; @@ -99,6 +100,14 @@ impl UnusedColumnPruner { RelOperator::EvalScalar(p) => { let mut used = vec![]; + if contaions_project_set(expr) { + return Ok(SExpr::create_unary( + RelOperator::EvalScalar(EvalScalar { + items: p.items.clone(), + }), + expr.child(0)?.clone(), + )); + } // Only keep columns needed by parent plan. for s in p.items.iter() { if !required.contains(&s.index) { diff --git a/src/query/sql/src/planner/optimizer/util.rs b/src/query/sql/src/planner/optimizer/util.rs index a7ffc3d2d757a..154547e038d5f 100644 --- a/src/query/sql/src/planner/optimizer/util.rs +++ b/src/query/sql/src/planner/optimizer/util.rs @@ -13,6 +13,8 @@ // limitations under the License. use super::SExpr; +use crate::plans::Operator; +use crate::plans::RelOp; use crate::plans::RelOperator; use crate::MetadataRef; @@ -28,3 +30,16 @@ pub fn contains_local_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool false } } + +/// Check the expr contains ProjectSet op. +pub fn contaions_project_set(s_expr: &SExpr) -> bool { + if let Some(child) = s_expr.children().iter().next() { + // Check children + return match child.plan.rel_op() { + RelOp::ProjectSet => true, + _ => contaions_project_set(child), + }; + } + + false +} diff --git a/src/query/sql/src/planner/plans/project_set.rs b/src/query/sql/src/planner/plans/project_set.rs index bb43fffa60167..99d233b918b49 100644 --- a/src/query/sql/src/planner/plans/project_set.rs +++ b/src/query/sql/src/planner/plans/project_set.rs @@ -57,6 +57,8 @@ impl Operator for ProjectSet { child_prop.output_columns.insert(srf.index); } + // ProjectSet is set-returning functions, precise_cardinality set None + child_prop.statistics.precise_cardinality = None; Ok(child_prop) } diff --git a/tests/sqllogictests/suites/mode/standalone/explain/project_set.test b/tests/sqllogictests/suites/mode/standalone/explain/project_set.test index ffc7077b3ef90..a0a0f4a6222d0 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/project_set.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/project_set.test @@ -1,3 +1,48 @@ +statement ok +use default; + +statement ok +drop table if exists fold_count; + +statement ok +create table fold_count(id array(int), c1 string); + +statement ok +insert into fold_count values([1,2,3,4], 'x'); + +query T +explain select count() from (select unnest(id), c1 from fold_count) +---- +EvalScalar +├── expressions: [count() (#4)] +├── estimated rows: 1.00 +└── AggregateFinal + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── AggregatePartial + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── EvalScalar + ├── expressions: [get(unnest (#2))] + ├── estimated rows: 1.00 + └── ProjectSet + ├── estimated rows: 1.00 + ├── set returning functions: unnest(CAST(fold_count.id (#0) AS Array(Int32 NULL))) + └── TableScan + ├── table: default.default.fold_count + ├── read rows: 1 + ├── read bytes: 87 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 + +statement ok +drop table fold_count; + statement ok drop database if exists project_set @@ -10,32 +55,38 @@ use project_set query T explain select number from (select unnest([1,2,3]), number from numbers(10)) t ---- -ProjectSet +EvalScalar +├── expressions: [get(unnest (#1))] ├── estimated rows: 10.00 -├── set returning functions: unnest([1, 2, 3]) -└── TableScan - ├── table: default.system.numbers - ├── read rows: 10 - ├── read bytes: 80 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── ProjectSet + ├── estimated rows: 10.00 + ├── set returning functions: unnest([1, 2, 3]) + └── TableScan + ├── table: default.system.numbers + ├── read rows: 10 + ├── read bytes: 80 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 query T explain select number from (select unnest([1,2,3,number]), number from numbers(10)) t ---- -ProjectSet +EvalScalar +├── expressions: [get(unnest (#1))] ├── estimated rows: 10.00 -├── set returning functions: unnest(CAST(array(1, 2, 3, numbers.number (#0)) AS Array(UInt64 NULL))) -└── TableScan - ├── table: default.system.numbers - ├── read rows: 10 - ├── read bytes: 80 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +└── ProjectSet + ├── estimated rows: 10.00 + ├── set returning functions: unnest(CAST(array(1, 2, 3, numbers.number (#0)) AS Array(UInt64 NULL))) + └── TableScan + ├── table: default.system.numbers + ├── read rows: 10 + ├── read bytes: 80 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 statement ok drop database project_set diff --git a/tests/sqllogictests/suites/query/02_function/02_0062_function_unnest b/tests/sqllogictests/suites/query/02_function/02_0062_function_unnest index 28ffec5c87563..880a27fcfd4f9 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0062_function_unnest +++ b/tests/sqllogictests/suites/query/02_function/02_0062_function_unnest @@ -369,6 +369,33 @@ drop table t; statement ok drop table t2; +statement ok +drop table if exists fold_count; + +statement ok +create table fold_count(id array(int), c1 string); + +statement ok +insert into fold_count values([1,2,3,4], 'x') + +query T +select concat(c,'y') as b from (select concat(c::String, c1) as c from (select unnest(id) as c, c1 from fold_count)); +---- +1xy +2xy +3xy +4xy + +query I +select count(b) from (select concat(c,'y') as b from (select concat(c::String, c1) as c from (select unnest(id) as c, c1 from fold_count))); +---- +4 + +query I +select count() from (select unnest(id), c1 from fold_count) +---- +4 + statement ok DROP DATABASE db_02_0062;