From a656b44d36a7559fe741aa323d406132c973613e Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 16 Nov 2021 14:51:47 -0800 Subject: [PATCH 01/16] Avoid changing output column name before pushdown projection. --- datafusion/src/execution/context.rs | 2 +- datafusion/tests/sql.rs | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index b79c4fa2946b..27c3ff143f1a 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -898,13 +898,13 @@ impl Default for ExecutionConfig { target_partitions: num_cpus::get(), batch_size: 8192, optimizers: vec![ - Arc::new(ConstantFolding::new()), Arc::new(CommonSubexprEliminate::new()), Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), Arc::new(SimplifyExpressions::new()), Arc::new(LimitPushDown::new()), + Arc::new(ConstantFolding::new()), ], physical_optimizers: vec![ Arc::new(AggregateStatistics::new()), diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index b06b1700feac..a78043279e93 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1282,6 +1282,22 @@ async fn csv_query_approx_count() -> Result<()> { Ok(()) } +#[tokio::test] +async fn query_count_without_from() -> Result<()> { + let mut ctx = ExecutionContext::new(); + let sql = "SELECT count(1 + 1)"; + let actual = execute_to_batches(&mut ctx, sql).await; + let expected = vec![ + "+-----------------+", + "| COUNT(Int64(2)) |", + "+-----------------+", + "| 1 |", + "+-----------------+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn csv_query_array_agg() -> Result<()> { let mut ctx = ExecutionContext::new(); From fe8445de9c62b9d9d0253d128f22866b46e0b0e4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 00:45:12 -0800 Subject: [PATCH 02/16] Avoid early optimization which is duplicate. --- datafusion/src/execution/context.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 27c3ff143f1a..84f6ec6c7741 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -268,10 +268,7 @@ impl ExecutionContext { } } - plan => Ok(Arc::new(DataFrameImpl::new( - self.state.clone(), - &self.optimize(&plan)?, - ))), + plan => Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))), } } From ebf67d347de60d2ce550a35f1a6c561632ca6fba Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 01:09:13 -0800 Subject: [PATCH 03/16] Modify test. --- datafusion/src/execution/context.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 84f6ec6c7741..7feb36408f03 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -3114,7 +3114,7 @@ mod tests { } #[tokio::test] - async fn ctx_sql_should_optimize_plan() -> Result<()> { + async fn ctx_sql_should_not_optimize_plan() -> Result<()> { let mut ctx = ExecutionContext::new(); let plan1 = ctx .create_logical_plan("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE")?; @@ -3125,11 +3125,16 @@ mod tests { .sql("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE") .await?; - assert_eq!( + assert_ne!( format!("{:?}", opt_plan1), format!("{:?}", plan2.to_logical_plan()) ); + assert_eq!( + format!("{:?}", opt_plan1), + format!("{:?}", ctx.optimize(&plan2.to_logical_plan())?) + ); + Ok(()) } From 8e52b9421729c7fed463ad8968f788c8542d2319 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 10:05:52 -0800 Subject: [PATCH 04/16] Revert "Modify test." This reverts commit ebf67d347de60d2ce550a35f1a6c561632ca6fba. --- datafusion/src/execution/context.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 7feb36408f03..84f6ec6c7741 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -3114,7 +3114,7 @@ mod tests { } #[tokio::test] - async fn ctx_sql_should_not_optimize_plan() -> Result<()> { + async fn ctx_sql_should_optimize_plan() -> Result<()> { let mut ctx = ExecutionContext::new(); let plan1 = ctx .create_logical_plan("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE")?; @@ -3125,14 +3125,9 @@ mod tests { .sql("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE") .await?; - assert_ne!( - format!("{:?}", opt_plan1), - format!("{:?}", plan2.to_logical_plan()) - ); - assert_eq!( format!("{:?}", opt_plan1), - format!("{:?}", ctx.optimize(&plan2.to_logical_plan())?) + format!("{:?}", plan2.to_logical_plan()) ); Ok(()) From 3191563339325a12f888a4fa617cb29d5843181f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 10:06:03 -0800 Subject: [PATCH 05/16] Revert "Avoid early optimization which is duplicate." This reverts commit fe8445de9c62b9d9d0253d128f22866b46e0b0e4. --- datafusion/src/execution/context.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 84f6ec6c7741..27c3ff143f1a 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -268,7 +268,10 @@ impl ExecutionContext { } } - plan => Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))), + plan => Ok(Arc::new(DataFrameImpl::new( + self.state.clone(), + &self.optimize(&plan)?, + ))), } } From 1cfbba088b8c8afc617674a5166959d0dde95ebd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 11:18:11 -0800 Subject: [PATCH 06/16] Add aliases during constant folding. --- datafusion/src/execution/context.rs | 2 +- datafusion/src/optimizer/constant_folding.rs | 7 ++++++- datafusion/src/physical_plan/planner.rs | 18 +++++++++++++----- datafusion/tests/sql.rs | 10 +++++----- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 27c3ff143f1a..b79c4fa2946b 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -898,13 +898,13 @@ impl Default for ExecutionConfig { target_partitions: num_cpus::get(), batch_size: 8192, optimizers: vec![ + Arc::new(ConstantFolding::new()), Arc::new(CommonSubexprEliminate::new()), Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), Arc::new(SimplifyExpressions::new()), Arc::new(LimitPushDown::new()), - Arc::new(ConstantFolding::new()), ], physical_optimizers: vec![ Arc::new(AggregateStatistics::new()), diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index a0bc04a0caf5..4a5f2c475205 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -92,6 +92,10 @@ impl OptimizerRule for ConstantFolding { .expressions() .into_iter() .map(|e| { + // We need to keep original expression name, constant folding + // should not change expression name. + let name = &e.name(plan.schema())?; + // TODO iterate until no changes are made // during rewrite (evaluating constants can // enable new simplifications and @@ -101,7 +105,8 @@ impl OptimizerRule for ConstantFolding { // fold constants and then simplify .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - Ok(new_e) + + Ok(new_e.alias(name)) }) .collect::>>()?; diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index dc7cf25934dc..4e04da7377f8 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -1342,6 +1342,17 @@ impl DefaultPhysicalPlanner { } } + fn remove_aliases<'a>(&self, expr: &'a Expr) -> Result<(String, &'a Expr)> { + let (name, e) = match expr { + Expr::Alias(sub_expr, alias) => match &**sub_expr { + Expr::Alias(nested_expr, _) => self.remove_aliases(&*nested_expr)?, + _ => (alias.clone(), sub_expr.as_ref()), + }, + _ => (physical_name(expr)?, expr), + }; + Ok((name, e)) + } + /// Create an aggregate expression from a logical expression or an alias pub fn create_aggregate_expr( &self, @@ -1350,11 +1361,8 @@ impl DefaultPhysicalPlanner { physical_input_schema: &Schema, ctx_state: &ExecutionContextState, ) -> Result> { - // unpack aliased logical expressions, e.g. "sum(col) as total" - let (name, e) = match e { - Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()), - _ => (physical_name(e)?, e), - }; + // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" + let (name, e) = self.remove_aliases(e)?; self.create_aggregate_expr_with_name( e, diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index a78043279e93..cd30a98b8d64 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1288,11 +1288,11 @@ async fn query_count_without_from() -> Result<()> { let sql = "SELECT count(1 + 1)"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+-----------------+", - "| COUNT(Int64(2)) |", - "+-----------------+", - "| 1 |", - "+-----------------+", + "+----------------------------+", + "| COUNT(Int64(1) + Int64(1)) |", + "+----------------------------+", + "| 1 |", + "+----------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) From 46315a1cafbbd8453dff5e6dd11c607c9a86af26 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 11:58:40 -0800 Subject: [PATCH 07/16] Some expressions don't support name. --- datafusion/src/optimizer/constant_folding.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 4a5f2c475205..35546f8e3d5b 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -92,9 +92,9 @@ impl OptimizerRule for ConstantFolding { .expressions() .into_iter() .map(|e| { - // We need to keep original expression name, constant folding - // should not change expression name. - let name = &e.name(plan.schema())?; + // We need to keep original expression name, if any. + // Constant folding should not change expression name. + let name = &e.name(plan.schema()); // TODO iterate until no changes are made // during rewrite (evaluating constants can @@ -106,7 +106,11 @@ impl OptimizerRule for ConstantFolding { .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - Ok(new_e.alias(name)) + if let Ok(expr_name) = name { + Ok(new_e.alias(expr_name)) + } else { + Ok(new_e) + } }) .collect::>>()?; From aa8cf15cc7ae13156346a1e5df0b4ac4571bd7e6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 15:01:00 -0800 Subject: [PATCH 08/16] Don't create redundant alias. --- datafusion/src/optimizer/constant_folding.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 35546f8e3d5b..e1e8df0b0390 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -106,10 +106,15 @@ impl OptimizerRule for ConstantFolding { .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - if let Ok(expr_name) = name { - Ok(new_e.alias(expr_name)) - } else { - Ok(new_e) + match &new_e { + Expr::Alias(_, _) | Expr::Column(_) => Ok(new_e), + _ => { + if let Ok(expr_name) = name { + Ok(new_e.alias(expr_name)) + } else { + Ok(new_e) + } + } } }) .collect::>>()?; From 7c22b5d12d0d7e629b9ff4d207d7b326be9c2100 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 16:46:18 -0800 Subject: [PATCH 09/16] Only add alias for certain plans. --- datafusion/src/optimizer/constant_folding.rs | 31 ++++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index e1e8df0b0390..eee58420e660 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -106,15 +106,28 @@ impl OptimizerRule for ConstantFolding { .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - match &new_e { - Expr::Alias(_, _) | Expr::Column(_) => Ok(new_e), - _ => { - if let Ok(expr_name) = name { - Ok(new_e.alias(expr_name)) - } else { - Ok(new_e) - } + let new_name = &new_e.name(plan.schema()); + + // Some plans will be candidates in projection pushdown rule to + // trim expressions based on expression names. We need to keep + // expression name for them. + let is_plan_for_projection_pushdown = match plan { + LogicalPlan::Window { .. } + | LogicalPlan::Aggregate { .. } + | LogicalPlan::Union { .. } => true, + _ => false, + }; + + if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { + if expr_name != new_expr_name + && is_plan_for_projection_pushdown + { + Ok(new_e.alias(expr_name)) + } else { + Ok(new_e) } + } else { + Ok(new_e) } }) .collect::>>()?; @@ -747,7 +760,7 @@ mod tests { .build()?; let expected = "\ - Aggregate: groupBy=[[#test.a, #test.c]], aggr=[[MAX(#test.b), MIN(#test.b)]]\ + Aggregate: groupBy=[[#test.a, #test.c]], aggr=[[MAX(#test.b) AS MAX(test.b = Boolean(true)), MIN(#test.b)]]\ \n Projection: #test.a, #test.c, #test.b\ \n TableScan: test projection=None"; From 166b41a3d6b8588312afc749295884ec70d7f9bb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 17 Nov 2021 17:09:27 -0800 Subject: [PATCH 10/16] Fix clippy. --- datafusion/src/optimizer/constant_folding.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index eee58420e660..43adeef2fe79 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -111,12 +111,12 @@ impl OptimizerRule for ConstantFolding { // Some plans will be candidates in projection pushdown rule to // trim expressions based on expression names. We need to keep // expression name for them. - let is_plan_for_projection_pushdown = match plan { + let is_plan_for_projection_pushdown = matches!( + plan, LogicalPlan::Window { .. } - | LogicalPlan::Aggregate { .. } - | LogicalPlan::Union { .. } => true, - _ => false, - }; + | LogicalPlan::Aggregate { .. } + | LogicalPlan::Union { .. } + ); if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { if expr_name != new_expr_name From d767aebc1a4413485dd6d5117c481f5e37070cad Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 18 Nov 2021 18:52:55 -0800 Subject: [PATCH 11/16] Fix. --- datafusion/src/optimizer/constant_folding.rs | 45 +++++++++----------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 43adeef2fe79..b604519f17a3 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -92,9 +92,7 @@ impl OptimizerRule for ConstantFolding { .expressions() .into_iter() .map(|e| { - // We need to keep original expression name, if any. - // Constant folding should not change expression name. - let name = &e.name(plan.schema()); + let org_name = &e.name(plan.schema()); // TODO iterate until no changes are made // during rewrite (evaluating constants can @@ -106,28 +104,27 @@ impl OptimizerRule for ConstantFolding { .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - let new_name = &new_e.name(plan.schema()); - - // Some plans will be candidates in projection pushdown rule to - // trim expressions based on expression names. We need to keep - // expression name for them. - let is_plan_for_projection_pushdown = matches!( - plan, - LogicalPlan::Window { .. } - | LogicalPlan::Aggregate { .. } - | LogicalPlan::Union { .. } - ); - - if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { - if expr_name != new_expr_name - && is_plan_for_projection_pushdown - { - Ok(new_e.alias(expr_name)) - } else { - Ok(new_e) + match plan { + LogicalPlan::Projection { .. } + | LogicalPlan::Window { .. } + | LogicalPlan::Aggregate { .. } => { + // We need to keep original expression name. + // Constant folding should not change expression name. + let new_name = &new_e.name(plan.schema()); + if let (Ok(expr_name), Ok(new_expr_name)) = + (org_name, new_name) + { + if expr_name != new_expr_name { + let new_alias_expr = new_e.alias(expr_name); + Ok(new_alias_expr) + } else { + Ok(new_e) + } + } else { + Ok(new_e) + } } - } else { - Ok(new_e) + _ => Ok(new_e), } }) .collect::>>()?; From 8d715de6987022bb3cbad1b6899fbc6f7c21076b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 18 Nov 2021 19:32:09 -0800 Subject: [PATCH 12/16] Revert "Fix." This reverts commit d767aebc1a4413485dd6d5117c481f5e37070cad. --- datafusion/src/optimizer/constant_folding.rs | 45 +++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index b604519f17a3..43adeef2fe79 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -92,7 +92,9 @@ impl OptimizerRule for ConstantFolding { .expressions() .into_iter() .map(|e| { - let org_name = &e.name(plan.schema()); + // We need to keep original expression name, if any. + // Constant folding should not change expression name. + let name = &e.name(plan.schema()); // TODO iterate until no changes are made // during rewrite (evaluating constants can @@ -104,27 +106,28 @@ impl OptimizerRule for ConstantFolding { .rewrite(&mut const_evaluator)? .rewrite(&mut simplifier)?; - match plan { - LogicalPlan::Projection { .. } - | LogicalPlan::Window { .. } - | LogicalPlan::Aggregate { .. } => { - // We need to keep original expression name. - // Constant folding should not change expression name. - let new_name = &new_e.name(plan.schema()); - if let (Ok(expr_name), Ok(new_expr_name)) = - (org_name, new_name) - { - if expr_name != new_expr_name { - let new_alias_expr = new_e.alias(expr_name); - Ok(new_alias_expr) - } else { - Ok(new_e) - } - } else { - Ok(new_e) - } + let new_name = &new_e.name(plan.schema()); + + // Some plans will be candidates in projection pushdown rule to + // trim expressions based on expression names. We need to keep + // expression name for them. + let is_plan_for_projection_pushdown = matches!( + plan, + LogicalPlan::Window { .. } + | LogicalPlan::Aggregate { .. } + | LogicalPlan::Union { .. } + ); + + if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { + if expr_name != new_expr_name + && is_plan_for_projection_pushdown + { + Ok(new_e.alias(expr_name)) + } else { + Ok(new_e) } - _ => Ok(new_e), + } else { + Ok(new_e) } }) .collect::>>()?; From eec7cbef69d49c2057916fb3480fc1d233ac951a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 19 Nov 2021 23:22:06 -0800 Subject: [PATCH 13/16] Apply to all nodes and update tests. --- datafusion/src/optimizer/constant_folding.rs | 41 +++++++------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 43adeef2fe79..a49db0b7ab6b 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -108,20 +108,8 @@ impl OptimizerRule for ConstantFolding { let new_name = &new_e.name(plan.schema()); - // Some plans will be candidates in projection pushdown rule to - // trim expressions based on expression names. We need to keep - // expression name for them. - let is_plan_for_projection_pushdown = matches!( - plan, - LogicalPlan::Window { .. } - | LogicalPlan::Aggregate { .. } - | LogicalPlan::Union { .. } - ); - if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { - if expr_name != new_expr_name - && is_plan_for_projection_pushdown - { + if expr_name != new_expr_name { Ok(new_e.alias(expr_name)) } else { Ok(new_e) @@ -653,8 +641,8 @@ mod tests { let expected = "\ Projection: #test.a\ - \n Filter: NOT #test.c\ - \n Filter: #test.b\ + \n Filter: NOT #test.c AS test.c = Boolean(false)\ + \n Filter: #test.b AS test.b = Boolean(true)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -674,8 +662,8 @@ mod tests { let expected = "\ Projection: #test.a\ \n Limit: 1\ - \n Filter: #test.c\ - \n Filter: NOT #test.b\ + \n Filter: #test.c AS test.c != Boolean(false)\ + \n Filter: NOT #test.b AS test.b != Boolean(true)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -692,7 +680,7 @@ mod tests { let expected = "\ Projection: #test.a\ - \n Filter: NOT #test.b AND #test.c\ + \n Filter: NOT #test.b AND #test.c AS test.b != Boolean(true) AND test.c = Boolean(true)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -709,7 +697,7 @@ mod tests { let expected = "\ Projection: #test.a\ - \n Filter: NOT #test.b OR NOT #test.c\ + \n Filter: NOT #test.b OR NOT #test.c AS test.b != Boolean(true) OR test.c = Boolean(false)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -726,7 +714,7 @@ mod tests { let expected = "\ Projection: #test.a\ - \n Filter: #test.b\ + \n Filter: #test.b AS NOT test.b = Boolean(false)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -741,7 +729,7 @@ mod tests { .build()?; let expected = "\ - Projection: #test.a, #test.d, NOT #test.b\ + Projection: #test.a, #test.d, NOT #test.b AS test.b = Boolean(false)\ \n TableScan: test projection=None"; assert_optimized_plan_eq(&plan, expected); @@ -816,7 +804,7 @@ mod tests { .build() .unwrap(); - let expected = "Projection: TimestampNanosecond(1599566400000000000)\ + let expected = "Projection: TimestampNanosecond(1599566400000000000) AS totimestamp(Utf8(\"2020-09-08T12:00:00+00:00\"))\ \n TableScan: test projection=None" .to_string(); let actual = get_optimized_plan_formatted(&plan, &Utc::now()); @@ -851,7 +839,7 @@ mod tests { .build() .unwrap(); - let expected = "Projection: Int32(0)\ + let expected = "Projection: Int32(0) AS CAST(Utf8(\"0\") AS Int32)\ \n TableScan: test projection=None"; let actual = get_optimized_plan_formatted(&plan, &Utc::now()); assert_eq!(expected, actual); @@ -900,7 +888,7 @@ mod tests { // expect the same timestamp appears in both exprs let actual = get_optimized_plan_formatted(&plan, &time); let expected = format!( - "Projection: TimestampNanosecond({}), TimestampNanosecond({}) AS t2\ + "Projection: TimestampNanosecond({}) AS now(), TimestampNanosecond({}) AS t2\ \n TableScan: test projection=None", time.timestamp_nanos(), time.timestamp_nanos() @@ -924,7 +912,8 @@ mod tests { .unwrap(); let actual = get_optimized_plan_formatted(&plan, &time); - let expected = "Projection: NOT #test.a\ + let expected = + "Projection: NOT #test.a AS Boolean(true) OR Boolean(false) != test.a\ \n TableScan: test projection=None"; assert_eq!(actual, expected); @@ -956,7 +945,7 @@ mod tests { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = "Filter: Boolean(true)\ + let expected = "Filter: Boolean(true) AS CAST(now() AS Int64) < CAST(totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) AS Int64) + Int32(50000)\ \n TableScan: test projection=None"; let actual = get_optimized_plan_formatted(&plan, &time); From 2a7652db68aa839de81d1f88d5944b827dade040 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 19 Nov 2021 23:55:34 -0800 Subject: [PATCH 14/16] Unalias when push donw to TableScan. --- datafusion/src/logical_plan/expr.rs | 9 +++++++++ datafusion/src/logical_plan/mod.rs | 2 +- datafusion/src/physical_plan/planner.rs | 5 +++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 04e95e73a297..e7801e35f039 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -1349,6 +1349,15 @@ pub fn unnormalize_cols(exprs: impl IntoIterator) -> Vec { exprs.into_iter().map(unnormalize_col).collect() } +/// Recursively un-alias an expressions +#[inline] +pub fn unalias(expr: Expr) -> Expr { + match expr { + Expr::Alias(sub_expr, _) => unalias(*sub_expr), + _ => expr, + } +} + /// Create an expression to represent the min() aggregate function pub fn min(expr: Expr) -> Expr { Expr::AggregateFunction { diff --git a/datafusion/src/logical_plan/mod.rs b/datafusion/src/logical_plan/mod.rs index 5db6a99672e6..05aa2992dc56 100644 --- a/datafusion/src/logical_plan/mod.rs +++ b/datafusion/src/logical_plan/mod.rs @@ -44,7 +44,7 @@ pub use expr::{ max, md5, min, normalize_col, normalize_cols, now, octet_length, or, random, regexp_match, regexp_replace, repeat, replace, replace_col, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, split_part, sqrt, - starts_with, strpos, substr, sum, tan, to_hex, translate, trim, trunc, + starts_with, strpos, substr, sum, tan, to_hex, translate, trim, trunc, unalias, unnormalize_col, unnormalize_cols, upper, when, Column, Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion, RewriteRecursion, }; diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 4e04da7377f8..672c888e7687 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -25,7 +25,7 @@ use super::{ use crate::execution::context::ExecutionContextState; use crate::logical_plan::plan::TableScanPlan; use crate::logical_plan::{ - unnormalize_cols, DFSchema, Expr, LogicalPlan, Operator, + unalias, union_with_alias, unnormalize_cols, DFSchema, Expr, LogicalPlan, Operator, Partitioning as LogicalPartitioning, PlanType, ToStringifiedPlan, UserDefinedLogicalNode, }; @@ -345,7 +345,8 @@ impl DefaultPhysicalPlanner { // doesn't know (nor should care) how the relation was // referred to in the query let filters = unnormalize_cols(filters.iter().cloned()); - source.scan(projection, batch_size, &filters, *limit).await + let unaliased: Vec = filters.into_iter().map(unalias).collect(); + source.scan(projection, batch_size, &unaliased, *limit).await } LogicalPlan::Values { values, From 546d2a280fc1afbde8d18eaff96d3aee52b9c8fd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 20 Nov 2021 01:09:16 -0800 Subject: [PATCH 15/16] Update more tests. --- datafusion/tests/sql.rs | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index c97328e71f59..91e49870c322 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1569,12 +1569,12 @@ async fn csv_query_cast_literal() -> Result<()> { let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+--------------------+------------+", - "| c12 | Float64(1) |", - "+--------------------+------------+", - "| 0.9294097332465232 | 1 |", - "| 0.3114712539863804 | 1 |", - "+--------------------+------------+", + "+--------------------+---------------------------+", + "| c12 | CAST(Int64(1) AS Float64) |", + "+--------------------+---------------------------+", + "| 0.9294097332465232 | 1 |", + "| 0.3114712539863804 | 1 |", + "+--------------------+---------------------------+", ]; assert_batches_eq!(expected, &actual); @@ -4280,11 +4280,11 @@ async fn query_without_from() -> Result<()> { let sql = "SELECT 1+2, 3/4, cos(0)"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----------+----------+------------+", - "| Int64(3) | Int64(0) | Float64(1) |", - "+----------+----------+------------+", - "| 3 | 0 | 1 |", - "+----------+----------+------------+", + "+---------------------+---------------------+---------------+", + "| Int64(1) + Int64(2) | Int64(3) / Int64(4) | cos(Int64(0)) |", + "+---------------------+---------------------+---------------+", + "| 3 | 0 | 1 |", + "+---------------------+---------------------+---------------+", ]; assert_batches_eq!(expected, &actual); @@ -5733,11 +5733,11 @@ async fn case_with_bool_type_result() -> Result<()> { let sql = "select case when 'cpu' != 'cpu' then true else false end"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----------------+", - "| Boolean(false) |", - "+----------------+", - "| false |", - "+----------------+", + "+---------------------------------------------------------------------------------+", + "| CASE WHEN Utf8(\"cpu\") != Utf8(\"cpu\") THEN Boolean(true) ELSE Boolean(false) END |", + "+---------------------------------------------------------------------------------+", + "| false |", + "+---------------------------------------------------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -5750,11 +5750,11 @@ async fn use_between_expression_in_select_query() -> Result<()> { let sql = "SELECT 1 NOT BETWEEN 3 AND 5"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+---------------+", - "| Boolean(true) |", - "+---------------+", - "| true |", - "+---------------+", + "+--------------------------------------------+", + "| Int64(1) NOT BETWEEN Int64(3) AND Int64(5) |", + "+--------------------------------------------+", + "| true |", + "+--------------------------------------------+", ]; assert_batches_eq!(expected, &actual); From 6e53b5388a5c70e3c6cfde2d158fe5032b0767c1 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 20 Nov 2021 10:00:46 -0800 Subject: [PATCH 16/16] Remove previous change. --- datafusion/src/physical_plan/planner.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 4c2a7fd8ca9a..dfedbc23ab85 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -1340,17 +1340,6 @@ impl DefaultPhysicalPlanner { } } - fn remove_aliases<'a>(&self, expr: &'a Expr) -> Result<(String, &'a Expr)> { - let (name, e) = match expr { - Expr::Alias(sub_expr, alias) => match &**sub_expr { - Expr::Alias(nested_expr, _) => self.remove_aliases(&*nested_expr)?, - _ => (alias.clone(), sub_expr.as_ref()), - }, - _ => (physical_name(expr)?, expr), - }; - Ok((name, e)) - } - /// Create an aggregate expression from a logical expression or an alias pub fn create_aggregate_expr( &self, @@ -1360,7 +1349,10 @@ impl DefaultPhysicalPlanner { ctx_state: &ExecutionContextState, ) -> Result> { // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" - let (name, e) = self.remove_aliases(e)?; + let (name, e) = match e { + Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()), + _ => (physical_name(e)?, e), + }; self.create_aggregate_expr_with_name( e,