From d5e5bd965a4fe6624577a6e4fed20edfd2ea5017 Mon Sep 17 00:00:00 2001 From: jychen7 Date: Sun, 6 Jun 2021 16:12:09 +0000 Subject: [PATCH 1/5] 110 support group by positions --- datafusion/src/sql/planner.rs | 17 ++++++++++++++++- datafusion/src/sql/utils.rs | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index aa6b5a93f483..f52ecfc73d21 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -54,8 +54,9 @@ use super::{ parser::DFParser, utils::{ can_columns_satisfy_exprs, expand_wildcard, expr_as_column_expr, extract_aliases, - find_aggregate_exprs, find_column_exprs, find_window_exprs, + extract_positions, find_aggregate_exprs, find_column_exprs, find_window_exprs, group_window_expr_by_sort_keys, rebase_expr, resolve_aliases_to_exprs, + resolve_positions_to_exprs, }, }; @@ -591,6 +592,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &group_by_expr, &extract_aliases(&select_exprs), )?; + let group_by_expr = resolve_positions_to_exprs( + &group_by_expr, + &extract_positions(&select_exprs), + )?; self.validate_schema_satisfies_exprs( plan.schema(), &[group_by_expr.clone()], @@ -2319,6 +2324,16 @@ mod tests { ); } + #[test] + fn select_simple_aggregate_with_groupby_can_use_positions() { + quick_test( + "SELECT state, age AS b, COUNT(1) FROM person GROUP BY 1, 2", + "Projection: #state, #age AS b, #COUNT(UInt8(1))\ + \n Aggregate: groupBy=[[#state, #age]], aggr=[[COUNT(UInt8(1))]]\ + \n TableScan: person projection=None", + ); + } + #[test] fn select_simple_aggregate_with_groupby_can_use_alias() { quick_test( diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index 80a25d04468f..38c6d44203e3 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -390,6 +390,42 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap { .collect::>() } +/// Returns mapping of each position (`String`) to the expression (`Expr`) it is +/// aliasing. +pub(crate) fn extract_positions(exprs: &[Expr]) -> HashMap { + let mut position = 0; + exprs + .iter() + .filter_map(|expr| match expr { + // position starts with 1 + Expr::Alias(nested_expr, _alias_name) => { + position += 1; + Some((position.clone().to_string(), *nested_expr.clone())) + } + _ => { + position += 1; + Some((position.clone().to_string(), expr.clone())) + } + }) + .collect::>() +} + +pub(crate) fn resolve_positions_to_exprs( + expr: &Expr, + positions: &HashMap, +) -> Result { + match expr { + Expr::Literal(value) => { + if let Some(position_expr) = positions.get(&(value.to_string())) { + Ok(position_expr.clone()) + } else { + Ok(Expr::Literal(value.clone())) + } + } + default => Ok(default.clone()), + } +} + /// Rebuilds an `Expr` with columns that refer to aliases replaced by the /// alias' underlying `Expr`. pub(crate) fn resolve_aliases_to_exprs( From 8d175c759e17190980f270b5894348dc4cff9bbf Mon Sep 17 00:00:00 2001 From: jychen7 Date: Sun, 6 Jun 2021 22:13:56 +0000 Subject: [PATCH 2/5] try resolve positions via array, not map --- datafusion/src/sql/planner.rs | 14 ++++------- datafusion/src/sql/utils.rs | 45 +++++++++++++---------------------- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index f52ecfc73d21..f52318f1be3d 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -54,7 +54,7 @@ use super::{ parser::DFParser, utils::{ can_columns_satisfy_exprs, expand_wildcard, expr_as_column_expr, extract_aliases, - extract_positions, find_aggregate_exprs, find_column_exprs, find_window_exprs, + find_aggregate_exprs, find_column_exprs, find_window_exprs, group_window_expr_by_sort_keys, rebase_expr, resolve_aliases_to_exprs, resolve_positions_to_exprs, }, @@ -583,19 +583,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // All of the aggregate expressions (deduplicated). let aggr_exprs = find_aggregate_exprs(&aggr_expr_haystack); + let alias_map = extract_aliases(&select_exprs); let group_by_exprs = select .group_by .iter() .map(|e| { let group_by_expr = self.sql_expr_to_logical_expr(e)?; - let group_by_expr = resolve_aliases_to_exprs( - &group_by_expr, - &extract_aliases(&select_exprs), - )?; - let group_by_expr = resolve_positions_to_exprs( - &group_by_expr, - &extract_positions(&select_exprs), - )?; + let group_by_expr = resolve_aliases_to_exprs(&group_by_expr, &alias_map)?; + let group_by_expr = + resolve_positions_to_exprs(&group_by_expr, &select_exprs)?; self.validate_schema_satisfies_exprs( plan.schema(), &[group_by_expr.clone()], diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index 38c6d44203e3..02dedaafb0f7 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -18,6 +18,7 @@ //! SQL Utility Functions use crate::logical_plan::{DFSchema, Expr, LogicalPlan}; +use crate::scalar::ScalarValue; use crate::{ error::{DataFusionError, Result}, logical_plan::{ExpressionVisitor, Recursion}, @@ -390,39 +391,27 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap { .collect::>() } -/// Returns mapping of each position (`String`) to the expression (`Expr`) it is -/// aliasing. -pub(crate) fn extract_positions(exprs: &[Expr]) -> HashMap { - let mut position = 0; - exprs - .iter() - .filter_map(|expr| match expr { - // position starts with 1 - Expr::Alias(nested_expr, _alias_name) => { - position += 1; - Some((position.clone().to_string(), *nested_expr.clone())) - } - _ => { - position += 1; - Some((position.clone().to_string(), expr.clone())) - } - }) - .collect::>() -} - pub(crate) fn resolve_positions_to_exprs( expr: &Expr, - positions: &HashMap, + select_exprs: &[Expr], ) -> Result { match expr { - Expr::Literal(value) => { - if let Some(position_expr) = positions.get(&(value.to_string())) { - Ok(position_expr.clone()) - } else { - Ok(Expr::Literal(value.clone())) + Expr::Literal(position) => match position { + ScalarValue::Int64(Some(pos)) => { + let index = (pos - 1) as usize; + if index < select_exprs.len() { + let select_expr = &select_exprs[index]; + match select_expr { + Expr::Alias(nested_expr, _alias_name) => Ok(*nested_expr.clone()), + _ => Ok(select_expr.clone()), + } + } else { + Ok(expr.clone()) + } } - } - default => Ok(default.clone()), + _ => Ok(expr.clone()), + }, + _ => Ok(expr.clone()), } } From 6bcd83b9209eb5af4a0c7283fc78a4dad5391f2b Mon Sep 17 00:00:00 2001 From: jychen7 Date: Mon, 7 Jun 2021 00:44:06 +0000 Subject: [PATCH 3/5] Add comment for i64 and simplify the pattern match --- datafusion/src/sql/utils.rs | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index 02dedaafb0f7..f0dd6ade3cb7 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -396,21 +396,20 @@ pub(crate) fn resolve_positions_to_exprs( select_exprs: &[Expr], ) -> Result { match expr { - Expr::Literal(position) => match position { - ScalarValue::Int64(Some(pos)) => { - let index = (pos - 1) as usize; - if index < select_exprs.len() { - let select_expr = &select_exprs[index]; - match select_expr { - Expr::Alias(nested_expr, _alias_name) => Ok(*nested_expr.clone()), - _ => Ok(select_expr.clone()), - } - } else { - Ok(expr.clone()) + // sql_expr_to_logical_expr maps number to i64 + // https://github.com/apache/arrow-datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887 + Expr::Literal(ScalarValue::Int64(Some(pos))) => { + let index = (pos - 1) as usize; + if index < select_exprs.len() { + let select_expr = &select_exprs[index]; + match select_expr { + Expr::Alias(nested_expr, _alias_name) => Ok(*nested_expr.clone()), + _ => Ok(select_expr.clone()), } + } else { + Ok(expr.clone()) } - _ => Ok(expr.clone()), - }, + } _ => Ok(expr.clone()), } } From c86c061f0653b7af2ec4d5e1d1f2ae94935344dc Mon Sep 17 00:00:00 2001 From: jychen7 Date: Mon, 7 Jun 2021 23:16:53 +0000 Subject: [PATCH 4/5] combine match and if condition, add more test cases --- datafusion/src/sql/planner.rs | 23 +++++++++++++++++++++++ datafusion/src/sql/utils.rs | 18 ++++++++---------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index f52318f1be3d..8e9365ce6706 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -2328,6 +2328,29 @@ mod tests { \n Aggregate: groupBy=[[#state, #age]], aggr=[[COUNT(UInt8(1))]]\ \n TableScan: person projection=None", ); + quick_test( + "SELECT state, age AS b, COUNT(1) FROM person GROUP BY 2, 1", + "Projection: #state, #age AS b, #COUNT(UInt8(1))\ + \n Aggregate: groupBy=[[#age, #state]], aggr=[[COUNT(UInt8(1))]]\ + \n TableScan: person projection=None", + ); + } + + #[test] + fn select_simple_aggregate_with_groupby_position_out_of_range() { + let sql = "SELECT state, MIN(age) FROM person GROUP BY 0"; + let err = logical_plan(sql).expect_err("query should have failed"); + assert_eq!( + "Plan(\"Projection references non-aggregate values\")", + format!("{:?}", err) + ); + + let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5"; + let err2 = logical_plan(sql2).expect_err("query should have failed"); + assert_eq!( + "Plan(\"Projection references non-aggregate values\")", + format!("{:?}", err2) + ); } #[test] diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index f0dd6ade3cb7..fa2f41842f48 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -398,16 +398,14 @@ pub(crate) fn resolve_positions_to_exprs( match expr { // sql_expr_to_logical_expr maps number to i64 // https://github.com/apache/arrow-datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887 - Expr::Literal(ScalarValue::Int64(Some(pos))) => { - let index = (pos - 1) as usize; - if index < select_exprs.len() { - let select_expr = &select_exprs[index]; - match select_expr { - Expr::Alias(nested_expr, _alias_name) => Ok(*nested_expr.clone()), - _ => Ok(select_expr.clone()), - } - } else { - Ok(expr.clone()) + Expr::Literal(ScalarValue::Int64(Some(position))) + if position > &(0 as i64) && position <= &(select_exprs.len() as i64) => + { + let index = (position - 1) as usize; + let select_expr = &select_exprs[index]; + match select_expr { + Expr::Alias(nested_expr, _alias_name) => Ok(*nested_expr.clone()), + _ => Ok(select_expr.clone()), } } _ => Ok(expr.clone()), From 4de895e1a3c81d899b216c20ecea3b42f042a221 Mon Sep 17 00:00:00 2001 From: jychen7 Date: Mon, 7 Jun 2021 23:42:01 +0000 Subject: [PATCH 5/5] replace '0 as i64' with 0_i64 --- datafusion/src/sql/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index fa2f41842f48..333c05d6c50c 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -399,7 +399,7 @@ pub(crate) fn resolve_positions_to_exprs( // sql_expr_to_logical_expr maps number to i64 // https://github.com/apache/arrow-datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887 Expr::Literal(ScalarValue::Int64(Some(position))) - if position > &(0 as i64) && position <= &(select_exprs.len() as i64) => + if position > &0_i64 && position <= &(select_exprs.len() as i64) => { let index = (position - 1) as usize; let select_expr = &select_exprs[index];