diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9f6855b69824..d8a6687aa8a5 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -490,6 +490,20 @@ impl Alias { } } + /// Create an alias with an optional schema/field qualifier. + pub fn new_boxed( + expr: Box, + relation: Option>, + name: impl Into, + ) -> Self { + Self { + expr, + relation: relation.map(|r| r.into()), + name: name.into(), + metadata: None, + } + } + pub fn with_metadata( mut self, metadata: Option>, @@ -1385,8 +1399,23 @@ impl Expr { } /// Return `self AS name` alias expression + /// + /// # Example + /// ``` + /// # use datafusion_expr::col; + /// let expr = col("foo").alias("bar"); + /// assert_eq!(expr.to_string(), "foo AS bar"); + /// + /// // when aliasing over the exising alias, the previous one is removed + /// let expr = col("foo").alias("bar").alias("baz"); + /// assert_eq!(expr.to_string(), "foo AS baz"); pub fn alias(self, name: impl Into) -> Expr { - Expr::Alias(Alias::new(self, None::<&str>, name.into())) + if let Expr::Alias(Alias { expr, .. }) = self { + // reuse the existing layer if possible + Expr::Alias(Alias::new_boxed(expr, None::<&str>, name.into())) + } else { + Expr::Alias(Alias::new(self, None::<&str>, name.into())) + } } /// Return `self AS name` alias expression with metadata @@ -1416,7 +1445,12 @@ impl Expr { relation: Option>, name: impl Into, ) -> Expr { - Expr::Alias(Alias::new(self, relation, name.into())) + if let Expr::Alias(Alias { expr, .. }) = self { + // reuse the existing layer if possible + Expr::Alias(Alias::new_boxed(expr, relation, name.into())) + } else { + Expr::Alias(Alias::new(self, relation, name.into())) + } } /// Return `self AS name` alias expression with a specific qualifier and metadata diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 5e66c7ec0313..76549235e1f8 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -310,7 +310,7 @@ fn eliminate_redundant_null_check_on_count() { GROUP BY col_int32 HAVING c IS NOT NULL"; let plan = test_sql(sql).unwrap(); - let expected = "Projection: test.col_int32, count(Int64(1)) AS count(*) AS c\ + let expected = "Projection: test.col_int32, count(Int64(1)) AS c\ \n Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]\ \n TableScan: test projection=[col_int32]"; assert_eq!(expected, format!("{plan}")); diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index a6d89638ff41..5df701981964 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -371,29 +371,33 @@ impl Unparser<'_> { // Projection can be top-level plan for unnest relation // The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have // only one expression, which is the placeholder column generated by the rewriter. - let unnest_input_type = if p.expr.len() == 1 { + let unnest_params = if p.expr.len() == 1 { Self::check_unnest_placeholder_with_outer_ref(&p.expr[0]) } else { None }; - if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() { - if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() { - if let Some(unnest_relation) = - self.try_unnest_to_table_factor_sql(unnest)? - { - relation.unnest(unnest_relation); - return self.select_to_sql_recursively( - p.input.as_ref(), - query, - select, - relation, - ); + if self.dialect.unnest_as_table_factor() { + if let Some((_, unnest_alias)) = &unnest_params { + if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() { + if let Some(unnest_relation) = + self.try_unnest_to_table_factor_sql(unnest, unnest_alias)? + { + relation.unnest(unnest_relation); + return self.select_to_sql_recursively( + p.input.as_ref(), + query, + select, + relation, + ); + } } } } // Projection can be top-level plan for derived table if select.already_projected() { + let unnest_input_type = unnest_params.map(|(t, _)| t); + return self.derive_with_dialect_alias( "derived_projection", plan, @@ -906,15 +910,22 @@ impl Unparser<'_> { /// - If the column is not a placeholder column, return [None]. /// /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] - fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option { - if let Expr::Alias(Alias { expr, .. }) = expr { + fn check_unnest_placeholder_with_outer_ref( + expr: &Expr, + ) -> Option<(UnnestInputType, &str)> { + if let Expr::Alias(Alias { + expr, + name: alias_name, + .. + }) = expr + { if let Expr::Column(Column { name, .. }) = expr.as_ref() { if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) { if prefix.starts_with(&format!("({}(", OUTER_REFERENCE_COLUMN_PREFIX)) { - return Some(UnnestInputType::OuterReference); + return Some((UnnestInputType::OuterReference, alias_name)); } - return Some(UnnestInputType::Scalar); + return Some((UnnestInputType::Scalar, alias_name)); } } } @@ -924,12 +935,15 @@ impl Unparser<'_> { fn try_unnest_to_table_factor_sql( &self, unnest: &Unnest, + alias: &str, ) -> Result> { let mut unnest_relation = UnnestRelationBuilder::default(); let LogicalPlan::Projection(projection) = unnest.input.as_ref() else { return Ok(None); }; + unnest_relation.alias(Some(self.new_table_alias(alias.to_string(), vec![]))); + if !matches!(projection.input.as_ref(), LogicalPlan::EmptyRelation(_)) { // It may be possible that UNNEST is used as a source for the query. // However, at this point, we don't yet know if it is just a single expression diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 5c0e5df673d4..498d3e4098df 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -572,7 +572,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT * FROM UNNEST([1,2,3])", - expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#, + expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, @@ -590,7 +590,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT * FROM UNNEST([1,2,3]), j1", - expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#, + expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))) CROSS JOIN j1"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, @@ -608,7 +608,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT UNNEST([1,2,3])", - expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, @@ -626,7 +626,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)", - expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#, + expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS UNNEST(outer_ref(u.array_col))"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, @@ -638,7 +638,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT unnest([1, 2, 3, 4]) from unnest([1, 2, 3]);", - expected: r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])"#, + expected: r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt index d38d3490fed4..c638107da531 100644 --- a/datafusion/sqllogictest/test_files/count_star_rule.slt +++ b/datafusion/sqllogictest/test_files/count_star_rule.slt @@ -59,7 +59,7 @@ query TT EXPLAIN SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 0; ---- logical_plan -01)Projection: t1.a, count(Int64(1)) AS count() AS cnt +01)Projection: t1.a, count(Int64(1)) AS cnt 02)--Filter: count(Int64(1)) > Int64(0) 03)----Aggregate: groupBy=[[t1.a]], aggr=[[count(Int64(1))]] 04)------TableScan: t1 projection=[a] @@ -83,7 +83,7 @@ query TT EXPLAIN SELECT a, COUNT() OVER (PARTITION BY a) AS count_a FROM t1; ---- logical_plan -01)Projection: t1.a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count_a +01)Projection: t1.a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count_a 02)--WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] 03)----TableScan: t1 projection=[a] physical_plan diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 75baba3efc4f..d1c557bdeb85 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -4188,7 +4188,7 @@ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) logical_plan 01)Projection: sum(alias1) AS sum(DISTINCT t1.x), max(alias1) AS max(DISTINCT t1.x) 02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), max(alias1)]] -03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS t1.x AS alias1]], aggr=[[]] +03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS alias1]], aggr=[[]] 04)------Projection: CAST(t1.x AS Float64) AS __common_expr_1, t1.y 05)--------TableScan: t1 projection=[x, y] physical_plan diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 8a9c01d36308..c2654ba9a815 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -171,7 +171,7 @@ logical_plan 01)Dml: op=[Insert Into] table=[table_without_values] 02)--Projection: a1 AS a1, a2 AS a2 03)----Sort: aggregate_test_100.c1 ASC NULLS LAST -04)------Projection: sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a2, aggregate_test_100.c1 +04)------Projection: sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a2, aggregate_test_100.c1 05)--------WindowAggr: windowExpr=[[sum(CAST(aggregate_test_100.c4 AS Int64)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]] 06)----------TableScan: aggregate_test_100 projection=[c1, c4, c9] physical_plan diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 9e9a40c510ef..b229d89f4165 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -491,7 +491,7 @@ EXPLAIN logical_plan 01)Limit: skip=0, fetch=3 02)--Union -03)----Projection: count(Int64(1)) AS count(*) AS cnt +03)----Projection: count(Int64(1)) AS cnt 04)------Limit: skip=0, fetch=3 05)--------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] 06)----------SubqueryAlias: a @@ -650,7 +650,7 @@ select x, y from (select 1 as x , max(10) as y) b ---- logical_plan 01)Union -02)--Projection: count(Int64(1)) AS count(*) AS count, a.n +02)--Projection: count(Int64(1)) AS count, a.n 03)----Aggregate: groupBy=[[a.n]], aggr=[[count(Int64(1))]] 04)------SubqueryAlias: a 05)--------Projection: Int64(5) AS n diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index b9c13582952a..f9e741bf3ca3 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -595,7 +595,7 @@ explain select unnest(unnest(column3)), column3 from recursive_unnest_table; ---- logical_plan 01)Unnest: lists[] structs[__unnest_placeholder(UNNEST(recursive_unnest_table.column3))] -02)--Projection: __unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3 +02)--Projection: __unnest_placeholder(recursive_unnest_table.column3,depth=1) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3 03)----Unnest: lists[__unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[] 04)------Projection: recursive_unnest_table.column3 AS __unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3 05)--------TableScan: recursive_unnest_table projection=[column3] diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index fd623b67fe9f..d3c31e31ede2 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1764,7 +1764,7 @@ EXPLAIN SELECT count(*) as global_count FROM ORDER BY c1 ) AS a ---- logical_plan -01)Projection: count(Int64(1)) AS count(*) AS global_count +01)Projection: count(Int64(1)) AS global_count 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] 03)----SubqueryAlias: a 04)------Projection: