diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 399f0df0a699..6cd00c2b0cc8 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -154,6 +154,13 @@ pub trait Dialect: Send + Sync { Ok(None) } + /// Allows the dialect to override column alias unparsing if the dialect has specific rules. + /// Returns None if the default unparsing should be used, or Some(String) if there is + /// a custom implementation for the alias. + fn col_alias_overrides(&self, _alias: &str) -> Result> { + Ok(None) + } + /// Allows the dialect to choose to omit window frame in unparsing /// based on function name and window frame bound /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing @@ -253,7 +260,48 @@ impl Dialect for DefaultDialect { } } } +#[derive(Default)] +pub struct BigQueryDialect {} + +impl Dialect for BigQueryDialect { + fn identifier_quote_style(&self, _: &str) -> Option { + Some('`') + } + + fn col_alias_overrides(&self, alias: &str) -> Result> { + // Check if alias contains any special characters not supported by BigQuery col names + // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names + let special_chars: [char; 20] = [ + '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']', + '^', '`', '{', '}', '~', + ]; + + if alias.chars().any(|c| special_chars.contains(&c)) { + let mut encoded_name = String::new(); + for c in alias.chars() { + if special_chars.contains(&c) { + encoded_name.push_str(&format!("_{}", c as u32)); + } else { + encoded_name.push(c); + } + } + Ok(Some(encoded_name)) + } else { + Ok(Some(alias.to_string())) + } + } + + fn unnest_as_table_factor(&self) -> bool { + true + } +} +impl BigQueryDialect { + #[must_use] + pub fn new() -> Self { + Self {} + } +} pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index a48c077afd9b..8c3764aaedcb 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -675,21 +675,29 @@ impl Unparser<'_> { } fn col_to_sql(&self, col: &Column) -> Result { + // Replace the column name if the dialect has an override + let col_name = match self.dialect.col_alias_overrides(&col.name)? { + Some(rewritten_name) => rewritten_name, + None => col.name.to_string(), + }; + if let Some(table_ref) = &col.relation { let mut id = if self.dialect.full_qualified_col() { table_ref.to_vec() } else { vec![table_ref.table().to_string()] }; - id.push(col.name.to_string()); + + id.push(col_name); return Ok(ast::Expr::CompoundIdentifier( id.iter() .map(|i| self.new_ident_quoted_if_needs(i.to_string())) .collect(), )); } + Ok(ast::Expr::Identifier( - self.new_ident_quoted_if_needs(col.name.to_string()), + self.new_ident_quoted_if_needs(col_name), )) } diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 0fa203c60b7b..ed3235064fa5 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -1035,9 +1035,18 @@ impl Unparser<'_> { Expr::Alias(Alias { expr, name, .. }) => { let inner = self.expr_to_sql(expr)?; + // Determine the alias name to use + let col_name = if let Some(rewritten_name) = + self.dialect.col_alias_overrides(name)? + { + rewritten_name.to_string() + } else { + name.to_string() + }; + Ok(ast::SelectItem::ExprWithAlias { expr: inner, - alias: self.new_ident_quoted_if_needs(name.to_string()), + alias: self.new_ident_quoted_if_needs(col_name), }) } _ => { diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 5af93a01e6c9..e670674496bd 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -31,8 +31,9 @@ use datafusion_functions_nested::map::map_udf; use datafusion_functions_window::rank::rank_udwf; use datafusion_sql::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_sql::unparser::dialect::{ - CustomDialectBuilder, DefaultDialect as UnparserDefaultDialect, DefaultDialect, - Dialect as UnparserDialect, MySqlDialect as UnparserMySqlDialect, SqliteDialect, + BigQueryDialect as UnparserBigqueryDialect, CustomDialectBuilder, + DefaultDialect as UnparserDefaultDialect, DefaultDialect, Dialect as UnparserDialect, + MySqlDialect as UnparserMySqlDialect, SqliteDialect, }; use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser}; use sqlparser::ast::Statement; @@ -54,7 +55,7 @@ use datafusion_sql::unparser::extension_unparser::{ UnparseToStatementResult, UnparseWithinStatementResult, UserDefinedLogicalNodeUnparser, }; -use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect}; +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect}; use sqlparser::parser::Parser; #[test] @@ -536,6 +537,30 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(SqliteDialect {}), }, + TestStatementWithDialect { + sql: "select min(*) as \"min(*)\" from (select 1 as a)", + expected: "SELECT min(*) AS `min_40_42_41` FROM (SELECT 1 AS `a`)", + parser_dialect: Box::new(PostgreSqlDialect {}), + unparser_dialect: Box::new(UnparserBigqueryDialect::new()), + }, + TestStatementWithDialect { + sql: "select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)", + expected: "SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)", + parser_dialect: Box::new(PostgreSqlDialect {}), + unparser_dialect: Box::new(UnparserBigqueryDialect::new()), + }, + TestStatementWithDialect { + sql: "select a as \"a*\", b , c as \"c@\" from (select 1 as a , 2 as b, 3 as c)", + expected: "SELECT `a` AS `a_42`, `b`, `c` AS `c_64` FROM (SELECT 1 AS `a`, 2 AS `b`, 3 AS `c`)", + parser_dialect: Box::new(PostgreSqlDialect {}), + unparser_dialect: Box::new(UnparserBigqueryDialect::new()), + }, + TestStatementWithDialect { + sql: "select * from (select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)) where \"a*\" = 1", + expected: "SELECT * FROM (SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)) WHERE (`a_42` = 1)", + parser_dialect: Box::new(PostgreSqlDialect {}), + unparser_dialect: Box::new(UnparserBigqueryDialect::new()), + }, TestStatementWithDialect { sql: "SELECT * FROM UNNEST([1,2,3])", expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#,