diff --git a/derive/src/lib.rs b/derive/src/lib.rs index d19696aa4..92b50315c 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -3,29 +3,34 @@ use quote::{format_ident, quote, quote_spanned, ToTokens}; use syn::spanned::Spanned; use syn::{ parse::{Parse, ParseStream}, - parse_macro_input, parse_quote, Attribute, Data, DeriveInput, - Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, Token + parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, + Ident, Index, LitStr, Meta, Token, }; - /// Implementation of `[#derive(Visit)]` #[proc_macro_derive(VisitMut, attributes(visit))] pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit(input, &VisitType { - visit_trait: quote!(VisitMut), - visitor_trait: quote!(VisitorMut), - modifier: Some(quote!(mut)), - }) + derive_visit( + input, + &VisitType { + visit_trait: quote!(VisitMut), + visitor_trait: quote!(VisitorMut), + modifier: Some(quote!(mut)), + }, + ) } /// Implementation of `[#derive(Visit)]` #[proc_macro_derive(Visit, attributes(visit))] pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit(input, &VisitType { - visit_trait: quote!(Visit), - visitor_trait: quote!(Visitor), - modifier: None, - }) + derive_visit( + input, + &VisitType { + visit_trait: quote!(Visit), + visitor_trait: quote!(Visitor), + modifier: None, + }, + ) } struct VisitType { @@ -34,15 +39,16 @@ struct VisitType { modifier: Option, } -fn derive_visit( - input: proc_macro::TokenStream, - visit_type: &VisitType, -) -> proc_macro::TokenStream { +fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_macro::TokenStream { // Parse the input tokens into a syntax tree. let input = parse_macro_input!(input as DeriveInput); let name = input.ident; - let VisitType { visit_trait, visitor_trait, modifier } = visit_type; + let VisitType { + visit_trait, + visitor_trait, + modifier, + } = visit_type; let attributes = Attributes::parse(&input.attrs); // Add a bound `T: Visit` to every type parameter T. @@ -87,7 +93,10 @@ impl Parse for WithIdent { let mut result = WithIdent { with: None }; let ident = input.parse::()?; if ident != "with" { - return Err(syn::Error::new(ident.span(), "Expected identifier to be `with`")); + return Err(syn::Error::new( + ident.span(), + "Expected identifier to be `with`", + )); } input.parse::()?; let s = input.parse::()?; @@ -131,17 +140,26 @@ impl Attributes { } // Add a bound `T: Visit` to every type parameter T. -fn add_trait_bounds(mut generics: Generics, VisitType{visit_trait, ..}: &VisitType) -> Generics { +fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics { for param in &mut generics.params { if let GenericParam::Type(ref mut type_param) = *param { - type_param.bounds.push(parse_quote!(sqlparser::ast::#visit_trait)); + type_param + .bounds + .push(parse_quote!(sqlparser::ast::#visit_trait)); } } generics } // Generate the body of the visit implementation for the given type -fn visit_children(data: &Data, VisitType{visit_trait, modifier, ..}: &VisitType) -> TokenStream { +fn visit_children( + data: &Data, + VisitType { + visit_trait, + modifier, + .. + }: &VisitType, +) -> TokenStream { match data { Data::Struct(data) => match &data.fields { Fields::Named(fields) => { diff --git a/src/ast/query.rs b/src/ast/query.rs index 64bda663a..8d61290b9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -245,10 +245,80 @@ pub struct Select { pub named_window: Vec, /// QUALIFY (Snowflake) pub qualify: Option, + /// Returns true if `FROM` is before `SELECT` in the original SQL. + /// such as is the case for duckdb style `FROM t1 SELECT *` + pub from_before_select: bool, +} + +impl Select { + fn display_for_from(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "FROM {}", display_comma_separated(&self.from))?; + if !self.projection.is_empty() { + write!(f, " SELECT")?; + if let Some(ref distinct) = self.distinct { + write!(f, " {distinct}")?; + } + if let Some(ref top) = self.top { + write!(f, " {top}")?; + } + write!(f, " {}", display_comma_separated(&self.projection))?; + } + + if let Some(ref into) = self.into { + write!(f, " {into}")?; + } + + if !self.lateral_views.is_empty() { + for lv in &self.lateral_views { + write!(f, "{lv}")?; + } + } + if let Some(ref selection) = self.selection { + write!(f, " WHERE {selection}")?; + } + match &self.group_by { + GroupByExpr::All => write!(f, " GROUP BY ALL")?, + GroupByExpr::Expressions(exprs) => { + if !exprs.is_empty() { + write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + } + } + } + if !self.cluster_by.is_empty() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(&self.cluster_by) + )?; + } + if !self.distribute_by.is_empty() { + write!( + f, + " DISTRIBUTE BY {}", + display_comma_separated(&self.distribute_by) + )?; + } + if !self.sort_by.is_empty() { + write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + } + if let Some(ref having) = self.having { + write!(f, " HAVING {having}")?; + } + if !self.named_window.is_empty() { + write!(f, " WINDOW {}", display_comma_separated(&self.named_window))?; + } + if let Some(ref qualify) = self.qualify { + write!(f, " QUALIFY {qualify}")?; + } + Ok(()) + } } impl fmt::Display for Select { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.from_before_select { + return self.display_for_from(f); + } write!(f, "SELECT")?; if let Some(ref distinct) = self.distinct { write!(f, " {distinct}")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c2397c74a..98c59a45c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -516,6 +516,12 @@ impl<'a> Parser<'a> { Keyword::MERGE => Ok(self.parse_merge()?), // `PRAGMA` is sqlite specific https://www.sqlite.org/pragma.html Keyword::PRAGMA => Ok(self.parse_pragma()?), + + // Duckdb style `FROM ` or `FROM tbl SELECT ...` + Keyword::FROM if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + Ok(Statement::Query(Box::new(self.parse_query()?))) + } // `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview Keyword::INSTALL if dialect_of!(self is DuckDbDialect | GenericDialect) => { Ok(self.parse_install()?) @@ -6458,10 +6464,8 @@ impl<'a> Parser<'a> { } else { None }; - if self.parse_keyword(Keyword::INSERT) { let insert = self.parse_insert()?; - Ok(Query { with, body: Box::new(SetExpr::Insert(insert)), @@ -6722,6 +6726,8 @@ impl<'a> Parser<'a> { SetExpr::Values(self.parse_values(is_mysql)?) } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) + } else if self.parse_keyword(Keyword::FROM) { + SetExpr::Select(Box::new(self.parse_from_select()?)) } else { return self.expected( "SELECT, VALUES, or a subquery in the query body", @@ -6936,6 +6942,162 @@ impl<'a> Parser<'a> { having, named_window: named_windows, qualify, + from_before_select: false, + }) + } + + /// Parse a duckdb style `FROM` statement without a select. + /// assuming the initial `FROM` was already consumed + pub fn parse_from_select(&mut self) -> Result { + let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; + + let distinct = self.parse_all_or_distinct()?; + let top = if self.parse_keyword(Keyword::TOP) { + Some(self.parse_top()?) + } else { + None + }; + + // FROM SELECT ... + let (selection, projection) = if self.parse_keyword(Keyword::SELECT) { + let projection = self.parse_comma_separated(Parser::parse_select_item)?; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + (selection, projection) + // FROM WHERE + } else if self.parse_keyword(Keyword::WHERE) { + let selection = Some(self.parse_expr()?); + + let projection = self + .maybe_parse(|parser| parser.parse_projection()) + .unwrap_or_default(); + (selection, projection) + } else { + let selection = None; + let projection = self + .maybe_parse(|parser| parser.parse_projection()) + .unwrap_or_default(); + (selection, projection) + }; + + let into = if self.parse_keyword(Keyword::INTO) { + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let unlogged = self.parse_keyword(Keyword::UNLOGGED); + let table = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + Some(SelectInto { + temporary, + unlogged, + table, + name, + }) + } else { + None + }; + + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, + // otherwise they may be parsed as an alias as part of the `projection` + // or `from`. + + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name(false)?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .flatten() + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } + + let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + if self.parse_keyword(Keyword::ALL) { + GroupByExpr::All + } else { + GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + } + } else { + GroupByExpr::Expressions(vec![]) + }; + + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let having = if self.parse_keyword(Keyword::HAVING) { + Some(self.parse_expr()?) + } else { + None + }; + + let named_windows = if self.parse_keyword(Keyword::WINDOW) { + self.parse_comma_separated(Parser::parse_named_window)? + } else { + vec![] + }; + + let qualify = if self.parse_keyword(Keyword::QUALIFY) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Select { + distinct, + top, + projection, + into, + from, + lateral_views, + selection, + group_by, + cluster_by, + distribute_by, + sort_by, + having, + named_window: named_windows, + qualify, + from_before_select: true, }) } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index e7c85c2a3..51a3f1af7 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -110,6 +110,7 @@ fn parse_map_access_expr() { having: None, named_window: vec![], qualify: None, + from_before_select: false }, select ); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bd95e3164..135b8dbc6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -400,7 +400,8 @@ fn parse_update_set_from() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -4212,6 +4213,7 @@ fn test_parse_named_window() { ), ], qualify: None, + from_before_select: false, }; assert_eq!(actual_select_only, expected); } @@ -4567,6 +4569,7 @@ fn parse_interval_and_or_xor() { having: None, named_window: vec![], qualify: None, + from_before_select: false, }))), order_by: vec![], limit: None, @@ -6550,6 +6553,7 @@ fn lateral_function() { having: None, named_window: vec![], qualify: None, + from_before_select: false, }; assert_eq!(actual_select_only, expected); } @@ -7193,6 +7197,7 @@ fn parse_merge() { having: None, named_window: vec![], qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 97bdd8e1c..85704b264 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -177,6 +177,7 @@ fn test_select_union_by_name() { having: None, named_window: vec![], qualify: None, + from_before_select: false, }))), right: Box::::new(SetExpr::Select(Box::new(Select { distinct: None, @@ -211,6 +212,7 @@ fn test_select_union_by_name() { having: None, named_window: vec![], qualify: None, + from_before_select: false, }))), }); assert_eq!(ast.body, expected); @@ -244,3 +246,188 @@ fn test_duckdb_load_extension() { stmt ); } + +#[test] +fn test_duckdb_from_statement() { + let stmt = duckdb().verified_only_select("FROM my_table"); + let expected = Select { + distinct: None, + top: None, + projection: vec![], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "my_table".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + from_before_select: true, + }; + assert_eq!(stmt, expected); +} + +#[test] +fn test_duckdb_from_statement_with_filter() { + let stmt = duckdb().verified_only_select("FROM t1 WHERE a = 1"); + println!("{:?}", stmt); + let expected = Select { + distinct: None, + top: None, + projection: vec![], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "t1".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "a".to_string(), + quote_style: None, + })), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("1"))), + }), + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + from_before_select: true, + }; + assert_eq!(stmt, expected); +} + +#[test] +fn test_duckdb_from_statement_with_filter_and_select() { + let stmt = duckdb().verified_only_select("FROM t1 SELECT b WHERE a = 1"); + println!("{:?}", stmt); + let expected = Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "b".to_string(), + quote_style: None, + }))], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "t1".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "a".to_string(), + quote_style: None, + })), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("1"))), + }), + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + from_before_select: true, + }; + assert_eq!(stmt, expected); +} + +#[test] +fn test_from_with_copy() { + let stmt = duckdb().verified_stmt("COPY (FROM trek_facts) TO 'phaser_filled_facts.parquet'"); + let body = Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "trek_facts".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + from_before_select: true, + }))); + let source = CopySource::Query(Box::new(Query { + with: None, + body, + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + })); + + let expected = Statement::Copy { + source, + to: true, + target: CopyTarget::File { + filename: "phaser_filled_facts.parquet".to_string(), + }, + options: vec![], + legacy_options: vec![], + values: vec![], + }; + assert_eq!(stmt, expected); +} diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index fc0d6394c..c346a3503 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -112,7 +112,8 @@ fn parse_create_procedure() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))) }))], params: Some(vec![ @@ -595,7 +596,8 @@ fn parse_substring_in_select() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 103530928..e30028846 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -785,7 +785,8 @@ fn parse_escaped_quote_identifiers_with_escape() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -829,7 +830,8 @@ fn parse_escaped_quote_identifiers_with_no_escape() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -870,7 +872,8 @@ fn parse_escaped_backticks_with_escape() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -911,7 +914,8 @@ fn parse_escaped_backticks_with_no_escape() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -1581,6 +1585,7 @@ fn parse_select_with_numeric_prefix_column_name() { having: None, named_window: vec![], qualify: None, + from_before_select: false }))) ); } @@ -1631,6 +1636,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { having: None, named_window: vec![], qualify: None, + from_before_select: false }))) ); } @@ -1841,7 +1847,8 @@ fn parse_substring_in_select() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -2143,7 +2150,8 @@ fn parse_hex_string_introducer() { having: None, named_window: vec![], qualify: None, - into: None + into: None, + from_before_select: false }))), order_by: vec![], limit: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c987822b8..3a99e626a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1082,6 +1082,7 @@ fn parse_copy_to() { distribute_by: vec![], sort_by: vec![], qualify: None, + from_before_select: false }))), order_by: vec![], limit: None, @@ -2139,6 +2140,7 @@ fn parse_array_subquery_expr() { having: None, named_window: vec![], qualify: None, + from_before_select: false }))), right: Box::new(SetExpr::Select(Box::new(Select { distinct: None, @@ -2155,6 +2157,7 @@ fn parse_array_subquery_expr() { having: None, named_window: vec![], qualify: None, + from_before_select: false }))), }), order_by: vec![],