From 1b3198ed49a71ec6be0a7e0007fb5aab2c1082b0 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sun, 9 Mar 2025 23:00:21 +0100 Subject: [PATCH 1/8] support some of pipe operators Part of https://github.com/apache/datafusion-sqlparser-rs/issues/1758 Still missing - join - union|intersect|except - call - tablesample - pivot - unpivot --- src/ast/mod.rs | 19 +++---- src/ast/query.rs | 98 ++++++++++++++++++++++++++++++++++ src/ast/spans.rs | 9 ++-- src/dialect/bigquery.rs | 4 ++ src/dialect/mod.rs | 14 +++++ src/keywords.rs | 2 + src/parser/mod.rs | 102 ++++++++++++++++++++++++++++++++++++ src/tokenizer.rs | 6 +++ tests/sqlparser_common.rs | 76 +++++++++++++++++++++++++++ tests/sqlparser_mssql.rs | 4 ++ tests/sqlparser_mysql.rs | 15 ++++++ tests/sqlparser_postgres.rs | 5 ++ 12 files changed, 341 insertions(+), 13 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e5e4aef05..7817db7ed 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -68,15 +68,16 @@ pub use self::query::{ JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn, - OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PivotValueSource, ProjectionSelect, Query, - RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, - Select, SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, - SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, - TableFactor, TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, - TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, - TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, - TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, - ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PipeOperator, PivotValueSource, + ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, + ReplaceSelectItem, RowsPerMatch, Select, SelectFlavor, SelectInto, SelectItem, + SelectItemQualifiedWildcardKind, SetExpr, SetOperator, SetQuantifier, Setting, + SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, + TableIndexHintForClause, TableIndexHintType, TableIndexHints, TableIndexType, TableSample, + TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, + TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, + TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, + WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index bed991114..cc8b9d40c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -68,6 +68,9 @@ pub struct Query { /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) /// (ClickHouse-specific) pub format_clause: Option, + + /// Pipe operator + pub pipe_operators: Vec, } impl fmt::Display for Query { @@ -103,6 +106,9 @@ impl fmt::Display for Query { if let Some(ref format) = self.format_clause { write!(f, " {}", format)?; } + for pipe_operator in &self.pipe_operators { + write!(f, " |> {}", pipe_operator)?; + } Ok(()) } } @@ -2407,6 +2413,98 @@ impl fmt::Display for OffsetRows { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PipeOperator { + Limit { + expr: Expr, + offset: Option, + }, + Where { + expr: Expr, + }, + OrderBy { + exprs: Vec, + }, + Select { + exprs: Vec, + }, + Extend { + exprs: Vec, + }, + Set { + assignments: Vec, + }, + Drop { + columns: Vec, + }, + Alias { + alias: Ident, + }, + Aggregate { + full_table_exprs: Vec, + group_by_exprs: Vec, + }, +} + +impl fmt::Display for PipeOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PipeOperator::Select { exprs } => { + write!(f, "SELECT {}", display_comma_separated(exprs.as_slice())) + } + PipeOperator::Extend { exprs } => { + write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice())) + } + PipeOperator::Set { assignments } => { + write!(f, "SET {}", display_comma_separated(assignments.as_slice())) + } + PipeOperator::Drop { columns } => { + write!(f, "DROP {}", display_comma_separated(columns.as_slice())) + } + PipeOperator::Alias { alias } => { + write!(f, "AS {}", alias) + } + PipeOperator::Limit { expr, offset } => { + write!(f, "LIMIT {}", expr)?; + if let Some(offset) = offset { + write!(f, " OFFSET {}", offset)?; + } + Ok(()) + } + PipeOperator::Aggregate { + full_table_exprs, + group_by_exprs, + } => { + write!(f, "AGGREGATE")?; + if !full_table_exprs.is_empty() { + write!( + f, + " {}", + display_comma_separated(full_table_exprs.as_slice()) + )?; + } + if !group_by_exprs.is_empty() { + write!( + f, + " GROUP BY {}", + display_comma_separated(group_by_exprs.as_slice()) + )?; + } + Ok(()) + } + + PipeOperator::Where { expr } => { + write!(f, "WHERE {}", expr) + } + PipeOperator::OrderBy { exprs } => { + write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice())) + } + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 0a64fb8ea..4f99451a0 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -98,10 +98,11 @@ impl Spanned for Query { limit_by, offset, fetch, - locks: _, // todo - for_clause: _, // todo, mssql specific - settings: _, // todo, clickhouse specific - format_clause: _, // todo, clickhouse specific + locks: _, // todo + for_clause: _, // todo, mssql specific + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific + pipe_operators: _, // todo bigquery specific } = self; union_spans( diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 49fb24f19..68ca1390a 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -136,6 +136,10 @@ impl Dialect for BigQueryDialect { fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { !RESERVED_FOR_COLUMN_ALIAS.contains(kw) } + + fn supports_pipe_operator(&self) -> bool { + true + } } impl BigQueryDialect { diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index aeb097cfd..244a79095 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -508,6 +508,20 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports pipe operator. + /// + /// Example: + /// ```sql + /// SELECT * + /// FROM table + /// |> limit 1 + /// ``` + /// + /// See "SQL Has Problems. We Can Fix Them: Pipe Syntax In SQL" https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/ + fn supports_pipe_operator(&self) -> bool { + false + } + /// Does the dialect support MySQL-style `'user'@'host'` grantee syntax? fn supports_user_host_grantee(&self) -> bool { false diff --git a/src/keywords.rs b/src/keywords.rs index bda817df9..e2a1f5f41 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -83,6 +83,7 @@ define_keywords!( ADMIN, AFTER, AGAINST, + AGGREGATE, AGGREGATION, ALERT, ALGORITHM, @@ -335,6 +336,7 @@ define_keywords!( EXPLAIN, EXPLICIT, EXPORT, + EXTEND, EXTENDED, EXTENSION, EXTERNAL, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b34415388..150b5dca1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10231,6 +10231,7 @@ impl<'a> Parser<'a> { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], } .into()) } else if self.parse_keyword(Keyword::UPDATE) { @@ -10246,6 +10247,7 @@ impl<'a> Parser<'a> { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], } .into()) } else { @@ -10319,6 +10321,104 @@ impl<'a> Parser<'a> { None }; + let mut pipe_operators = Vec::new(); + + // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In SQL" + // https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf + while self.consume_token(&Token::VerticalBarRightAngleBracket) { + let kw = self.expect_one_of_keywords(&[ + Keyword::SELECT, + Keyword::EXTEND, + Keyword::SET, + Keyword::DROP, + Keyword::AS, + Keyword::WHERE, + Keyword::LIMIT, + Keyword::AGGREGATE, + Keyword::ORDER, + ])?; + match kw { + // SELECT [[AS] alias], ... + Keyword::SELECT => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Select { exprs }) + } + // EXTEND [[AS] alias], ... + Keyword::EXTEND => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Extend { exprs }) + } + // SET = , ... + Keyword::SET => { + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + pipe_operators.push(PipeOperator::Set { assignments }) + } + // DROP , ... + Keyword::DROP => { + let columns = self.parse_identifiers()?; + pipe_operators.push(PipeOperator::Drop { columns }) + } + // AS + Keyword::AS => { + let alias = self.parse_identifier()?; + pipe_operators.push(PipeOperator::Alias { alias }) + } + // WHERE + Keyword::WHERE => { + let expr = self.parse_expr()?; + pipe_operators.push(PipeOperator::Where { expr }) + } + // LIMIT [OFFSET ] + Keyword::LIMIT => { + let expr = self.parse_expr()?; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + pipe_operators.push(PipeOperator::Limit { expr, offset }) + } + // AGGREGATE [[AS] alias], ... + // + // and + // + // AGGREGATE [ [[AS] alias], ...] + // GROUP BY [AS alias], ... + Keyword::AGGREGATE => { + let full_table_exprs = self.parse_comma_separated0( + |parser| { + let expr = parser.parse_expr()?; + let alias = parser.maybe_parse_select_item_alias()?; + Ok(ExprWithAlias { expr, alias }) + }, + Token::make_keyword(keywords::GROUP), + )?; + + let group_by_exprs = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) + { + self.parse_comma_separated(|parser| { + let expr = parser.parse_expr()?; + let alias = parser.maybe_parse_select_item_alias()?; + Ok(ExprWithAlias { expr, alias }) + })? + } else { + vec![] + }; + pipe_operators.push(PipeOperator::Aggregate { + full_table_exprs, + group_by_exprs, + }) + } + // ORDER BY [ASC|DESC], ... + Keyword::ORDER => { + self.expect_one_of_keywords(&[Keyword::BY])?; + let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + pipe_operators.push(PipeOperator::OrderBy { exprs }) + } + _ => {} + } + } + Ok(Query { with, body, @@ -10331,6 +10431,7 @@ impl<'a> Parser<'a> { for_clause, settings, format_clause, + pipe_operators, } .into()) } @@ -11688,6 +11789,7 @@ impl<'a> Parser<'a> { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias, }) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d33a7d8af..bfbc1804b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -246,6 +246,8 @@ pub enum Token { ShiftLeftVerticalBar, /// `|>> PostgreSQL/Redshift geometrical binary operator (Is strictly above?) VerticalBarShiftRight, + /// `|> BigQuery pipe operator + VerticalBarRightAngleBracket, /// `#>>`, extracts JSON sub-object at the specified path as text HashLongArrow, /// jsonb @> jsonb -> boolean: Test whether left json contains the right json @@ -359,6 +361,7 @@ impl fmt::Display for Token { Token::AmpersandRightAngleBracket => f.write_str("&>"), Token::AmpersandLeftAngleBracketVerticalBar => f.write_str("&<|"), Token::VerticalBarAmpersandRightAngleBracket => f.write_str("|&>"), + Token::VerticalBarRightAngleBracket => f.write_str("|>"), Token::TwoWayArrow => f.write_str("<->"), Token::LeftAngleBracketCaret => f.write_str("<^"), Token::RightAngleBracketCaret => f.write_str(">^"), @@ -1378,6 +1381,9 @@ impl<'a> Tokenizer<'a> { _ => self.start_binop_opt(chars, "|>", None), } } + Some('>') if self.dialect.supports_pipe_operator() => { + self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket) + } // Bitshift '|' operator _ => self.start_binop(chars, "|", Token::Pipe), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a8ccd70a7..60ec8fa37 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -491,6 +491,7 @@ fn parse_update_set_from() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -4255,6 +4256,7 @@ fn parse_create_table_as_table() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }); match verified_stmt(sql1) { @@ -4282,6 +4284,7 @@ fn parse_create_table_as_table() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }); match verified_stmt(sql2) { @@ -6281,6 +6284,7 @@ fn parse_interval_and_or_xor() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }))]; assert_eq!(actual_ast, expected_ast); @@ -9369,6 +9373,7 @@ fn parse_merge() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias: Some(TableAlias { name: Ident { @@ -11239,6 +11244,7 @@ fn parse_unload() { order_by: None, settings: None, format_clause: None, + pipe_operators: vec![], }), to: Ident { value: "s3://...".to_string(), @@ -12411,6 +12417,7 @@ fn test_extract_seconds_ok() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }))]; assert_eq!(actual_ast, expected_ast); @@ -14276,6 +14283,7 @@ fn test_select_from_first() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }; assert_eq!(expected, ast); assert_eq!(ast.to_string(), q); @@ -14654,3 +14662,71 @@ fn parse_set_names() { dialects.verified_stmt("SET NAMES 'utf8'"); dialects.verified_stmt("SET NAMES UTF8 COLLATE bogus"); } + +#[test] +fn parse_pipeline_operator() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + + // select pipe operator + dialects.verified_stmt("SELECT * FROM users |> SELECT id"); + dialects.verified_stmt("SELECT * FROM users |> SELECT id, name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> SELECT id user_id", + "SELECT * FROM users |> SELECT id AS user_id", + ); + dialects.verified_stmt("SELECT * FROM users |> SELECT id AS user_id"); + + // extend pipe operator + dialects.verified_stmt("SELECT * FROM users |> EXTEND id + 1 AS new_id"); + dialects.verified_stmt("SELECT * FROM users |> EXTEND id AS new_id, name AS new_name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> EXTEND id user_id", + "SELECT * FROM users |> EXTEND id AS user_id", + ); + + // set pipe operator + dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1"); + dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1, name = name + ' Doe'"); + + // drop pipe operator + dialects.verified_stmt("SELECT * FROM users |> DROP id"); + dialects.verified_stmt("SELECT * FROM users |> DROP id, name"); + + // as pipe operator + dialects.verified_stmt("SELECT * FROM users |> AS new_users"); + + // limit pipe operator + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 OFFSET 5"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> LIMIT 5"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> WHERE true"); + + // where pipe operator + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1"); + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 AND name = 'John'"); + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 OR name = 'John'"); + + // aggregate pipe operator full table + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*)"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> AGGREGATE COUNT(*) total_users", + "SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users", + ); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*), MIN(id)"); + + // aggregate pipe opeprator with grouping + dialects.verified_stmt( + "SELECT * FROM users |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + ); + dialects.verified_stmt( + "SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + ); + dialects + .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); + + // order by pipe operator + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC"); + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); +} diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 3f313af4f..b6530bca5 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -116,6 +116,7 @@ fn parse_create_procedure() { order_by: None, settings: None, format_clause: None, + pipe_operators: vec![], body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), distinct: None, @@ -1169,6 +1170,7 @@ fn parse_substring_in_select() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), query ); @@ -1273,6 +1275,8 @@ fn parse_mssql_declare() { order_by: None, settings: None, format_clause: None, + pipe_operators: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), distinct: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 8d89ce4eb..68a4b7b15 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1115,6 +1115,7 @@ fn parse_escaped_quote_identifiers_with_escape() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1169,6 +1170,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1217,6 +1219,7 @@ fn parse_escaped_backticks_with_escape() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1269,6 +1272,7 @@ fn parse_escaped_backticks_with_no_escape() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1446,6 +1450,7 @@ fn parse_simple_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1496,6 +1501,7 @@ fn parse_ignore_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1546,6 +1552,7 @@ fn parse_priority_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1593,6 +1600,7 @@ fn parse_priority_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1642,6 +1650,7 @@ fn parse_insert_as() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1706,6 +1715,7 @@ fn parse_insert_as() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1757,6 +1767,7 @@ fn parse_replace_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1799,6 +1810,7 @@ fn parse_empty_row_insert() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1865,6 +1877,7 @@ fn parse_insert_with_on_duplicate_update() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -2594,6 +2607,7 @@ fn parse_substring_in_select() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), query ); @@ -2901,6 +2915,7 @@ fn parse_hex_string_introducer() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 0dfcc24ea..236f945ed 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1327,6 +1327,7 @@ fn parse_copy_to() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), to: true, target: CopyTarget::File { @@ -2967,6 +2968,7 @@ fn parse_array_subquery_expr() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), filter: None, null_treatment: None, @@ -4759,6 +4761,7 @@ fn test_simple_postgres_insert_with_alias() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), assignments: vec![], partitioned: None, @@ -4844,6 +4847,7 @@ fn test_simple_postgres_insert_with_alias() { insert_alias: None, settings: None, format_clause: None, + pipe_operators: vec![], }) ) } @@ -4903,6 +4907,7 @@ fn test_simple_insert_with_quoted_alias() { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), assignments: vec![], partitioned: None, From 496131c0e9cdfd886f635d1994ade0f6b3d85b7c Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 12 Mar 2025 21:57:26 +0100 Subject: [PATCH 2/8] remove comment --- src/parser/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 150b5dca1..011a25b2d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10323,8 +10323,6 @@ impl<'a> Parser<'a> { let mut pipe_operators = Vec::new(); - // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In SQL" - // https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf while self.consume_token(&Token::VerticalBarRightAngleBracket) { let kw = self.expect_one_of_keywords(&[ Keyword::SELECT, From 1e3bea05325395f0115ca1dc4bb0783e0a921c61 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 12 Mar 2025 22:13:02 +0100 Subject: [PATCH 3/8] better variant docs, rename Alias -> As --- src/ast/query.rs | 93 ++++++++++++++++++++++++++++++++++------------- src/parser/mod.rs | 16 +------- 2 files changed, 68 insertions(+), 41 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index cc8b9d40c..a633f2c57 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2413,35 +2413,76 @@ impl fmt::Display for OffsetRows { } } +/// Pipe syntax, first introduced in Google BigQuery. +/// Example: +/// +/// ```sql +/// FROM Produce +/// |> WHERE sales > 0 +/// |> AGGREGATE SUM(sales) AS total_sales, COUNT(*) AS num_sales +/// GROUP BY item; +/// ``` +/// +/// See https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#pipe_syntax #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum PipeOperator { - Limit { - expr: Expr, - offset: Option, - }, - Where { - expr: Expr, - }, - OrderBy { - exprs: Vec, - }, - Select { - exprs: Vec, - }, - Extend { - exprs: Vec, - }, - Set { - assignments: Vec, - }, - Drop { - columns: Vec, - }, - Alias { - alias: Ident, - }, + /// Limits the number of rows to return in a query, with an optional OFFSET clause to skip over rows. + /// + /// Syntax: `|> LIMIT [OFFSET ]` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#limit_pipe_operator + Limit { expr: Expr, offset: Option }, + /// Filters the results of the input table. + /// + /// Syntax: `|> WHERE ` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#where_pipe_operator + Where { expr: Expr }, + /// ORDER BY [ASC|DESC], ... + OrderBy { exprs: Vec }, + /// Produces a new table with the listed columns, similar to the outermost SELECT clause in a table subquery in standard syntax. + /// + /// Syntax `|> SELECT [[AS] alias], ...` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#select_pipe_operator + Select { exprs: Vec }, + /// Propagates the existing table and adds computed columns, similar to SELECT *, new_column in standard syntax. + /// + /// Syntax: `|> EXTEND [[AS] alias], ...` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#extend_pipe_operator + Extend { exprs: Vec }, + /// Replaces the value of a column in the current table, similar to SELECT * REPLACE (expression AS column) in standard syntax. + /// + /// Syntax: `|> SET = , ...` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#set_pipe_operator + Set { assignments: Vec }, + /// Removes listed columns from the current table, similar to SELECT * EXCEPT (column) in standard syntax. + /// + /// Syntax: `|> DROP , ...` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#drop_pipe_operator + Drop { columns: Vec }, + /// Introduces a table alias for the input table, similar to applying the AS alias clause on a table subquery in standard syntax. + /// + /// Syntax: `|> AS ` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#as_pipe_operator + As { alias: Ident }, + /// Performs aggregation on data across grouped rows or an entire table. + /// + /// Syntax: `|> AGGREGATE [[AS] alias], ...` + /// + /// Syntax: + /// ``` + /// |> AGGREGATE [ [[AS] alias], ...] + /// GROUP BY [AS alias], ... + /// ``` + /// + /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#aggregate_pipe_operator Aggregate { full_table_exprs: Vec, group_by_exprs: Vec, @@ -2463,7 +2504,7 @@ impl fmt::Display for PipeOperator { PipeOperator::Drop { columns } => { write!(f, "DROP {}", display_comma_separated(columns.as_slice())) } - PipeOperator::Alias { alias } => { + PipeOperator::As { alias } => { write!(f, "AS {}", alias) } PipeOperator::Limit { expr, offset } => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 011a25b2d..4b0990e1f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10336,37 +10336,30 @@ impl<'a> Parser<'a> { Keyword::ORDER, ])?; match kw { - // SELECT [[AS] alias], ... Keyword::SELECT => { let exprs = self.parse_comma_separated(Parser::parse_select_item)?; pipe_operators.push(PipeOperator::Select { exprs }) } - // EXTEND [[AS] alias], ... Keyword::EXTEND => { let exprs = self.parse_comma_separated(Parser::parse_select_item)?; pipe_operators.push(PipeOperator::Extend { exprs }) } - // SET = , ... Keyword::SET => { let assignments = self.parse_comma_separated(Parser::parse_assignment)?; pipe_operators.push(PipeOperator::Set { assignments }) } - // DROP , ... Keyword::DROP => { let columns = self.parse_identifiers()?; pipe_operators.push(PipeOperator::Drop { columns }) } - // AS Keyword::AS => { let alias = self.parse_identifier()?; - pipe_operators.push(PipeOperator::Alias { alias }) + pipe_operators.push(PipeOperator::As { alias }) } - // WHERE Keyword::WHERE => { let expr = self.parse_expr()?; pipe_operators.push(PipeOperator::Where { expr }) } - // LIMIT [OFFSET ] Keyword::LIMIT => { let expr = self.parse_expr()?; let offset = if self.parse_keyword(Keyword::OFFSET) { @@ -10376,12 +10369,6 @@ impl<'a> Parser<'a> { }; pipe_operators.push(PipeOperator::Limit { expr, offset }) } - // AGGREGATE [[AS] alias], ... - // - // and - // - // AGGREGATE [ [[AS] alias], ...] - // GROUP BY [AS alias], ... Keyword::AGGREGATE => { let full_table_exprs = self.parse_comma_separated0( |parser| { @@ -10407,7 +10394,6 @@ impl<'a> Parser<'a> { group_by_exprs, }) } - // ORDER BY [ASC|DESC], ... Keyword::ORDER => { self.expect_one_of_keywords(&[Keyword::BY])?; let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; From 841a59664885dafa5339624aa2d5754fc908ba45 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 12 Mar 2025 22:19:05 +0100 Subject: [PATCH 4/8] move parsing to dedicated function --- src/parser/mod.rs | 166 ++++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4b0990e1f..51f116252 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10321,87 +10321,7 @@ impl<'a> Parser<'a> { None }; - let mut pipe_operators = Vec::new(); - - while self.consume_token(&Token::VerticalBarRightAngleBracket) { - let kw = self.expect_one_of_keywords(&[ - Keyword::SELECT, - Keyword::EXTEND, - Keyword::SET, - Keyword::DROP, - Keyword::AS, - Keyword::WHERE, - Keyword::LIMIT, - Keyword::AGGREGATE, - Keyword::ORDER, - ])?; - match kw { - Keyword::SELECT => { - let exprs = self.parse_comma_separated(Parser::parse_select_item)?; - pipe_operators.push(PipeOperator::Select { exprs }) - } - Keyword::EXTEND => { - let exprs = self.parse_comma_separated(Parser::parse_select_item)?; - pipe_operators.push(PipeOperator::Extend { exprs }) - } - Keyword::SET => { - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - pipe_operators.push(PipeOperator::Set { assignments }) - } - Keyword::DROP => { - let columns = self.parse_identifiers()?; - pipe_operators.push(PipeOperator::Drop { columns }) - } - Keyword::AS => { - let alias = self.parse_identifier()?; - pipe_operators.push(PipeOperator::As { alias }) - } - Keyword::WHERE => { - let expr = self.parse_expr()?; - pipe_operators.push(PipeOperator::Where { expr }) - } - Keyword::LIMIT => { - let expr = self.parse_expr()?; - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_expr()?) - } else { - None - }; - pipe_operators.push(PipeOperator::Limit { expr, offset }) - } - Keyword::AGGREGATE => { - let full_table_exprs = self.parse_comma_separated0( - |parser| { - let expr = parser.parse_expr()?; - let alias = parser.maybe_parse_select_item_alias()?; - Ok(ExprWithAlias { expr, alias }) - }, - Token::make_keyword(keywords::GROUP), - )?; - - let group_by_exprs = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) - { - self.parse_comma_separated(|parser| { - let expr = parser.parse_expr()?; - let alias = parser.maybe_parse_select_item_alias()?; - Ok(ExprWithAlias { expr, alias }) - })? - } else { - vec![] - }; - pipe_operators.push(PipeOperator::Aggregate { - full_table_exprs, - group_by_exprs, - }) - } - Keyword::ORDER => { - self.expect_one_of_keywords(&[Keyword::BY])?; - let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; - pipe_operators.push(PipeOperator::OrderBy { exprs }) - } - _ => {} - } - } + let pipe_operators = self.parse_pipe_operators()?; Ok(Query { with, @@ -10421,6 +10341,90 @@ impl<'a> Parser<'a> { } } + fn parse_pipe_operators(&mut self) -> Result, ParserError> { + let mut pipe_operators = Vec::new(); + + while self.consume_token(&Token::VerticalBarRightAngleBracket) { + let kw = self.expect_one_of_keywords(&[ + Keyword::SELECT, + Keyword::EXTEND, + Keyword::SET, + Keyword::DROP, + Keyword::AS, + Keyword::WHERE, + Keyword::LIMIT, + Keyword::AGGREGATE, + Keyword::ORDER, + ])?; + match kw { + Keyword::SELECT => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Select { exprs }) + } + Keyword::EXTEND => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Extend { exprs }) + } + Keyword::SET => { + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + pipe_operators.push(PipeOperator::Set { assignments }) + } + Keyword::DROP => { + let columns = self.parse_identifiers()?; + pipe_operators.push(PipeOperator::Drop { columns }) + } + Keyword::AS => { + let alias = self.parse_identifier()?; + pipe_operators.push(PipeOperator::As { alias }) + } + Keyword::WHERE => { + let expr = self.parse_expr()?; + pipe_operators.push(PipeOperator::Where { expr }) + } + Keyword::LIMIT => { + let expr = self.parse_expr()?; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + pipe_operators.push(PipeOperator::Limit { expr, offset }) + } + Keyword::AGGREGATE => { + let full_table_exprs = self.parse_comma_separated0( + |parser| { + let expr = parser.parse_expr()?; + let alias = parser.maybe_parse_select_item_alias()?; + Ok(ExprWithAlias { expr, alias }) + }, + Token::make_keyword(keywords::GROUP), + )?; + + let group_by_exprs = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + self.parse_comma_separated(|parser| { + let expr = parser.parse_expr()?; + let alias = parser.maybe_parse_select_item_alias()?; + Ok(ExprWithAlias { expr, alias }) + })? + } else { + vec![] + }; + pipe_operators.push(PipeOperator::Aggregate { + full_table_exprs, + group_by_exprs, + }) + } + Keyword::ORDER => { + self.expect_one_of_keywords(&[Keyword::BY])?; + let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + pipe_operators.push(PipeOperator::OrderBy { exprs }) + } + _ => {} + } + } + Ok(pipe_operators) + } + fn parse_settings(&mut self) -> Result>, ParserError> { let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) && self.parse_keyword(Keyword::SETTINGS) From 215894891e121ea6de5d076cf4431b2effff8ea6 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 12 Mar 2025 22:19:52 +0100 Subject: [PATCH 5/8] use other source --- src/dialect/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 244a79095..81c203230 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -517,7 +517,7 @@ pub trait Dialect: Debug + Any { /// |> limit 1 /// ``` /// - /// See "SQL Has Problems. We Can Fix Them: Pipe Syntax In SQL" https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/ + /// See https://cloud.google.com/bigquery/docs/pipe-syntax-guide#basic_syntax fn supports_pipe_operator(&self) -> bool { false } From 202582c8bc4e7540105686e0d060ff50ef484710 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 12 Mar 2025 22:24:26 +0100 Subject: [PATCH 6/8] add unreachable --- src/parser/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 51f116252..f080e861e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10419,7 +10419,9 @@ impl<'a> Parser<'a> { let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; pipe_operators.push(PipeOperator::OrderBy { exprs }) } - _ => {} + unhandled => { + unreachable!("`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"); + } } } Ok(pipe_operators) From e4ef3cfb9657c02083a34da1bfd362fd931c99ec Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 16 Apr 2025 19:59:01 +0200 Subject: [PATCH 7/8] make tests pass --- src/ast/mod.rs | 32 ++++++++++++++--------------- src/ast/query.rs | 34 +++++++++++++++++++++++-------- src/parser/mod.rs | 43 +++++++++++++++++++++++++++------------ tests/sqlparser_common.rs | 8 ++++++++ 4 files changed, 79 insertions(+), 38 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7817db7ed..91efc616c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -62,22 +62,22 @@ pub use self::dml::{CreateIndex, CreateTable, Delete, IndexColumn, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, - ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, - InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, - LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, - NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn, - OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PipeOperator, PivotValueSource, - ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, - ReplaceSelectItem, RowsPerMatch, Select, SelectFlavor, SelectInto, SelectItem, - SelectItemQualifiedWildcardKind, SetExpr, SetOperator, SetQuantifier, Setting, - SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, - TableIndexHintForClause, TableIndexHintType, TableIndexHints, TableIndexType, TableSample, - TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, - TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, - TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, - WildcardAdditionalOptions, With, WithFill, + ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause, + ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, + IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, + JsonTableNestedColumn, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, + PipeOperator, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, + SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, + TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, + TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, + TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, + TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index a633f2c57..716ea30cc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1019,6 +1019,26 @@ impl fmt::Display for ExprWithAlias { } } +/// An expression optionally followed by an alias and order by options. +/// +/// Example: +/// ```sql +/// 42 AS myint ASC +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ExprWithAliasAndOrderBy { + pub expr: ExprWithAlias, + pub order_by: OrderByOptions, +} + +impl fmt::Display for ExprWithAliasAndOrderBy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}{}", self.expr, self.order_by) + } +} + /// Arguments to a table-valued function #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -2484,8 +2504,8 @@ pub enum PipeOperator { /// /// See more at https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#aggregate_pipe_operator Aggregate { - full_table_exprs: Vec, - group_by_exprs: Vec, + full_table_exprs: Vec, + group_by_expr: Vec, }, } @@ -2516,7 +2536,7 @@ impl fmt::Display for PipeOperator { } PipeOperator::Aggregate { full_table_exprs, - group_by_exprs, + group_by_expr, } => { write!(f, "AGGREGATE")?; if !full_table_exprs.is_empty() { @@ -2526,12 +2546,8 @@ impl fmt::Display for PipeOperator { display_comma_separated(full_table_exprs.as_slice()) )?; } - if !group_by_exprs.is_empty() { - write!( - f, - " GROUP BY {}", - display_comma_separated(group_by_exprs.as_slice()) - )?; + if !group_by_expr.is_empty() { + write!(f, " GROUP BY {}", display_comma_separated(group_by_expr))?; } Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f080e861e..eb745496c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1015,6 +1015,25 @@ impl<'a> Parser<'a> { self.parse_subexpr(self.dialect.prec_unknown()) } + pub fn parse_expr_with_alias_and_order_by( + &mut self, + ) -> Result { + let expr = self.parse_expr()?; + + fn validator(explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { + explicit || !&[Keyword::ASC, Keyword::DESC, Keyword::GROUP].contains(kw) + } + let alias = self.parse_optional_alias_inner(None, validator)?; + let order_by = OrderByOptions { + asc: self.parse_asc_desc(), + nulls_first: None, + }; + Ok(ExprWithAliasAndOrderBy { + expr: ExprWithAlias { expr, alias }, + order_by, + }) + } + /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; @@ -10391,27 +10410,25 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::Limit { expr, offset }) } Keyword::AGGREGATE => { - let full_table_exprs = self.parse_comma_separated0( - |parser| { - let expr = parser.parse_expr()?; - let alias = parser.maybe_parse_select_item_alias()?; - Ok(ExprWithAlias { expr, alias }) - }, - Token::make_keyword(keywords::GROUP), - )?; + let full_table_exprs = if self.peek_keyword(Keyword::GROUP) { + vec![] + } else { + self.parse_comma_separated(|parser| { + parser.parse_expr_with_alias_and_order_by() + })? + }; - let group_by_exprs = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + let group_by_expr = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { self.parse_comma_separated(|parser| { - let expr = parser.parse_expr()?; - let alias = parser.maybe_parse_select_item_alias()?; - Ok(ExprWithAlias { expr, alias }) + parser.parse_expr_with_alias_and_order_by() })? } else { vec![] }; + pipe_operators.push(PipeOperator::Aggregate { full_table_exprs, - group_by_exprs, + group_by_expr, }) } Keyword::ORDER => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 60ec8fa37..7d387d5d2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -14724,9 +14724,17 @@ fn parse_pipeline_operator() { ); dialects .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) ASC"); // order by pipe operator dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC"); dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); + + // many pipes + dialects.verified_stmt( + "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", + ); } From f55f102c327f85b93d39afb68cbd59d43aa5fbe4 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 16 Apr 2025 20:07:07 +0200 Subject: [PATCH 8/8] fixes after merge --- src/ast/mod.rs | 47 ++++++++++++++++------------------------------- src/ast/query.rs | 2 +- src/parser/mod.rs | 1 + 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 570275038..473ab4fc0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -66,37 +66,22 @@ pub use self::dml::{CreateIndex, CreateTable, Delete, IndexColumn, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, - ExceptSelectItem, ExceptSelectItem, ExcludeSelectItem, ExcludeSelectItem, ExprWithAlias, - ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, Fetch, ForClause, ForClause, ForJson, ForJson, - ForXml, ForXml, FormatClause, FormatClause, GroupByExpr, GroupByExpr, GroupByWithModifier, - GroupByWithModifier, IdentWithAlias, IdentWithAlias, IlikeSelectItem, IlikeSelectItem, - InputFormatClause, InputFormatClause, Interpolate, Interpolate, InterpolateExpr, - InterpolateExpr, Join, Join, JoinConstraint, JoinConstraint, JoinOperator, JoinOperator, - JsonTableColumn, JsonTableColumn, JsonTableColumnErrorHandling, JsonTableColumnErrorHandling, - JsonTableNamedColumn, JsonTableNamedColumn, JsonTableNestedColumn, JsonTableNestedColumn, - LateralView, LateralView, LimitClause, LockClause, LockClause, LockType, LockType, - MatchRecognizePattern, MatchRecognizePattern, MatchRecognizeSymbol, MatchRecognizeSymbol, - Measure, Measure, NamedWindowDefinition, NamedWindowDefinition, NamedWindowExpr, - NamedWindowExpr, NonBlock, NonBlock, Offset, Offset, OffsetRows, OffsetRows, - OpenJsonTableColumn, OpenJsonTableColumn, OrderBy, OrderBy, OrderByExpr, OrderByExpr, - OrderByKind, OrderByKind, OrderByOptions, OrderByOptions, PipeOperator, PivotValueSource, - PivotValueSource, ProjectionSelect, ProjectionSelect, Query, Query, RenameSelectItem, - RenameSelectItem, RepetitionQuantifier, RepetitionQuantifier, ReplaceSelectElement, - ReplaceSelectElement, ReplaceSelectItem, ReplaceSelectItem, RowsPerMatch, RowsPerMatch, Select, - Select, SelectFlavor, SelectFlavor, SelectInto, SelectInto, SelectItem, SelectItem, - SelectItemQualifiedWildcardKind, SelectItemQualifiedWildcardKind, SetExpr, SetExpr, - SetOperator, SetOperator, SetQuantifier, SetQuantifier, Setting, Setting, SymbolDefinition, - SymbolDefinition, Table, Table, TableAlias, TableAlias, TableAliasColumnDef, - TableAliasColumnDef, TableFactor, TableFactor, TableFunctionArgs, TableFunctionArgs, - TableIndexHintForClause, TableIndexHintForClause, TableIndexHintType, TableIndexHintType, - TableIndexHints, TableIndexHints, TableIndexType, TableIndexType, TableSample, TableSample, - TableSampleBucket, TableSampleBucket, TableSampleKind, TableSampleKind, TableSampleMethod, - TableSampleMethod, TableSampleModifier, TableSampleModifier, TableSampleQuantity, - TableSampleQuantity, TableSampleSeed, TableSampleSeed, TableSampleSeedModifier, - TableSampleSeedModifier, TableSampleUnit, TableSampleUnit, TableVersion, TableVersion, - TableWithJoins, TableWithJoins, Top, Top, TopQuantity, TopQuantity, UpdateTableFromKind, - UpdateTableFromKind, ValueTableMode, ValueTableMode, Values, Values, WildcardAdditionalOptions, - WildcardAdditionalOptions, With, With, WithFill, WithFill, + ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause, + ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, + IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, + JsonTableNestedColumn, LateralView, LimitClause, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, + PipeOperator, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, + SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, + TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, + TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, + TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, + TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index b728e05f2..127261f51 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2547,7 +2547,7 @@ pub enum PipeOperator { /// Syntax: `|> AGGREGATE [[AS] alias], ...` /// /// Syntax: - /// ``` + /// ```norust /// |> AGGREGATE [ [[AS] alias], ...] /// GROUP BY [AS alias], ... /// ``` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b82261c99..28a81d9f1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10531,6 +10531,7 @@ impl<'a> Parser<'a> { for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], } .into()) } else if self.parse_keyword(Keyword::DELETE) {