From 870b3de433e8c4c3196b3e4984d1cb101288c5c2 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Tue, 9 May 2023 16:15:24 +0300 Subject: [PATCH 1/3] Initial Commit --- src/ast/mod.rs | 86 ++++++++++++++++++++++++++++++++++ src/parser.rs | 98 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 25 ++++++++++ 3 files changed, 209 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 94b237b4f..3dde1e472 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,10 @@ pub enum Expr { ListAgg(ListAgg), /// The `ARRAY_AGG` function `SELECT ARRAY_AGG(... ORDER BY ...)` ArrayAgg(ArrayAgg), + /// The `FIRST` function `SELECT FIRST(... ORDER BY ...)` + FIRST(FirstAgg), + /// The `LAST` function `SELECT LAST(... ORDER BY ...)` + LAST(LastAgg), /// The `GROUPING SETS` expr. GroupingSets(Vec>), /// The `CUBE` expr. @@ -738,6 +742,8 @@ impl fmt::Display for Expr { Expr::ArraySubquery(s) => write!(f, "ARRAY({s})"), Expr::ListAgg(listagg) => write!(f, "{listagg}"), Expr::ArrayAgg(arrayagg) => write!(f, "{arrayagg}"), + Expr::FIRST(first) => write!(f, "{first}"), + Expr::LAST(last) => write!(f, "{last}"), Expr::GroupingSets(sets) => { write!(f, "GROUPING SETS (")?; let mut sep = ""; @@ -3444,6 +3450,86 @@ impl fmt::Display for ArrayAgg { } } +/// An `FIRST` invocation `FIRST( [ DISTINCT ] [ORDER BY ] [LIMIT ] )` +/// Or `FIRST( [ DISTINCT ] ) [ WITHIN GROUP ( ORDER BY ) ]` +/// ORDER BY position is defined differently for BigQuery, Postgres and Snowflake. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct FirstAgg { + pub distinct: bool, + pub expr: Box, + pub order_by: Option>, + pub limit: Option>, + pub within_group: bool, // order by is used inside a within group or not +} + +impl fmt::Display for FirstAgg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "FIRST({}{}", + if self.distinct { "DISTINCT " } else { "" }, + self.expr + )?; + if !self.within_group { + if let Some(order_by) = &self.order_by { + write!(f, " ORDER BY {order_by}")?; + } + if let Some(limit) = &self.limit { + write!(f, " LIMIT {limit}")?; + } + } + write!(f, ")")?; + if self.within_group { + if let Some(order_by) = &self.order_by { + write!(f, " WITHIN GROUP (ORDER BY {order_by})")?; + } + } + Ok(()) + } +} + +/// An `LAST` invocation `LAST( [ DISTINCT ] [ORDER BY ] [LIMIT ] )` +/// Or `LAST( [ DISTINCT ] ) [ WITHIN GROUP ( ORDER BY ) ]` +/// ORDER BY position is defined differently for BigQuery, Postgres and Snowflake. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct LastAgg { + pub distinct: bool, + pub expr: Box, + pub order_by: Option>, + pub limit: Option>, + pub within_group: bool, // order by is used inside a within group or not +} + +impl fmt::Display for LastAgg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "LAST({}{}", + if self.distinct { "DISTINCT " } else { "" }, + self.expr + )?; + if !self.within_group { + if let Some(order_by) = &self.order_by { + write!(f, " ORDER BY {order_by}")?; + } + if let Some(limit) = &self.limit { + write!(f, " LIMIT {limit}")?; + } + } + write!(f, ")")?; + if self.within_group { + if let Some(order_by) = &self.order_by { + write!(f, " WITHIN GROUP (ORDER BY {order_by})")?; + } + } + Ok(()) + } +} + #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/parser.rs b/src/parser.rs index b06e6bd25..0f88fe4dc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -741,6 +741,8 @@ impl<'a> Parser<'a> { self.parse_array_subquery() } Keyword::ARRAY_AGG => self.parse_array_agg_expr(), + Keyword::FIRST => self.parse_first_agg_expr(), + Keyword::LAST => self.parse_last_agg_expr(), Keyword::NOT => self.parse_not(), Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() @@ -1410,6 +1412,102 @@ impl<'a> Parser<'a> { })) } + pub fn parse_first_agg_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_keyword(Keyword::DISTINCT); + let expr = Box::new(self.parse_expr()?); + // ANSI SQL and BigQuery define ORDER BY inside function. + if !self.dialect.supports_within_after_array_aggregation() { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let order_by_expr = self.parse_order_by_expr()?; + Some(Box::new(order_by_expr)) + } else { + None + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()?.map(Box::new) + } else { + None + }; + self.expect_token(&Token::RParen)?; + return Ok(Expr::FIRST(FirstAgg { + distinct, + expr, + order_by, + limit, + within_group: false, + })); + } + // Snowflake defines ORDERY BY in within group instead of inside the function like + // ANSI SQL. + self.expect_token(&Token::RParen)?; + let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; + let order_by_expr = self.parse_order_by_expr()?; + self.expect_token(&Token::RParen)?; + Some(Box::new(order_by_expr)) + } else { + None + }; + + Ok(Expr::FIRST(FirstAgg { + distinct, + expr, + order_by: within_group, + limit: None, + within_group: true, + })) + } + + pub fn parse_last_agg_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_keyword(Keyword::DISTINCT); + let expr = Box::new(self.parse_expr()?); + // ANSI SQL and BigQuery define ORDER BY inside function. + if !self.dialect.supports_within_after_array_aggregation() { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let order_by_expr = self.parse_order_by_expr()?; + Some(Box::new(order_by_expr)) + } else { + None + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()?.map(Box::new) + } else { + None + }; + self.expect_token(&Token::RParen)?; + return Ok(Expr::LAST(LastAgg { + distinct, + expr, + order_by, + limit, + within_group: false, + })); + } + // Snowflake defines ORDERY BY in within group instead of inside the function like + // ANSI SQL. + self.expect_token(&Token::RParen)?; + let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; + let order_by_expr = self.parse_order_by_expr()?; + self.expect_token(&Token::RParen)?; + Some(Box::new(order_by_expr)) + } else { + None + }; + + Ok(Expr::LAST(LastAgg { + distinct, + expr, + order_by: within_group, + limit: None, + within_group: true, + })) + } + // This function parses date/time fields for the EXTRACT function-like // operator, interval qualifiers, and the ceil/floor operations. // EXTRACT supports a wider set of date/time fields than interval qualifiers, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 55350e5a6..b44bdf781 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1952,6 +1952,31 @@ fn parse_array_agg_func() { } } +#[test] +fn parse_first_last_func() { + let supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + Box::new(HiveDialect {}), + ], + }; + + for sql in [ + "SELECT FIRST(x ORDER BY x) AS a FROM T", + "SELECT FIRST(x ORDER BY x LIMIT 2) FROM tbl", + "SELECT FIRST(DISTINCT x ORDER BY x LIMIT 2) FROM tbl", + "SELECT LAST(x ORDER BY x) AS a FROM T", + "SELECT LAST(x ORDER BY x LIMIT 2) FROM tbl", + "SELECT LAST(DISTINCT x ORDER BY x LIMIT 2) FROM tbl", + ] { + supported_dialects.verified_stmt(sql); + } +} + + #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ From 1c4c3b8e7380d640c3271a61c12de171f87c29bb Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Wed, 10 May 2023 13:08:18 +0300 Subject: [PATCH 2/3] Remove distinct and limit from first and last --- src/ast/mod.rs | 24 ++---------------------- src/parser.rs | 20 -------------------- tests/sqlparser_common.rs | 5 ----- 3 files changed, 2 insertions(+), 47 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3dde1e472..304fd585e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3457,28 +3457,18 @@ impl fmt::Display for ArrayAgg { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FirstAgg { - pub distinct: bool, pub expr: Box, pub order_by: Option>, - pub limit: Option>, pub within_group: bool, // order by is used inside a within group or not } impl fmt::Display for FirstAgg { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "FIRST({}{}", - if self.distinct { "DISTINCT " } else { "" }, - self.expr - )?; + write!(f, "FIRST({}", self.expr)?; if !self.within_group { if let Some(order_by) = &self.order_by { write!(f, " ORDER BY {order_by}")?; } - if let Some(limit) = &self.limit { - write!(f, " LIMIT {limit}")?; - } } write!(f, ")")?; if self.within_group { @@ -3497,28 +3487,18 @@ impl fmt::Display for FirstAgg { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct LastAgg { - pub distinct: bool, pub expr: Box, pub order_by: Option>, - pub limit: Option>, pub within_group: bool, // order by is used inside a within group or not } impl fmt::Display for LastAgg { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "LAST({}{}", - if self.distinct { "DISTINCT " } else { "" }, - self.expr - )?; + write!(f, "LAST({}", self.expr)?; if !self.within_group { if let Some(order_by) = &self.order_by { write!(f, " ORDER BY {order_by}")?; } - if let Some(limit) = &self.limit { - write!(f, " LIMIT {limit}")?; - } } write!(f, ")")?; if self.within_group { diff --git a/src/parser.rs b/src/parser.rs index 0f88fe4dc..e0b8f38f3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1414,7 +1414,6 @@ impl<'a> Parser<'a> { pub fn parse_first_agg_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let distinct = self.parse_keyword(Keyword::DISTINCT); let expr = Box::new(self.parse_expr()?); // ANSI SQL and BigQuery define ORDER BY inside function. if !self.dialect.supports_within_after_array_aggregation() { @@ -1424,17 +1423,10 @@ impl<'a> Parser<'a> { } else { None }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()?.map(Box::new) - } else { - None - }; self.expect_token(&Token::RParen)?; return Ok(Expr::FIRST(FirstAgg { - distinct, expr, order_by, - limit, within_group: false, })); } @@ -1452,17 +1444,14 @@ impl<'a> Parser<'a> { }; Ok(Expr::FIRST(FirstAgg { - distinct, expr, order_by: within_group, - limit: None, within_group: true, })) } pub fn parse_last_agg_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let distinct = self.parse_keyword(Keyword::DISTINCT); let expr = Box::new(self.parse_expr()?); // ANSI SQL and BigQuery define ORDER BY inside function. if !self.dialect.supports_within_after_array_aggregation() { @@ -1472,17 +1461,10 @@ impl<'a> Parser<'a> { } else { None }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()?.map(Box::new) - } else { - None - }; self.expect_token(&Token::RParen)?; return Ok(Expr::LAST(LastAgg { - distinct, expr, order_by, - limit, within_group: false, })); } @@ -1500,10 +1482,8 @@ impl<'a> Parser<'a> { }; Ok(Expr::LAST(LastAgg { - distinct, expr, order_by: within_group, - limit: None, within_group: true, })) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b44bdf781..cacc831a6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1966,17 +1966,12 @@ fn parse_first_last_func() { for sql in [ "SELECT FIRST(x ORDER BY x) AS a FROM T", - "SELECT FIRST(x ORDER BY x LIMIT 2) FROM tbl", - "SELECT FIRST(DISTINCT x ORDER BY x LIMIT 2) FROM tbl", "SELECT LAST(x ORDER BY x) AS a FROM T", - "SELECT LAST(x ORDER BY x LIMIT 2) FROM tbl", - "SELECT LAST(DISTINCT x ORDER BY x LIMIT 2) FROM tbl", ] { supported_dialects.verified_stmt(sql); } } - #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ From 4d8076a6b4efc208abdf854f7dc8c665fb323f1b Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Wed, 10 May 2023 14:52:59 +0300 Subject: [PATCH 3/3] fix buggy test --- tests/sqlparser_common.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e38d97c69..892feb90c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2080,6 +2080,7 @@ fn parse_first_last_func() { Box::new(AnsiDialect {}), Box::new(HiveDialect {}), ], + options: None, }; for sql in [