diff --git a/src/ast/value.rs b/src/ast/value.rs index 0adb2d5dc..154aafc76 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -41,6 +41,10 @@ pub enum Value { /// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS) /// for more details. EscapedStringLiteral(String), + /// B'string value' + SingleQuotedByteStringLiteral(String), + /// B"string value" + DoubleQuotedByteStringLiteral(String), /// N'string value' NationalStringLiteral(String), /// X'hex value' @@ -68,6 +72,8 @@ impl fmt::Display for Value { Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), + Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), + Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""), Value::Null => write!(f, "NULL"), Value::Placeholder(v) => write!(f, "{v}"), Value::UnQuotedString(v) => write!(f, "{v}"), diff --git a/src/parser.rs b/src/parser.rs index f962f9db1..a1ecdfe96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -790,6 +790,8 @@ impl<'a> Parser<'a> { | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) | Token::DollarQuotedString(_) + | Token::SingleQuotedByteStringLiteral(_) + | Token::DoubleQuotedByteStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); @@ -4125,6 +4127,12 @@ impl<'a> Parser<'a> { Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), + Token::SingleQuotedByteStringLiteral(ref s) => { + Ok(Value::SingleQuotedByteStringLiteral(s.clone())) + } + Token::DoubleQuotedByteStringLiteral(ref s) => { + Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) + } Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9780de046..b05667c2b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; -use crate::dialect::SnowflakeDialect; +use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect}; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -58,6 +58,10 @@ pub enum Token { DoubleQuotedString(String), /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$ DollarQuotedString(DollarQuotedString), + /// Byte string literal: i.e: b'string' or B'string' + SingleQuotedByteStringLiteral(String), + /// Byte string literal: i.e: b"string" or B"string" + DoubleQuotedByteStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' @@ -189,6 +193,8 @@ impl fmt::Display for Token { Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), + Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"), + Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{ws}"), Token::DoubleEq => f.write_str("=="), @@ -493,6 +499,25 @@ impl<'a> Tokenizer<'a> { } Ok(Some(Token::Whitespace(Whitespace::Newline))) } + // BigQuery uses b or B for byte string literal + b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | GenericDialect) => { + chars.next(); // consume + match chars.peek() { + Some('\'') => { + let s = self.tokenize_quoted_string(chars, '\'')?; + Ok(Some(Token::SingleQuotedByteStringLiteral(s))) + } + Some('\"') => { + let s = self.tokenize_quoted_string(chars, '\"')?; + Ok(Some(Token::DoubleQuotedByteStringLiteral(s))) + } + _ => { + // regular identifier starting with an "b" or "B" + let s = self.tokenize_word(b, chars); + Ok(Some(Token::make_word(&s, None))) + } + } + } // Redshift uses lower case n for national string literal n @ 'N' | n @ 'n' => { chars.next(); // consume, to check the next char diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 4ed80df99..f4020436d 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -32,6 +32,24 @@ fn parse_literal_string() { ); } +#[test] +fn parse_byte_literal() { + let sql = r#"SELECT B'abc', B"abc""#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Value(Value::SingleQuotedByteStringLiteral("abc".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::DoubleQuotedByteStringLiteral("abc".to_string())), + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT b'abc', b"abc""#; + bigquery().one_statement_parses_to(sql, r#"SELECT B'abc', B"abc""#); +} + #[test] fn parse_table_identifiers() { fn test_table_ident(ident: &str, expected: Vec) {