Skip to content

Commit

Permalink
feat: support byte string literal in bq (#802)
Browse files Browse the repository at this point in the history
* rebase

* review

* lint
  • Loading branch information
togami2864 authored Feb 19, 2023
1 parent c35dcc9 commit 0c0d088
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ pub enum Value {
/// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS)
/// for more details.
EscapedStringLiteral(String),
/// B'string value'
SingleQuotedByteStringLiteral(String),
/// B"string value"
DoubleQuotedByteStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
Expand Down Expand Up @@ -68,6 +72,8 @@ impl fmt::Display for Value {
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""),
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{v}"),
Value::UnQuotedString(v) => write!(f, "{v}"),
Expand Down
8 changes: 8 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,8 @@ impl<'a> Parser<'a> {
| Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
| Token::DollarQuotedString(_)
| Token::SingleQuotedByteStringLiteral(_)
| Token::DoubleQuotedByteStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Expand Down Expand Up @@ -4125,6 +4127,12 @@ impl<'a> Parser<'a> {
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
Token::SingleQuotedByteStringLiteral(ref s) => {
Ok(Value::SingleQuotedByteStringLiteral(s.clone()))
}
Token::DoubleQuotedByteStringLiteral(ref s) => {
Ok(Value::DoubleQuotedByteStringLiteral(s.clone()))
}
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
Expand Down
27 changes: 26 additions & 1 deletion src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

use crate::ast::DollarQuotedString;
use crate::dialect::SnowflakeDialect;
use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect};
use crate::dialect::{Dialect, MySqlDialect};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};

Expand All @@ -58,6 +58,10 @@ pub enum Token {
DoubleQuotedString(String),
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
DollarQuotedString(DollarQuotedString),
/// Byte string literal: i.e: b'string' or B'string'
SingleQuotedByteStringLiteral(String),
/// Byte string literal: i.e: b"string" or B"string"
DoubleQuotedByteStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
Expand Down Expand Up @@ -189,6 +193,8 @@ impl fmt::Display for Token {
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
Token::Comma => f.write_str(","),
Token::Whitespace(ws) => write!(f, "{ws}"),
Token::DoubleEq => f.write_str("=="),
Expand Down Expand Up @@ -493,6 +499,25 @@ impl<'a> Tokenizer<'a> {
}
Ok(Some(Token::Whitespace(Whitespace::Newline)))
}
// BigQuery uses b or B for byte string literal
b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
chars.next(); // consume
match chars.peek() {
Some('\'') => {
let s = self.tokenize_quoted_string(chars, '\'')?;
Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
}
Some('\"') => {
let s = self.tokenize_quoted_string(chars, '\"')?;
Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
}
_ => {
// regular identifier starting with an "b" or "B"
let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
// Redshift uses lower case n for national string literal
n @ 'N' | n @ 'n' => {
chars.next(); // consume, to check the next char
Expand Down
18 changes: 18 additions & 0 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ fn parse_literal_string() {
);
}

#[test]
fn parse_byte_literal() {
let sql = r#"SELECT B'abc', B"abc""#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Value(Value::SingleQuotedByteStringLiteral("abc".to_string())),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(Value::DoubleQuotedByteStringLiteral("abc".to_string())),
expr_from_projection(&select.projection[1])
);

let sql = r#"SELECT b'abc', b"abc""#;
bigquery().one_statement_parses_to(sql, r#"SELECT B'abc', B"abc""#);
}

#[test]
fn parse_table_identifiers() {
fn test_table_ident(ident: &str, expected: Vec<Ident>) {
Expand Down

0 comments on commit 0c0d088

Please sign in to comment.