Skip to content

Commit

Permalink
feat(parser): parse dollar quoted string (#8218)
Browse files Browse the repository at this point in the history
support parsing dollar quoted string

Signed-off-by: Runji Wang <wangrunji0408@163.com>
  • Loading branch information
wangrunji0408 authored Feb 28, 2023
1 parent 108082e commit 9db3773
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use self::query::{
With,
};
pub use self::statement::*;
pub use self::value::{DateTimeField, TrimWhereField, Value};
pub use self::value::{DateTimeField, DollarQuotedString, TrimWhereField, Value};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};

Expand Down
23 changes: 23 additions & 0 deletions src/sqlparser/src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ pub enum Value {
Number(String),
/// 'string value'
SingleQuotedString(String),
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
DollarQuotedString(DollarQuotedString),
/// String Constants With C-Style Escapes
CstyleEscapesString(String),
/// N'string value'
Expand Down Expand Up @@ -63,6 +65,7 @@ impl fmt::Display for Value {
Value::Number(v) => write!(f, "{}", v),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::DollarQuotedString(v) => write!(f, "{}", v),
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
Value::CstyleEscapesString(v) => write!(f, "E'{}'", v),
Expand Down Expand Up @@ -112,6 +115,26 @@ impl fmt::Display for Value {
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DollarQuotedString {
pub value: String,
pub tag: Option<String>,
}

impl fmt::Display for DollarQuotedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.tag {
Some(tag) => {
write!(f, "${}${}${}$", tag, self.value, tag)
}
None => {
write!(f, "$${}$$", self.value)
}
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DateTimeField {
Expand Down
26 changes: 12 additions & 14 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ impl Parser {
}
Token::Number(_)
| Token::SingleQuotedString(_)
| Token::DollarQuotedString(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_)
| Token::CstyleEscapesString(_) => {
Expand Down Expand Up @@ -2483,6 +2484,7 @@ impl Parser {
},
Token::Number(ref n) => Ok(Value::Number(n.clone())),
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
Token::CstyleEscapesString(ref s) => Ok(Value::CstyleEscapesString(s.to_string())),
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
Expand Down Expand Up @@ -2526,20 +2528,16 @@ impl Parser {
}

pub fn parse_function_definition(&mut self) -> Result<FunctionDefinition, ParserError> {
Ok(FunctionDefinition::SingleQuotedDef(
self.parse_literal_string()?,
))
// TODO: support dollar quoted string
// let peek_token = self.peek_token();
// match peek_token {
// Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => {
// self.next_token();
// Ok(FunctionDefinition::DoubleDollarDef(value.value))
// }
// _ => Ok(FunctionDefinition::SingleQuotedDef(
// self.parse_literal_string()?,
// )),
// }
let peek_token = self.peek_token();
match peek_token {
Token::DollarQuotedString(value) => {
self.next_token();
Ok(FunctionDefinition::DoubleDollarDef(value.value))
}
_ => Ok(FunctionDefinition::SingleQuotedDef(
self.parse_literal_string()?,
)),
}
}

/// Parse a literal string
Expand Down
109 changes: 91 additions & 18 deletions src/sqlparser/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use core::str::Chars;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use crate::ast::DollarQuotedString;
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};

/// SQL Token enumeration
Expand All @@ -47,6 +48,8 @@ pub enum Token {
Char(char),
/// Single quoted string: i.e: 'string'
SingleQuotedString(String),
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
DollarQuotedString(DollarQuotedString),
/// Single quoted string with c-style escapes: i.e: E'string'
CstyleEscapesString(String),
/// "National" string literal: i.e: N'string'
Expand Down Expand Up @@ -160,6 +163,7 @@ impl fmt::Display for Token {
Token::Number(ref n) => write!(f, "{}", n),
Token::Char(ref c) => write!(f, "{}", c),
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
Token::DollarQuotedString(ref s) => write!(f, "{}", s),
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
Token::CstyleEscapesString(ref s) => write!(f, "E'{}'", s),
Expand Down Expand Up @@ -613,13 +617,7 @@ impl<'a> Tokenizer<'a> {
_ => Ok(Some(Token::Colon)),
}
}
'$' => {
if let Some(parameter) = self.tokenize_parameter(chars) {
Ok(Some(parameter))
} else {
Ok(Some(Token::Char('$')))
}
}
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
';' => self.consume_and_return(chars, Token::SemiColon),
'\\' => self.consume_and_return(chars, Token::Backslash),
'[' => self.consume_and_return(chars, Token::LBracket),
Expand Down Expand Up @@ -659,6 +657,92 @@ impl<'a> Tokenizer<'a> {
}
}

/// Tokenize dollar preceded value (i.e: a string/placeholder)
fn tokenize_dollar_preceded_value(
&self,
chars: &mut Peekable<Chars<'_>>,
) -> Result<Token, TokenizerError> {
let mut s = String::new();
let mut value = String::new();

chars.next();

if let Some('$') = chars.peek() {
chars.next();

let mut is_terminated = false;
let mut prev: Option<char> = None;

while let Some(&ch) = chars.peek() {
if prev == Some('$') {
if ch == '$' {
chars.next();
is_terminated = true;
break;
} else {
s.push('$');
s.push(ch);
}
} else if ch != '$' {
s.push(ch);
}

prev = Some(ch);
chars.next();
}

return if chars.peek().is_none() && !is_terminated {
self.tokenizer_error("Unterminated dollar-quoted string")
} else {
Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: None,
}))
};
} else {
value.push_str(&peeking_take_while(chars, |ch| {
ch.is_alphanumeric() || ch == '_'
}));

if let Some('$') = chars.peek() {
chars.next();
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));

match chars.peek() {
Some('$') => {
chars.next();
for (_, c) in value.chars().enumerate() {
let next_char = chars.next();
if Some(c) != next_char {
return self.tokenizer_error(format!(
"Unterminated dollar-quoted string at or near \"{}\"",
value
));
}
}

if let Some('$') = chars.peek() {
chars.next();
} else {
return self
.tokenizer_error("Unterminated dollar-quoted string, expected $");
}
}
_ => {
return self.tokenizer_error("Unterminated dollar-quoted, expected $");
}
}
} else {
return Ok(Token::Parameter(value));
}
}

Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: if value.is_empty() { None } else { Some(value) },
}))
}

fn tokenizer_error<R>(&self, message: impl Into<String>) -> Result<R, TokenizerError> {
Err(TokenizerError {
message: message.into(),
Expand Down Expand Up @@ -799,17 +883,6 @@ impl<'a> Tokenizer<'a> {
chars.next();
Ok(Some(t))
}

fn tokenize_parameter(&self, chars: &mut Peekable<Chars<'_>>) -> Option<Token> {
chars.next(); // consume '$'

let s = peeking_take_while(chars, |ch| ch.is_ascii_digit());
if s.is_empty() {
None
} else {
Some(Token::Parameter(s))
}
}
}

/// Read from `chars` until `predicate` returns `false` or EOF is hit.
Expand Down
78 changes: 78 additions & 0 deletions src/sqlparser/tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1099,3 +1099,81 @@ fn parse_param_symbol() {
assert_eq!(values.0[0][0], Expr::Parameter { index: 1 });
}
}

#[test]
fn parse_dollar_quoted_string() {
let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$";

let stmt = parse_sql_statements(sql).unwrap();

let projection = match stmt.get(0).unwrap() {
Statement::Query(query) => match &query.body {
SetExpr::Select(select) => &select.projection,
_ => unreachable!(),
},
_ => unreachable!(),
};

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "hello".into()
})),
expr_from_projection(&projection[0])
);

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: Some("tag_name".into()),
value: "world".into()
})),
expr_from_projection(&projection[1])
);

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "Foo$Bar".into()
})),
expr_from_projection(&projection[2])
);

assert_eq!(
projection[3],
SelectItem::ExprWithAlias {
expr: Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "Foo$Bar".into(),
})),
alias: Ident::new_unchecked("col_name"),
}
);

assert_eq!(
expr_from_projection(&projection[4]),
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "".into()
})),
);

assert_eq!(
expr_from_projection(&projection[5]),
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: Some("tag_name".into()),
value: "".into()
})),
);
}

#[test]
fn parse_incorrect_dollar_quoted_string() {
let sql = "SELECT $x$hello$$";
assert!(parse_sql_statements(sql).is_err());

let sql = "SELECT $hello$$";
assert!(parse_sql_statements(sql).is_err());

let sql = "SELECT $$$";
assert!(parse_sql_statements(sql).is_err());
}

0 comments on commit 9db3773

Please sign in to comment.