From 1f2ef9f84a2726da5ad20c8eb518d6faf34a8568 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 26 Dec 2024 14:43:29 +0100 Subject: [PATCH 1/3] SQLite: Allow dollar signs in placeholder names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relevant: https://github.com/apache/datafusion-sqlparser-rs/pull/1402 SQLite version 3.43.2 2023-10-10 13:08:14 Enter ".help" for usage hints. Connected to a transient in-memory database. Use ".open FILENAME" to reopen on a persistent database. sqlite> .mode box sqlite> .nullvalue NULL sqlite> SELECT $$, $$ABC$$, $ABC$, $ABC; ┌──────┬─────────┬───────┬──────┐ │ $$ │ $$ABC$$ │ $ABC$ │ $ABC │ ├──────┼─────────┼───────┼──────┤ │ NULL │ NULL │ NULL │ NULL │ └──────┴─────────┴───────┴──────┘ --- src/dialect/mod.rs | 4 ++++ src/dialect/sqlite.rs | 4 ++++ src/tokenizer.rs | 37 +++++++++++++++++++++++++++++++++---- tests/sqlparser_sqlite.rs | 10 ++++++++++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index aee7b5994..af4e6cf9f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -636,6 +636,10 @@ pub trait Dialect: Debug + Any { false } + fn supports_dollar_quoted_string(&self) -> bool { + true + } + /// Does the dialect support with clause in create index statement? /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)` fn supports_create_index_with_clause(&self) -> bool { diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 95717f9fd..09df6036e 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -81,4 +81,8 @@ impl Dialect for SQLiteDialect { fn supports_asc_desc_in_column_definition(&self) -> bool { true } + + fn supports_dollar_quoted_string(&self) -> bool { + false + } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3c2f70edf..412c594c9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1523,7 +1523,9 @@ impl<'a> Tokenizer<'a> { chars.next(); - if let Some('$') = chars.peek() { + // Check if the second character is a dollar sign + let next_is_dollar = matches!(chars.peek(), Some('$')); + if next_is_dollar && self.dialect.supports_dollar_quoted_string() { chars.next(); let mut is_terminated = false; @@ -1557,10 +1559,13 @@ impl<'a> Tokenizer<'a> { }; } else { value.push_str(&peeking_take_while(chars, |ch| { - ch.is_alphanumeric() || ch == '_' + ch.is_alphanumeric() + || ch == '_' + || matches!(ch, '$' if !self.dialect.supports_dollar_quoted_string()) })); - if let Some('$') = chars.peek() { + let next_is_dollar = matches!(chars.peek(), Some('$')); + if next_is_dollar && self.dialect.supports_dollar_quoted_string() { chars.next(); 'searching_for_end: loop { @@ -2151,7 +2156,7 @@ fn take_char_from_hex_digits( mod tests { use super::*; use crate::dialect::{ - BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, }; use core::fmt::Debug; @@ -2604,6 +2609,30 @@ mod tests { ); } + #[test] + fn tokenize_dollar_placeholder_sqlite() { + let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC"); + let dialect = SQLiteDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + assert_eq!( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$$ABC$$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$ABC$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$ABC".into()), + ] + ); + } + #[test] fn tokenize_dollar_quoted_string_untagged() { let sql = diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index ff0b54ef7..0adf7f755 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -561,6 +561,16 @@ fn test_dollar_identifier_as_placeholder() { } _ => unreachable!(), } + + // $$ is a valid placeholder in SQLite + match sqlite().verified_expr("id = $$") { + Expr::BinaryOp { op, left, right } => { + assert_eq!(op, BinaryOperator::Eq); + assert_eq!(left, Box::new(Expr::Identifier(Ident::new("id")))); + assert_eq!(right, Box::new(Expr::Value(Placeholder("$$".to_string())))); + } + _ => unreachable!(), + } } fn sqlite() -> TestedDialects { From 87461b17d2f3decc2e0ba260a0cad2850c072670 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 27 Dec 2024 10:04:35 +0100 Subject: [PATCH 2/3] Add comment for dialect option --- src/dialect/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index af4e6cf9f..6ba9cc071 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -636,6 +636,8 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect allows dollar quoted strings + /// e.g. `SELECT $$Hello, world!$$` or `SELECT $tag$Hello, world!$tag$` fn supports_dollar_quoted_string(&self) -> bool { true } From cfbac35863f305a3c71f0647c856c2d687192d60 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 27 Dec 2024 16:31:48 +0100 Subject: [PATCH 3/3] supports_dollar_quoted_string -> supports_dollar_placeholder --- src/dialect/mod.rs | 8 ++++---- src/dialect/sqlite.rs | 4 ++-- src/tokenizer.rs | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 6ba9cc071..1343efca6 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -636,10 +636,10 @@ pub trait Dialect: Debug + Any { false } - /// Returns true if this dialect allows dollar quoted strings - /// e.g. `SELECT $$Hello, world!$$` or `SELECT $tag$Hello, world!$tag$` - fn supports_dollar_quoted_string(&self) -> bool { - true + /// Returns true if this dialect allows dollar placeholders + /// e.g. `SELECT $var` (SQLite) + fn supports_dollar_placeholder(&self) -> bool { + false } /// Does the dialect support with clause in create index statement? diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 09df6036e..138c4692c 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -82,7 +82,7 @@ impl Dialect for SQLiteDialect { true } - fn supports_dollar_quoted_string(&self) -> bool { - false + fn supports_dollar_placeholder(&self) -> bool { + true } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 412c594c9..4938b642b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1523,9 +1523,8 @@ impl<'a> Tokenizer<'a> { chars.next(); - // Check if the second character is a dollar sign - let next_is_dollar = matches!(chars.peek(), Some('$')); - if next_is_dollar && self.dialect.supports_dollar_quoted_string() { + // If the dialect does not support dollar-quoted strings, then `$$` is rather a placeholder. + if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() { chars.next(); let mut is_terminated = false; @@ -1561,11 +1560,12 @@ impl<'a> Tokenizer<'a> { value.push_str(&peeking_take_while(chars, |ch| { ch.is_alphanumeric() || ch == '_' - || matches!(ch, '$' if !self.dialect.supports_dollar_quoted_string()) + // Allow $ as a placeholder character if the dialect supports it + || matches!(ch, '$' if self.dialect.supports_dollar_placeholder()) })); - let next_is_dollar = matches!(chars.peek(), Some('$')); - if next_is_dollar && self.dialect.supports_dollar_quoted_string() { + // If the dialect does not support dollar-quoted strings, don't look for the end delimiter. + if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() { chars.next(); 'searching_for_end: loop { @@ -2610,7 +2610,7 @@ mod tests { } #[test] - fn tokenize_dollar_placeholder_sqlite() { + fn tokenize_dollar_placeholder() { let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC"); let dialect = SQLiteDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();