Skip to content

Commit

Permalink
Fix: improve support for identifier delimiter escaping (tobymao#4288)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgesittas authored Oct 25, 2024
1 parent bcdc1aa commit ee266ef
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 3 deletions.
1 change: 1 addition & 0 deletions sqlglot/dialects/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class ClickHouse(Dialect):
class Tokenizer(tokens.Tokenizer):
COMMENTS = ["--", "#", "#!", ("/*", "*/")]
IDENTIFIERS = ['"', "`"]
IDENTIFIER_ESCAPES = ["\\"]
STRING_ESCAPES = ["'", "\\"]
BIT_STRINGS = [("0b", "")]
HEX_STRINGS = [("0x", ""), ("0X", "")]
Expand Down
10 changes: 8 additions & 2 deletions sqlglot/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,11 +609,15 @@ class Tokenizer(metaclass=_Tokenizer):
HEREDOC_STRINGS: t.List[str | t.Tuple[str, str]] = []
UNICODE_STRINGS: t.List[str | t.Tuple[str, str]] = []
IDENTIFIERS: t.List[str | t.Tuple[str, str]] = ['"']
IDENTIFIER_ESCAPES = ['"']
QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
STRING_ESCAPES = ["'"]
VAR_SINGLE_TOKENS: t.Set[str] = set()

# The strings in this list can always be used as escapes, regardless of the surrounding
# identifier delimiters. By default, the closing delimiter is assumed to also act as an
# identifier escape, e.g. if we use double-quotes, then they also act as escapes: "x"""
IDENTIFIER_ESCAPES: t.List[str] = []

# Whether the heredoc tags follow the same lexical rules as unquoted identifiers
HEREDOC_TAG_IS_IDENTIFIER = False

Expand Down Expand Up @@ -1363,7 +1367,9 @@ def _scan_string(self, start: str) -> bool:

def _scan_identifier(self, identifier_end: str) -> None:
self._advance()
text = self._extract_string(identifier_end, escapes=self._IDENTIFIER_ESCAPES)
text = self._extract_string(
identifier_end, escapes=self._IDENTIFIER_ESCAPES | {identifier_end}
)
self._add(TokenType.IDENTIFIER, text)

def _scan_var(self) -> None:
Expand Down
5 changes: 4 additions & 1 deletion sqlglotrs/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,11 @@ impl<'a> TokenizerState<'a> {
let mut text = String::from("");

loop {
let mut new_identifier_escapes;
let escapes = if use_identifier_escapes {
&self.settings.identifier_escapes
new_identifier_escapes = self.settings.identifier_escapes.clone();
new_identifier_escapes.extend(delimiter.chars());
&new_identifier_escapes
} else {
&self.settings.string_escapes
};
Expand Down
91 changes: 91 additions & 0 deletions tests/dialects/test_dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -2893,3 +2893,94 @@ def test_uuid(self):
"snowflake": "UUID_STRING()",
},
)

def test_escaped_identifier_delimiter(self):
for dialect in ("databricks", "hive", "mysql", "spark2", "spark"):
with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x`"',
read={
dialect: "SELECT 1 AS `x```",
},
write={
dialect: "SELECT 1 AS `x```",
},
)

for dialect in (
"",
"clickhouse",
"duckdb",
"postgres",
"presto",
"trino",
"redshift",
"snowflake",
"sqlite",
):
with self.subTest(f"Testing escaped double-quote in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
dialect: 'SELECT 1 AS "x"""',
},
write={
dialect: 'SELECT 1 AS "x"""',
},
)

for dialect in ("clickhouse", "sqlite"):
with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x`"',
read={
dialect: "SELECT 1 AS `x```",
},
write={
dialect: 'SELECT 1 AS "x`"',
},
)

self.validate_all(
'SELECT 1 AS "x`"',
read={
"clickhouse": "SELECT 1 AS `x\\``",
},
write={
"clickhouse": 'SELECT 1 AS "x`"',
},
)
for name in ('"x\\""', '`x"`'):
with self.subTest(f"Testing ClickHouse delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
"clickhouse": f"SELECT 1 AS {name}",
},
write={
"clickhouse": 'SELECT 1 AS "x"""',
},
)

for name in ("[[x]]]", '"[x]"'):
with self.subTest(f"Testing T-SQL delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "[x]"',
read={
"tsql": f"SELECT 1 AS {name}",
},
write={
"tsql": "SELECT 1 AS [[x]]]",
},
)
for name in ('[x"]', '"x"""'):
with self.subTest(f"Testing T-SQL delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
"tsql": f"SELECT 1 AS {name}",
},
write={
"tsql": 'SELECT 1 AS [x"]',
},
)

0 comments on commit ee266ef

Please sign in to comment.