Skip to content

Commit

Permalink
Support UESCAPE in PostgreSQL unicode strings & identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
nene committed Feb 11, 2024
1 parent f715a15 commit 6a8712a
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/cst/Expr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ type KeywordOperator =
| [Keyword<"MEMBER">, Keyword<"OF">] // MySQL
| [Keyword<"SOUNDS">, Keyword<"LIKE">] // MySQL
| Keyword<"ESCAPE"> // SQLite, MySQL, PostgreSQL
| Keyword<"UESCAPE"> // PostgreSQL
// Timezone
| [Keyword<"AT">, Keyword<"TIME">, Keyword<"ZONE">]; // PostgreSQL

Expand Down
18 changes: 16 additions & 2 deletions src/parser.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -6962,7 +6962,15 @@ quoted_ident
/ (&sqlite / &mysql) ident:backticks_quoted_ident_qq { return ident; }
/ &bigquery ident:(bigquery_quoted_member_expr / backticks_quoted_ident_bs) { return ident; }
/ (&sqlite / &postgres) str:string_literal_double_quoted_qq { return loc(createIdentifier(str.text, str.value)); }
/ &postgres str:string_literal_unicode_double_quoted_qq { return loc(createIdentifier(str.text, str.value)); }
/ &postgres ident:postgres_unicode_ident { return ident; }

postgres_unicode_ident
= head:ident_unicode_double_quoted_qq tail:(__ UESCAPE __ string_literal_single_quoted_qq)|0..1| {
return createBinaryExprChain(head, tail);
}

ident_unicode_double_quoted_qq
= str:string_literal_unicode_double_quoted_qq { return loc(createIdentifier(str.text, str.value)); }

backticks_quoted_ident_qq
= "`" chars:([^`] / escaped_backtick_qq)+ "`" { return loc(createIdentifier(text(), chars.join(""))); }
Expand Down Expand Up @@ -7178,7 +7186,7 @@ string_literal_plain
string_literal_single_quoted_qq
/ string_literal_dollar_quoted
/ string_literal_e_single_quoted_bs
/ string_literal_unicode_single_quoted_qq) { return s; }
/ postgres_unicode_string) { return s; }

mysql_string_literal_chain
= head:mysql_string_literal_plain tail:(__ mysql_string_literal_plain)* {
Expand All @@ -7200,6 +7208,11 @@ charset_introducer
charset_name
= ident_name_basic { return text(); }

postgres_unicode_string
= head:string_literal_unicode_single_quoted_qq tail:(__ UESCAPE __ string_literal_single_quoted_qq)|0..1| {
return createBinaryExprChain(head, tail);
}

string_literal_single_quoted_qq_bs // with repeated quote or backslash for escaping
= "'" chars:([^'\\] / escaped_single_quote_qq / backslash_escape)* "'" {
return loc({
Expand Down Expand Up @@ -8235,6 +8248,7 @@ TRUE = kw:"TRUE"i !ident_part { return loc(createK
TRUNCATE = kw:"TRUNCATE"i !ident_part { return loc(createKeyword(kw)); }
TUESDAY = kw:"TUESDAY"i !ident_part { return loc(createKeyword(kw)); }
TYPE = kw:"TYPE"i !ident_part { return loc(createKeyword(kw)); }
UESCAPE = kw:"UESCAPE"i !ident_part { return loc(createKeyword(kw)); }
UNBOUNDED = kw:"UNBOUNDED"i !ident_part { return loc(createKeyword(kw)); }
UNDEFINED = kw:"UNDEFINED"i !ident_part { return loc(createKeyword(kw)); }
UNION = kw:"UNION"i !ident_part { return loc(createKeyword(kw)); }
Expand Down
23 changes: 23 additions & 0 deletions test/identifier.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,5 +216,28 @@ describe("identifier", () => {
it(`supports ""-escaping in unicode identifer`, () => {
testExpr(`U&"my "" why"`);
});

it("supports custom unicode escape character on unicode identifier", () => {
expect(parseExpr(`U&"!0441!043B!043E!043D" UESCAPE '!'`)).toMatchInlineSnapshot(`
{
"left": {
"name": "!0441!043B!043E!043D",
"text": "U&"!0441!043B!043E!043D"",
"type": "identifier",
},
"operator": {
"name": "UESCAPE",
"text": "UESCAPE",
"type": "keyword",
},
"right": {
"text": "'!'",
"type": "string_literal",
"value": "!",
},
"type": "binary_expr",
}
`);
});
});
});
23 changes: 23 additions & 0 deletions test/literal/string.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -340,5 +340,28 @@ describe("string literal", () => {
}
`);
});

it("supports custom unicode escape character on unicode strings", () => {
expect(parseExpr(`U&'!0441!043B!043E!043D' UESCAPE '!'`)).toMatchInlineSnapshot(`
{
"left": {
"text": "U&'!0441!043B!043E!043D'",
"type": "string_literal",
"value": "!0441!043B!043E!043D",
},
"operator": {
"name": "UESCAPE",
"text": "UESCAPE",
"type": "keyword",
},
"right": {
"text": "'!'",
"type": "string_literal",
"value": "!",
},
"type": "binary_expr",
}
`);
});
});
});

0 comments on commit 6a8712a

Please sign in to comment.