From f715a156648b2b1bd2a7c1879e812bd1a253751b Mon Sep 17 00:00:00 2001 From: Rene Saarsoo Date: Sun, 11 Feb 2024 10:12:28 +0200 Subject: [PATCH] BREAKING! Don't parse unicode escapes in PostgreSQL unicode strings and identifiers We can't really do that at the CST parser because the escape character can be defined outside of the string itself. We could pull it off with some trickery, but better to leave this out of the scope for this parser. --- src/parser.pegjs | 5 ++--- src/utils/unicode.ts | 17 ----------------- test/identifier.test.ts | 2 +- test/literal/string.test.ts | 2 +- 4 files changed, 4 insertions(+), 22 deletions(-) delete mode 100644 src/utils/unicode.ts diff --git a/src/parser.pegjs b/src/parser.pegjs index d537b845..2f50790b 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -6,7 +6,6 @@ parseBitBlob, parseTextBlob, } from "./utils/blob"; - import { parseUnicodeEscapes } from "./utils/unicode"; import { createBinaryExprChain, createBinaryExpr, @@ -7225,7 +7224,7 @@ string_literal_unicode_single_quoted_qq return loc({ type: "string_literal", text: text(), - value: parseUnicodeEscapes(str.value), + value: str.value, }); } @@ -7234,7 +7233,7 @@ string_literal_unicode_double_quoted_qq return loc({ type: "string_literal", text: text(), - value: parseUnicodeEscapes(str.value), + value: str.value, }); } diff --git a/src/utils/unicode.ts b/src/utils/unicode.ts deleted file mode 100644 index 5dfecc41..00000000 --- a/src/utils/unicode.ts +++ /dev/null @@ -1,17 +0,0 @@ -// -// helpers for parsing unicode escape sequences -// - -/** - * Converts unicode escape sequences to actual characters. - * - * "\0061" --> "a" - * "\+000061" --> "a" - */ -export const parseUnicodeEscapes = (str: string): string => { - return str.replace( - /\\([0-9A-F]{4})|\\\+([0-9A-F]{6})/g, - (_?: any, h1?: string, h2?: string) => - String.fromCodePoint(parseInt((h1 ?? h2) as string, 16)) - ); -}; diff --git a/test/identifier.test.ts b/test/identifier.test.ts index 6f3cca5b..6e0ff551 100644 --- a/test/identifier.test.ts +++ b/test/identifier.test.ts @@ -206,7 +206,7 @@ describe("identifier", () => { it("parses unicode identifier", () => { expect(parseExpr(`U&"d\\0061t\\+000061"`)).toMatchInlineSnapshot(` { - "name": "data", + "name": "d\\0061t\\+000061", "text": "U&"d\\0061t\\+000061"", "type": "identifier", } diff --git a/test/literal/string.test.ts b/test/literal/string.test.ts index c3fc351c..ff069121 100644 --- a/test/literal/string.test.ts +++ b/test/literal/string.test.ts @@ -326,7 +326,7 @@ describe("string literal", () => { { "text": "U&'d\\0061t\\+000061'", "type": "string_literal", - "value": "data", + "value": "d\\0061t\\+000061", } `); });