From eec000805ae4625a11bc20ae04153f73194f390b Mon Sep 17 00:00:00 2001 From: pokutuna Date: Sat, 10 Jun 2023 03:49:52 +0000 Subject: [PATCH 1/5] Add test cases for tirple-quoted string --- .../formatter/definitions/triple_quoted.sqlx | 25 +++++++++++++++++ tests/sqlx/format.spec.ts | 27 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 examples/formatter/definitions/triple_quoted.sqlx diff --git a/examples/formatter/definitions/triple_quoted.sqlx b/examples/formatter/definitions/triple_quoted.sqlx new file mode 100644 index 000000000..45f4c5f53 --- /dev/null +++ b/examples/formatter/definitions/triple_quoted.sqlx @@ -0,0 +1,25 @@ +config { type: "table" } + +SELECT + +'''1''' AS single_line, + +"""multi + line + string + with indent""" +AS multi_line, + +REGEXP_CONTAINS( + "\n abc\n ", + r''' +abc +''') AS multi_line_regex, + +""" +This project is ... + "${ + database()}"!! +""" AS with_js + +post_operations { select """1""" as inner_sql } diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index eb4d28086..dc0240d72 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -146,6 +146,33 @@ WHERE n < 8 QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0 +`); + }); + + test("format triple quoted string", async () => { + expect(await formatFile(path.resolve("examples/formatter/definitions/triple_quoted.sqlx"))) + .equal(`config { + type: "table" +} + +SELECT + '''1''' AS single_line, + """multi + line + string + with indent""" AS multi_line, + REGEXP_CONTAINS("\\n abc\\n ", r''' +abc +''') AS multi_line_regex, + """ +This project is ... + "\${database()}"!! +""" AS with_js + +post_operations { + select + """1""" as inner_sql +} `); }); }); From 392f42b8a3c4ada306007439136183c14b20ae2b Mon Sep 17 00:00:00 2001 From: pokutuna Date: Wed, 21 Jun 2023 00:49:56 +0000 Subject: [PATCH 2/5] Add a todo test case for formatting --- .../formatter/definitions/triple_quoted.sqlx | 3 +-- tests/sqlx/format.spec.ts | 24 ++++++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/examples/formatter/definitions/triple_quoted.sqlx b/examples/formatter/definitions/triple_quoted.sqlx index 45f4c5f53..8f97b7bc9 100644 --- a/examples/formatter/definitions/triple_quoted.sqlx +++ b/examples/formatter/definitions/triple_quoted.sqlx @@ -18,8 +18,7 @@ abc """ This project is ... - "${ - database()}"!! + "${database()}"!! """ AS with_js post_operations { select """1""" as inner_sql } diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index dc0240d72..0a375cc3d 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -1,7 +1,7 @@ import { expect } from "chai"; import * as path from "path"; -import { formatFile } from "df/sqlx/format"; +import { formatFile, format } from "df/sqlx/format"; import { suite, test } from "df/testing"; suite("@dataform/sqlx", () => { @@ -176,4 +176,26 @@ post_operations { `); }); }); + + suite("formatter todos", () => { + test("TODO format tempalte string in a string", async () => { + const input = ` + config { + type: "view" + } + SELECT + "ok" AS \${ "here"+ "works" }, + "1 + 2 = \${ 1+2 }" AS TODO_in_string, + '''\${1 +2 }''' AS TODO_in_triple_quoted_string + `; + expect(format(input, 'sqlx')).eql(`config { + type: "view" +} + +SELECT + "ok" AS \${"here" + "works"}, + "1 + 2 = \${ 1+2 }" AS TODO_in_string, + '''\${1 +2 }''' AS TODO_in_triple_quoted_string +`)}); + }) }); From 3852bc62e972fb6cbbca91675587977fedf58779 Mon Sep 17 00:00:00 2001 From: pokutuna Date: Wed, 21 Jun 2023 01:34:51 +0000 Subject: [PATCH 3/5] Enhance lexer to handle triple-quoted string --- sqlx/format.ts | 10 ++++++ sqlx/lexer.ts | 98 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/sqlx/format.ts b/sqlx/format.ts index 239b5b109..97067897b 100644 --- a/sqlx/format.ts +++ b/sqlx/format.ts @@ -171,6 +171,7 @@ function stripUnformattableText( const placeholderId = generatePlaceholderId(); switch (part.type) { case SyntaxTreeNodeType.SQL_LITERAL_STRING: + case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING: case SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER: { placeholders[placeholderId] = part; return placeholderId; @@ -242,6 +243,7 @@ function formatPlaceholderInSqlx( const wholeLine = getWholeLineContainingPlaceholderId(placeholderId, sqlx); const indent = " ".repeat(wholeLine.length - wholeLine.trimLeft().length); const formattedPlaceholder = formatSqlQueryPlaceholder(placeholderSyntaxNode, indent); + // Replace the placeholder entirely if (a) it fits on one line and (b) it isn't a comment. // Otherwise, push the replacement onto its own line. if ( @@ -250,6 +252,12 @@ function formatPlaceholderInSqlx( ) { return sqlx.replace(placeholderId, () => formattedPlaceholder.trim()); } + + // Keep internal line breaks in multiline string. + if (placeholderSyntaxNode.type === SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING) { + return sqlx.replace(placeholderId, () => formattedPlaceholder.trim()); + } + // Push multi-line placeholders to their own lines, if they're not already on one. const [textBeforePlaceholder, textAfterPlaceholder] = wholeLine.split(placeholderId); const newLines: string[] = []; @@ -270,6 +278,8 @@ function formatSqlQueryPlaceholder(node: SyntaxTreeNode, jsIndent: string): stri case SyntaxTreeNodeType.SQL_LITERAL_STRING: case SyntaxTreeNodeType.SQL_COMMENT: return formatEveryLine(node.concatenate(), line => `${jsIndent}${line.trimLeft()}`); + case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING: + return `${jsIndent}${node.concatenate().trimLeft()}`; default: throw new Error(`Unrecognized SyntaxTreeNodeType: ${node.type}`); } diff --git a/sqlx/lexer.ts b/sqlx/lexer.ts index 5791f4855..5ecc75684 100644 --- a/sqlx/lexer.ts +++ b/sqlx/lexer.ts @@ -6,7 +6,9 @@ const LEXER_STATE_NAMES = { JS_TEMPLATE_STRING: "jsTemplateString", INNER_SQL_BLOCK: "innerSqlBlock", SQL_SINGLE_QUOTE_STRING: "innerSingleQuote", - SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote" + SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote", + SQL_TRIPLE_SINGLE_QUOTE_STRING: "innerTripleSingleQuote", + SQL_TRIPLE_DOUBLE_QUOTE_STRING: "innerTripleDoubleQuote" }; const SQL_LEXER_TOKEN_NAMES = { @@ -21,8 +23,10 @@ const SQL_LEXER_TOKEN_NAMES = { MULTI_LINE_COMMENT: LEXER_STATE_NAMES.SQL + "_multiLineComment", START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL + "_startJsPlaceholder", BACKTICK: LEXER_STATE_NAMES.SQL + "_backtick", - START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle", - START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble", + START_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startQuoteSingle", + START_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startQuoteDouble", + START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteSingle", + START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteDouble", CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL + "_captureEverythingElse" }; @@ -54,6 +58,8 @@ const INNER_SQL_BLOCK_LEXER_TOKEN_NAMES = { BACKTICK: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_backtick", START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle", START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble", + START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteSingle", + START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteDouble", CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_captureEverythingElse" }; @@ -73,6 +79,20 @@ const SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING + "_captureEverythingElse" }; +const SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { + ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_escapedBackslash", + START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_startJsPlaceholder", + CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_closeTripleQuoteSingle", + CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_captureEverythingElse" +}; + +const SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { + ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_escapedBackslash", + START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_startJsPlaceholder", + CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_closeTripleQuoteDouble", + CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_captureEverythingElse" +}; + const lexer = moo.states(buildSqlxLexer()); export enum SyntaxTreeNodeType { @@ -81,6 +101,7 @@ export enum SyntaxTreeNodeType { SQL, SQL_COMMENT, SQL_LITERAL_STRING, + SQL_LITERAL_MULTILINE_STRING, SQL_STATEMENT_SEPARATOR } @@ -97,6 +118,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ [SQL_LEXER_TOKEN_NAMES.START_PRE_OPERATIONS, SyntaxTreeNodeType.SQL], [SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], [SQL_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], + [SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], + [SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], [JS_BLOCK_LEXER_TOKEN_NAMES.START_JS_BLOCK, SyntaxTreeNodeType.JAVASCRIPT], @@ -108,6 +131,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ ], [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], + [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], + [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], [ SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, @@ -117,6 +142,16 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ [ SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER + ], + + [ + SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, + SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER + ], + + [ + SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, + SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER ] ]); @@ -124,7 +159,9 @@ const CLOSE_TOKEN_TYPES = new Set([ JS_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK, INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK, SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, - SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE + SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, + SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, + SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE ]); const WHOLE_TOKEN_NODE_MAPPINGS = new Map([ @@ -269,6 +306,19 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { push: LEXER_STATE_NAMES.JS_BLOCK }; sqlLexer[SQL_LEXER_TOKEN_NAMES.BACKTICK] = "`"; + + // Since quotes(' & ") are substring of triple-quotes(''' & """), the declarations of + // triple-quote tokens must be placed first. The parsing order by moo implicity depends + // on the order of property creation in rule object. + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = { + match: "'''", + push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING, + }; + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = { + match: '"""', + push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING, + }; + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = { match: "'", push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING @@ -329,6 +379,14 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { pop: 1 }; innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.BACKTICK] = "`"; + innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = { + match: "'''", + push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + }; + innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = { + match: '"""', + push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + }; innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = { match: "'", push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING @@ -376,6 +434,36 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { lineBreaks: true }; + const innerTripleSingleQuoteLexer: moo.Rules = {}; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\"; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = { + match: "${", + push: LEXER_STATE_NAMES.JS_BLOCK + }; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = { + match: "'''", + pop: 1 + }; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = { + match: /[\s\S]+?/, + lineBreaks: true + }; + + const innerTripleDoubleQuoteLexer: moo.Rules = {}; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\"; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = { + match: "${", + push: LEXER_STATE_NAMES.JS_BLOCK + }; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = { + match: '"""', + pop: 1 + }; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = { + match: /[\s\S]+?/, + lineBreaks: true + }; + const lexerStates: { [x: string]: moo.Rules } = {}; lexerStates[LEXER_STATE_NAMES.SQL] = sqlLexer; lexerStates[LEXER_STATE_NAMES.JS_BLOCK] = jsBlockLexer; @@ -383,6 +471,8 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { lexerStates[LEXER_STATE_NAMES.INNER_SQL_BLOCK] = innerSqlBlockLexer; lexerStates[LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING] = innerSingleQuoteLexer; lexerStates[LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING] = innerDoubleQuoteLexer; + lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING] = innerTripleSingleQuoteLexer; + lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING] = innerTripleDoubleQuoteLexer; return lexerStates; } From d9095a1f09d7c6b88223b155bea5d5fcafa8d2c7 Mon Sep 17 00:00:00 2001 From: pokutuna Date: Wed, 21 Jun 2023 02:28:11 +0000 Subject: [PATCH 4/5] Fix tslint issues --- tests/sqlx/format.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index 0a375cc3d..a44584dbf 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -1,7 +1,7 @@ import { expect } from "chai"; import * as path from "path"; -import { formatFile, format } from "df/sqlx/format"; +import { format, formatFile } from "df/sqlx/format"; import { suite, test } from "df/testing"; suite("@dataform/sqlx", () => { From d6743aec0e3ee8a1ceb0b243b28da8c86892ee9a Mon Sep 17 00:00:00 2001 From: pokutuna Date: Wed, 28 Jun 2023 00:16:46 +0000 Subject: [PATCH 5/5] Fix typos --- sqlx/lexer.ts | 2 +- tests/sqlx/format.spec.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlx/lexer.ts b/sqlx/lexer.ts index 5ecc75684..f2e10d15e 100644 --- a/sqlx/lexer.ts +++ b/sqlx/lexer.ts @@ -308,7 +308,7 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { sqlLexer[SQL_LEXER_TOKEN_NAMES.BACKTICK] = "`"; // Since quotes(' & ") are substring of triple-quotes(''' & """), the declarations of - // triple-quote tokens must be placed first. The parsing order by moo implicity depends + // triple-quote tokens must be placed first. The parsing order by moo implicitly depends // on the order of property creation in rule object. sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = { match: "'''", diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index a44584dbf..24d4f3ba7 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -178,7 +178,7 @@ post_operations { }); suite("formatter todos", () => { - test("TODO format tempalte string in a string", async () => { + test("TODO format template string in a string", async () => { const input = ` config { type: "view"