diff --git a/examples/formatter/definitions/triple_quoted.sqlx b/examples/formatter/definitions/triple_quoted.sqlx new file mode 100644 index 000000000..8f97b7bc9 --- /dev/null +++ b/examples/formatter/definitions/triple_quoted.sqlx @@ -0,0 +1,24 @@ +config { type: "table" } + +SELECT + +'''1''' AS single_line, + +"""multi + line + string + with indent""" +AS multi_line, + +REGEXP_CONTAINS( + "\n abc\n ", + r''' +abc +''') AS multi_line_regex, + +""" +This project is ... + "${database()}"!! +""" AS with_js + +post_operations { select """1""" as inner_sql } diff --git a/sqlx/format.ts b/sqlx/format.ts index 239b5b109..97067897b 100644 --- a/sqlx/format.ts +++ b/sqlx/format.ts @@ -171,6 +171,7 @@ function stripUnformattableText( const placeholderId = generatePlaceholderId(); switch (part.type) { case SyntaxTreeNodeType.SQL_LITERAL_STRING: + case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING: case SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER: { placeholders[placeholderId] = part; return placeholderId; @@ -242,6 +243,7 @@ function formatPlaceholderInSqlx( const wholeLine = getWholeLineContainingPlaceholderId(placeholderId, sqlx); const indent = " ".repeat(wholeLine.length - wholeLine.trimLeft().length); const formattedPlaceholder = formatSqlQueryPlaceholder(placeholderSyntaxNode, indent); + // Replace the placeholder entirely if (a) it fits on one line and (b) it isn't a comment. // Otherwise, push the replacement onto its own line. if ( @@ -250,6 +252,12 @@ function formatPlaceholderInSqlx( ) { return sqlx.replace(placeholderId, () => formattedPlaceholder.trim()); } + + // Keep internal line breaks in multiline string. + if (placeholderSyntaxNode.type === SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING) { + return sqlx.replace(placeholderId, () => formattedPlaceholder.trim()); + } + // Push multi-line placeholders to their own lines, if they're not already on one. const [textBeforePlaceholder, textAfterPlaceholder] = wholeLine.split(placeholderId); const newLines: string[] = []; @@ -270,6 +278,8 @@ function formatSqlQueryPlaceholder(node: SyntaxTreeNode, jsIndent: string): stri case SyntaxTreeNodeType.SQL_LITERAL_STRING: case SyntaxTreeNodeType.SQL_COMMENT: return formatEveryLine(node.concatenate(), line => `${jsIndent}${line.trimLeft()}`); + case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING: + return `${jsIndent}${node.concatenate().trimLeft()}`; default: throw new Error(`Unrecognized SyntaxTreeNodeType: ${node.type}`); } diff --git a/sqlx/lexer.ts b/sqlx/lexer.ts index 5791f4855..f2e10d15e 100644 --- a/sqlx/lexer.ts +++ b/sqlx/lexer.ts @@ -6,7 +6,9 @@ const LEXER_STATE_NAMES = { JS_TEMPLATE_STRING: "jsTemplateString", INNER_SQL_BLOCK: "innerSqlBlock", SQL_SINGLE_QUOTE_STRING: "innerSingleQuote", - SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote" + SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote", + SQL_TRIPLE_SINGLE_QUOTE_STRING: "innerTripleSingleQuote", + SQL_TRIPLE_DOUBLE_QUOTE_STRING: "innerTripleDoubleQuote" }; const SQL_LEXER_TOKEN_NAMES = { @@ -21,8 +23,10 @@ const SQL_LEXER_TOKEN_NAMES = { MULTI_LINE_COMMENT: LEXER_STATE_NAMES.SQL + "_multiLineComment", START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL + "_startJsPlaceholder", BACKTICK: LEXER_STATE_NAMES.SQL + "_backtick", - START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle", - START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble", + START_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startQuoteSingle", + START_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startQuoteDouble", + START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteSingle", + START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteDouble", CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL + "_captureEverythingElse" }; @@ -54,6 +58,8 @@ const INNER_SQL_BLOCK_LEXER_TOKEN_NAMES = { BACKTICK: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_backtick", START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle", START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble", + START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteSingle", + START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteDouble", CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_captureEverythingElse" }; @@ -73,6 +79,20 @@ const SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING + "_captureEverythingElse" }; +const SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { + ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_escapedBackslash", + START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_startJsPlaceholder", + CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_closeTripleQuoteSingle", + CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_captureEverythingElse" +}; + +const SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = { + ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_escapedBackslash", + START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_startJsPlaceholder", + CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_closeTripleQuoteDouble", + CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_captureEverythingElse" +}; + const lexer = moo.states(buildSqlxLexer()); export enum SyntaxTreeNodeType { @@ -81,6 +101,7 @@ export enum SyntaxTreeNodeType { SQL, SQL_COMMENT, SQL_LITERAL_STRING, + SQL_LITERAL_MULTILINE_STRING, SQL_STATEMENT_SEPARATOR } @@ -97,6 +118,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ [SQL_LEXER_TOKEN_NAMES.START_PRE_OPERATIONS, SyntaxTreeNodeType.SQL], [SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], [SQL_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], + [SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], + [SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], [JS_BLOCK_LEXER_TOKEN_NAMES.START_JS_BLOCK, SyntaxTreeNodeType.JAVASCRIPT], @@ -108,6 +131,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ ], [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING], + [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], + [INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING], [ SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, @@ -117,6 +142,16 @@ const START_TOKEN_NODE_MAPPINGS = new Map([ [ SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER + ], + + [ + SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, + SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER + ], + + [ + SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER, + SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER ] ]); @@ -124,7 +159,9 @@ const CLOSE_TOKEN_TYPES = new Set([ JS_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK, INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK, SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, - SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE + SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, + SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE, + SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE ]); const WHOLE_TOKEN_NODE_MAPPINGS = new Map([ @@ -269,6 +306,19 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { push: LEXER_STATE_NAMES.JS_BLOCK }; sqlLexer[SQL_LEXER_TOKEN_NAMES.BACKTICK] = "`"; + + // Since quotes(' & ") are substring of triple-quotes(''' & """), the declarations of + // triple-quote tokens must be placed first. The parsing order by moo implicitly depends + // on the order of property creation in rule object. + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = { + match: "'''", + push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING, + }; + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = { + match: '"""', + push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING, + }; + sqlLexer[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = { match: "'", push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING @@ -329,6 +379,14 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { pop: 1 }; innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.BACKTICK] = "`"; + innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = { + match: "'''", + push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + }; + innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = { + match: '"""', + push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + }; innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = { match: "'", push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING @@ -376,6 +434,36 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { lineBreaks: true }; + const innerTripleSingleQuoteLexer: moo.Rules = {}; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\"; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = { + match: "${", + push: LEXER_STATE_NAMES.JS_BLOCK + }; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = { + match: "'''", + pop: 1 + }; + innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = { + match: /[\s\S]+?/, + lineBreaks: true + }; + + const innerTripleDoubleQuoteLexer: moo.Rules = {}; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\"; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = { + match: "${", + push: LEXER_STATE_NAMES.JS_BLOCK + }; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = { + match: '"""', + pop: 1 + }; + innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = { + match: /[\s\S]+?/, + lineBreaks: true + }; + const lexerStates: { [x: string]: moo.Rules } = {}; lexerStates[LEXER_STATE_NAMES.SQL] = sqlLexer; lexerStates[LEXER_STATE_NAMES.JS_BLOCK] = jsBlockLexer; @@ -383,6 +471,8 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } { lexerStates[LEXER_STATE_NAMES.INNER_SQL_BLOCK] = innerSqlBlockLexer; lexerStates[LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING] = innerSingleQuoteLexer; lexerStates[LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING] = innerDoubleQuoteLexer; + lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING] = innerTripleSingleQuoteLexer; + lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING] = innerTripleDoubleQuoteLexer; return lexerStates; } diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index eb4d28086..24d4f3ba7 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -1,7 +1,7 @@ import { expect } from "chai"; import * as path from "path"; -import { formatFile } from "df/sqlx/format"; +import { format, formatFile } from "df/sqlx/format"; import { suite, test } from "df/testing"; suite("@dataform/sqlx", () => { @@ -146,7 +146,56 @@ WHERE n < 8 QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0 +`); + }); + + test("format triple quoted string", async () => { + expect(await formatFile(path.resolve("examples/formatter/definitions/triple_quoted.sqlx"))) + .equal(`config { + type: "table" +} + +SELECT + '''1''' AS single_line, + """multi + line + string + with indent""" AS multi_line, + REGEXP_CONTAINS("\\n abc\\n ", r''' +abc +''') AS multi_line_regex, + """ +This project is ... + "\${database()}"!! +""" AS with_js + +post_operations { + select + """1""" as inner_sql +} `); }); }); + + suite("formatter todos", () => { + test("TODO format template string in a string", async () => { + const input = ` + config { + type: "view" + } + SELECT + "ok" AS \${ "here"+ "works" }, + "1 + 2 = \${ 1+2 }" AS TODO_in_string, + '''\${1 +2 }''' AS TODO_in_triple_quoted_string + `; + expect(format(input, 'sqlx')).eql(`config { + type: "view" +} + +SELECT + "ok" AS \${"here" + "works"}, + "1 + 2 = \${ 1+2 }" AS TODO_in_string, + '''\${1 +2 }''' AS TODO_in_triple_quoted_string +`)}); + }) });