Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions examples/formatter/definitions/triple_quoted.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
config { type: "table" }

SELECT

'''1''' AS single_line,

"""multi
line
string
with indent"""
AS multi_line,

REGEXP_CONTAINS(
"\n abc\n ",
r'''
abc
''') AS multi_line_regex,

"""
This project is ...
"${database()}"!!
""" AS with_js

post_operations { select """1""" as inner_sql }
10 changes: 10 additions & 0 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ function stripUnformattableText(
const placeholderId = generatePlaceholderId();
switch (part.type) {
case SyntaxTreeNodeType.SQL_LITERAL_STRING:
case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING:
case SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER: {
placeholders[placeholderId] = part;
return placeholderId;
Expand Down Expand Up @@ -242,6 +243,7 @@ function formatPlaceholderInSqlx(
const wholeLine = getWholeLineContainingPlaceholderId(placeholderId, sqlx);
const indent = " ".repeat(wholeLine.length - wholeLine.trimLeft().length);
const formattedPlaceholder = formatSqlQueryPlaceholder(placeholderSyntaxNode, indent);

// Replace the placeholder entirely if (a) it fits on one line and (b) it isn't a comment.
// Otherwise, push the replacement onto its own line.
if (
Expand All @@ -250,6 +252,12 @@ function formatPlaceholderInSqlx(
) {
return sqlx.replace(placeholderId, () => formattedPlaceholder.trim());
}

// Keep internal line breaks in multiline string.
if (placeholderSyntaxNode.type === SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING) {
return sqlx.replace(placeholderId, () => formattedPlaceholder.trim());
}

// Push multi-line placeholders to their own lines, if they're not already on one.
const [textBeforePlaceholder, textAfterPlaceholder] = wholeLine.split(placeholderId);
const newLines: string[] = [];
Expand All @@ -270,6 +278,8 @@ function formatSqlQueryPlaceholder(node: SyntaxTreeNode, jsIndent: string): stri
case SyntaxTreeNodeType.SQL_LITERAL_STRING:
case SyntaxTreeNodeType.SQL_COMMENT:
return formatEveryLine(node.concatenate(), line => `${jsIndent}${line.trimLeft()}`);
case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING:
return `${jsIndent}${node.concatenate().trimLeft()}`;
default:
throw new Error(`Unrecognized SyntaxTreeNodeType: ${node.type}`);
}
Expand Down
98 changes: 94 additions & 4 deletions sqlx/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ const LEXER_STATE_NAMES = {
JS_TEMPLATE_STRING: "jsTemplateString",
INNER_SQL_BLOCK: "innerSqlBlock",
SQL_SINGLE_QUOTE_STRING: "innerSingleQuote",
SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote"
SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote",
SQL_TRIPLE_SINGLE_QUOTE_STRING: "innerTripleSingleQuote",
SQL_TRIPLE_DOUBLE_QUOTE_STRING: "innerTripleDoubleQuote"
};

const SQL_LEXER_TOKEN_NAMES = {
Expand All @@ -21,8 +23,10 @@ const SQL_LEXER_TOKEN_NAMES = {
MULTI_LINE_COMMENT: LEXER_STATE_NAMES.SQL + "_multiLineComment",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL + "_startJsPlaceholder",
BACKTICK: LEXER_STATE_NAMES.SQL + "_backtick",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startQuoteDouble",
START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteSingle",
START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL + "_captureEverythingElse"
};

Expand Down Expand Up @@ -54,6 +58,8 @@ const INNER_SQL_BLOCK_LEXER_TOKEN_NAMES = {
BACKTICK: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_backtick",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble",
START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteSingle",
START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_captureEverythingElse"
};

Expand All @@ -73,6 +79,20 @@ const SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING + "_captureEverythingElse"
};

const SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_escapedBackslash",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_startJsPlaceholder",
CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_closeTripleQuoteSingle",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_captureEverythingElse"
};

const SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_escapedBackslash",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_startJsPlaceholder",
CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_closeTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_captureEverythingElse"
};

const lexer = moo.states(buildSqlxLexer());

export enum SyntaxTreeNodeType {
Expand All @@ -81,6 +101,7 @@ export enum SyntaxTreeNodeType {
SQL,
SQL_COMMENT,
SQL_LITERAL_STRING,
SQL_LITERAL_MULTILINE_STRING,
SQL_STATEMENT_SEPARATOR
}

Expand All @@ -97,6 +118,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
[SQL_LEXER_TOKEN_NAMES.START_PRE_OPERATIONS, SyntaxTreeNodeType.SQL],
[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[SQL_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],
[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],

[JS_BLOCK_LEXER_TOKEN_NAMES.START_JS_BLOCK, SyntaxTreeNodeType.JAVASCRIPT],

Expand All @@ -108,6 +131,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],

[
SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
Expand All @@ -117,14 +142,26 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
[
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
],

[
SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
],

[
SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
]
]);

const CLOSE_TOKEN_TYPES = new Set<string>([
JS_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK,
INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK,
SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE
]);

const WHOLE_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
Expand Down Expand Up @@ -269,6 +306,19 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
push: LEXER_STATE_NAMES.JS_BLOCK
};
sqlLexer[SQL_LEXER_TOKEN_NAMES.BACKTICK] = "`";

// Since quotes(' & ") are substring of triple-quotes(''' & """), the declarations of
// triple-quote tokens must be placed first. The parsing order by moo implicitly depends
// on the order of property creation in rule object.
sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = {
match: "'''",
push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING,
};
sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = {
match: '"""',
push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING,
};

sqlLexer[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = {
match: "'",
push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING
Expand Down Expand Up @@ -329,6 +379,14 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
pop: 1
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.BACKTICK] = "`";
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = {
match: "'''",
push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = {
match: '"""',
push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = {
match: "'",
push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING
Expand Down Expand Up @@ -376,13 +434,45 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
lineBreaks: true
};

const innerTripleSingleQuoteLexer: moo.Rules = {};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\";
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = {
match: "${",
push: LEXER_STATE_NAMES.JS_BLOCK
};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = {
match: "'''",
pop: 1
};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = {
match: /[\s\S]+?/,
lineBreaks: true
};

const innerTripleDoubleQuoteLexer: moo.Rules = {};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\";
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = {
match: "${",
push: LEXER_STATE_NAMES.JS_BLOCK
};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = {
match: '"""',
pop: 1
};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = {
match: /[\s\S]+?/,
lineBreaks: true
};

const lexerStates: { [x: string]: moo.Rules } = {};
lexerStates[LEXER_STATE_NAMES.SQL] = sqlLexer;
lexerStates[LEXER_STATE_NAMES.JS_BLOCK] = jsBlockLexer;
lexerStates[LEXER_STATE_NAMES.JS_TEMPLATE_STRING] = jsTemplateStringLexer;
lexerStates[LEXER_STATE_NAMES.INNER_SQL_BLOCK] = innerSqlBlockLexer;
lexerStates[LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING] = innerSingleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING] = innerDoubleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING] = innerTripleSingleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING] = innerTripleDoubleQuoteLexer;

return lexerStates;
}
51 changes: 50 additions & 1 deletion tests/sqlx/format.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect } from "chai";
import * as path from "path";

import { formatFile } from "df/sqlx/format";
import { format, formatFile } from "df/sqlx/format";
import { suite, test } from "df/testing";

suite("@dataform/sqlx", () => {
Expand Down Expand Up @@ -146,7 +146,56 @@ WHERE
n < 8
QUALIFY
MOD(ROW_NUMBER() OVER (), 2) = 0
`);
});

test("format triple quoted string", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/triple_quoted.sqlx")))
.equal(`config {
type: "table"
}

SELECT
'''1''' AS single_line,
"""multi
line
string
with indent""" AS multi_line,
REGEXP_CONTAINS("\\n abc\\n ", r'''
abc
''') AS multi_line_regex,
"""
This project is ...
"\${database()}"!!
""" AS with_js

post_operations {
select
"""1""" as inner_sql
}
`);
});
});

suite("formatter todos", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized that previously, whether using single or triple quotes, template strings inside string literals could not be formatted. I made it like this because there was no way to represent todo tests.

test("TODO format template string in a string", async () => {
const input = `
config {
type: "view"
}
SELECT
"ok" AS \${ "here"+ "works" },
"1 + 2 = \${ 1+2 }" AS TODO_in_string,
'''\${1 +2 }''' AS TODO_in_triple_quoted_string
`;
expect(format(input, 'sqlx')).eql(`config {
type: "view"
}

SELECT
"ok" AS \${"here" + "works"},
"1 + 2 = \${ 1+2 }" AS TODO_in_string,
'''\${1 +2 }''' AS TODO_in_triple_quoted_string
`)});
})
});