diff --git a/cli/index.ts b/cli/index.ts index c4c94d211..c3754e453 100644 --- a/cli/index.ts +++ b/cli/index.ts @@ -33,7 +33,7 @@ import { } from "df/cli/credentials"; import { actuallyResolve, assertPathExists, compiledGraphHasErrors } from "df/cli/util"; import { createYargsCli, INamedOption } from "df/cli/yargswrapper"; -import { supportsCancel, WarehouseType } from "df/core/adapters"; +import { isWarehouseType, supportsCancel, WarehouseType } from "df/core/adapters"; import { targetAsReadableString } from "df/core/targets"; import { dataform } from "df/protos/ts"; import { formatFile } from "df/sqlx/format"; @@ -715,6 +715,22 @@ export function runCli() { positionalOptions: [projectDirMustExistOption], options: [trackOption], processFn: async argv => { + const readWarehouseConfig = (): WarehouseType => { + let wh: string; + try { + const dataformJson = fs.readFileSync(path.resolve(argv[projectDirMustExistOption.name], "dataform.json"), 'utf8'); + const projectConfig = JSON.parse(dataformJson); + wh = projectConfig.warehouse; + } catch (e) { + throw new Error(`Could not parse dataform.json: ${e.message}`); + } + if (!isWarehouseType(wh)) { + throw new Error("Unrecognized 'warehouse' setting in dataform.json"); + } + return wh; + }; + const warehouse = readWarehouseConfig(); + const filenames = glob.sync("{definitions,includes}/**/*.{js,sqlx}", { cwd: argv[projectDirMustExistOption.name] }); @@ -722,7 +738,8 @@ export function runCli() { filenames.map(async filename => { try { await formatFile(path.resolve(argv[projectDirMustExistOption.name], filename), { - overwriteFile: true + overwriteFile: true, + warehouse }); return { filename diff --git a/core/adapters/index.ts b/core/adapters/index.ts index 48b01593b..9cb96828b 100644 --- a/core/adapters/index.ts +++ b/core/adapters/index.ts @@ -41,6 +41,10 @@ export enum WarehouseType { SQLDATAWAREHOUSE = "sqldatawarehouse" } +export function isWarehouseType(input: any): input is WarehouseType { + return Object.values(WarehouseType).includes(input); +} + const CANCELLATION_SUPPORTED = [WarehouseType.BIGQUERY, WarehouseType.SQLDATAWAREHOUSE]; export function supportsCancel(warehouseType: WarehouseType) { diff --git a/examples/formatter/definitions/named_arguments.sqlx b/examples/formatter/definitions/named_arguments.sqlx new file mode 100644 index 000000000..7b28923f6 --- /dev/null +++ b/examples/formatter/definitions/named_arguments.sqlx @@ -0,0 +1,3 @@ +config { type: "operations" } + +SELECT MAKE_INTERVAL(1, day=>2, minute => 3) diff --git a/examples/formatter/definitions/qualify.sqlx b/examples/formatter/definitions/qualify.sqlx new file mode 100644 index 000000000..5e02dac9b --- /dev/null +++ b/examples/formatter/definitions/qualify.sqlx @@ -0,0 +1,5 @@ +config { type: "operations" } + +SELECT * FROM UNNEST([0,1,2,3,4,5,6,7,8,9]) AS n +WHERE n < 8 +QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0 diff --git a/package.json b/package.json index 48f9efcb5..f754eda77 100644 --- a/package.json +++ b/package.json @@ -120,7 +120,7 @@ "semver": "^7.3.8", "snowflake-sdk": "^1.6.12", "source-map-loader": "^0.2.0", - "sql-formatter": "^2.3.3", + "sql-formatter": "^12.2.1", "ssh2": "^1.4.0", "style-loader": "^1.0.0", "tarjan-graph": "^2.0.0", diff --git a/sqlx/format.ts b/sqlx/format.ts index 6de290c93..239b5b109 100644 --- a/sqlx/format.ts +++ b/sqlx/format.ts @@ -1,10 +1,10 @@ -import * as crypto from "crypto"; import * as fs from "fs"; import * as jsBeautify from "js-beautify"; import * as sqlFormatter from "sql-formatter"; import { promisify } from "util"; import { ErrorWithCause } from "df/common/errors/errors"; +import { WarehouseType } from "df/core/adapters"; import { SyntaxTreeNode, SyntaxTreeNodeType } from "df/sqlx/lexer"; import { v4 as uuidv4 } from "uuid"; @@ -16,11 +16,26 @@ const JS_BEAUTIFY_OPTIONS: JsBeautifyOptions = { const MAX_SQL_FORMAT_ATTEMPTS = 5; -export function format(text: string, fileExtension: string) { +const WAREHOUSE_LANGUAGE_MAP: Record = { + [WarehouseType.BIGQUERY]: "bigquery", + [WarehouseType.PRESTO]: "trino", + [WarehouseType.POSTGRES]: "postgresql", + [WarehouseType.REDSHIFT]: "redshift", + [WarehouseType.SNOWFLAKE]: "snowflake", + [WarehouseType.SQLDATAWAREHOUSE]: "transactsql" +}; + +const DEFAULT_WAREHOUSE_FOR_FORMATTING: WarehouseType = WarehouseType.BIGQUERY; + +export function format( + text: string, + fileExtension: string, + warehouse: WarehouseType = DEFAULT_WAREHOUSE_FOR_FORMATTING +) { try { switch (fileExtension) { case "sqlx": - return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text))); + return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text), "", warehouse)); case "js": return `${formatJavaScript(text).trim()}\n`; default: @@ -35,12 +50,14 @@ export async function formatFile( filename: string, options?: { overwriteFile?: boolean; + warehouse?: WarehouseType; } ) { const fileExtension = filename.split(".").slice(-1)[0]; const originalFileContent = await promisify(fs.readFile)(filename, "utf8"); - const formattedText = format(originalFileContent, fileExtension); - if (formattedText !== format(formattedText, fileExtension)) { + + const formattedText = format(originalFileContent, fileExtension, options?.warehouse); + if (formattedText !== format(formattedText, fileExtension, options?.warehouse)) { throw new Error("Formatter unable to determine final formatted form."); } @@ -57,10 +74,11 @@ export async function formatFile( return formattedText; } -function formatSqlx(node: SyntaxTreeNode, indent: string = "") { +function formatSqlx(node: SyntaxTreeNode, indent: string = "", warehouse: WarehouseType) { const { sqlxStatements, javascriptBlocks, innerSqlBlocks } = separateSqlxIntoParts( node.children() ); + const sqlLanguage = WAREHOUSE_LANGUAGE_MAP[warehouse]; // First, format the JS blocks (including the config block). const formattedJsCodeBlocks = javascriptBlocks.map(jsCodeBlock => @@ -73,7 +91,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") { [placeholderId: string]: SyntaxTreeNode | string; } = {}; const unformattedPlaceholderSql = stripUnformattableText(sqlxStatement, placeholders).join(""); - const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql); + const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql, sqlLanguage); return formatEveryLine( replacePlaceholders(formattedPlaceholderSql, placeholders), line => `${indent}${line}` @@ -101,7 +119,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") { ]); return `${upToFirstBrace} -${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ")} +${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ", warehouse)} ${lastBraceOnwards}`; }); @@ -175,7 +193,9 @@ function stripUnformattableText( } function generatePlaceholderId() { - return uuidv4() + // Add a leading character to ensure that the placeholder doesn't start with a number. + // Identifiers beginning with a number cause errors when formatting. + return '_' + uuidv4() .replace(/-/g, "") .substring(0, 16); } @@ -199,11 +219,11 @@ function formatJavaScript(text: string) { return jsBeautify.js(text, JS_BEAUTIFY_OPTIONS); } -function formatSql(text: string) { - let formatted = sqlFormatter.format(text) as string; +function formatSql(text: string, language: sqlFormatter.SqlLanguage) { + let formatted = sqlFormatter.format(text, { language }) as string; // Unfortunately sql-formatter does not always produce final formatted output (even on plain SQL) in a single pass. for (let attempts = 0; attempts < MAX_SQL_FORMAT_ATTEMPTS; attempts++) { - const newFormatted = sqlFormatter.format(formatted) as string; + const newFormatted = sqlFormatter.format(formatted, { language }) as string; if (newFormatted === formatted) { return newFormatted; } diff --git a/sqlx/sql_formatter.d.ts b/sqlx/sql_formatter.d.ts deleted file mode 100644 index 15e76f122..000000000 --- a/sqlx/sql_formatter.d.ts +++ /dev/null @@ -1 +0,0 @@ -declare module "sql-formatter"; diff --git a/tests/sqlx/format.spec.ts b/tests/sqlx/format.spec.ts index 6ee28a9d3..eb4d28086 100644 --- a/tests/sqlx/format.spec.ts +++ b/tests/sqlx/format.spec.ts @@ -43,13 +43,13 @@ drop something --- -alter table - \${tempTable} rename to \${finalTableName} +alter table \${tempTable} +rename to \${finalTableName} --- SELECT - SUM(IF (session_start_event, 1, 0)) AS session_index + SUM(IF(session_start_event, 1, 0)) AS session_index `); }); @@ -67,7 +67,7 @@ select CAST( REGEXP_EXTRACT("", r"^/([0-9]+)\\"\\'/.*") AS INT64 ) AS id2, - IFNULL ( + IFNULL( regexp_extract('', r'\\a?query=([^&]+)&*'), regexp_extract('', r'\\a?q=([^&]+)&*') ) AS id3, @@ -91,7 +91,7 @@ SELECT SELECT SUM(IF(track.event = "event_viewed_project_with_connection", 1, 0)) FROM - UNNEST(records) + UNNEST (records) ) ) > 0 as created_project, /* multi line @@ -118,6 +118,34 @@ input "something" { test("doesn't care about special string replacement characters", async () => { expect(await formatFile(path.resolve("examples/formatter/definitions/dollar_regex.sqlx"))) .equal(`'^.*(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|PhantomJS|a_archiver|facebookexternalhit|BingPreview|360User-agent|semalt).*$' +`); + }); + + test("formats named arguments", async () => { + expect(await formatFile(path.resolve("examples/formatter/definitions/named_arguments.sqlx"))) + .equal(`config { + type: "operations" +} + +SELECT + MAKE_INTERVAL(1, day => 2, minute => 3) +`); + }); + + test("formats QUALIFY clause", async () => { + expect(await formatFile(path.resolve("examples/formatter/definitions/qualify.sqlx"))) + .equal(`config { + type: "operations" +} + +SELECT + * +FROM + UNNEST ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) AS n +WHERE + n < 8 +QUALIFY + MOD(ROW_NUMBER() OVER (), 2) = 0 `); }); }); diff --git a/version.bzl b/version.bzl index bbce3877d..43196e790 100644 --- a/version.bzl +++ b/version.bzl @@ -1,3 +1,3 @@ # NOTE: If you change the format of this line, you must change the bash command # in /scripts/publish to extract the version string correctly. -DF_VERSION = "2.5.0" +DF_VERSION = "2.6.0" diff --git a/yarn.lock b/yarn.lock index 703512e61..0adeb2980 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3244,6 +3244,11 @@ digest-header@^0.0.1: dependencies: utility "0.1.11" +discontinuous-range@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/discontinuous-range/-/discontinuous-range-1.0.0.tgz#e38331f0844bba49b9a9cb71c771585aab1bc65a" + integrity sha512-c68LpLbO+7kP/b1Hr1qs8/BJ09F5khZGTxqxZuhzxpmwJKOgRFHJWIb9/KmqnqHhLdO55aOxFH/EGBvUQbL/RQ== + dns-packet@^1.3.2: version "1.3.4" resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.4.tgz#e3455065824a2507ba886c55a89963bb107dec6f" @@ -5787,7 +5792,7 @@ lodash.uniq@^4.5.0: resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773" integrity "sha1-0CJTc662Uq3BvILklFM5qEJ1R3M= sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ==" -lodash@^4.15.0, lodash@^4.16.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5: +lodash@^4.15.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity "sha1-Z5WRxWTDv/quhFTPCz3zcMPWkRw= sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" @@ -6281,6 +6286,16 @@ ncp@~2.0.0: resolved "https://registry.yarnpkg.com/ncp/-/ncp-2.0.0.tgz#195a21d6c46e361d2fb1281ba38b91e9df7bdbb3" integrity "sha1-GVoh1sRuNh0vsSgbo4uR6d9727M= sha512-zIdGUrPRFTUELUvr3Gmc7KZ2Sw/h1PiVM0Af/oHB6zgnV1ikqSfRk+TOufi79aHYCW3NiOXmr1BP5nWbzojLaA==" +nearley@^2.20.1: + version "2.20.1" + resolved "https://registry.yarnpkg.com/nearley/-/nearley-2.20.1.tgz#246cd33eff0d012faf197ff6774d7ac78acdd474" + integrity sha512-+Mc8UaAebFzgV+KpI5n7DasuuQCHA89dmwm7JXw3TV43ukfNQ9DnBH3Mdb2g/I4Fdxc26pwimBWvjIw0UAILSQ== + dependencies: + commander "^2.19.0" + moo "^0.5.0" + railroad-diagrams "^1.0.0" + randexp "0.4.6" + negotiator@0.6.3: version "0.6.3" resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd" @@ -7358,6 +7373,19 @@ raf@^3.1.0: dependencies: performance-now "^2.1.0" +railroad-diagrams@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/railroad-diagrams/-/railroad-diagrams-1.0.0.tgz#eb7e6267548ddedfb899c1b90e57374559cddb7e" + integrity sha512-cz93DjNeLY0idrCNOH6PviZGRN9GJhsdm9hpn1YCS879fj4W+x5IFJhhkRZcwVgMmFF7R82UA/7Oh+R8lLZg6A== + +randexp@0.4.6: + version "0.4.6" + resolved "https://registry.yarnpkg.com/randexp/-/randexp-0.4.6.tgz#e986ad5e5e31dae13ddd6f7b3019aa7c87f60ca3" + integrity sha512-80WNmd9DA0tmZrw9qQa62GPPWfuXJknrmVmLcxvq4uZBdYqb1wYoKTmnlGUchvVWe0XiLupYkBoXVOxz3C8DYQ== + dependencies: + discontinuous-range "1.0.0" + ret "~0.1.10" + randombytes@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a" @@ -8343,12 +8371,13 @@ sprintf-js@~1.0.2: resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" integrity "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" -sql-formatter@^2.3.3: - version "2.3.3" - resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-2.3.3.tgz#910ef484fbb988a5e510bea4161157e3b80b2f62" - integrity "sha1-kQ70hPu5iKXlEL6kFhFX47gLL2I= sha512-m6pqVXwsm9GkCHC/+gdPvNowI7PNoVTT6OZMWKwXJoP2MvfntfhcfyliIf4/QX6t+DirSJ6XDSiSS70YvZ87Lw==" +sql-formatter@^12.2.1: + version "12.2.1" + resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-12.2.1.tgz#6e048fea63c06f7f10371066502e4eb163967e17" + integrity sha512-rvNfY+g+mv3vxzqdDrsLVjhe/UZohYdCqUqyTK/JI8oXxnW4ObcldlclqUoL55SWQs8azEfFf66XfPBi6tkjfQ== dependencies: - lodash "^4.16.0" + argparse "^2.0.1" + nearley "^2.20.1" ssh2@^1.4.0: version "1.4.0"