From 8803f97782ce9aad5c807b52ef8e593a48bd7036 Mon Sep 17 00:00:00 2001 From: Camyll Harajli Date: Wed, 22 Jan 2025 13:33:35 -0800 Subject: [PATCH] add string replacment to account for sql variables --- .lintrunner.toml | 4 +- tools/linter/adapters/sqlfluff_linter.py | 156 +++++++++++++++++++++++ 2 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 tools/linter/adapters/sqlfluff_linter.py diff --git a/.lintrunner.toml b/.lintrunner.toml index a2455f0e6b..22b31d5042 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -339,13 +339,13 @@ init_command = [ is_formatter = true [[linter]] -code = 'CLICKHOUSE' +code = 'SQLFLUFF' include_patterns = ['torchci/clickhouse_queries/**/*.sql'] exclude_patterns = [ ] command = [ 'python3', - 'tools/linter/adapters/clickhouse_sql_linter.py', + 'tools/linter/adapters/sqlfluff_linter.py', '@{{PATHSFILE}}', ] init_command = [ diff --git a/tools/linter/adapters/sqlfluff_linter.py b/tools/linter/adapters/sqlfluff_linter.py new file mode 100644 index 0000000000..307b302ae7 --- /dev/null +++ b/tools/linter/adapters/sqlfluff_linter.py @@ -0,0 +1,156 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import re +import subprocess +import time +from enum import Enum +from typing import List, NamedTuple, Optional, Pattern + + +LINTER_CODE = "SQLFLUFF" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?P\d+): + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + capture_output=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def check_file( + filename: str, +) -> List[LintMessage]: + with open(filename, 'r') as f: + original = f.read() + original = original.replace('{', '\'{').replace('}', '}\'') + with open(filename, 'w') as f: + f.write(original) + + try: + # proc.run_command(sed -i -e "s/'{/{/g" -e "s/}'/}/g") + proc = run_command( + [ + "sqlfluff", + "format", + "--dialect", + "clickhouse", + filename, + ] + ) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + + with open(filename, 'r') as f: + final = f.read() + final = final.replace('\'{', '{').replace('}\'', '}') + with open(filename, 'w') as f: + f.write(final) + + lint_message = proc.stdout + + + return [ + LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.WARNING, + name="format", + original=None, + replacement=None, + description=lint_message.decode("utf-8"), + ) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description=f"sqlfluff format linter for sql queries.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file, + filename, + ): filename + for filename in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main()