From e02e0c5c899b31599daaddf4510a91ac99839c34 Mon Sep 17 00:00:00 2001 From: facelessuser Date: Fri, 4 Oct 2024 23:20:34 -0600 Subject: [PATCH] Add support for checking only files changed in git --- .github/workflows/build.yml | 6 ++- pyproject.toml | 5 ++- pyspelling/__init__.py | 37 +++++++++++++----- pyspelling/__main__.py | 16 ++++++++ pyspelling/util/git.py | 78 +++++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 11 deletions(-) create mode 100644 pyspelling/util/git.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f42ec6e..1f7a061 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,6 +107,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 2 - name: Set up Python uses: actions/setup-python@v5 with: @@ -118,7 +120,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install --upgrade build setuptools tox + python -m pip install --upgrade build setuptools tox - name: ${{ matrix.tox-env }} + env: + TOX_MERGE_BASE: "${{ github.event.pull_request.base.sha }}" run: | python -m tox -e ${{ matrix.tox-env }} diff --git a/pyproject.toml b/pyproject.toml index 8529a66..c6d0b3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,12 +134,15 @@ commands= {envpython} -m coverage report --show-missing [testenv:documents] +passenv = TOX_MERGE_BASE deps= . -r requirements/docs.txt commands= {envpython} -m mkdocs build --clean --verbose --strict - {envpython} -m pyspelling + {envpython} -m pyspelling -n python -m {env:TOX_MERGE_BASE:master} -v + {envpython} -m pyspelling -n markdown -m {env:TOX_MERGE_BASE:master} -v + {envpython} -m pyspelling -n mkdocs -v [testenv:lint] deps= diff --git a/pyspelling/__init__.py b/pyspelling/__init__.py index de3f1c5..5d8da4b 100644 --- a/pyspelling/__init__.py +++ b/pyspelling/__init__.py @@ -5,6 +5,7 @@ from .__meta__ import __version__, __version_info__ # noqa: F401 from . import flow_control from . import filters +from .util import git from wcmatch import glob import codecs from collections import namedtuple @@ -578,7 +579,7 @@ class SpellingTask: "O": glob.O } - def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False): + def __init__(self, checker, config, binary='', verbose=0, jobs=0, git_merge_base='', git_binary=None, debug=False): """Initialize.""" if checker == "hunspell": # pragma: no cover @@ -594,6 +595,8 @@ def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False): self.binary = checker if not binary else binary self.debug = debug self.jobs = jobs + self.git_merge_base = git_merge_base + self.git_binary = git_binary def log(self, text, level): """Log level.""" @@ -613,11 +616,20 @@ def _to_flags(self, text): def walk_src(self, targets, flags, limit): """Walk source and parse files.""" - for target in targets: - # Glob using `S` for patterns with `|` and `O` to exclude directories. - kwargs = {"flags": flags | glob.S | glob.O} - kwargs['limit'] = limit - yield from glob.iglob(target, **kwargs) + # Glob using `S` for patterns with `|` and `O` to exclude directories. + kwargs = {"flags": flags | glob.S | glob.O} + kwargs['limit'] = limit + + if self.git_merge_base: + for target in targets: + yield from glob.globfilter( + git.get_file_diff(self.git_merge_base, git_binary=self.git_binary), + target, + **kwargs + ) + else: + for target in targets: + yield from glob.iglob(target, **kwargs) def get_checker(self): """Get a spell checker object.""" @@ -659,13 +671,18 @@ def run_task(self, task, source_patterns=None): glob_flags = self._to_flags(self.task.get('glob_flags', "N|B|G")) glob_limit = self.task.get('glob_pattern_limit', 1000) + if self.git_merge_base: + self.log("Searching: Only checking files that changed in git...", 1) + else: + self.log("Searching: Finding files to check...", 1) + if not source_patterns: source_patterns = self.task.get('sources', []) # If jobs was not specified via command line, check the config for jobs settings jobs = max(1, self.config.get('jobs', 1) if self.jobs == 0 else self.jobs) - expect_match = self.task.get('expect_match', True) + expect_match = self.task.get('expect_match', True) and not self.git_merge_base if jobs > 1: # Use multi-processing to process files concurrently with ProcessPoolExecutor(max_workers=jobs) as pool: @@ -696,7 +713,9 @@ def spellcheck( sources=None, verbose=0, debug=False, - jobs=0 + jobs=0, + git_merge_base='', + git_binary=None ): """Spell check.""" @@ -737,7 +756,7 @@ def spellcheck( log('Using {} to spellcheck {}'.format(checker, task.get('name', '')), 1, verbose) - spelltask = SpellingTask(checker, config, binary, verbose, jobs, debug) + spelltask = SpellingTask(checker, config, binary, verbose, jobs, git_merge_base, git_binary, debug) for result in spelltask.run_task(task, source_patterns=sources): log('Context: %s' % result.context, 2, verbose) diff --git a/pyspelling/__main__.py b/pyspelling/__main__.py index ce1d832..7c2fb0f 100644 --- a/pyspelling/__main__.py +++ b/pyspelling/__main__.py @@ -16,6 +16,16 @@ def main(): group.add_argument('--name', '-n', action='append', help="Specific spelling task by name to run.") group.add_argument('--group', '-g', action='append', help="Specific spelling task group to run.") parser.add_argument('--binary', '-b', action='store', default='', help="Provide path to spell checker's binary.") + parser.add_argument( + '--git-merge-base', + '-m', + help="Specify the git merge base for generating a modified file list." + ) + parser.add_argument( + '--git-binary', + '-G', + help="Specify the path to the the git binary if not on the system path." + ) parser.add_argument( '--jobs', '-j', action='store', @@ -44,6 +54,8 @@ def main(): verbose=args.verbose, debug=args.debug, jobs=args.jobs, + git_merge_base=args.git_merge_base, + git_binary=args.git_binary ) @@ -58,6 +70,8 @@ def run(config, **kwargs): sources = kwargs.get('sources', []) debug = kwargs.get('debug', False) jobs = kwargs.get('jobs', 0) + git_merge_base = kwargs.get('git_merge_base', '') + git_binary = kwargs.get('git_binary', None) fail = False count = 0 @@ -71,6 +85,8 @@ def run(config, **kwargs): verbose=verbose, debug=debug, jobs=jobs, + git_merge_base=git_merge_base, + git_binary=git_binary ): count += 1 if results.error: diff --git a/pyspelling/util/git.py b/pyspelling/util/git.py new file mode 100644 index 0000000..edd734b --- /dev/null +++ b/pyspelling/util/git.py @@ -0,0 +1,78 @@ +"""Git support.""" +import subprocess +import sys +import os + +WIN = sys.platform.startswith('win') +GIT_BINARY = "git.exe" if WIN else "git" + + +def get_git_tree(target): + """Recursively get Git tree.""" + + is_file = os.path.isfile(target) + folder = os.path.dirname(target) if is_file else target + if os.path.exists(os.path.join(folder, ".git")): + return folder + else: + parent = os.path.dirname(folder) + if parent == folder: + return None + else: + return get_git_tree(parent) + + +def get_git_dir(tree): + """Get Git directory from tree.""" + + return os.path.join(tree, ".git") + + +def get_file_diff(target, git_binary=None): + """Get the file list of the HEAD vs the specified target.""" + + args = ['--no-pager', 'diff', '--name-only', '--cached', '--merge-base', f'{target}'] + return gitopen( + args, + git_binary=git_binary, + git_tree=get_git_tree(os.path.abspath('.')) + ).decode('utf-8').splitlines() + + +def gitopen(args, git_binary=None, git_tree=None): + """Call Git with arguments.""" + + returncode = output = None + + if git_binary is None or not git_binary: + git_binary = GIT_BINARY + + if git_tree is not None: + cmd = [git_binary, f"--work-tree={git_tree}", f"--git-dir={get_git_dir(git_tree)}"] + args + else: + cmd = [git_binary] + args + + if WIN: + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + process = subprocess.Popen( + cmd, + startupinfo=startupinfo, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False + ) + else: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False, + ) + output = process.communicate() + returncode = process.returncode + + if returncode != 0: + raise RuntimeError(output[1].decode('utf-8').rstrip()) + + return output[0]