Skip to content

Commit

Permalink
Add support for checking only files changed in git
Browse files Browse the repository at this point in the history
  • Loading branch information
facelessuser committed Oct 5, 2024
1 parent 63e4654 commit 6ca5f7f
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 10 deletions.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ deps=
-r requirements/docs.txt
commands=
{envpython} -m mkdocs build --clean --verbose --strict
{envpython} -m pyspelling
{envpython} -m pyspelling -n python -m master -v
{envpython} -m pyspelling -n markdown -m master -v
{envpython} -m pyspelling -n mkdocs -v
[testenv:lint]
deps=
Expand Down
37 changes: 28 additions & 9 deletions pyspelling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import flow_control
from . import filters
from .util import git
from wcmatch import glob
import codecs
from collections import namedtuple
Expand Down Expand Up @@ -578,7 +579,7 @@ class SpellingTask:
"O": glob.O
}

def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False):
def __init__(self, checker, config, binary='', verbose=0, jobs=0, git_merge_base='', git_binary=None, debug=False):
"""Initialize."""

if checker == "hunspell": # pragma: no cover
Expand All @@ -594,6 +595,8 @@ def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False):
self.binary = checker if not binary else binary
self.debug = debug
self.jobs = jobs
self.git_merge_base = git_merge_base
self.git_binary = git_binary

def log(self, text, level):
"""Log level."""
Expand All @@ -613,11 +616,20 @@ def _to_flags(self, text):
def walk_src(self, targets, flags, limit):
"""Walk source and parse files."""

for target in targets:
# Glob using `S` for patterns with `|` and `O` to exclude directories.
kwargs = {"flags": flags | glob.S | glob.O}
kwargs['limit'] = limit
yield from glob.iglob(target, **kwargs)
# Glob using `S` for patterns with `|` and `O` to exclude directories.
kwargs = {"flags": flags | glob.S | glob.O}
kwargs['limit'] = limit

if self.git_merge_base:
for target in targets:
yield from glob.globfilter(
git.get_file_diff(self.git_merge_base, git_binary=self.git_binary),
target,
**kwargs
)
else:
for target in targets:
yield from glob.iglob(target, **kwargs)

def get_checker(self):
"""Get a spell checker object."""
Expand Down Expand Up @@ -659,13 +671,18 @@ def run_task(self, task, source_patterns=None):
glob_flags = self._to_flags(self.task.get('glob_flags', "N|B|G"))
glob_limit = self.task.get('glob_pattern_limit', 1000)

if self.git_merge_base:
self.log("Searching: Only checking files that changed in git...", 1)
else:
self.log("Searching: Finding files to check...", 1)

if not source_patterns:
source_patterns = self.task.get('sources', [])

# If jobs was not specified via command line, check the config for jobs settings
jobs = max(1, self.config.get('jobs', 1) if self.jobs == 0 else self.jobs)

expect_match = self.task.get('expect_match', True)
expect_match = self.task.get('expect_match', True) and not self.git_merge_base
if jobs > 1:
# Use multi-processing to process files concurrently
with ProcessPoolExecutor(max_workers=jobs) as pool:
Expand Down Expand Up @@ -696,7 +713,9 @@ def spellcheck(
sources=None,
verbose=0,
debug=False,
jobs=0
jobs=0,
git_merge_base='',
git_binary=None
):
"""Spell check."""

Expand Down Expand Up @@ -737,7 +756,7 @@ def spellcheck(

log('Using {} to spellcheck {}'.format(checker, task.get('name', '')), 1, verbose)

spelltask = SpellingTask(checker, config, binary, verbose, jobs, debug)
spelltask = SpellingTask(checker, config, binary, verbose, jobs, git_merge_base, git_binary, debug)

for result in spelltask.run_task(task, source_patterns=sources):
log('Context: %s' % result.context, 2, verbose)
Expand Down
16 changes: 16 additions & 0 deletions pyspelling/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ def main():
group.add_argument('--name', '-n', action='append', help="Specific spelling task by name to run.")
group.add_argument('--group', '-g', action='append', help="Specific spelling task group to run.")
parser.add_argument('--binary', '-b', action='store', default='', help="Provide path to spell checker's binary.")
parser.add_argument(
'--git-merge-base',
'-m',
help="Specify the git merge base for generating a modified file list."
)
parser.add_argument(
'--git-binary',
'-G',
help="Specify the path to the the git binary if not on the system path."
)
parser.add_argument(
'--jobs', '-j',
action='store',
Expand Down Expand Up @@ -44,6 +54,8 @@ def main():
verbose=args.verbose,
debug=args.debug,
jobs=args.jobs,
git_merge_base=args.git_merge_base,
git_binary=args.git_binary
)


Expand All @@ -58,6 +70,8 @@ def run(config, **kwargs):
sources = kwargs.get('sources', [])
debug = kwargs.get('debug', False)
jobs = kwargs.get('jobs', 0)
git_merge_base = kwargs.get('git_merge_base', '')
git_binary = kwargs.get('git_binary', None)

fail = False
count = 0
Expand All @@ -71,6 +85,8 @@ def run(config, **kwargs):
verbose=verbose,
debug=debug,
jobs=jobs,
git_merge_base=git_merge_base,
git_binary=git_binary
):
count += 1
if results.error:
Expand Down
78 changes: 78 additions & 0 deletions pyspelling/util/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Git support."""
import subprocess
import sys
import os

WIN = sys.platform.startswith('win')
GIT_BINARY = "git.exe" if WIN else "git"


def get_git_tree(target):
"""Recursively get Git tree."""

is_file = os.path.isfile(target)
folder = os.path.dirname(target) if is_file else target
if os.path.exists(os.path.join(folder, ".git")):
return folder
else:
parent = os.path.dirname(folder)
if parent == folder:
return None
else:
return get_git_tree(parent)


def get_git_dir(tree):
"""Get Git directory from tree."""

return os.path.join(tree, ".git")


def get_file_diff(target, git_binary=None):
"""Get the file list of the HEAD vs the specified target."""

args = ['--no-pager', 'diff', '--name-only', '--cached', '--merge-base', f'{target}']
return gitopen(
args,
git_binary=git_binary,
git_tree=get_git_tree(os.path.abspath('.'))
).decode('utf-8').splitlines()


def gitopen(args, git_binary=None, git_tree=None):
"""Call Git with arguments."""

returncode = output = None

if git_binary is None or not git_binary:
git_binary = GIT_BINARY

if git_tree is not None:
cmd = [git_binary, f"--work-tree={git_tree}", f"--git-dir={get_git_dir(git_tree)}"] + args
else:
cmd = [git_binary] + args

if WIN:
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
process = subprocess.Popen(
cmd,
startupinfo=startupinfo,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=False
)
else:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=False,
)
output = process.communicate()
returncode = process.returncode

if returncode != 0:
raise RuntimeError(output[1].decode('utf-8').rstrip())

return output[0]

0 comments on commit 6ca5f7f

Please sign in to comment.