akaihola · akaihola · Jan 14, 2023 · Jan 10, 2023 · Jan 10, 2023 · Jan 10, 2023
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -12,6 +12,10 @@ Added
  ``--target-version`` command line option
 - In ``README.rst``, link to GitHub searches which find public repositories that
  use Darker.
+- Linters are now run twice: once for ``rev1`` to get a baseline, and another time for
+ ``rev2`` to get the current situation. Old linter messages which fall on unmodified
+ lines are hidden, so effectively the user gets new linter messages introduced by
+ latest changes, as well as persistent linter messages on modified lines.
 
 Fixed
 -----

diff --git a/README.rst b/README.rst
@@ -31,8 +31,13 @@ What?
 =====
 
 This utility reformats and checks Python source code files.
-However, when run in a Git repository, it only applies reformatting and reports errors
-in regions which have changed in the Git working tree since the last commit.
+However, when run in a Git repository, it compares an old revision of the source tree
+to a newer revision (or the working tree). It then
+
+- only applies reformatting in regions which have changed in the Git working tree
+ between the two revisions, and
+- only reports those linting messages which appeared after the modifications to the
+ source code files.
 
 The reformatters supported are:
 
@@ -726,7 +731,8 @@ It defaults to ``"--check --diff --color"``.
 You can e.g. add ``"--isort"`` to sort imports, or ``"--verbose"`` for debug logging.
 
 To run linters through Darker, you can provide a comma separated list of linters using
-the ``lint:`` option. Only ``flake8``, ``pylint`` and ``mypy`` are supported.
+the ``lint:`` option. Only ``flake8``, ``pylint`` and ``mypy`` are supported. Other
+linters may or may not work with Darker, depending on their message output format.
 Versions can be constrained using ``pip`` syntax, e.g. ``"flake8>=3.9.2"``.
 
 *New in version 1.1.0:*
@@ -745,7 +751,9 @@ The ``lint:`` option.
 Using linters
 =============
 
-One way to use Darker is to filter linter output to modified lines only.
+One way to use Darker is to filter linter output to only those linter messages
+which appeared after the modifications to source code files,
+as well as old messages which concern modified lines.
 Darker supports any linter with output in one of the following formats::
 
  <file>:<linenum>: <description>
@@ -892,7 +900,9 @@ are applied to the edited file.
 Also, in case the ``--isort`` option was specified,
 isort_ is run on each edited file before applying Black_.
 Similarly, each linter requested using the `--lint <command>` option is run,
-and only linting errors/warnings on modified lines are displayed.
+and only those linting messages are displayed which appeared after the modifications to
+the source code files,
+or which were there already before but now fall on modified lines.
 
 
 License

diff --git a/src/darker/__main__.py b/src/darker/__main__.py
@@ -350,11 +350,14 @@ def main( # pylint: disable=too-many-locals,too-many-branches,too-many-statemen
  8. verify that the resulting reformatted source code parses to an identical AST as
  the original edited to-file
  9. write the reformatted source back to the original file
- 10. run linter subprocesses for all edited files (10.-13. optional)
- 11. diff the given revision and worktree (after isort and Black reformatting) for
- each file reported by a linter
- 12. extract line numbers in each file reported by a linter for changed lines
- 13. print only linter error lines which fall on changed lines
+ 10. run linter subprocesses twice for all modified and unmodified files which are
+ mentioned on the command line: first establish a baseline by running against
+ ``rev1``, then get current linting status by running against the working tree
+ (steps 10.-12. are optional)
+ 11. create a mapping from line numbers of unmodified lines in the current versions
+ to corresponding line numbers in ``rev1``
+ 12. hide linter messages which appear in the current versions and identically on
+ corresponding lines in ``rev1``, and show all other linter messages
 
  :param argv: The command line arguments to the ``darker`` command
  :return: 1 if the ``--check`` argument was provided and at least one file was (or
@@ -423,38 +426,35 @@ def main( # pylint: disable=too-many-locals,too-many-branches,too-many-statemen
  f"Error: Path(s) {missing_reprs} do not exist in {rev2_repr}",
  )
 
- # These are absolute paths:
+ # These paths are relative to `root`:
  files_to_process = filter_python_files(paths, root, {})
  files_to_blacken = filter_python_files(paths, root, black_config)
+ # Now decide which files to reformat (Black & isort). Note that this doesn't apply
+ # to linting.
  if output_mode == OutputMode.CONTENT:
- # With `-d` / `--stdout`, process the file whether modified or not. Paths have
+ # With `-d` / `--stdout`, reformat the file whether modified or not. Paths have
  # previously been validated to contain exactly one existing file.
- changed_files_to_process = {
- p.resolve().relative_to(root) for p in files_to_process
- }
+ changed_files_to_reformat = files_to_process
  black_exclude = set()
  else:
- # In other modes, only process files which have been modified.
+ # In other modes, only reformat files which have been modified.
  if git_is_repository(root):
- changed_files_to_process = git_get_modified_python_files(
+ changed_files_to_reformat = git_get_modified_python_files(
  files_to_process, revrange, root
  )
  else:
- changed_files_to_process = {
- path.relative_to(root) for path in files_to_process
- }
+ changed_files_to_reformat = files_to_process
  black_exclude = {
  str(path)
- for path in changed_files_to_process
- if root / path not in files_to_blacken
+ for path in changed_files_to_reformat
+ if path not in files_to_blacken
  }
-
  use_color = should_use_color(config["color"])
  formatting_failures_on_modified_lines = False
  for path, old, new in sorted(
  format_edited_parts(
  root,
- changed_files_to_process,
+ changed_files_to_reformat,
  Exclusions(black=black_exclude, isort=set() if args.isort else {"**/*"}),
  revrange,
  black_config,
@@ -472,7 +472,8 @@ def main( # pylint: disable=too-many-locals,too-many-branches,too-many-statemen
  linter_failures_on_modified_lines = run_linters(
  args.lint,
  root,
- changed_files_to_process,
+ # paths to lint are not limited to modified files or just Python files:
+ {p.resolve().relative_to(root) for p in paths},
  revrange,
  use_color,
  )

diff --git a/src/darker/black_diff.py b/src/darker/black_diff.py
@@ -146,8 +146,8 @@ def filter_python_files(
  :param root: A common root directory for all ``paths``
  :param black_config: Black configuration which contains the exclude options read
  from Black's configuration files
- :return: Absolute paths of files which should be reformatted according to
- ``black_config``
+ :return: Paths of files which should be reformatted according to
+ ``black_config``, relative to ``root``.
 
  """
  sig = inspect.signature(gen_python_files)
@@ -172,7 +172,7 @@ def filter_python_files(
  **kwargs, # type: ignore[arg-type]
  )
  )
- return files_from_directories | files
+ return {p.resolve().relative_to(root) for p in files_from_directories | files}
 
 
 def run_black(src_contents: TextDocument, black_config: BlackConfig) -> TextDocument:

diff --git a/src/darker/diff.py b/src/darker/diff.py
@@ -66,7 +66,7 @@
 
 import logging
 from difflib import SequenceMatcher
-from typing import Generator, List, Sequence, Tuple
+from typing import Dict, Generator, List, Sequence, Tuple
 
 from darker.multiline_strings import find_overlap
 from darker.utils import DiffChunk, TextDocument
@@ -203,3 +203,36 @@ def diff_chunks(src: TextDocument, dst: TextDocument) -> List[DiffChunk]:
  """
  opcodes = diff_and_get_opcodes(src, dst)
  return list(opcodes_to_chunks(opcodes, src, dst))
+
+
+def map_unmodified_lines(src: TextDocument, dst: TextDocument) -> Dict[int, int]:
+ """Return a mapping of line numbers of unmodified lines between dst and src docs
+
+ After doing a diff between ``src`` and ``dst``, some identical chunks of lines may
+ be identified. For each such chunk, a mapping from every line number of the chunk in
+ ``dst`` to corresponding line number in ``src`` is added.
+
+ :param src: The original text document
+ :param dst: The modified text document
+ :return: A mapping from ``dst`` lines to corresponding unmodified ``src`` lines.
+ Line numbers are 1-based.
+
+ """
+ opcodes = diff_and_get_opcodes(src, dst)
+ _validate_opcodes(opcodes)
+ if not src.string and not dst.string:
+ # empty files may get linter messages on line 1
+ return {1: 1}
+ result = {}
+ for tag, src_start, src_end, dst_start, dst_end in opcodes:
+ if tag != "equal":
+ continue
+ for line_delta in range(dst_end - dst_start):
+ result[dst_start + line_delta + 1] = src_start + line_delta + 1
+ if line_delta != src_end - src_start - 1:
+ raise RuntimeError(
+ "Something is wrong, 'equal' diff blocks should have the same length."
+ f" src_start={src_start}, src_end={src_end},"
+ f" dst_start={dst_start}, dst_end={dst_end}"
+ )
+ return result
diff --git a/src/darker/git.py b/src/darker/git.py
@@ -362,7 +362,7 @@ def get_missing_at_revision(paths: Iterable[Path], rev2: str, cwd: Path) -> Set[
 
 
 def _git_diff_name_only(
- rev1: str, rev2: str, relative_paths: Set[Path], cwd: Path
+ rev1: str, rev2: str, relative_paths: Iterable[Path], cwd: Path
 ) -> Set[Path]:
  """Collect names of changed files between commits from Git
 
@@ -389,7 +389,7 @@ def _git_diff_name_only(
  return {Path(line) for line in lines}
 
 
-def _git_ls_files_others(relative_paths: Set[Path], cwd: Path) -> Set[Path]:
+def _git_ls_files_others(relative_paths: Iterable[Path], cwd: Path) -> Set[Path]:
  """Collect names of untracked non-excluded files from Git
 
  This will return those files in ``relative_paths`` which are untracked and not
@@ -419,21 +419,69 @@ def git_get_modified_python_files(
  - ``git diff --name-only --relative <rev> -- <path(s)>``
  - ``git ls-files --others --exclude-standard -- <path(s)>``
 
- :param paths: Paths to the files to diff
+ :param paths: Relative paths to the files to diff
  :param revrange: Git revision range to compare
  :param cwd: The Git repository root
  :return: File names relative to the Git repository root
 
  """
- relative_paths = {p.resolve().relative_to(cwd) for p in paths}
- changed_paths = _git_diff_name_only(
- revrange.rev1, revrange.rev2, relative_paths, cwd
- )
+ changed_paths = _git_diff_name_only(revrange.rev1, revrange.rev2, paths, cwd)
  if revrange.rev2 == WORKTREE:
- changed_paths.update(_git_ls_files_others(relative_paths, cwd))
+ changed_paths.update(_git_ls_files_others(paths, cwd))
  return {path for path in changed_paths if should_reformat_file(cwd / path)}
 
 
+def git_clone_local(source_repository: Path, revision: str, destination: Path) -> Path:
+ """Clone a local repository and check out the given revision
+
+ :param source_repository: Path to the root of the local repository checkout
+ :param revision: The revision to check out, or ``HEAD``
+ :param destination: Directory to create for the clone
+ :return: Path to the root of the new clone
+
+ """
+ clone_path = destination / source_repository.name
+ _ = _git_check_output(
+ [
+ "clone",
+ "--quiet",
+ str(source_repository),
+ str(clone_path),
+ ],
+ Path("."),
+ )
+ if revision != "HEAD":
+ _ = _git_check_output(["checkout", revision], clone_path)
+ return clone_path
+
+
+def git_get_root(path: Path) -> Optional[Path]:
+ """Get the root directory of a local Git repository clone based on a path inside it
+
+ :param path: A file or directory path inside the Git repository clone
+ :return: The root of the clone, or ``None`` if none could be found
+
+ """
+ try:
+ return Path(
+ _git_check_output(
+ ["rev-parse", "--show-toplevel"],
+ cwd=path if path.is_dir() else path.parent,
+ encoding="utf-8",
+ exit_on_error=False,
+ ).rstrip()
+ )
+ except CalledProcessError as exc_info:
+ if exc_info.returncode == 128 and exc_info.stderr.splitlines()[0].startswith(
+ "fatal: not a git repository (or any "
+ ):
+ # The error string differs a bit in different Git versions, but up to the
+ # point above it's identical in recent versions.
+ return None
+ sys.stderr.write(exc_info.stderr)
+ raise
+
+
 def _revision_vs_lines(
  root: Path, path_in_repo: Path, rev1: str, content: TextDocument, context_lines: int
 ) -> List[int]: