diff --git a/coverage/files.py b/coverage/files.py index 2c520b8ab..56400e9fd 100644 --- a/coverage/files.py +++ b/coverage/files.py @@ -3,7 +3,6 @@ """File wrangling.""" -import fnmatch import hashlib import ntpath import os @@ -282,7 +281,35 @@ def sep(s): return the_sep +PATHEX_SUBS = [(re.compile(rx), sub) for rx, sub in [ + (r"^\*+[/\\]", r"^(.*[/\\\\])?"), + (r"[/\\]\*+$", r".*"), + (r"\*\*+[/\\]?", r".*"), + (r"[/\\]", r"[/\\\\]"), + (r"\*", r"[^/\\\\]*"), + (r"\?", r"[^/\\\\]"), + (r"\[.*?\]", r"\g<0>"), + (r"[a-zA-Z0-9_-]+", r"\g<0>"), + (r".", r"\\\g<0>"), +]] + +def pathex(pattern): + """Convert a file-path pattern into a regex.""" + if not re.search(r"[/\\]", pattern): + pattern = "**/" + pattern + path_rx = "" + pos = 0 + while pos < len(pattern): + for rx, sub in PATHEX_SUBS: + m = rx.match(pattern, pos=pos) + if m: + path_rx += m.expand(sub) + pos = m.end() + break + return path_rx + def fnmatches_to_regex(patterns, case_insensitive=False, partial=False): + 1 # todo: fix this docstring """Convert fnmatch patterns to a compiled regex that matches any of them. Slashes are always converted to match either slash or backslash, for @@ -295,24 +322,13 @@ def fnmatches_to_regex(patterns, case_insensitive=False, partial=False): strings. """ - regexes = (fnmatch.translate(pattern) for pattern in patterns) - # */ at the start should also match nothing. - regexes = (re.sub(r"^\(\?s:\.\*(\\\\|/)", r"(?s:^(.*\1)?", regex) for regex in regexes) - # Be agnostic: / can mean backslash or slash. - regexes = (re.sub(r"/", r"[\\\\/]", regex) for regex in regexes) - - if partial: - # fnmatch always adds a \Z to match the whole string, which we don't - # want, so we remove the \Z. While removing it, we only replace \Z if - # followed by paren (introducing flags), or at end, to keep from - # destroying a literal \Z in the pattern. - regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes) - flags = 0 if case_insensitive: flags |= re.IGNORECASE - compiled = re.compile(join_regex(regexes), flags=flags) - + rx = join_regex(map(pathex, patterns)) + if not partial: + rx = rf"(?:{rx})\Z" + compiled = re.compile(rx, flags=flags) return compiled diff --git a/tests/test_api.py b/tests/test_api.py index 375edcec1..07bd07f33 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -71,7 +71,6 @@ def test_unexecuted_file(self): assert missing == [1] def test_filenames(self): - self.make_file("mymain.py", """\ import mymod a = 1 diff --git a/tests/test_files.py b/tests/test_files.py index 8fea61d07..38208cf0e 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -3,6 +3,7 @@ """Tests for files.py""" +import itertools import os import os.path from unittest import mock @@ -104,59 +105,99 @@ def test_flat_rootname(original, flat): assert flat_rootname(original) == flat +def gen_params(patterns, case_insensitive=False, partial=False, matches=(), nomatches=()): + pat_id = "|".join(patterns) + for text in matches: + yield pytest.param( + patterns, case_insensitive, partial, text, True, + id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:match", + ) + for text in nomatches: + yield pytest.param( + patterns, case_insensitive, partial, text, False, + id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:nomatch", + ) + @pytest.mark.parametrize( - "patterns, case_insensitive, partial," + - "matches," + - "nomatches", -[ - ( - ["abc", "xyz"], False, False, - ["abc", "xyz"], - ["ABC", "xYz", "abcx", "xabc", "axyz", "xyza"], + "patterns, case_insensitive, partial, text, result", +list(itertools.chain.from_iterable([ + gen_params( + ["abc", "xyz"], + matches=["abc", "xyz", "sub/mod/abc"], + nomatches=["ABC", "xYz", "abcx", "xabc", "axyz", "xyza", "sub/mod/abcd", "sub/abc/more"], ), - ( - ["abc", "xyz"], True, False, - ["abc", "xyz", "Abc", "XYZ", "AbC"], - ["abcx", "xabc", "axyz", "xyza"], + gen_params( + ["abc", "xyz"], case_insensitive=True, + matches=["abc", "xyz", "Abc", "XYZ", "AbC"], + nomatches=["abcx", "xabc", "axyz", "xyza"], ), - ( - ["abc/hi.py"], True, False, - ["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"], - ["abc_hi.py", "abc/hi.pyc"], + gen_params( + ["a?c", "x?z"], + matches=["abc", "xyz", "xYz", "azc", "xaz"], + nomatches=["ABC", "abcx", "xabc", "axyz", "xyza"], ), - ( - [r"abc\hi.py"], True, False, - [r"abc\hi.py", r"ABC\hi.py"], - ["abc/hi.py", "ABC/hi.py", "abc_hi.py", "abc/hi.pyc"], + gen_params( + ["a??d"], + matches=["abcd", "azcd", "a12d"], + nomatches=["ABCD", "abcx", "axyz", "abcde"], ), - ( - ["abc/*/hi.py"], True, False, - ["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"], - ["abc/hi.py", "abc/hi.pyc"], + gen_params( + ["abc/hi.py"], case_insensitive=True, + matches=["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"], + nomatches=["abc_hi.py", "abc/hi.pyc"], ), - ( - ["abc/[a-f]*/hi.py"], True, False, - ["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"], - ["abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc"], + gen_params( + [r"abc\hi.py"], case_insensitive=True, + matches=[r"abc\hi.py", r"ABC\hi.py", "abc/hi.py", "ABC/hi.py"], + nomatches=["abc_hi.py", "abc/hi.pyc"], ), - ( - ["abc/"], True, True, - ["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"], - ["abcd/foo.py", "xabc/hi.py"], + gen_params( + ["abc/*/hi.py"], case_insensitive=True, + matches=["abc/foo/hi.py", r"ABC\foo/hi.py"], + nomatches=["abc/hi.py", "abc/hi.pyc", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"], ), - ( - ["*/foo"], False, True, - ["abc/foo/hi.py", "foo/hi.py"], - ["abc/xfoo/hi.py"], + gen_params( + ["abc/**/hi.py"], case_insensitive=True, + matches=[ + "abc/foo/hi.py", r"ABC\foo/hi.py", "abc/hi.py", "ABC/foo/bar/hi.py", + r"ABC\foo/bar/hi.py", + ], + nomatches=["abc/hi.pyc"], ), - -]) -def test_fnmatches_to_regex(patterns, case_insensitive, partial, matches, nomatches): + gen_params( + ["abc/[a-f]*/hi.py"], case_insensitive=True, + matches=["abc/foo/hi.py", r"ABC\boo/hi.py"], + nomatches=[ + "abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py", r"abc\foo/bar/hi.py", + ], + ), + gen_params( + ["abc/[a-f]/hi.py"], case_insensitive=True, + matches=["abc/f/hi.py", r"ABC\b/hi.py"], + nomatches=[ + "abc/foo/hi.py", "abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py", + r"abc\foo/bar/hi.py", + ], + ), + gen_params( + ["abc/"], case_insensitive=True, partial=True, + matches=["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"], + nomatches=["abcd/foo.py", "xabc/hi.py"], + ), + gen_params( + ["*/foo"], case_insensitive=False, partial=True, + matches=["abc/foo/hi.py", "foo/hi.py"], + nomatches=["abc/xfoo/hi.py"], + ), + gen_params( + ["**/foo"], + matches=["foo", "hello/foo", "hi/there/foo"], + nomatches=["foob", "hello/foob", "hello/Foo"], + ), +]))) +def test_fnmatches_to_regex(patterns, case_insensitive, partial, text, result): regex = fnmatches_to_regex(patterns, case_insensitive=case_insensitive, partial=partial) - for s in matches: - assert regex.match(s) - for s in nomatches: - assert not regex.match(s) + assert bool(regex.match(text)) == result class MatcherTest(CoverageTest): @@ -235,6 +276,8 @@ def test_fnmatch_matcher_overload(self): self.assertMatches(fnm, "x007foo.txt", True) self.assertMatches(fnm, "x123foo.txt", True) self.assertMatches(fnm, "x798bar.txt", False) + self.assertMatches(fnm, "x499.txt", True) + self.assertMatches(fnm, "x500.txt", False) def test_fnmatch_windows_paths(self): # We should be able to match Windows paths even if we are running on @@ -309,9 +352,9 @@ def test_multiple_patterns(self, rel_yn): assert msgs == [ "Aliases (relative=True):", " Rule: '/home/*/src' -> './mysrc/' using regex " + - "'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'", + "'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'", " Rule: '/lib/*/libsrc' -> './mylib/' using regex " + - "'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'", + "'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'", "Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " + "producing './mysrc/a.py'", "Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " + @@ -321,9 +364,9 @@ def test_multiple_patterns(self, rel_yn): assert msgs == [ "Aliases (relative=False):", " Rule: '/home/*/src' -> './mysrc/' using regex " + - "'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'", + "'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'", " Rule: '/lib/*/libsrc' -> './mylib/' using regex " + - "'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'", + "'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'", "Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " + f"producing {files.canonical_filename('./mysrc/a.py')!r}", "Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " +