From 560a61b63401c8644686c54f94566d531d12fa68 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 15:27:37 +0100 Subject: [PATCH 1/9] Add myself to AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 31910fb680..077386e216 100644 --- a/AUTHORS +++ b/AUTHORS @@ -5,6 +5,7 @@ Borg Contributors ("The Borg Collective") - Antoine Beaupré - Radek Podgorny - Yuri D'Elia +- Michael Hanselmann Borg is a fork of Attic. From c1feb4b532bbc87321d0264899d699ecb0962891 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Fri, 15 Jan 2016 22:11:47 +0100 Subject: [PATCH 2/9] Simplify pattern tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Stop using “adjust_pattern” and “exclude_path” as they're utility functions not relevant to testing pattern classes - Cover a few more cases, especially with more than one path separator and relative paths - At least one dedicated test function for each pattern style as opposed to a single, big test mixing styles - Use positive instead of negative matching (i.e. the expected list of resulting items is a list of items matching a pattern) --- borg/testsuite/helpers.py | 127 ++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 46 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 3da955d7fe..725dba9b27 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -9,7 +9,7 @@ import msgpack import msgpack.fallback -from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ +from ..helpers import exclude_path, Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, RegexPattern, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern @@ -160,70 +160,105 @@ def test(self): ) -def check_patterns(files, paths, excludes, expected): - """Utility for testing exclusion patterns. +def check_patterns(files, pattern, expected): + """Utility for testing patterns. """ - patterns = adjust_patterns(paths, excludes) - included = [path for path in files if not exclude_path(path, patterns)] + assert all([f == os.path.normpath(f) for f in files]), \ + "Pattern matchers expect normalized input paths" - assert included == (files if expected is None else expected) + matched = [f for f in files if pattern.match(f)] + assert matched == (files if expected is None else expected) -@pytest.mark.parametrize("paths, excludes, expected", [ - # "None" means all files, i.e. none excluded - ([], [], None), - (['/'], [], None), - (['/'], ['/h'], None), - (['/'], ['/home'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']), - (['/'], ['/home/'], ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg']), - (['/home/u'], [], []), - (['/', '/home', '/etc/hosts'], ['/'], []), - (['/home/'], ['/home/user2'], ['/home', '/home/user/.profile', '/home/user/.bashrc']), - (['/'], ['*.profile', '/var/log'], - ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html']), - (['/'], ['/home/*/public_html', '*.profile', '*/log/*'], - ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc']), - (['/etc/', '/var'], ['dmesg'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']), + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", None), + ("/./", None), + ("", []), + ("/home/u", []), + ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc", ["/etc/server/config", "/etc/server/hosts"]), + ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv", ["/srv/messages", "/srv/dmesg"]), ]) -def test_patterns(paths, excludes, expected): +def test_patterns_prefix(pattern, expected): files = [ - '/etc/passwd', '/etc/hosts', '/home', - '/home/user/.profile', '/home/user/.bashrc', - '/home/user2/.profile', '/home/user2/public_html/index.html', - '/var/log/messages', '/var/log/dmesg', + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", ] - check_patterns(files, paths, [FnmatchPattern(p) for p in excludes], expected) + check_patterns(files, PathPrefixPattern(pattern), expected) -@pytest.mark.parametrize("paths, excludes, expected", [ - # "None" means all files, i.e. none excluded - ([], [], None), - (['/'], [], None), - (['/'], ['.*'], []), - (['/'], ['^/'], []), - (['/'], ['^abc$'], None), - (['/'], ['^(?!/home/)'], - ['/home/user/.profile', '/home/user/.bashrc', '/home/user2/.profile', - '/home/user2/public_html/index.html']), +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("foo", []), + ("relative", ["relative/path1", "relative/two"]), + ("more", ["more/relative"]), + ]) +def test_patterns_prefix_relative(pattern, expected): + files = ["relative/path1", "relative/two", "more/relative"] + + check_patterns(files, PathPrefixPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/*", None), + ("/./*", None), + ("*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*", ["/srv/messages", "/srv/dmesg"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), ]) -def test_patterns_regex(paths, excludes, expected): +def test_patterns_fnmatch(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + check_patterns(files, FnmatchPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", None), + (".*", None), + ("^/", None), + ("^abc$", []), + ("^[^/]", []), + ("^(?!/srv|/foo|/opt)", + ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", + "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",]), + ]) +def test_patterns_regex(pattern, expected): files = [ '/srv/data', '/foo/bar', '/home', '/home/user/.profile', '/home/user/.bashrc', '/home/user2/.profile', '/home/user2/public_html/index.html', '/opt/log/messages.txt', '/opt/log/dmesg.txt', + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ] - patterns = [] - - for i in excludes: - pat = RegexPattern(i) - assert str(pat) == i - assert pat.pattern == i - patterns.append(pat) + obj = RegexPattern(pattern) + assert str(obj) == pattern + assert obj.pattern == pattern - check_patterns(files, paths, patterns, expected) + check_patterns(files, obj, expected) def test_regex_pattern(): From 9747755131fadf909069d7a84dc793267af2c5ff Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 13:32:49 +0100 Subject: [PATCH 3/9] Add pattern matcher wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utility functions “adjust_patterns” and “exclude_path” produce respectively use a standard list object containing pattern objects. With the forthcoming introduction of patterns for filtering files to be extracted it's better to move the logic of these classes into a single class. The wrapper allows adding any number of patterns to an internal list together with a value to be returned if a match function finds that one of the patterns matches. A fallback value is returned otherwise. --- borg/helpers.py | 21 +++++++++++++++++++++ borg/testsuite/helpers.py | 25 ++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/borg/helpers.py b/borg/helpers.py index ce344e3f58..39aad5532e 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -274,6 +274,27 @@ def exclude_path(path, patterns): return False +class PatternMatcher: + def __init__(self, fallback=None): + self._items = [] + + # Value to return from match function when none of the patterns match. + self.fallback = fallback + + def add(self, patterns, value): + """Add list of patterns to internal list. The given value is returned from the match function when one of the + given patterns matches. + """ + self._items.extend((i, value) for i in patterns) + + def match(self, path): + for (pattern, value) in self._items: + if pattern.match(path): + return value + + return self.fallback + + def normalized(func): """ Decorator for the Pattern match methods, returning a wrapper that normalizes OSX paths to match the normalized pattern on OSX, and diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 725dba9b27..ee8a9e91d3 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -12,7 +12,7 @@ from ..helpers import exclude_path, Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, RegexPattern, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ - ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern + ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, PatternMatcher from . import BaseTestCase, environment_variable, FakeInputs @@ -374,6 +374,29 @@ def test_parse_pattern_error(pattern): parse_pattern(pattern) +def test_pattern_matcher(): + pm = PatternMatcher() + + assert pm.fallback is None + + for i in ["", "foo", "bar"]: + assert pm.match(i) is None + + pm.add([RegexPattern("^a")], "A") + pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") + pm.add([RegexPattern("^$")], "Empty") + pm.fallback = "FileNotFound" + + assert pm.match("") == "Empty" + assert pm.match("aaa") == "A" + assert pm.match("bbb") == "B" + assert pm.match("ccc") == "FileNotFound" + assert pm.match("xyz") == "FileNotFound" + assert pm.match("z") == "B" + + assert PatternMatcher(fallback="hey!").fallback == "hey!" + + def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') From 190107ada77156e3703d3b7589b911c2d6da6dff Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 15:11:28 +0100 Subject: [PATCH 4/9] =?UTF-8?q?Replace=20use=20of=20=E2=80=9Cexclude=5Fpat?= =?UTF-8?q?h=E2=80=9D=20in=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The newly added pattern matcher class can replace the “exclude_path” function. The latter is going to be removed in a later change. --- borg/testsuite/helpers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index ee8a9e91d3..22bfe903e1 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -9,7 +9,7 @@ import msgpack import msgpack.fallback -from ..helpers import exclude_path, Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ +from ..helpers import Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, RegexPattern, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, PatternMatcher @@ -334,8 +334,9 @@ def test_patterns_from_file(tmpdir, lines, expected): ] def evaluate(filename): - patterns = load_excludes(open(filename, "rt")) - return [path for path in files if not exclude_path(path, patterns)] + matcher = PatternMatcher(fallback=True) + matcher.add(load_excludes(open(filename, "rt")), False) + return [path for path in files if matcher.match(path)] exclfile = tmpdir.join("exclude.txt") From 1fa4d2e516f8334f7bcf6f9bdc78fe6aafd45e65 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 13:05:48 +0100 Subject: [PATCH 5/9] Use constants for pattern style prefixes The prefix used for pattern styles should be kept together with the respective style implementation. --- borg/helpers.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 39aad5532e..eb3231deb5 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -316,6 +316,8 @@ def normalize_wrapper(self, path): class PatternBase: """Shared logic for inclusion/exclusion patterns. """ + PREFIX = NotImplemented + def __init__(self, pattern): self.pattern_orig = pattern self.match_count = 0 @@ -360,6 +362,8 @@ class PathPrefixPattern(PatternBase): If a directory is specified, all paths that start with that path match as well. A trailing slash makes no difference. """ + PREFIX = "pp" + def _prepare(self, pattern): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep @@ -371,6 +375,8 @@ class FnmatchPattern(PatternBase): """Shell glob patterns to exclude. A trailing slash means to exclude the contents of a directory, but not the directory itself. """ + PREFIX = "fm" + def _prepare(self, pattern): if pattern.endswith(os.path.sep): pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep @@ -390,6 +396,8 @@ def _match(self, path): class RegexPattern(PatternBase): """Regular expression to exclude. """ + PREFIX = "re" + def _prepare(self, pattern): self.pattern = pattern self.regex = re.compile(pattern) @@ -402,11 +410,12 @@ def _match(self, path): return (self.regex.search(path) is not None) -_DEFAULT_PATTERN_STYLE = "fm" -_PATTERN_STYLES = { - "fm": FnmatchPattern, - "re": RegexPattern, - } +_PATTERN_STYLES = set([ + FnmatchPattern, + RegexPattern, +]) + +_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) def parse_pattern(pattern): @@ -415,9 +424,9 @@ def parse_pattern(pattern): if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): (style, pattern) = (pattern[:2], pattern[3:]) else: - style = _DEFAULT_PATTERN_STYLE + style = FnmatchPattern.PREFIX - cls = _PATTERN_STYLES.get(style, None) + cls = _PATTERN_STYLE_BY_PREFIX.get(style, None) if cls is None: raise ValueError("Unknown pattern style: {}".format(style)) From b6362b596390651d244086907702185745c4bc55 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 13:08:49 +0100 Subject: [PATCH 6/9] Flexible default pattern style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function to parse pattern specifications was introduced in commit 2bafece. Since then it had a hardcoded default style of “fm”, meaning fnmatch. With the forthcoming support for extracting files using patterns this default style must be more flexible. --- borg/helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index eb3231deb5..adb75fb4bf 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -418,18 +418,18 @@ def _match(self, path): _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) -def parse_pattern(pattern): +def parse_pattern(pattern, fallback=FnmatchPattern): """Read pattern from string and return an instance of the appropriate implementation class. """ if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): (style, pattern) = (pattern[:2], pattern[3:]) - else: - style = FnmatchPattern.PREFIX - cls = _PATTERN_STYLE_BY_PREFIX.get(style, None) + cls = _PATTERN_STYLE_BY_PREFIX.get(style, None) - if cls is None: - raise ValueError("Unknown pattern style: {}".format(style)) + if cls is None: + raise ValueError("Unknown pattern style: {}".format(style)) + else: + cls = fallback return cls(pattern) From 848375e2fe35ecddfcdfd7bb30811fff87b549da Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 13:09:08 +0100 Subject: [PATCH 7/9] Add and document path prefix as pattern style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “extract” command supports extracting all files underneath a given set of prefix paths. The forthcoming support for extracting files using a pattern (i.e. only files ending in “.zip”) requires the introduction of path prefixes as a third pattern style, making it also available for exclusions. --- borg/archiver.py | 16 ++++++++++------ borg/helpers.py | 1 + borg/testsuite/helpers.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 57d311bcfd..66298fe07a 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -611,12 +611,12 @@ def do_break_lock(self, args): helptext = {} helptext['patterns'] = textwrap.dedent(''' - Exclusion patterns support two separate styles, fnmatch and regular - expressions. If followed by a colon (':') the first two characters of - a pattern are used as a style selector. Explicit style selection is necessary - when regular expressions are desired or when the desired fnmatch pattern - starts with two alphanumeric characters followed by a colon (i.e. - `aa:something/*`). + Exclusion patterns support three separate styles, fnmatch, regular + expressions and path prefixes. If followed by a colon (':') the first two + characters of a pattern are used as a style selector. Explicit style + selection is necessary when a non-default style is desired or when the + desired pattern starts with two alphanumeric characters followed by a colon + (i.e. `aa:something/*`). `Fnmatch `_ patterns use a variant of shell pattern syntax, with '*' matching any number of @@ -640,6 +640,10 @@ def do_break_lock(self, args): documentation for the re module `_. + Prefix path patterns can be selected with the prefix `pp:`. This pattern + style is useful to match whole sub-directories. The pattern `pp:/data/bar` + matches `/data/bar` and everything therein. + Exclusions can be passed via the command line option `--exclude`. When used from within a shell the patterns should be quoted to protect them from expansion. diff --git a/borg/helpers.py b/borg/helpers.py index adb75fb4bf..8c1bb594be 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -412,6 +412,7 @@ def _match(self, path): _PATTERN_STYLES = set([ FnmatchPattern, + PathPrefixPattern, RegexPattern, ]) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 22bfe903e1..f31bd9840c 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -324,6 +324,10 @@ def test_invalid_unicode_pattern(pattern): ["/more/data"]), ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), + (["pp:./"], None), + (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), + (["pp:aaabbb"], None), + (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), ]) def test_patterns_from_file(tmpdir, lines, expected): files = [ @@ -364,6 +368,12 @@ def evaluate(filename): ("re:.*", RegexPattern), ("re:^/something/", RegexPattern), ("re:re:^/something/", RegexPattern), + + # Path prefix + ("pp:", PathPrefixPattern), + ("pp:/", PathPrefixPattern), + ("pp:/data/", PathPrefixPattern), + ("pp:pp:/data/", PathPrefixPattern), ]) def test_parse_pattern(pattern, cls): assert isinstance(parse_pattern(pattern), cls) From ceae4a9fa8fa030984c750fc8fcc795167ece639 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 16:45:42 +0100 Subject: [PATCH 8/9] Support patterns on extraction, fixes #361 This change implements the functionality requested in issue #361: extracting files with a given extension. It does so by permitting patterns to be used instead plain prefix paths. The pattern styles supported are the same as for exclusions. --- borg/archiver.py | 22 +++++++++++++++++----- borg/testsuite/archiver.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 66298fe07a..d5c1d19630 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -286,13 +286,25 @@ def do_extract(self, args): manifest, key = Manifest.load(repository) archive = Archive(repository, key, manifest, args.location.archive, numeric_owner=args.numeric_owner) - patterns = adjust_patterns(args.paths, args.excludes) + + matcher = PatternMatcher() + if args.excludes: + matcher.add(args.excludes, False) + + include_patterns = [] + + if args.paths: + include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths) + matcher.add(include_patterns, True) + + matcher.fallback = not include_patterns + dry_run = args.dry_run stdout = args.stdout sparse = args.sparse strip_components = args.strip_components dirs = [] - for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True): + for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True): orig_path = item[b'path'] if strip_components: item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) @@ -317,8 +329,8 @@ def do_extract(self, args): if not args.dry_run: while dirs: archive.extract_item(dirs.pop(-1)) - for pattern in (patterns or []): - if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0: + for pattern in include_patterns: + if pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) return self.exit_code @@ -965,7 +977,7 @@ def build_parser(self, args=None, prog=None): type=location_validator(archive=True), help='archive to extract') subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, - help='paths to extract') + help='paths to extract; patterns are supported') rename_epilog = textwrap.dedent(""" This command renames an archive in the repository. diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 0727932938..f75cc120c7 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -562,6 +562,39 @@ def test_extract_include_exclude_regex_from_file(self): self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file3']) + def test_extract_with_pattern(self): + self.cmd("init", self.repository_location) + self.create_regular_file("file1", size=1024 * 80) + self.create_regular_file("file2", size=1024 * 80) + self.create_regular_file("file3", size=1024 * 80) + self.create_regular_file("file4", size=1024 * 80) + self.create_regular_file("file333", size=1024 * 80) + + self.cmd("create", self.repository_location + "::test", "input") + + # Extract everything with regular expression + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "re:.*") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"]) + shutil.rmtree("output/input") + + # Extract with pattern while also excluding files + with changedir("output"): + self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"]) + shutil.rmtree("output/input") + + # Combine --exclude with pattern for extraction + with changedir("output"): + self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$") + self.assert_equal(sorted(os.listdir("output/input")), ["file2"]) + shutil.rmtree("output/input") + + # Multiple pattern + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) + def test_exclude_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) From dad0ba96619f76cd3f73499cf20d8e56c9212279 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 16:45:48 +0100 Subject: [PATCH 9/9] Remove old-style pattern handling functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the “adjust_pattern” and “exclude_path” functions and replace them with the recently introduced pattern matcher class. --- borg/archiver.py | 19 ++++++++++++------- borg/helpers.py | 17 ----------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index d5c1d19630..62f00f503c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,11 +17,11 @@ from . import __version__ from .helpers import Error, location_validator, format_time, format_file_size, \ - format_file_mode, parse_pattern, PathPrefixPattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ + format_file_mode, parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ - EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi + EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher from .logger import create_logger, setup_logging logger = create_logger() from .compress import Compressor, COMPR_BUFFER @@ -129,6 +129,10 @@ def do_change_passphrase(self, args): def do_create(self, args): """Create new archive""" + matcher = PatternMatcher(fallback=True) + if args.excludes: + matcher.add(args.excludes, False) + def create_inner(archive, cache): # Add cache dir to inode_skip list skip_inodes = set() @@ -166,7 +170,7 @@ def create_inner(archive, cache): continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, args.exclude_if_present, + self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present, args.keep_tag_files, skip_inodes, path, restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: @@ -202,11 +206,12 @@ def create_inner(archive, cache): create_inner(None, None) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, + def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, keep_tag_files, skip_inodes, path, restrict_dev, read_special=False, dry_run=False): - if exclude_path(path, excludes): + if not matcher.match(path): return + try: st = os.lstat(path) except OSError as e: @@ -235,7 +240,7 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, if keep_tag_files and not dry_run: archive.process_dir(path, st) for tag_path in tag_paths: - self._process(archive, cache, excludes, exclude_caches, exclude_if_present, + self._process(archive, cache, matcher, exclude_caches, exclude_if_present, keep_tag_files, skip_inodes, tag_path, restrict_dev, read_special=read_special, dry_run=dry_run) return @@ -249,7 +254,7 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, else: for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) - self._process(archive, cache, excludes, exclude_caches, exclude_if_present, + self._process(archive, cache, matcher, exclude_caches, exclude_if_present, keep_tag_files, skip_inodes, entry_path, restrict_dev, read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): diff --git a/borg/helpers.py b/borg/helpers.py index 8c1bb594be..91c9e0434d 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -257,23 +257,6 @@ def update_excludes(args): file.close() -def adjust_patterns(paths, excludes): - if paths: - return (excludes or []) + [PathPrefixPattern(path) for path in paths] + [FnmatchPattern('*')] - else: - return excludes - - -def exclude_path(path, patterns): - """Used by create and extract sub-commands to determine - whether or not an item should be processed. - """ - for pattern in (patterns or []): - if pattern.match(path): - return isinstance(pattern, (FnmatchPattern, RegexPattern)) - return False - - class PatternMatcher: def __init__(self, fallback=None): self._items = []