Skip to content

Commit

Permalink
Merge pull request #575 from hansmi/extract-pattern-support
Browse files Browse the repository at this point in the history
Extract pattern support
  • Loading branch information
ThomasWaldmann committed Jan 18, 2016
2 parents 522c14c + dad0ba9 commit 1e1812c
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 91 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Borg Contributors ("The Borg Collective")
- Antoine Beaupré <anarcat@debian.org>
- Radek Podgorny <radek@podgorny.cz>
- Yuri D'Elia
- Michael Hanselmann <public@hansmi.ch>

Borg is a fork of Attic.

Expand Down
57 changes: 39 additions & 18 deletions borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

from . import __version__
from .helpers import Error, location_validator, format_time, format_file_size, \
format_file_mode, parse_pattern, PathPrefixPattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
format_file_mode, parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher
from .logger import create_logger, setup_logging
logger = create_logger()
from .compress import Compressor, COMPR_BUFFER
Expand Down Expand Up @@ -129,6 +129,10 @@ def do_change_passphrase(self, args):

def do_create(self, args):
"""Create new archive"""
matcher = PatternMatcher(fallback=True)
if args.excludes:
matcher.add(args.excludes, False)

def create_inner(archive, cache):
# Add cache dir to inode_skip list
skip_inodes = set()
Expand Down Expand Up @@ -166,7 +170,7 @@ def create_inner(archive, cache):
continue
else:
restrict_dev = None
self._process(archive, cache, args.excludes, args.exclude_caches, args.exclude_if_present,
self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present,
args.keep_tag_files, skip_inodes, path, restrict_dev,
read_special=args.read_special, dry_run=dry_run)
if not dry_run:
Expand Down Expand Up @@ -202,11 +206,12 @@ def create_inner(archive, cache):
create_inner(None, None)
return self.exit_code

def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present,
def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
keep_tag_files, skip_inodes, path, restrict_dev,
read_special=False, dry_run=False):
if exclude_path(path, excludes):
if not matcher.match(path):
return

try:
st = os.lstat(path)
except OSError as e:
Expand Down Expand Up @@ -235,7 +240,7 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present,
if keep_tag_files and not dry_run:
archive.process_dir(path, st)
for tag_path in tag_paths:
self._process(archive, cache, excludes, exclude_caches, exclude_if_present,
self._process(archive, cache, matcher, exclude_caches, exclude_if_present,
keep_tag_files, skip_inodes, tag_path, restrict_dev,
read_special=read_special, dry_run=dry_run)
return
Expand All @@ -249,7 +254,7 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present,
else:
for filename in sorted(entries):
entry_path = os.path.normpath(os.path.join(path, filename))
self._process(archive, cache, excludes, exclude_caches, exclude_if_present,
self._process(archive, cache, matcher, exclude_caches, exclude_if_present,
keep_tag_files, skip_inodes, entry_path, restrict_dev,
read_special=read_special, dry_run=dry_run)
elif stat.S_ISLNK(st.st_mode):
Expand Down Expand Up @@ -286,13 +291,25 @@ def do_extract(self, args):
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.location.archive,
numeric_owner=args.numeric_owner)
patterns = adjust_patterns(args.paths, args.excludes)

matcher = PatternMatcher()
if args.excludes:
matcher.add(args.excludes, False)

include_patterns = []

if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)

matcher.fallback = not include_patterns

dry_run = args.dry_run
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
dirs = []
for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
orig_path = item[b'path']
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
Expand All @@ -317,8 +334,8 @@ def do_extract(self, args):
if not args.dry_run:
while dirs:
archive.extract_item(dirs.pop(-1))
for pattern in (patterns or []):
if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0:
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
return self.exit_code

Expand Down Expand Up @@ -611,12 +628,12 @@ def do_break_lock(self, args):

helptext = {}
helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support two separate styles, fnmatch and regular
expressions. If followed by a colon (':') the first two characters of
a pattern are used as a style selector. Explicit style selection is necessary
when regular expressions are desired or when the desired fnmatch pattern
starts with two alphanumeric characters followed by a colon (i.e.
`aa:something/*`).
Exclusion patterns support three separate styles, fnmatch, regular
expressions and path prefixes. If followed by a colon (':') the first two
characters of a pattern are used as a style selector. Explicit style
selection is necessary when a non-default style is desired or when the
desired pattern starts with two alphanumeric characters followed by a colon
(i.e. `aa:something/*`).
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_ patterns use
a variant of shell pattern syntax, with '*' matching any number of
Expand All @@ -640,6 +657,10 @@ def do_break_lock(self, args):
documentation for the re module
<https://docs.python.org/3/library/re.html>`_.
Prefix path patterns can be selected with the prefix `pp:`. This pattern
style is useful to match whole sub-directories. The pattern `pp:/data/bar`
matches `/data/bar` and everything therein.
Exclusions can be passed via the command line option `--exclude`. When used
from within a shell the patterns should be quoted to protect them from
expansion.
Expand Down Expand Up @@ -961,7 +982,7 @@ def build_parser(self, args=None, prog=None):
type=location_validator(archive=True),
help='archive to extract')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract')
help='paths to extract; patterns are supported')

rename_epilog = textwrap.dedent("""
This command renames an archive in the repository.
Expand Down
62 changes: 38 additions & 24 deletions borg/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,21 +257,25 @@ def update_excludes(args):
file.close()


def adjust_patterns(paths, excludes):
if paths:
return (excludes or []) + [PathPrefixPattern(path) for path in paths] + [FnmatchPattern('*')]
else:
return excludes
class PatternMatcher:
def __init__(self, fallback=None):
self._items = []

# Value to return from match function when none of the patterns match.
self.fallback = fallback

def exclude_path(path, patterns):
"""Used by create and extract sub-commands to determine
whether or not an item should be processed.
"""
for pattern in (patterns or []):
if pattern.match(path):
return isinstance(pattern, (FnmatchPattern, RegexPattern))
return False
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.
"""
self._items.extend((i, value) for i in patterns)

def match(self, path):
for (pattern, value) in self._items:
if pattern.match(path):
return value

return self.fallback


def normalized(func):
Expand All @@ -295,6 +299,8 @@ def normalize_wrapper(self, path):
class PatternBase:
"""Shared logic for inclusion/exclusion patterns.
"""
PREFIX = NotImplemented

def __init__(self, pattern):
self.pattern_orig = pattern
self.match_count = 0
Expand Down Expand Up @@ -339,6 +345,8 @@ class PathPrefixPattern(PatternBase):
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
"""
PREFIX = "pp"

def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep

Expand All @@ -350,6 +358,8 @@ class FnmatchPattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
PREFIX = "fm"

def _prepare(self, pattern):
if pattern.endswith(os.path.sep):
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
Expand All @@ -369,6 +379,8 @@ def _match(self, path):
class RegexPattern(PatternBase):
"""Regular expression to exclude.
"""
PREFIX = "re"

def _prepare(self, pattern):
self.pattern = pattern
self.regex = re.compile(pattern)
Expand All @@ -381,25 +393,27 @@ def _match(self, path):
return (self.regex.search(path) is not None)


_DEFAULT_PATTERN_STYLE = "fm"
_PATTERN_STYLES = {
"fm": FnmatchPattern,
"re": RegexPattern,
}
_PATTERN_STYLES = set([
FnmatchPattern,
PathPrefixPattern,
RegexPattern,
])

_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)

def parse_pattern(pattern):

def parse_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
(style, pattern) = (pattern[:2], pattern[3:])
else:
style = _DEFAULT_PATTERN_STYLE

cls = _PATTERN_STYLES.get(style, None)
cls = _PATTERN_STYLE_BY_PREFIX.get(style, None)

if cls is None:
raise ValueError("Unknown pattern style: {}".format(style))
if cls is None:
raise ValueError("Unknown pattern style: {}".format(style))
else:
cls = fallback

return cls(pattern)

Expand Down
33 changes: 33 additions & 0 deletions borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,39 @@ def test_extract_include_exclude_regex_from_file(self):
self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file3'])

def test_extract_with_pattern(self):
self.cmd("init", self.repository_location)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
self.create_regular_file("file3", size=1024 * 80)
self.create_regular_file("file4", size=1024 * 80)
self.create_regular_file("file333", size=1024 * 80)

self.cmd("create", self.repository_location + "::test", "input")

# Extract everything with regular expression
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "re:.*")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"])
shutil.rmtree("output/input")

# Extract with pattern while also excluding files
with changedir("output"):
self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"])
shutil.rmtree("output/input")

# Combine --exclude with pattern for extraction
with changedir("output"):
self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$")
self.assert_equal(sorted(os.listdir("output/input")), ["file2"])
shutil.rmtree("output/input")

# Multiple pattern
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])

def test_exclude_caches(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
Expand Down
Loading

0 comments on commit 1e1812c

Please sign in to comment.