jborg · rotty · Jun 17, 2015 · Jun 19, 2015 · Jun 28, 2015 · Jul 5, 2015
diff --git a/attic/archiver.py b/attic/archiver.py
@@ -18,7 +18,7 @@
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    is_cachedir, bigint_to_int
+    is_cachedir, bigint_to_int, iter_delim, FileType
 from attic.remote import RepositoryServer, RemoteRepository
 
 
@@ -116,6 +116,10 @@ def do_create(self, args):
                 skip_inodes.add((st.st_ino, st.st_dev))
             except IOError:
                 pass
+        for f in args.filelists:
+            self._process_filelist(archive, cache, skip_inodes, f)
+            if not (f is sys.stdin or f is getattr(sys.stdin, 'buffer', None)):
+                f.close()
         for path in args.paths:
             path = os.path.normpath(path)
             if args.dontcross:
@@ -142,7 +146,14 @@ def do_create(self, args):
             print('-' * 78)
         return self.exit_code
 
-    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
+    def _process_filelist(self, archive, cache, skip_inodes, filelist):
+        delim = getattr(filelist, 'delim', b'\n')
+        for filename in iter_delim(filelist, delim=delim, delim_out=b''):
+            self._process(archive, cache,
+                          excludes=[], exclude_caches=False, skip_inodes=skip_inodes,
+                          path=os.fsdecode(filename), restrict_dev=False, recurse=False)
+
+    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, recurse=True):
         if exclude_path(path, excludes):
             return
         try:
@@ -168,6 +179,8 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path,
             if exclude_caches and is_cachedir(path):
                 return
             archive.process_item(path, st)
+            if not recurse:
+                return
             try:
                 entries = os.listdir(path)
             except OSError as e:
@@ -544,6 +557,14 @@ def run(self, args=None):
         subparser.add_argument('--exclude-caches', dest='exclude_caches',
                                action='store_true', default=False,
                                help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)')
+        subparser.add_argument('--files-from', dest='filelists',
+                               type=FileType('rb'), action='append', default=[],
+                               metavar='FILELIST',
+                               help='read a list of files to backup from FILELIST, separated by newlines')
+        subparser.add_argument('--files-from0', dest='filelists',
+                               type=FileType('rb', delim=b'\0'), action='append', default=[],
+                               metavar='FILELIST',
+                               help='read a list of files to backup from FILELIST, separated by NUL characters')
         subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
                                type=int, default=300, metavar='SECONDS',
                                help='write checkpoint every SECONDS seconds (Default: 300)')
@@ -556,7 +577,7 @@ def run(self, args=None):
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to create')
-        subparser.add_argument('paths', metavar='PATH', nargs='+', type=str,
+        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
                                help='paths to archive')
 
         extract_epilog = textwrap.dedent("""

diff --git a/attic/helpers.py b/attic/helpers.py
@@ -528,6 +528,63 @@ def make_path_safe(path):
     """
     return _safe_re.sub('', path) or '.'
 
+def iter_delim(f, delim='\n', delim_out=None, read_size=4096):
+    """Iterate through a file object's contents, given a delimiter.
+
+    This function returns an iterator based on the contents of the
+    file-like object f. The contents will be split into chunks based
+    on delim, and each chunk is returned by the iterator created. By
+    default, the original delimiter is retained, but a replacement can
+    be specified using delim_out.
+
+    Both text and binary files are supported, but the type of delim
+    and delim_out must match the file type, i.e. they must be strings
+    for text files, and bytes for binary files.
+
+    """
+    if delim_out is None:
+        delim_out = delim
+    bufs = []
+    empty = None
+    while True:
+        data = f.read(read_size)
+        if not data:
+            break
+        if empty is None:
+            empty = '' if isinstance(data, str) else b''
+        start = 0
+        while True:
+            pos = data.find(delim, start)
+            if pos < 0:
+                break
+            yield empty.join(bufs) + data[start:pos] + delim_out
+            start = pos + len(delim)
+            bufs = []
+        if start < len(data):
+            bufs.append(data[start:])
+    if len(bufs) > 0:
+        yield empty.join(bufs)
+
+class FileType(argparse.FileType):
+    """Extended version of argparse.FileType.
+
+    Allows to specify additional attributes to be set on the returned
+    file objects.
+
+    """
+    def __init__(self, mode='r', bufsize=-1, **kwargs):
+        super().__init__(mode=mode, bufsize=bufsize)
+        self._attrs = kwargs
+        self._binary = 'b' in mode
+
+    def __call__(self, string):
+        result = super().__call__(string)
+        # Work around http://bugs.python.org/issue14156
+        if self._binary and result is sys.stdin or result is sys.stdout:
+            result = result.buffer
+        for key, value in self._attrs.items():
+            setattr(result, key, value)
+        return result
 
 def daemonize():
     """Detach process from controlling terminal and run in background

diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py
@@ -9,6 +9,7 @@
 import tempfile
 import time
 import unittest
+import itertools
 from hashlib import sha256
 from attic import xattr
 from attic.archive import Archive, ChunkBuffer
@@ -59,6 +60,9 @@ def __exit__(self, *args, **kw):
             if v is not None:
                 os.environ[k] = v
 
+def listdir_recursive(dirname):
+    return itertools.chain(*[[os.path.normpath(os.path.join(dirpath, f)) for f in filenames]
+                             for dirpath, dirnames, filenames in os.walk(dirname)])
 
 class ArchiverTestCaseBase(AtticTestCase):
 
@@ -262,6 +266,27 @@ def test_exclude_caches(self):
         self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1'])
         self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG'])
 
+    def test_files_from(self):
+        self._test_files_from_option(delim=b'\n', option='--files-from')
+
+    def test_files_from0(self):
+        self._test_files_from_option(delim=b'\0', option='--files-from0')
+
+    def _test_files_from_option(self, *, delim, option):
+        self.attic('init', self.repository_location)
+        for filename in ['file1', 'non-listed/file', 'listed/file']:
+            self.create_regular_file(filename, size=1024 * 80)
+        listed_files = sorted(['file1', 'listed/file'])
+        self.create_regular_file('filelist',
+                                 contents=delim.join([os.path.join('input', f).encode('ascii')
+                                                      for f in listed_files]))
+        self.attic('create', option + '=input/filelist', self.repository_location + '::test')
+        with changedir('output'):
+            self.attic('extract', self.repository_location + '::test')
+        with changedir('output/input'):
+            present_files = sorted(listdir_recursive('.'))
+        self.assert_equal(present_files, listed_files)
+
     def test_path_normalization(self):
         self.attic('init', self.repository_location)
         self.create_regular_file('dir1/dir2/file', size=1024 * 80)