diff --git a/src/borg/archive.py b/src/borg/archive.py index 49edb816b2..36bf13aa51 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -34,7 +34,7 @@ from .platform import uid2user, user2uid, gid2group, group2gid from .helpers import parse_timestamp, to_localtime from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize -from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates +from .helpers import safe_encode, make_path_safe, remove_surrogates from .helpers import StableDict from .helpers import bin_to_hex from .helpers import safe_ns @@ -492,7 +492,6 @@ def _load_meta(self, id): def load(self, id): self.id = id self.metadata = self._load_meta(self.id) - self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline] self.name = self.metadata.name self.comment = self.metadata.get('comment', '') @@ -1550,7 +1549,7 @@ class RobustUnpacker: """ def __init__(self, validator, item_keys): super().__init__() - self.item_keys = [msgpack.packb(name.encode()) for name in item_keys] + self.item_keys = [msgpack.packb(name) for name in item_keys] self.validator = validator self._buffered_data = [] self._resync = False @@ -1754,13 +1753,10 @@ def rebuild_manifest(self): Iterates through all objects in the repository looking for archive metadata blocks. """ - required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS) - def valid_archive(obj): if not isinstance(obj, dict): return False - keys = set(obj) - return required_archive_keys.issubset(keys) + return REQUIRED_ARCHIVE_KEYS.issubset(obj) logger.info('Rebuilding missing manifest, this might take some time...') # as we have lost the manifest, we do not know any more what valid item keys we had. @@ -1769,7 +1765,7 @@ def valid_archive(obj): # lost manifest on a older borg version than the most recent one that was ever used # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) - archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS] + archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS] pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01, msgid='check.rebuild_manifest') for chunk_id, _ in self.chunks.iteritems(): @@ -1916,9 +1912,9 @@ def robust_iterator(archive): Missing item chunks will be skipped and the msgpack stream will be restarted """ - item_keys = frozenset(key.encode() for key in self.manifest.item_keys) - required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS) - unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item, + item_keys = self.manifest.item_keys + required_item_keys = REQUIRED_ITEM_KEYS + unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item, self.manifest.item_keys) _state = 0 @@ -1940,9 +1936,9 @@ def list_keys_safe(keys): def valid_item(obj): if not isinstance(obj, StableDict): return False, 'not a dictionary' - # A bug in Attic up to and including release 0.13 added a (meaningless) b'acl' key to every item. + # A bug in Attic up to and including release 0.13 added a (meaningless) 'acl' key to every item. # We ignore it here, should it exist. See test_attic013_acl_bug for details. - obj.pop(b'acl', None) + obj.pop('acl', None) keys = set(obj) if not required_item_keys.issubset(keys): return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys) @@ -2029,7 +2025,6 @@ def valid_item(obj): archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) if archive.version != 1: raise Exception('Unknown archive metadata version') - archive.cmdline = [safe_decode(arg) for arg in archive.cmdline] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index de9489ff0a..1fdb0c1764 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -44,7 +44,7 @@ from .archive import has_link from .cache import Cache, assert_secure, SecurityManager from .constants import * # NOQA - from .compress import CompressionSpec + from .compress import CompressionSpec, ZLIB, ZLIB_legacy from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey from .crypto.keymanager import KeyManager @@ -54,7 +54,7 @@ from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive - from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes + from .helpers import remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from .helpers import timestamp from .helpers import get_cache_dir, os_stat @@ -338,6 +338,101 @@ def do_serve(self, args): ).serve() return EXIT_SUCCESS + @with_other_repository(manifest=True, key=True, compatibility=(Manifest.Operation.READ,)) + @with_repository(exclusive=True, manifest=True, cache=True, compatibility=(Manifest.Operation.WRITE,)) + def do_transfer(self, args, *, + repository, manifest, key, cache, + other_repository=None, other_manifest=None, other_key=None): + """archives transfer from other repository""" + + def upgrade_item(item): + """upgrade item as needed, get rid of legacy crap""" + if item.get('hardlink_master', True) and 'source' not in item and hardlinkable(item.mode): + item._dict['hlid'] = hlid = hashlib.sha256(item._dict['path']) + hardlink_masters[hlid] = (item._dict.get('chunks'), item._dict.get('chunks_healthy')) + elif 'source' in item and hardlinkable(item.mode): + item._dict['hlid'] = hlid = hashlib.sha256(item._dict['source']) + chunks, chunks_healthy = hardlink_masters.get(hlid, (None, None)) + if chunks is not None: + item._dict['chunks'] = chunks + for chunk_id, _, _ in chunks: + cache.chunk_incref(chunk_id, archive.stats) + if chunks_healthy is not None: + item._dict['chunks_healthy'] = chunks + item._dict.pop('source') # not used for hardlinks any more, replaced by hlid + for attr in 'atime', 'ctime', 'mtime', 'birthtime': + if attr in item: + ns = getattr(item, attr) # decode (bigint or Timestamp) --> int ns + setattr(item, attr, ns) # encode int ns --> msgpack.Timestamp only, no bigint any more + item._dict.pop('hardlink_master', None) # not used for hardlinks any more, replaced by hlid + item._dict.pop('acl', None) # remove remnants of bug in attic <= 0.13 + item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks + return item + + def upgrade_compressed_chunk(chunk): + if ZLIB_legacy.detect(chunk): + chunk = ZLIB.ID + chunk # get rid of the attic legacy: prepend separate type bytes for zlib + return chunk + + dry_run = args.dry_run + + args.consider_checkpoints = True + archive_names = tuple(x.name for x in other_manifest.archives.list_considering(args)) + if not archive_names: + return EXIT_SUCCESS + + for name in archive_names: + transfer_size = 0 + present_size = 0 + if name in manifest.archives and not dry_run: + print(f"{name}: archive is already present in destination repo, skipping.") + else: + if not dry_run: + print(f"{name}: copying archive to destination repo...") + hardlink_masters = {} + other_archive = Archive(other_repository, other_key, other_manifest, name) + archive = Archive(repository, key, manifest, name, cache=cache, create=True) if not dry_run else None + for item in other_archive.iter_items(): + if 'chunks' in item: + chunks = [] + for chunk_id, size, _ in item.chunks: + refcount = cache.seen_chunk(chunk_id, size) + if refcount == 0: # target repo does not yet have this chunk + if not dry_run: + cdata = other_repository.get(chunk_id) + # keep compressed payload same, avoid decompression / recompression + data = other_key.decrypt(chunk_id, cdata, decompress=False) + data = upgrade_compressed_chunk(data) + chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False, + compress=False, size=size) + cache.repository.async_response(wait=False) + chunks.append(chunk_entry) + transfer_size += size + else: + if not dry_run: + chunk_entry = cache.chunk_incref(chunk_id, archive.stats) + chunks.append(chunk_entry) + present_size += size + if not dry_run: + item.chunks = chunks # overwrite! IDs and sizes are same, csizes are likely different + archive.stats.nfiles += 1 + if not dry_run: + archive.add_item(upgrade_item(item)) + if not dry_run: + additional_metadata = {} + # keep all metadata except archive version and stats. also do not keep + # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2. + for attr in ('cmdline', 'hostname', 'username', 'time', 'time_end', 'comment', + 'chunker_params', 'recreate_cmdline'): + if hasattr(other_archive.metadata, attr): + additional_metadata[attr] = getattr(other_archive.metadata, attr) + archive.save(stats=archive.stats, additional_metadata=additional_metadata) + print(f"{name}: finished. transfer_size: {transfer_size} present_size: {present_size}") + else: + print(f"{name}: completed" if transfer_size == 0 else + f"{name}: incomplete, transfer_size: {transfer_size} present_size: {present_size}") + return EXIT_SUCCESS + @with_repository(create=True, exclusive=True, manifest=False) @with_other_repository(key=True, compatibility=(Manifest.Operation.READ, )) def do_init(self, args, repository, *, other_repository=None, other_key=None): @@ -1844,12 +1939,12 @@ def do_upgrade(self, args, repository, manifest=None, key=None): print('This repository is not encrypted, cannot enable TAM.') return EXIT_ERROR - if not manifest.tam_verified or not manifest.config.get(b'tam_required', False): + if not manifest.tam_verified or not manifest.config.get('tam_required', False): # The standard archive listing doesn't include the archive ID like in borg 1.1.x print('Manifest contents:') for archive_info in manifest.archives.list(sort_by=['ts']): print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id)) - manifest.config[b'tam_required'] = True + manifest.config['tam_required'] = True manifest.write() repository.commit(compact=False) if not key.tam_required: @@ -1872,7 +1967,7 @@ def do_upgrade(self, args, repository, manifest=None, key=None): print('Key updated') if hasattr(key, 'find_key'): print('Key location:', key.find_key()) - manifest.config[b'tam_required'] = False + manifest.config['tam_required'] = False manifest.write() repository.commit(compact=False) else: @@ -2205,7 +2300,7 @@ def do_debug_dump_archive(self, args, repository, manifest, key): """dump decoded archive metadata (not: data)""" try: - archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)] + archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive] except KeyError: raise Archive.DoesNotExist(args.location.archive) @@ -2222,7 +2317,7 @@ def output(fd): fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) fd.write(',\n') - data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id'])) + data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id'])) archive_org_dict = msgpack.unpackb(data, object_hook=StableDict) fd.write(' "_meta":\n') @@ -2232,7 +2327,7 @@ def output(fd): unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) first = True - for item_id in archive_org_dict[b'items']: + for item_id in archive_org_dict['items']: data = key.decrypt(item_id, repository.get(item_id)) unpacker.feed(data) for item in unpacker: @@ -4083,6 +4178,43 @@ def define_borg_mount(parser): help='archives to delete') define_archive_filters_group(subparser) + # borg transfer + transfer_epilog = process_epilog(""" + This command transfers archives from one repository to another repository. + + Suggested use: + + # initialize DST_REPO reusing key material from SRC_REPO, so that + # chunking and chunk id generation will work in the same way as before. + borg init --other-location=SRC_REPO --encryption=DST_ENC DST_REPO + + # transfer archives from SRC_REPO to DST_REPO + borg transfer --dry-run SRC_REPO DST_REPO # check what it would do + borg transfer SRC_REPO DST_REPO # do it! + borg transfer --dry-run SRC_REPO DST_REPO # check! anything left? + + The default is to transfer all archives, including checkpoint archives. + + You could use the misc. archive filter options to limit which archives it will + transfer, e.g. using the --prefix option. This is recommended for big + repositories with multiple data sets to keep the runtime per invocation lower. + """) + subparser = subparsers.add_parser('transfer', parents=[common_parser], add_help=False, + description=self.do_transfer.__doc__, + epilog=transfer_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='transfer of archives from another repository') + subparser.set_defaults(func=self.do_transfer) + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', + help='do not change repository, just check') + subparser.add_argument('other_location', metavar='SRC_REPOSITORY', + type=location_validator(archive=False, other=True), + help='source repository') + subparser.add_argument('location', metavar='DST_REPOSITORY', + type=location_validator(archive=False, other=False), + help='destination repository') + define_archive_filters_group(subparser) + # borg diff diff_epilog = process_epilog(""" This command finds differences (file contents, user/group/mode) between archives. diff --git a/src/borg/cache.py b/src/borg/cache.py index 6fa74e692d..6cd6123590 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -19,7 +19,7 @@ from .helpers import Error from .helpers import Manifest from .helpers import get_cache_dir, get_security_dir -from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list +from .helpers import bin_to_hex, parse_stringified_list from .helpers import format_file_size from .helpers import safe_ns from .helpers import yes @@ -28,6 +28,7 @@ from .helpers import set_ec, EXIT_WARNING from .helpers import safe_unlink from .helpers import msgpack +from .helpers.msgpack import int_to_timestamp, timestamp_to_int from .item import ArchiveItem, ChunkListEntry from .crypto.key import PlaintextKey from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError @@ -623,7 +624,7 @@ def commit(self): # this is to avoid issues with filesystem snapshots and cmtime granularity. # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. entry = FileCacheEntry(*msgpack.unpackb(item)) - if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \ + if entry.age == 0 and timestamp_to_int(entry.cmtime) < self._newest_cmtime or \ entry.age > 0 and entry.age < ttl: msgpack.pack((path_hash, entry), fd) entry_count += 1 @@ -1018,10 +1019,10 @@ def file_known_and_unchanged(self, hashed_path, path_hash, st): if 'i' in cache_mode and entry.inode != st.st_ino: files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path) return True, None - if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns: + if 'c' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_ctime_ns: files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path) return True, None - elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns: + elif 'm' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_mtime_ns: files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path) return True, None # we ignored the inode number in the comparison above or it is still same. @@ -1049,7 +1050,7 @@ def memorize_file(self, hashed_path, path_hash, st, ids): elif 'm' in cache_mode: cmtime_type = 'mtime' cmtime_ns = safe_ns(st.st_mtime_ns) - entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids) + entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunk_ids=ids) self.files[path_hash] = msgpack.packb(entry) self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns) files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r', diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h index f6dd9ca8e8..bba566419f 100644 --- a/src/borg/cache_sync/unpack.h +++ b/src/borg/cache_sync/unpack.h @@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u) static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length) { - /* raw = what Borg uses for binary stuff and strings as well */ + /* raw = what Borg uses for text stuff */ /* Note: p points to an internal buffer which contains l bytes. */ (void)b; switch(u->expect) { - case expect_key: - if(length != 32) { - SET_LAST_ERROR("Incorrect key length"); - return -1; - } - memcpy(u->current.key, p, 32); - u->expect = expect_size; - break; case expect_map_key: if(length == 6 && !memcmp("chunks", p, 6)) { u->expect = expect_chunks_begin; @@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* u->expect = expect_map_item_end; } break; - default: - if(u->inside_chunks) { - SET_LAST_ERROR("Unexpected bytes in chunks structure"); - return -1; - } } return 0; } static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length) { - (void)u; (void)b; (void)p; (void)length; - UNEXPECTED("bin"); + /* bin = what Borg uses for binary stuff */ + /* Note: p points to an internal buffer which contains l bytes. */ + (void)b; + + switch(u->expect) { + case expect_key: + if(length != 32) { + SET_LAST_ERROR("Incorrect key length"); + return -1; + } + memcpy(u->current.key, p, 32); + u->expect = expect_size; + break; + default: + if(u->inside_chunks) { + SET_LAST_ERROR("Unexpected bytes in chunks structure"); + return -1; + } + } return 0; } diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 1900480025..bbc2eaaf8b 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -331,14 +331,52 @@ class ZSTD(DecidingCompressor): return dest[:osize] -class ZLIB(CompressorBase): +class ZLIB(DecidingCompressor): """ zlib compression / decompression (python stdlib) """ - ID = b'\x08\x00' # not used here, see detect() - # avoid all 0x.8.. IDs elsewhere! + ID = b'\x05\x00' name = 'zlib' + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + + def _decide(self, data): + """ + Decides what to do with *data*. Returns (compressor, zlib_data). + + *zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None. + """ + zlib_data = zlib.compress(data, self.level) + if len(zlib_data) < len(data): + return self, zlib_data + else: + return NONE_COMPRESSOR, None + + def decompress(self, data): + data = super().decompress(data) + try: + return zlib.decompress(data) + except zlib.error as e: + raise DecompressionError(str(e)) from None + + +class ZLIB_legacy(CompressorBase): + """ + zlib compression / decompression (python stdlib) + + Note: This is the legacy ZLIB support as used by borg < 1.3. + It still suffers from attic *only* supporting zlib and not having separate + ID bytes to differentiate between differently compressed chunks. + This just works because zlib compressed stuff always starts with 0x.8.. bytes. + Newer borg uses the ZLIB class that has separate ID bytes (as all the other + compressors) and does not need this hack. + """ + ID = b'\x08\x00' # not used here, see detect() + # avoid all 0x.8.. IDs elsewhere! + name = 'zlib_legacy' + @classmethod def detect(cls, data): # matches misc. patterns 0x.8.. used by zlib @@ -502,13 +540,14 @@ COMPRESSOR_TABLE = { CNONE.name: CNONE, LZ4.name: LZ4, ZLIB.name: ZLIB, + ZLIB_legacy.name: ZLIB_legacy, LZMA.name: LZMA, Auto.name: Auto, ZSTD.name: ZSTD, ObfuscateSize.name: ObfuscateSize, } # List of possible compression types. Does not include Auto, since it is a meta-Compressor. -COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first +COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ] # check fast stuff first def get_compressor(name, **kwargs): cls = COMPRESSOR_TABLE[name] @@ -554,7 +593,7 @@ class CompressionSpec: self.name = values[0] if self.name in ('none', 'lz4', ): return - elif self.name in ('zlib', 'lzma', ): + elif self.name in ('zlib', 'lzma', 'zlib_legacy'): # zlib_legacy just for testing if count < 2: level = 6 # default compression level in py stdlib elif count == 2: @@ -597,7 +636,7 @@ class CompressionSpec: def compressor(self): if self.name in ('none', 'lz4', ): return get_compressor(self.name) - elif self.name in ('zlib', 'lzma', 'zstd', ): + elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'): return get_compressor(self.name, level=self.level) elif self.name == 'auto': return get_compressor(self.name, compressor=self.inner.compressor) diff --git a/src/borg/constants.py b/src/borg/constants.py index 0b2ef16a16..13eb8bd232 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -1,5 +1,5 @@ # this set must be kept complete, otherwise the RobustUnpacker might malfunction: -ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', +ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', 'hlid', 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'birthtime', 'size', 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'part']) diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index 6ca6fbac02..15df53d001 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -232,24 +232,24 @@ def unpack_and_verify_manifest(self, data, force_tam_not_required=False): unpacker = get_limited_unpacker('manifest') unpacker.feed(data) unpacked = unpacker.unpack() - if b'tam' not in unpacked: + if 'tam' not in unpacked: if tam_required: raise TAMRequiredError(self.repository._location.canonical_path()) else: logger.debug('TAM not found and not required') return unpacked, False - tam = unpacked.pop(b'tam', None) + tam = unpacked.pop('tam', None) if not isinstance(tam, dict): raise TAMInvalid() - tam_type = tam.get(b'type', b'').decode('ascii', 'replace') + tam_type = tam.get('type', '') if tam_type != 'HKDF_HMAC_SHA512': if tam_required: raise TAMUnsupportedSuiteError(repr(tam_type)) else: logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type) return unpacked, False - tam_hmac = tam.get(b'hmac') - tam_salt = tam.get(b'salt') + tam_hmac = tam.get('hmac') + tam_salt = tam.get('salt') if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes): raise TAMInvalid() offset = data.index(tam_hmac) diff --git a/src/borg/helpers/manifest.py b/src/borg/helpers/manifest.py index 425feb4e68..1b9d91fb49 100644 --- a/src/borg/helpers/manifest.py +++ b/src/borg/helpers/manifest.py @@ -12,7 +12,7 @@ logger = create_logger() from .datastruct import StableDict -from .parseformat import bin_to_hex, safe_encode, safe_decode +from .parseformat import bin_to_hex from .time import parse_timestamp from .. import shellpattern from ..constants import * # NOQA @@ -39,39 +39,35 @@ class Archives(abc.MutableMapping): str timestamps or datetime timestamps. """ def __init__(self): - # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) + # key: str archive name, value: dict('id': bytes_id, 'time': str_iso_ts) self._archives = {} def __len__(self): return len(self._archives) def __iter__(self): - return iter(safe_decode(name) for name in self._archives) + return iter(self._archives) def __getitem__(self, name): assert isinstance(name, str) - _name = safe_encode(name) - values = self._archives.get(_name) + values = self._archives.get(name) if values is None: raise KeyError - ts = parse_timestamp(values[b'time'].decode()) - return ArchiveInfo(name=name, id=values[b'id'], ts=ts) + ts = parse_timestamp(values['time']) + return ArchiveInfo(name=name, id=values['id'], ts=ts) def __setitem__(self, name, info): assert isinstance(name, str) - name = safe_encode(name) assert isinstance(info, tuple) id, ts = info assert isinstance(id, bytes) if isinstance(ts, datetime): ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT) assert isinstance(ts, str) - ts = ts.encode() - self._archives[name] = {b'id': id, b'time': ts} + self._archives[name] = {'id': id, 'time': ts} def __delitem__(self, name): assert isinstance(name, str) - name = safe_encode(name) del self._archives[name] def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False): @@ -116,8 +112,8 @@ def list_considering(self, args): def set_raw_dict(self, d): """set the dict we get from the msgpack unpacker""" for k, v in d.items(): - assert isinstance(k, bytes) - assert isinstance(v, dict) and b'id' in v and b'time' in v + assert isinstance(k, str) + assert isinstance(v, dict) and 'id' in v and 'time' in v self._archives[k] = v def get_raw_dict(self): @@ -196,10 +192,10 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False): manifest.timestamp = m.get('timestamp') manifest.config = m.config # valid item keys are whatever is known in the repo or every key we know - manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', [])) + manifest.item_keys = ITEM_KEYS | frozenset(m.get('item_keys', [])) if manifest.tam_verified: - manifest_required = manifest.config.get(b'tam_required', False) + manifest_required = manifest.config.get('tam_required', False) security_required = tam_required(repository) if manifest_required and not security_required: logger.debug('Manifest is TAM verified and says TAM is required, updating security database...') @@ -214,32 +210,32 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False): def check_repository_compatibility(self, operations): for operation in operations: assert isinstance(operation, self.Operation) - feature_flags = self.config.get(b'feature_flags', None) + feature_flags = self.config.get('feature_flags', None) if feature_flags is None: return - if operation.value.encode() not in feature_flags: + if operation.value not in feature_flags: continue - requirements = feature_flags[operation.value.encode()] - if b'mandatory' in requirements: - unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES + requirements = feature_flags[operation.value] + if 'mandatory' in requirements: + unsupported = set(requirements['mandatory']) - self.SUPPORTED_REPO_FEATURES if unsupported: - raise MandatoryFeatureUnsupported([f.decode() for f in unsupported]) + raise MandatoryFeatureUnsupported(list(unsupported)) def get_all_mandatory_features(self): result = {} - feature_flags = self.config.get(b'feature_flags', None) + feature_flags = self.config.get('feature_flags', None) if feature_flags is None: return result for operation, requirements in feature_flags.items(): - if b'mandatory' in requirements: - result[operation.decode()] = {feature.decode() for feature in requirements[b'mandatory']} + if 'mandatory' in requirements: + result[operation] = set(requirements['mandatory']) return result def write(self): from ..item import ManifestItem if self.key.tam_required: - self.config[b'tam_required'] = True + self.config['tam_required'] = True # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly if self.timestamp is None: self.timestamp = datetime.utcnow().strftime(ISO_FORMAT) diff --git a/src/borg/helpers/msgpack.py b/src/borg/helpers/msgpack.py index 2ace88feef..268ee30e75 100644 --- a/src/borg/helpers/msgpack.py +++ b/src/borg/helpers/msgpack.py @@ -1,21 +1,52 @@ +""" +wrapping msgpack +================ + +We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code. + +Packing +------- +- use_bin_type = True (used by borg since borg 1.3) + This is used to generate output according to new msgpack 2.0 spec. + This cleanly keeps bytes and str types apart. + +- use_bin_type = False (used by borg < 1.3) + This creates output according to the older msgpack spec. + BAD: str and bytes were packed into same "raw" representation. + +- unicode_errors = 'surrogateescape' + Guess backup applications are one of the rare cases when this needs to be used. + It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8. + There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible. + +Unpacking +--------- +- raw = False (used by borg since borg 1.3) + We already can use this with borg 1.3 due to the want_bytes decoder. + This decoder can be removed in future, when we do not have to deal with data any more that was packed the old way. + It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str. + +- raw = True (the old way, used by borg < 1.3) + +- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False). + +As of borg 1.3, we have fixed most of the msgpack str/bytes mess, #968. +Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely. +But from now on, borg only **writes** new data according to the new msgpack 2.0 spec, +thus we can remove some legacy support in a later borg release (some places are marked with "legacy"). + +current way in msgpack terms +---------------------------- + +- pack with use_bin_type=True (according to msgpack 2.0 spec) +- packs str -> raw and bytes -> bin +- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape') +- unpacks bin to bytes and raw to str (thus we need to re-encode manually if we want bytes from "raw") +""" + from .datastruct import StableDict from ..constants import * # NOQA -# wrapping msgpack --------------------------------------------------------------------------------------------------- -# -# due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it - -# to avoid having lots of clutter in the calling code. see tickets #968 and #3632. -# -# Packing -# ------- -# use_bin_type = False is needed to generate the old msgpack format (not msgpack 2.0 spec) as borg always did. -# unicode_errors = None is needed because usage of it is deprecated -# -# Unpacking -# --------- -# raw = True is needed to unpack the old msgpack format to bytes (not str, about the decoding see item.pyx). -# unicode_errors = None is needed because usage of it is deprecated - from msgpack import Packer as mp_Packer from msgpack import packb as mp_packb from msgpack import pack as mp_pack @@ -24,12 +55,16 @@ from msgpack import unpack as mp_unpack from msgpack import version as mp_version -from msgpack import ExtType +from msgpack import ExtType, Timestamp from msgpack import OutOfData version = mp_version +USE_BIN_TYPE = True +RAW = False +UNICODE_ERRORS = 'surrogateescape' + class PackException(Exception): """Exception while msgpack packing""" @@ -40,10 +75,10 @@ class UnpackException(Exception): class Packer(mp_Packer): - def __init__(self, *, default=None, unicode_errors=None, - use_single_float=False, autoreset=True, use_bin_type=False, + def __init__(self, *, default=None, unicode_errors=UNICODE_ERRORS, + use_single_float=False, autoreset=True, use_bin_type=USE_BIN_TYPE, strict_types=False): - assert unicode_errors is None + assert unicode_errors == UNICODE_ERRORS super().__init__(default=default, unicode_errors=unicode_errors, use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type, strict_types=strict_types) @@ -55,16 +90,16 @@ def pack(self, obj): raise PackException(e) -def packb(o, *, use_bin_type=False, unicode_errors=None, **kwargs): - assert unicode_errors is None +def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs): + assert unicode_errors == UNICODE_ERRORS try: return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs) except Exception as e: raise PackException(e) -def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs): - assert unicode_errors is None +def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs): + assert unicode_errors == UNICODE_ERRORS try: return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs) except Exception as e: @@ -72,13 +107,13 @@ def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs): class Unpacker(mp_Unpacker): - def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=True, + def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=RAW, object_hook=None, object_pairs_hook=None, list_hook=None, - unicode_errors=None, max_buffer_size=0, + unicode_errors=UNICODE_ERRORS, max_buffer_size=0, ext_hook=ExtType, strict_map_key=False): - assert raw is True - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw, object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, unicode_errors=unicode_errors, max_buffer_size=max_buffer_size, @@ -105,10 +140,11 @@ def __next__(self): next = __next__ -def unpackb(packed, *, raw=True, unicode_errors=None, +def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs): - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS try: kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key) @@ -118,10 +154,11 @@ def unpackb(packed, *, raw=True, unicode_errors=None, raise UnpackException(e) -def unpack(stream, *, raw=True, unicode_errors=None, +def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs): - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS try: kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key) @@ -164,7 +201,7 @@ def get_limited_unpacker(kind): return Unpacker(**args) -def bigint_to_int(mtime): +def bigint_to_int(mtime): # legacy """Convert bytearray to int """ if isinstance(mtime, bytes): @@ -172,7 +209,7 @@ def bigint_to_int(mtime): return mtime -def int_to_bigint(value): +def int_to_bigint(value): # legacy """Convert integers larger than 64 bits to bytearray Smaller integers are left alone @@ -180,3 +217,14 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value + + +def int_to_timestamp(ns): + return Timestamp.from_unix_nano(ns) + + +def timestamp_to_int(ts): + if isinstance(ts, Timestamp): + return ts.to_unix_nano() + # legacy support note: we need to keep the bigint conversion for compatibility with borg < 1.3 archives. + return bigint_to_int(ts) diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index b681571283..99026f478d 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -19,6 +19,7 @@ from .errors import Error from .fs import get_keys_dir +from .msgpack import Timestamp from .time import OutputTimestamp, format_time, to_localtime, safe_timestamp, safe_s from .. import __version__ as borg_version from .. import __version_tuple__ as borg_version_tuple @@ -1050,6 +1051,8 @@ def decode(d): value = decode_tuple(value) elif isinstance(value, bytes): value = decode_bytes(value) + elif isinstance(value, Timestamp): + value = value.to_unix_nano() if isinstance(key, bytes): key = key.decode() res[key] = value diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 48debf1839..128e317444 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -2,10 +2,9 @@ import stat from collections import namedtuple from .constants import ITEM_KEYS, ARCHIVE_KEYS -from .helpers import safe_encode, safe_decode -from .helpers import bigint_to_int, int_to_bigint from .helpers import StableDict from .helpers import format_file_size +from .helpers.msgpack import timestamp_to_int, int_to_timestamp cdef extern from "_item.c": @@ -16,6 +15,60 @@ cdef extern from "_item.c": API_VERSION = '1.2_01' +def fix_key(data, key): + """if k is a bytes-typed key, migrate key/value to a str-typed key in dict data""" + if isinstance(key, bytes): + value = data.pop(key) + key = key.decode() + data[key] = value + assert isinstance(key, str) + return key + + +def fix_str_value(data, key, errors='surrogateescape'): + """makes sure that data[key] is a str (decode if it is bytes)""" + assert isinstance(key, str) # fix_key must be called first + value = data[key] + if isinstance(value, bytes): + value = value.decode('utf-8', errors=errors) + data[key] = value + assert isinstance(value, str) + return value + + +def fix_list_of_str(t): + """make sure we have a list of str""" + assert isinstance(t, (tuple, list)) + l = [e.decode() if isinstance(e, bytes) else e for e in t] + assert all(isinstance(e, str) for e in l), repr(l) + return l + + +def fix_tuple_of_str(t): + """make sure we have a tuple of str""" + assert isinstance(t, (tuple, list)) + t = tuple(e.decode() if isinstance(e, bytes) else e for e in t) + assert all(isinstance(e, str) for e in t), repr(t) + return t + + +def fix_tuple_of_str_and_int(t): + """make sure we have a tuple of str""" + assert isinstance(t, (tuple, list)) + t = tuple(e.decode() if isinstance(e, bytes) else e for e in t) + assert all(isinstance(e, (str, int)) for e in t), repr(t) + return t + + +def want_bytes(v): + """we know that we want bytes and the value should be bytes""" + # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False + if isinstance(v, str): + v = v.encode('utf-8', errors='surrogateescape') + assert isinstance(v, bytes) + return v + + class PropDict: """ Manage a dictionary via properties. @@ -155,15 +208,15 @@ class Item(PropDict): # properties statically defined, so that IDEs can know their names: - path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) - group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + path = PropDict._make_property('path', str, 'surrogate-escaped str') + source = PropDict._make_property('source', str, 'surrogate-escaped str') + user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None') + group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None') - acl_access = PropDict._make_property('acl_access', bytes) - acl_default = PropDict._make_property('acl_default', bytes) - acl_extended = PropDict._make_property('acl_extended', bytes) - acl_nfs4 = PropDict._make_property('acl_nfs4', bytes) + acl_access = PropDict._make_property('acl_access', bytes, decode=want_bytes) + acl_default = PropDict._make_property('acl_default', bytes, decode=want_bytes) + acl_extended = PropDict._make_property('acl_extended', bytes, decode=want_bytes) + acl_nfs4 = PropDict._make_property('acl_nfs4', bytes, decode=want_bytes) mode = PropDict._make_property('mode', int) uid = PropDict._make_property('uid', int) @@ -171,17 +224,17 @@ class Item(PropDict): rdev = PropDict._make_property('rdev', int) bsdflags = PropDict._make_property('bsdflags', int) - # note: we need to keep the bigint conversion for compatibility with borg 1.0 archives. - atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) - ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) - mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) - birthtime = PropDict._make_property('birthtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + atime = PropDict._make_property('atime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int) + ctime = PropDict._make_property('ctime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int) + mtime = PropDict._make_property('mtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int) + birthtime = PropDict._make_property('birthtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int) # size is only present for items with a chunk list and then it is sum(chunk_sizes) # compatibility note: this is a new feature, in old archives size will be missing. size = PropDict._make_property('size', int) - hardlink_master = PropDict._make_property('hardlink_master', bool) + hlid = PropDict._make_property('hlid', bytes, decode=want_bytes) # hard link id: same value means same hard link. + hardlink_master = PropDict._make_property('hardlink_master', bool) # legacy chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None') @@ -291,6 +344,14 @@ class Item(PropDict): except AttributeError: return False + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k in ('path', 'source', 'user', 'group'): + v = fix_str_value(d, k) + self._dict[k] = v + class EncryptedKey(PropDict): """ @@ -310,15 +371,25 @@ class EncryptedKey(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode) + algorithm = PropDict._make_property('algorithm', str) iterations = PropDict._make_property('iterations', int) - salt = PropDict._make_property('salt', bytes) - hash = PropDict._make_property('hash', bytes) - data = PropDict._make_property('data', bytes) + salt = PropDict._make_property('salt', bytes, decode=want_bytes) + hash = PropDict._make_property('hash', bytes, decode=want_bytes) + data = PropDict._make_property('data', bytes, decode=want_bytes) argon2_time_cost = PropDict._make_property('argon2_time_cost', int) argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int) argon2_parallelism = PropDict._make_property('argon2_parallelism', int) - argon2_type = PropDict._make_property('argon2_type', str, encode=str.encode, decode=bytes.decode) + argon2_type = PropDict._make_property('argon2_type', str) + + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k in ('algorithm', 'argon2_type'): + v = fix_str_value(d, k) + self._dict[k] = v class Key(PropDict): @@ -338,24 +409,20 @@ class Key(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - repository_id = PropDict._make_property('repository_id', bytes) - enc_key = PropDict._make_property('enc_key', bytes) - enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes) - id_key = PropDict._make_property('id_key', bytes) + repository_id = PropDict._make_property('repository_id', bytes, decode=want_bytes) + enc_key = PropDict._make_property('enc_key', bytes, decode=want_bytes) + enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes, decode=want_bytes) + id_key = PropDict._make_property('id_key', bytes, decode=want_bytes) chunk_seed = PropDict._make_property('chunk_seed', int) tam_required = PropDict._make_property('tam_required', bool) - -def tuple_encode(t): - """encode a tuple that might contain str items""" - # we have str, but want to give bytes to msgpack.pack - return tuple(safe_encode(e) if isinstance(e, str) else e for e in t) - - -def tuple_decode(t): - """decode a tuple that might contain bytes items""" - # we get bytes objects from msgpack.unpack, but want str - return tuple(safe_decode(e) if isinstance(e, bytes) else e for e in t) + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + self._dict[k] = v class ArchiveItem(PropDict): @@ -375,18 +442,18 @@ class ArchiveItem(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + name = PropDict._make_property('name', str, 'surrogate-escaped str') items = PropDict._make_property('items', list) cmdline = PropDict._make_property('cmdline', list) # list of s-e-str - hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - chunker_params = PropDict._make_property('chunker_params', tuple, 'chunker-params tuple', encode=tuple_encode, decode=tuple_decode) + hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str') + username = PropDict._make_property('username', str, 'surrogate-escaped str') + time = PropDict._make_property('time', str) + time_end = PropDict._make_property('time_end', str) + comment = PropDict._make_property('comment', str, 'surrogate-escaped str') + chunker_params = PropDict._make_property('chunker_params', tuple) recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2 - recreate_source_id = PropDict._make_property('recreate_source_id', bytes) + recreate_source_id = PropDict._make_property('recreate_source_id', bytes, decode=want_bytes) recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples size = PropDict._make_property('size', int) @@ -396,6 +463,22 @@ class ArchiveItem(PropDict): csize_parts = PropDict._make_property('csize_parts', int) nfiles_parts = PropDict._make_property('nfiles_parts', int) + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k in ('name', 'hostname', 'username', 'comment'): + v = fix_str_value(d, k) + if k in ('time', 'time_end'): + v = fix_str_value(d, k, 'replace') + if k == 'chunker_params': + v = fix_tuple_of_str_and_int(v) + if k in ('cmdline', 'recreate_cmdline'): + v = fix_list_of_str(v) + self._dict[k] = v + class ManifestItem(PropDict): """ @@ -414,10 +497,52 @@ class ManifestItem(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - archives = PropDict._make_property('archives', dict) # name -> dict - timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + archives = PropDict._make_property('archives', dict, 'dict of str -> dict') # name -> dict + timestamp = PropDict._make_property('timestamp', str) config = PropDict._make_property('config', dict) - item_keys = PropDict._make_property('item_keys', tuple) + item_keys = PropDict._make_property('item_keys', tuple, 'tuple of str') + + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k == 'archives': + ad = v + assert isinstance(ad, dict) + for ak, av in list(ad.items()): + ak = fix_key(ad, ak) + assert isinstance(av, dict) + for ik, iv in list(av.items()): + ik = fix_key(av, ik) + assert set(av) == {'id', 'time'} + assert isinstance(av['id'], bytes) + fix_str_value(av, 'time') + if k == 'timestamp': + v = fix_str_value(d, k, 'replace') + if k == 'config': + cd = v + assert isinstance(cd, dict) + for ck, cv in list(cd.items()): + ck = fix_key(cd, ck) + if ck == 'tam_required': + assert isinstance(cv, bool) + if ck == 'feature_flags': + assert isinstance(cv, dict) + ops = {'read', 'check', 'write', 'delete'} + for op, specs in list(cv.items()): + op = fix_key(cv, op) + assert op in ops + for speck, specv in list(specs.items()): + speck = fix_key(specs, speck) + if speck == 'mandatory': + specs[speck] = fix_tuple_of_str(specv) + assert set(cv).issubset(ops) + if k == 'item_keys': + v = fix_tuple_of_str(v) + self._dict[k] = v + class ItemDiff: """ diff --git a/src/borg/remote.py b/src/borg/remote.py index 2870d71d98..6ea51d3c3d 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -38,7 +38,7 @@ RPC_PROTOCOL_VERSION = 2 BORG_VERSION = parse_version(__version__) -MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r' +MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r' MAX_INFLIGHT = 100 @@ -138,10 +138,6 @@ def __init__(self, data): } -def decode_keys(d): - return {k.decode(): d[k] for k in d} - - class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', @@ -217,13 +213,12 @@ def serve(self): if isinstance(unpacked, dict): dictFormat = True msgid = unpacked[MSGID] - method = unpacked[MSG].decode() - args = decode_keys(unpacked[ARGS]) + method = unpacked[MSG] + args = unpacked[ARGS] elif isinstance(unpacked, tuple) and len(unpacked) == 4: dictFormat = False # The first field 'type' was always 1 and has always been ignored _, msgid, method, args = unpacked - method = method.decode() args = self.positional_to_named(method, args) else: if self.repository is not None: @@ -256,21 +251,21 @@ def serve(self): try: msg = msgpack.packb({MSGID: msgid, - b'exception_class': e.__class__.__name__, - b'exception_args': e.args, - b'exception_full': ex_full, - b'exception_short': ex_short, - b'exception_trace': ex_trace, - b'sysinfo': sysinfo()}) + 'exception_class': e.__class__.__name__, + 'exception_args': e.args, + 'exception_full': ex_full, + 'exception_short': ex_short, + 'exception_trace': ex_trace, + 'sysinfo': sysinfo()}) except TypeError: msg = msgpack.packb({MSGID: msgid, - b'exception_class': e.__class__.__name__, - b'exception_args': [x if isinstance(x, (str, bytes, int)) else None - for x in e.args], - b'exception_full': ex_full, - b'exception_short': ex_short, - b'exception_trace': ex_trace, - b'sysinfo': sysinfo()}) + 'exception_class': e.__class__.__name__, + 'exception_args': [x if isinstance(x, (str, bytes, int)) else None + for x in e.args], + 'exception_full': ex_full, + 'exception_short': ex_short, + 'exception_trace': ex_trace, + 'sysinfo': sysinfo()}) os_write(stdout_fd, msg) else: @@ -307,7 +302,7 @@ def negotiate(self, client_data): # clients since 1.1.0b3 use a dict as client_data # clients since 1.1.0b6 support json log format from server if isinstance(client_data, dict): - self.client_version = client_data[b'client_version'] + self.client_version = client_data['client_version'] level = logging.getLevelName(logging.getLogger('').level) setup_logging(is_serve=True, json=True, level=level) logger.debug('Initialized logging system for JSON-based protocol') @@ -369,7 +364,6 @@ def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, ap return self.repository.id def inject_exception(self, kind): - kind = kind.decode() s1 = 'test string' s2 = 'test string2' if kind == 'DoesNotExist': @@ -483,35 +477,35 @@ class RemoteRepository: class RPCError(Exception): def __init__(self, unpacked): - # for borg < 1.1: unpacked only has b'exception_class' as key - # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo' + # for borg < 1.1: unpacked only has 'exception_class' as key + # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo' self.unpacked = unpacked def get_message(self): - if b'exception_short' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_short']).decode() + if 'exception_short' in self.unpacked: + return '\n'.join(self.unpacked['exception_short']) else: return self.exception_class @property def traceback(self): - return self.unpacked.get(b'exception_trace', True) + return self.unpacked.get('exception_trace', True) @property def exception_class(self): - return self.unpacked[b'exception_class'].decode() + return self.unpacked['exception_class'] @property def exception_full(self): - if b'exception_full' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_full']).decode() + if 'exception_full' in self.unpacked: + return '\n'.join(self.unpacked['exception_full']) else: return self.get_message() + '\nRemote Exception (see remote log for the traceback)' @property def sysinfo(self): - if b'sysinfo' in self.unpacked: - return self.unpacked[b'sysinfo'].decode() + if 'sysinfo' in self.unpacked: + return self.unpacked['sysinfo'] else: return '' @@ -570,15 +564,15 @@ def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock try: try: version = self.call('negotiate', {'client_data': { - b'client_version': BORG_VERSION, + 'client_version': BORG_VERSION, }}) except ConnectionClosed: raise ConnectionClosedWithHint('Is borg working on the server?') from None if version == RPC_PROTOCOL_VERSION: self.dictFormat = False - elif isinstance(version, dict) and b'server_version' in version: + elif isinstance(version, dict) and 'server_version' in version: self.dictFormat = True - self.server_version = version[b'server_version'] + self.server_version = version['server_version'] else: raise Exception('Server insisted on using unsupported protocol version %s' % version) @@ -733,9 +727,9 @@ def pop_preload_msgid(chunkid): return msgid def handle_error(unpacked): - error = unpacked[b'exception_class'].decode() - old_server = b'exception_args' not in unpacked - args = unpacked.get(b'exception_args') + error = unpacked['exception_class'] + old_server = 'exception_args' not in unpacked + args = unpacked.get('exception_args') if error == 'DoesNotExist': raise Repository.DoesNotExist(self.location.processed) @@ -747,29 +741,29 @@ def handle_error(unpacked): if old_server: raise IntegrityError('(not available)') else: - raise IntegrityError(args[0].decode()) + raise IntegrityError(args[0]) elif error == 'AtticRepository': if old_server: raise Repository.AtticRepository('(not available)') else: - raise Repository.AtticRepository(args[0].decode()) + raise Repository.AtticRepository(args[0]) elif error == 'PathNotAllowed': if old_server: raise PathNotAllowed('(unknown)') else: - raise PathNotAllowed(args[0].decode()) + raise PathNotAllowed(args[0]) elif error == 'ParentPathDoesNotExist': - raise Repository.ParentPathDoesNotExist(args[0].decode()) + raise Repository.ParentPathDoesNotExist(args[0]) elif error == 'ObjectNotFound': if old_server: raise Repository.ObjectNotFound('(not available)', self.location.processed) else: - raise Repository.ObjectNotFound(args[0].decode(), self.location.processed) + raise Repository.ObjectNotFound(args[0], self.location.processed) elif error == 'InvalidRPCMethod': if old_server: raise InvalidRPCMethod('(not available)') else: - raise InvalidRPCMethod(args[0].decode()) + raise InvalidRPCMethod(args[0]) else: raise self.RPCError(unpacked) @@ -788,7 +782,7 @@ def handle_error(unpacked): try: unpacked = self.responses.pop(waiting_for[0]) waiting_for.pop(0) - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: yield unpacked[RESULT] @@ -808,7 +802,7 @@ def handle_error(unpacked): else: return else: - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: yield unpacked[RESULT] @@ -834,7 +828,7 @@ def handle_error(unpacked): _, msgid, error, res = unpacked if error: # ignore res, because it is only a fixed string anyway. - unpacked = {MSGID: msgid, b'exception_class': error} + unpacked = {MSGID: msgid, 'exception_class': error} else: unpacked = {MSGID: msgid, RESULT: res} else: @@ -842,7 +836,7 @@ def handle_error(unpacked): if msgid in self.ignore_responses: self.ignore_responses.remove(msgid) # async methods never return values, but may raise exceptions. - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: self.async_responses[msgid] = unpacked else: # we currently do not have async result values except "None", diff --git a/src/borg/repository.py b/src/borg/repository.py index 9267fe0e6c..3fcc72aadd 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -516,16 +516,16 @@ def _read_integrity(self, transaction_id, key): integrity = msgpack.unpack(fd) except FileNotFoundError: return - if integrity.get(b'version') != 2: - logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file) + if integrity.get('version') != 2: + logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file) return - return integrity[key].decode() + return integrity[key] def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'index') + integrity_data = self._read_integrity(transaction_id, 'index') try: with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd: return NSIndex.read(fd) @@ -575,7 +575,7 @@ def prepare_txn(self, transaction_id, do_cleanup=True): self.io.cleanup(transaction_id) hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'hints') + integrity_data = self._read_integrity(transaction_id, 'hints') try: with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd: hints = msgpack.unpack(fd) @@ -588,23 +588,23 @@ def prepare_txn(self, transaction_id, do_cleanup=True): self.check_transaction() self.prepare_txn(transaction_id) return - if hints[b'version'] == 1: + if hints['version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) - self.segments = hints[b'segments'] + self.segments = hints['segments'] self.compact = FreeSpace() self.storage_quota_use = 0 self.shadow_index = {} - for segment in sorted(hints[b'compact']): + for segment in sorted(hints['compact']): logger.debug('Rebuilding sparse info for segment %d', segment) self._rebuild_sparse(segment) logger.debug('Upgrade to v2 hints complete') - elif hints[b'version'] != 2: - raise ValueError('Unknown hints file version: %d' % hints[b'version']) + elif hints['version'] != 2: + raise ValueError('Unknown hints file version: %d' % hints['version']) else: - self.segments = hints[b'segments'] - self.compact = FreeSpace(hints[b'compact']) - self.storage_quota_use = hints.get(b'storage_quota_use', 0) - self.shadow_index = hints.get(b'shadow_index', {}) + self.segments = hints['segments'] + self.compact = FreeSpace(hints['compact']) + self.storage_quota_use = hints.get('storage_quota_use', 0) + self.shadow_index = hints.get('shadow_index', {}) self.log_storage_quota() # Drop uncommitted segments in the shadow index for key, shadowed_segments in self.shadow_index.items(): @@ -621,16 +621,16 @@ def rename_tmp(file): os.rename(file + '.tmp', file) hints = { - b'version': 2, - b'segments': self.segments, - b'compact': self.compact, - b'storage_quota_use': self.storage_quota_use, - b'shadow_index': self.shadow_index, + 'version': 2, + 'segments': self.segments, + 'compact': self.compact, + 'storage_quota_use': self.storage_quota_use, + 'shadow_index': self.shadow_index, } integrity = { # Integrity version started at 2, the current hints version. # Thus, integrity version == hints version, for now. - b'version': 2, + 'version': 2, } transaction_id = self.io.get_segments_transaction_id() assert transaction_id is not None @@ -647,7 +647,7 @@ def rename_tmp(file): with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd: msgpack.pack(hints, fd) flush_and_sync(fd) - integrity[b'hints'] = fd.integrity_data + integrity['hints'] = fd.integrity_data # Write repository index index_name = 'index.%d' % transaction_id @@ -656,7 +656,7 @@ def rename_tmp(file): # XXX: Consider using SyncFile for index write-outs. self.index.write(fd) flush_and_sync(fd) - integrity[b'index'] = fd.integrity_data + integrity['index'] = fd.integrity_data # Write integrity file, containing checksums of the hints and index files integrity_name = 'integrity.%d' % transaction_id diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index db9ed573b5..6fd766f9b9 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -142,7 +142,7 @@ def make_chunks(self, items): return b''.join(msgpack.packb({'path': item}) for item in items) def _validator(self, value): - return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') + return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz') def process(self, input): unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS) @@ -157,14 +157,14 @@ def process(self, input): return result def test_extra_garbage_no_sync(self): - chunks = [(False, [self.make_chunks([b'foo', b'bar'])]), - (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])] + chunks = [(False, [self.make_chunks(['foo', 'bar'])]), + (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])] result = self.process(chunks) self.assert_equal(result, [ - {b'path': b'foo'}, {b'path': b'bar'}, + {'path': 'foo'}, {'path': 'bar'}, 103, 97, 114, 98, 97, 103, 101, - {b'path': b'boo'}, - {b'path': b'baz'}]) + {'path': 'boo'}, + {'path': 'baz'}]) def split(self, left, length): parts = [] @@ -174,22 +174,22 @@ def split(self, left, length): return parts def test_correct_stream(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2) input = [(False, chunks)] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}]) def test_missing_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, chunks[4:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) def test_corrupt_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) @pytest.fixture @@ -213,7 +213,7 @@ def test_invalid_msgpacked_item(packed, item_keys_serialized): @pytest.mark.parametrize('packed', [msgpack.packb(o) for o in [ - {b'path': b'/a/b/c'}, # small (different msgpack mapping type!) + {'path': b'/a/b/c'}, # small (different msgpack mapping type!) OrderedDict((k, b'') for k in IK), # as big (key count) as it gets OrderedDict((k, b'x' * 1000) for k in IK), # as big (key count and volume) as it gets ]]) @@ -222,7 +222,7 @@ def test_valid_msgpacked_items(packed, item_keys_serialized): def test_key_length_msgpacked_items(): - key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type + key = 'x' * 32 # 31 bytes is the limit for fixstr msgpack type data = {key: b''} item_keys_serialized = [msgpack.packb(key), ] assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 65fe2a446a..51bfa85640 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1787,7 +1787,7 @@ def test_create_dry_run(self): def add_unknown_feature(self, operation): with Repository(self.repository_path, exclusive=True) as repository: manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - manifest.config[b'feature_flags'] = {operation.value.encode(): {b'mandatory': [b'unknown-feature']}} + manifest.config['feature_flags'] = {operation.value: {'mandatory': ['unknown-feature']}} manifest.write() repository.commit(compact=False) @@ -2442,7 +2442,7 @@ def test_compression_none_uncompressible(self): def test_compression_zlib_compressible(self): size, csize = self._get_sizes('zlib', compressible=True) assert csize < size * 0.1 - assert csize == 35 + assert csize == 37 def test_compression_zlib_uncompressible(self): size, csize = self._get_sizes('zlib', compressible=False) @@ -2451,7 +2451,7 @@ def test_compression_zlib_uncompressible(self): def test_compression_auto_compressible(self): size, csize = self._get_sizes('auto,zlib', compressible=True) assert csize < size * 0.1 - assert csize == 35 # same as compression 'zlib' + assert csize == 37 # same as compression 'zlib' def test_compression_auto_uncompressible(self): size, csize = self._get_sizes('auto,zlib', compressible=False) @@ -3600,14 +3600,14 @@ def test_init_defaults_to_argon2(self): self.cmd('init', '--encryption=repokey', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'argon2 chacha20-poly1305' + assert key['algorithm'] == 'argon2 chacha20-poly1305' def test_init_with_explicit_key_algorithm(self): """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401""" self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'sha256' + assert key['algorithm'] == 'sha256' def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm): self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location) @@ -3617,13 +3617,13 @@ def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, ex with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm + assert key['algorithm'] == expected_algorithm def test_change_passphrase_does_not_change_algorithm_argon2(self): - self.verify_change_passphrase_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305') + self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') def test_change_passphrase_does_not_change_algorithm_pbkdf2(self): - self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', b'sha256') + self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', 'sha256') def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm): self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location) @@ -3632,13 +3632,13 @@ def verify_change_location_does_not_change_algorithm(self, given_algorithm, expe with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm + assert key['algorithm'] == expected_algorithm def test_change_location_does_not_change_algorithm_argon2(self): - self.verify_change_location_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305') + self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') def test_change_location_does_not_change_algorithm_pbkdf2(self): - self.verify_change_location_does_not_change_algorithm('pbkdf2', b'sha256') + self.verify_change_location_does_not_change_algorithm('pbkdf2', 'sha256') def test_key_change_algorithm(self): self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location) @@ -3915,15 +3915,15 @@ class Attic013Item: def as_dict(self): return { # These are required - b'path': '1234', - b'mtime': 0, - b'mode': 0, - b'user': b'0', - b'group': b'0', - b'uid': 0, - b'gid': 0, + 'path': '1234', + 'mtime': 0, + 'mode': 0, + 'user': '0', + 'group': '0', + 'uid': 0, + 'gid': 0, # acl is the offending key. - b'acl': None, + 'acl': None, } archive, repository = self.open_archive('archive1') @@ -3975,7 +3975,7 @@ def test_not_required(self): key.change_passphrase(key._passphrase) manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID))) - del manifest[b'tam'] + del manifest['tam'] repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest))) repository.commit(compact=False) output = self.cmd('list', '--debug', self.repository_location) diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index 3942c3537f..c93dd3bb67 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -88,11 +88,11 @@ def test_autodetect_invalid(): Compressor(**params).decompress(b'\x08\x00notreallyzlib') -def test_zlib_compat(): +def test_zlib_legacy_compat(): # for compatibility reasons, we do not add an extra header for zlib, # nor do we expect one when decompressing / autodetecting for level in range(10): - c = get_compressor(name='zlib', level=level) + c = get_compressor(name='zlib_legacy', level=level) cdata1 = c.compress(data) cdata2 = zlib.compress(data, level) assert cdata1 == cdata2 diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index aa40cc0660..94167e7ea9 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -3,6 +3,7 @@ from ..cache import ChunkListEntry from ..item import Item from ..helpers import StableDict +from ..helpers.msgpack import Timestamp def test_item_empty(): @@ -77,15 +78,15 @@ def test_item_int_property(): item.mode = "invalid" -def test_item_bigint_property(): +def test_item_mptimestamp_property(): item = Item() small, big = 42, 2 ** 65 item.atime = small assert item.atime == small - assert item.as_dict() == {'atime': small} + assert item.as_dict() == {'atime': Timestamp.from_unix_nano(small)} item.atime = big assert item.atime == big - assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'} + assert item.as_dict() == {'atime': Timestamp.from_unix_nano(big)} def test_item_user_group_none(): @@ -101,7 +102,7 @@ def test_item_se_str_property(): item = Item() item.path = '/a/b/c' assert item.path == '/a/b/c' - assert item.as_dict() == {'path': b'/a/b/c'} + assert item.as_dict() == {'path': '/a/b/c'} del item.path assert item.as_dict() == {} with pytest.raises(TypeError): @@ -110,11 +111,11 @@ def test_item_se_str_property(): # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut item = Item(internal_dict={'path': b'/a/\xfc/c'}) assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation - assert item.as_dict() == {'path': b'/a/\xfc/c'} + assert item.as_dict() == {'path': '/a/\udcfc/c'} del item.path assert 'path' not in item item.path = '/a/\udcfc/c' # setting using a surrogate-escaped representation - assert item.as_dict() == {'path': b'/a/\xfc/c'} + assert item.as_dict() == {'path': '/a/\udcfc/c'} def test_item_list_property(): diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index e0be752fae..0ad5fd9e07 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -360,23 +360,23 @@ def test_round_trip(self, key): assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob) - assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512' + assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512' unpacked, verified = key.unpack_and_verify_manifest(blob) assert verified - assert unpacked[b'foo'] == b'bar' - assert b'tam' not in unpacked + assert unpacked['foo'] == 'bar' + assert 'tam' not in unpacked - @pytest.mark.parametrize('which', (b'hmac', b'salt')) + @pytest.mark.parametrize('which', ('hmac', 'salt')) def test_tampered(self, key, which): data = {'foo': 'bar'} blob = key.pack_and_authenticate_metadata(data) assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob, object_hook=StableDict) - assert len(unpacked[b'tam'][which]) == 64 - unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32) - assert len(unpacked[b'tam'][which]) == 64 + assert len(unpacked['tam'][which]) == 64 + unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32) + assert len(unpacked['tam'][which]) == 64 blob = msgpack.packb(unpacked) with pytest.raises(TAMInvalid): @@ -421,4 +421,4 @@ def to_dict(key): load_me = RepoKey.detect(repository, manifest_data=None) assert to_dict(load_me) == to_dict(save_me) - assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode() + assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 83cf728e4b..4b4d6487cb 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -655,8 +655,8 @@ def _subtly_corrupted_hints_setup(self): hints = msgpack.unpack(fd) fd.seek(0) # Corrupt segment refcount - assert hints[b'segments'][2] == 1 - hints[b'segments'][2] = 0 + assert hints['segments'][2] == 1 + hints['segments'][2] = 0 msgpack.pack(hints, fd) fd.truncate()