Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for --wrap option #129

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 54 additions & 7 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import signal
import sys
import tempfile
import textwrap
import unicodedata
import warnings
from collections import defaultdict
Expand Down Expand Up @@ -137,7 +138,8 @@ def find_locale_dir():


def make_bag(
bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8"
bag_dir, bag_info=None, processes=1, checksums=None, checksum=None,
encoding="utf-8", tag_wrap_column=0
):
"""
Convert a given directory into a bag. You can pass in arbitrary
Expand Down Expand Up @@ -256,7 +258,7 @@ def make_bag(
)

bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
_make_tag_file("bag-info.txt", bag_info)
_make_tag_file("bag-info.txt", bag_info, tag_wrap_column)

for c in checksums:
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
Expand Down Expand Up @@ -450,7 +452,7 @@ def payload_entries(self):
if key.startswith("data" + os.sep)
)

def save(self, processes=1, manifests=False):
def save(self, processes=1, manifests=False, tag_wrap_column=0):
"""
save will persist any changes that have been made to the bag
metadata (self.info).
Expand All @@ -463,6 +465,11 @@ def save(self, processes=1, manifests=False):

If you want to control the number of processes that are used when
recalculating checksums use the processes parameter.

If you want long tag values to be wrapped by breaking long strings at
whitespace characters, set tag_wrap_column to a value greater than 0.
An integer value greater than 0 causes line-wrapping to be performed on
a best-effort basis to limit line lengths to the given value.
"""
# Error checking
if not self.path:
Expand Down Expand Up @@ -514,7 +521,7 @@ def save(self, processes=1, manifests=False):
LOGGER.info(_("Updating Payload-Oxum in %s"), self.tag_file_name)
self.info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)

_make_tag_file(self.tag_file_name, self.info)
_make_tag_file(self.tag_file_name, self.info, tag_wrap_column)

# Update tag-manifest for changes to manifest & bag-info files
for alg in self.algorithms:
Expand Down Expand Up @@ -1219,19 +1226,47 @@ def _parse_tags(tag_file):
yield (tag_name, tag_value.strip())


def _make_tag_file(bag_info_path, bag_info):
def _make_tag_file(bag_info_path, bag_info, tag_wrap_column):
headers = sorted(bag_info.keys())
with open_text_file(bag_info_path, "w") as f:
for h in headers:
values = bag_info[h]
if not isinstance(values, list):
values = [values]
for txt in values:
# strip CR, LF and CRLF so they don't mess up the tag file
txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt))
txt = _format_tag_value(h, txt, tag_wrap_column)
f.write("%s: %s\n" % (h, txt))


def _format_tag_value(header, txt, tag_wrap_column):
txt = force_unicode(txt)
if tag_wrap_column > 1:
# Account for colon & space written after the property name.
prop_width = len(header) + 2
# If the wrap column falls in the middle of the first word of the
# value, start on the next line. This avoids a very long first line.
first_break = prop_width + len(txt.split(None, 1)[0])
if tag_wrap_column <= first_break:
# Start value on the next line.
txt = '\n'.join(textwrap.wrap(txt, initial_indent='\n ',
width=tag_wrap_column,
break_long_words=False,
break_on_hyphens=False,
subsequent_indent=' '))
else:
# Account for tag name by temporarily adding a leading space
# before calling wrap(), then removing the space later below.
txt = '\n'.join(textwrap.wrap(txt, initial_indent=' '*prop_width,
width=tag_wrap_column,
break_long_words=False,
break_on_hyphens=False,
subsequent_indent=' '))
txt = txt[prop_width:]
else:
txt = re.sub(r"\n|\r|(\r\n)", "", txt)
return txt


def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding="utf-8"):
LOGGER.info(
_("Using %(process_count)d processes to generate manifests: %(algorithms)s"),
Expand Down Expand Up @@ -1499,6 +1534,17 @@ def _make_parser():
" without performing checksum validation to detect corruption."
),
)
parser.add_argument(
"--wrap-tags",
type=int,
dest="tag_wrap_column",
default=0,
help=_(
"Limit line lengths in the tag file (bag-info.txt) by"
" wrapping long values and indenting subsequent lines with a"
" space character. (Default: don't.)"
),
)

checksum_args = parser.add_argument_group(
_("Checksum Algorithms"),
Expand Down Expand Up @@ -1596,6 +1642,7 @@ def main():
bag_info=args.bag_info,
processes=args.processes,
checksums=args.checksums,
tag_wrap_column=args.tag_wrap_column,
)
except Exception as exc:
LOGGER.error(
Expand Down
24 changes: 24 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,30 @@ def test_validate_fast(self):
os.remove(j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg"))
self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)

def test_validate_wrapped_tag_lines(self):
info = {"External-Description":
('BagIt is a set of hierarchical file layout conventions'
' designed to support storage and transfer of arbitrary'
' digital content. A bag consists of a directory containing'
' the payload files and other accompanying metadata files'
' known as "tag" files.')}
bag = bagit.make_bag(self.tmpdir, bag_info=info, tag_wrap_column=79)
self.assertEqual(self.validate(bag, fast=True), True)
os.remove(j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg"))
self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)

def test_validate_wrapped_tag_lines_short(self):
info = {"External-Description":
('BagIt is a set of hierarchical file layout conventions'
' designed to support storage and transfer of arbitrary'
' digital content. A bag consists of a directory containing'
' the payload files and other accompanying metadata files'
' known as "tag" files.')}
bag = bagit.make_bag(self.tmpdir, bag_info=info, tag_wrap_column=18)
self.assertEqual(self.validate(bag, fast=True), True)
os.remove(j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg"))
self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)

def test_validate_completeness(self):
bag = bagit.make_bag(self.tmpdir)
old_path = j(self.tmpdir, "data", "README")
Expand Down