Skip to content

Commit

Permalink
ocrd zip: remove Ocrd-Manifestation-Depth
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Aug 14, 2022
1 parent e8aa49d commit 16d0933
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 40 deletions.
4 changes: 1 addition & 3 deletions ocrd/ocrd/cli/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,12 @@ def zip_cli():
show_default=True)
@click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True)
@click.option('-I', '--in-place', help="Replace workspace with bag (like bagit.py does)", is_flag=True)
@click.option('-D', '--manifestation-depth', help="Ocrd-Manifestation-Depth", type=click.Choice(['full', 'partial']), default='partial')
@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default="mets.xml")
@click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum")
@click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True)
@click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False)
@click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
def bag(directory, mets_basename, dest, identifier, in_place, manifestation_depth, mets, base_version_checksum, tag_file, skip_zip, processes):
def bag(directory, mets_basename, dest, identifier, in_place, mets, base_version_checksum, tag_file, skip_zip, processes):
"""
Bag workspace as OCRD-ZIP at DEST
"""
Expand All @@ -57,7 +56,6 @@ def bag(directory, mets_basename, dest, identifier, in_place, manifestation_dept
workspace,
dest=dest,
ocrd_identifier=identifier,
ocrd_manifestation_depth=manifestation_depth,
ocrd_mets=mets,
ocrd_base_version_checksum=base_version_checksum,
processes=processes,
Expand Down
20 changes: 5 additions & 15 deletions ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _log_or_raise(self, msg):
else:
log.info(msg)

def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes):
def _bag_mets_files(self, workspace, bagdir, ocrd_mets, processes):
mets = workspace.mets
changed_urls = {}

Expand All @@ -67,13 +67,10 @@ def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets
with pushd_popd(workspace.directory):
# URLs of the files before changing
for f in mets.find_files():
log.info("Resolving %s (%s)", f.url, ocrd_manifestation_depth)
log.info("Resolving %s", f.url)
if is_local_filename(f.url):
# nothing to do then
pass
elif ocrd_manifestation_depth != 'full':
self._log_or_raise("Not fetching non-local files, skipping %s" % f.url)
continue
elif not f.url.startswith('http'):
self._log_or_raise("Not an http URL: %s" % f.url)
continue
Expand Down Expand Up @@ -118,8 +115,7 @@ def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets
log.info("New vs. old: %s" % changed_urls)
return total_bytes, total_files

def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier,
ocrd_manifestation_depth, ocrd_base_version_checksum, ocrd_mets='mets.xml'):
def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets='mets.xml'):
bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL
bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % (
VERSION, # TODO
Expand All @@ -128,7 +124,6 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier,
' '.join(sys.argv))

bag.info['Ocrd-Identifier'] = ocrd_identifier
bag.info['Ocrd-Manifestation-Depth'] = ocrd_manifestation_depth
if ocrd_base_version_checksum:
bag.info['Ocrd-Base-Version-Checksum'] = ocrd_base_version_checksum
bag.info['Bagging-Date'] = str(datetime.now())
Expand All @@ -141,7 +136,6 @@ def bag(self,
ocrd_identifier,
dest=None,
ocrd_mets='mets.xml',
ocrd_manifestation_depth='full',
ocrd_base_version_checksum=None,
processes=1,
skip_zip=False,
Expand All @@ -158,15 +152,12 @@ def bag(self,
ord_identifier (string): Ocrd-Identifier in bag-info.txt
dest (string): Path of the generated OCRD-ZIP.
ord_mets (string): Ocrd-Mets in bag-info.txt
ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt
ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt
processes (integer): Number of parallel processes checksumming
skip_zip (boolean): Whether to leave directory unzipped
in_place (boolean): Whether to **replace** the workspace with its BagIt variant
tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag
"""
if ocrd_manifestation_depth not in ('full', 'partial'):
raise Exception("manifestation_depth must be 'full' or 'partial'")
if in_place and (dest is not None):
raise Exception("Setting 'dest' and 'in_place' is a contradiction")
if in_place and not skip_zip:
Expand Down Expand Up @@ -197,12 +188,11 @@ def bag(self,
f.write(BAGIT_TXT.encode('utf-8'))

# create manifests
total_bytes, total_files = self._bag_mets_files(workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes)
total_bytes, total_files = self._bag_mets_files(workspace, bagdir, ocrd_mets, processes)

# create bag-info.txt
bag = Bag(bagdir)
self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_manifestation_depth,
ocrd_base_version_checksum, ocrd_mets=ocrd_mets)
self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets=ocrd_mets)

for tag_file in tag_files:
copyfile(tag_file, join(bagdir, basename(tag_file)))
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_ocrd_zip_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def tearDown(self):
rmtree(self.tempdir)

def test_validation0(self):
ocrdzip = self.bagger.bag(self.workspace, 'SBB0000F29300010000', ocrd_manifestation_depth='partial')
ocrdzip = self.bagger.bag(self.workspace, 'SBB0000F29300010000')
report = OcrdZipValidator(self.resolver, ocrdzip).validate()
self.assertEqual(report.is_valid, True)

Expand Down
30 changes: 9 additions & 21 deletions tests/validator/test_workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ def setUp(self):
def tearDown(self):
rmtree(self.tempdir)

def test_bad_manifestation_depth(self):
with self.assertRaisesRegex(Exception, "manifestation_depth must be 'full' or 'partial'"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='foo')

def test_bad_inplace_and_dest(self):
with self.assertRaisesRegex(Exception, "Setting 'dest' and 'in_place' is a contradiction"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, dest='/x/y/z')
Expand All @@ -46,7 +42,6 @@ def test_bag_inplace(self):
self.bagger.bag(
self.workspace,
'kant_aufklaerung_1784',
ocrd_manifestation_depth='partial',
skip_zip=True,
in_place=True,
ocrd_base_version_checksum='123',
Expand All @@ -58,41 +53,35 @@ def test_bag_inplace(self):
def test_bag_zip_and_spill(self):
self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo'
self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out'))

def test_bag_zip_and_spill_wo_dest(self):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), self.tempdir)

def test_bag_wo_dest(self):
makedirs(BACKUPDIR)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=True, skip_zip=True)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=True)

def test_bag_wo_dest_zip(self):
makedirs(BACKUPDIR)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=True)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=True)

def test_bag_partial_http_nostrict(self):
self.bagger.strict = False
makedirs(BACKUPDIR)
self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False)

def test_bag_partial_http_strict(self):
self.bagger.strict = True
makedirs(BACKUPDIR)
self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
with self.assertRaisesRegex(Exception, "Not fetching non-local files"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False)

def test_bag_full(self):
self.bagger.strict = True
f = self.workspace.mets.find_all_files(ID='INPUT_0017')[0]
f.url = 'bad-scheme://foo'
f.local_filename = None
with self.assertRaisesRegex(Exception, "Not an http URL"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False)
self.bagger.strict = False

def test_spill_dest_not_dir(self):
with self.assertRaisesRegex(Exception, "Not a directory: /dev/stdout"):
Expand All @@ -107,7 +96,7 @@ def test_spill_derived_dest_exists(self):
def test_spill_derived_dest(self):
bag_dest = join(self.bagdir, 'foo.ocrd.zip')
spill_dest = join(self.bagdir, 'foo')
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=bag_dest)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=bag_dest)
self.bagger.spill(bag_dest, self.bagdir)
self.assertTrue(exists(spill_dest))

Expand All @@ -122,8 +111,7 @@ def test_bag_with_changed_metsname(self):
workspace = Workspace(self.resolver, directory=workspace_dir, mets_basename=new_metsname)

# act
self.bagger.bag(workspace, "changed-mets-test", ocrd_mets=new_metsname,
ocrd_manifestation_depth='partial', in_place=True, skip_zip=True)
self.bagger.bag(workspace, "changed-mets-test", ocrd_mets=new_metsname, in_place=True, skip_zip=True)

# assert
bag_metspath = join(workspace_dir, "data", new_metsname)
Expand Down

0 comments on commit 16d0933

Please sign in to comment.