From 16d0933178768548893226bb8851cd0fa99fa19c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 14 Aug 2022 17:32:05 +0200 Subject: [PATCH] ocrd zip: remove Ocrd-Manifestation-Depth --- ocrd/ocrd/cli/zip.py | 4 +-- ocrd/ocrd/workspace_bagger.py | 20 ++++----------- tests/validator/test_ocrd_zip_validator.py | 2 +- tests/validator/test_workspace_bagger.py | 30 +++++++--------------- 4 files changed, 16 insertions(+), 40 deletions(-) diff --git a/ocrd/ocrd/cli/zip.py b/ocrd/ocrd/cli/zip.py index eebd49ac20..722b695a7a 100644 --- a/ocrd/ocrd/cli/zip.py +++ b/ocrd/ocrd/cli/zip.py @@ -40,13 +40,12 @@ def zip_cli(): show_default=True) @click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True) @click.option('-I', '--in-place', help="Replace workspace with bag (like bagit.py does)", is_flag=True) -@click.option('-D', '--manifestation-depth', help="Ocrd-Manifestation-Depth", type=click.Choice(['full', 'partial']), default='partial') @click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default="mets.xml") @click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum") @click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True) @click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False) @click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1) -def bag(directory, mets_basename, dest, identifier, in_place, manifestation_depth, mets, base_version_checksum, tag_file, skip_zip, processes): +def bag(directory, mets_basename, dest, identifier, in_place, mets, base_version_checksum, tag_file, skip_zip, processes): """ Bag workspace as OCRD-ZIP at DEST """ @@ -57,7 +56,6 @@ def bag(directory, mets_basename, dest, identifier, in_place, manifestation_dept workspace, dest=dest, ocrd_identifier=identifier, - ocrd_manifestation_depth=manifestation_depth, ocrd_mets=mets, ocrd_base_version_checksum=base_version_checksum, processes=processes, diff --git a/ocrd/ocrd/workspace_bagger.py b/ocrd/ocrd/workspace_bagger.py index 98c797a4b7..fcbd014223 100644 --- a/ocrd/ocrd/workspace_bagger.py +++ b/ocrd/ocrd/workspace_bagger.py @@ -58,7 +58,7 @@ def _log_or_raise(self, msg): else: log.info(msg) - def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes): + def _bag_mets_files(self, workspace, bagdir, ocrd_mets, processes): mets = workspace.mets changed_urls = {} @@ -67,13 +67,10 @@ def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets with pushd_popd(workspace.directory): # URLs of the files before changing for f in mets.find_files(): - log.info("Resolving %s (%s)", f.url, ocrd_manifestation_depth) + log.info("Resolving %s", f.url) if is_local_filename(f.url): # nothing to do then pass - elif ocrd_manifestation_depth != 'full': - self._log_or_raise("Not fetching non-local files, skipping %s" % f.url) - continue elif not f.url.startswith('http'): self._log_or_raise("Not an http URL: %s" % f.url) continue @@ -118,8 +115,7 @@ def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets log.info("New vs. old: %s" % changed_urls) return total_bytes, total_files - def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, - ocrd_manifestation_depth, ocrd_base_version_checksum, ocrd_mets='mets.xml'): + def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets='mets.xml'): bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % ( VERSION, # TODO @@ -128,7 +124,6 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ' '.join(sys.argv)) bag.info['Ocrd-Identifier'] = ocrd_identifier - bag.info['Ocrd-Manifestation-Depth'] = ocrd_manifestation_depth if ocrd_base_version_checksum: bag.info['Ocrd-Base-Version-Checksum'] = ocrd_base_version_checksum bag.info['Bagging-Date'] = str(datetime.now()) @@ -141,7 +136,6 @@ def bag(self, ocrd_identifier, dest=None, ocrd_mets='mets.xml', - ocrd_manifestation_depth='full', ocrd_base_version_checksum=None, processes=1, skip_zip=False, @@ -158,15 +152,12 @@ def bag(self, ord_identifier (string): Ocrd-Identifier in bag-info.txt dest (string): Path of the generated OCRD-ZIP. ord_mets (string): Ocrd-Mets in bag-info.txt - ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt processes (integer): Number of parallel processes checksumming skip_zip (boolean): Whether to leave directory unzipped in_place (boolean): Whether to **replace** the workspace with its BagIt variant tag_files (list): Path names of additional tag files to be bagged at the root of the bag """ - if ocrd_manifestation_depth not in ('full', 'partial'): - raise Exception("manifestation_depth must be 'full' or 'partial'") if in_place and (dest is not None): raise Exception("Setting 'dest' and 'in_place' is a contradiction") if in_place and not skip_zip: @@ -197,12 +188,11 @@ def bag(self, f.write(BAGIT_TXT.encode('utf-8')) # create manifests - total_bytes, total_files = self._bag_mets_files(workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes) + total_bytes, total_files = self._bag_mets_files(workspace, bagdir, ocrd_mets, processes) # create bag-info.txt bag = Bag(bagdir) - self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_manifestation_depth, - ocrd_base_version_checksum, ocrd_mets=ocrd_mets) + self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets=ocrd_mets) for tag_file in tag_files: copyfile(tag_file, join(bagdir, basename(tag_file))) diff --git a/tests/validator/test_ocrd_zip_validator.py b/tests/validator/test_ocrd_zip_validator.py index 2ea022b34c..4c39c3ea33 100644 --- a/tests/validator/test_ocrd_zip_validator.py +++ b/tests/validator/test_ocrd_zip_validator.py @@ -27,7 +27,7 @@ def tearDown(self): rmtree(self.tempdir) def test_validation0(self): - ocrdzip = self.bagger.bag(self.workspace, 'SBB0000F29300010000', ocrd_manifestation_depth='partial') + ocrdzip = self.bagger.bag(self.workspace, 'SBB0000F29300010000') report = OcrdZipValidator(self.resolver, ocrdzip).validate() self.assertEqual(report.is_valid, True) diff --git a/tests/validator/test_workspace_bagger.py b/tests/validator/test_workspace_bagger.py index a1a729c7ee..86ec28a14e 100644 --- a/tests/validator/test_workspace_bagger.py +++ b/tests/validator/test_workspace_bagger.py @@ -30,10 +30,6 @@ def setUp(self): def tearDown(self): rmtree(self.tempdir) - def test_bad_manifestation_depth(self): - with self.assertRaisesRegex(Exception, "manifestation_depth must be 'full' or 'partial'"): - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='foo') - def test_bad_inplace_and_dest(self): with self.assertRaisesRegex(Exception, "Setting 'dest' and 'in_place' is a contradiction"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, dest='/x/y/z') @@ -46,7 +42,6 @@ def test_bag_inplace(self): self.bagger.bag( self.workspace, 'kant_aufklaerung_1784', - ocrd_manifestation_depth='partial', skip_zip=True, in_place=True, ocrd_base_version_checksum='123', @@ -58,33 +53,26 @@ def test_bag_inplace(self): def test_bag_zip_and_spill(self): self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo' self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out')) def test_bag_zip_and_spill_wo_dest(self): - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), self.tempdir) def test_bag_wo_dest(self): makedirs(BACKUPDIR) - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=True, skip_zip=True) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=True) def test_bag_wo_dest_zip(self): makedirs(BACKUPDIR) - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=True) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=True) def test_bag_partial_http_nostrict(self): self.bagger.strict = False makedirs(BACKUPDIR) self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False) - - def test_bag_partial_http_strict(self): - self.bagger.strict = True - makedirs(BACKUPDIR) - self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' - with self.assertRaisesRegex(Exception, "Not fetching non-local files"): - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False) def test_bag_full(self): self.bagger.strict = True @@ -92,7 +80,8 @@ def test_bag_full(self): f.url = 'bad-scheme://foo' f.local_filename = None with self.assertRaisesRegex(Exception, "Not an http URL"): - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False) + self.bagger.strict = False def test_spill_dest_not_dir(self): with self.assertRaisesRegex(Exception, "Not a directory: /dev/stdout"): @@ -107,7 +96,7 @@ def test_spill_derived_dest_exists(self): def test_spill_derived_dest(self): bag_dest = join(self.bagdir, 'foo.ocrd.zip') spill_dest = join(self.bagdir, 'foo') - self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=bag_dest) + self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=bag_dest) self.bagger.spill(bag_dest, self.bagdir) self.assertTrue(exists(spill_dest)) @@ -122,8 +111,7 @@ def test_bag_with_changed_metsname(self): workspace = Workspace(self.resolver, directory=workspace_dir, mets_basename=new_metsname) # act - self.bagger.bag(workspace, "changed-mets-test", ocrd_mets=new_metsname, - ocrd_manifestation_depth='partial', in_place=True, skip_zip=True) + self.bagger.bag(workspace, "changed-mets-test", ocrd_mets=new_metsname, in_place=True, skip_zip=True) # assert bag_metspath = join(workspace_dir, "data", new_metsname)