Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

defaults for mets_basename and mets_server_url #1156

Merged
merged 1 commit into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ocrd/ocrd/cli/bashlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ocrd.constants import BASHLIB_FILENAME
import ocrd.constants
import ocrd_utils.constants
from ocrd_utils.constants import DEFAULT_METS_BASENAME
import ocrd_models.constants
import ocrd_validators.constants
from ocrd.decorators import (
Expand Down Expand Up @@ -78,7 +79,7 @@ def bashlib_constants(name):
print(val)

@bashlib_cli.command('input-files')
@click.option('-m', '--mets', help="METS to process", default="mets.xml")
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
@click.option('-w', '--working-dir', help="Working Directory")
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT')
@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT')
Expand Down
4 changes: 2 additions & 2 deletions ocrd/ocrd/cli/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""
import click

from ocrd_utils import getLogger, initLogging
from ocrd_utils import getLogger, initLogging, DEFAULT_METS_BASENAME
from ocrd.task_sequence import run_tasks

from ..decorators import ocrd_loglevel
Expand All @@ -18,7 +18,7 @@
# ----------------------------------------------------------------------
@click.command('process')
@ocrd_loglevel
@click.option('-m', '--mets', help="METS to process", default="mets.xml")
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist")
@click.argument('tasks', nargs=-1, required=True)
Expand Down
4 changes: 2 additions & 2 deletions ocrd/ocrd/cli/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ocrd import Resolver, Workspace
from ocrd.task_sequence import ProcessorTask, validate_tasks

from ocrd_utils import initLogging, parse_json_string_or_file
from ocrd_utils import initLogging, parse_json_string_or_file, DEFAULT_METS_BASENAME
from ocrd_validators import (
OcrdToolValidator,
OcrdZipValidator,
Expand Down Expand Up @@ -101,7 +101,7 @@ def validate_page(page, **kwargs):

@validate_cli.command('tasks')
@click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. If omitted, only validate syntax')
@click.option('-M', '--mets-basename', nargs=1, default='mets.xml', help='Basename of the METS file, used in conjunction with --workspace')
@click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME, help='Basename of the METS file, used in conjunction with --workspace')
@click.option('--overwrite', is_flag=True, default=False, help='When checking against a concrete workspace, simulate overwriting output or page range.')
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
@click.argument('tasks', nargs=-1, required=True)
Expand Down
4 changes: 2 additions & 2 deletions ocrd/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@

from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager
from ocrd.mets_server import OcrdMetsServer
from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list
from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list, DEFAULT_METS_BASENAME
from ocrd.decorators import mets_find_options
from . import command_with_replaced_help


class WorkspaceCtx():

def __init__(self, directory, mets_url, mets_basename, mets_server_url, automatic_backup):
def __init__(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, mets_server_url=None, automatic_backup=False):
self.log = getLogger('ocrd.cli.workspace')
if mets_basename:
self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.'))
Expand Down
6 changes: 3 additions & 3 deletions ocrd/ocrd/cli/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import click

from ocrd_utils import initLogging
from ocrd_utils import initLogging, DEFAULT_METS_BASENAME
from ocrd_validators import OcrdZipValidator

from ..resolver import Resolver
Expand All @@ -35,13 +35,13 @@ def zip_cli():
help='Workspace folder location.',
show_default=True)
@click.option('-M', '--mets-basename',
default="mets.xml",
default=DEFAULT_METS_BASENAME,
help='Basename of the METS file.',
show_default=True)
@click.option('-q', '--include-file-grps', 'include_fileGrp', help="fileGrps to include", default=[], multiple=True)
@click.option('-Q', '--exclude-file-grps', 'exclude_fileGrp', help="fileGrps to exclude", default=[], multiple=True)
@click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True)
@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default="mets.xml")
@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default=DEFAULT_METS_BASENAME)
@click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum")
@click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True)
@click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False)
Expand Down
3 changes: 2 additions & 1 deletion ocrd/ocrd/decorators/ocrd_cli_options.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import click
from click import option, Path, group, command, argument
from ocrd_utils import DEFAULT_METS_BASENAME
from ocrd_network import NETWORK_AGENT_SERVER, NETWORK_AGENT_WORKER
from .parameter_option import parameter_option, parameter_override_option
from .loglevel_option import loglevel_option
Expand All @@ -25,7 +26,7 @@ def cli(mets_url):
"""
# XXX Note that the `--help` output is statically generate_processor_help
params = [
option('-m', '--mets', help="METS to process", default="mets.xml"),
option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME),
option('-w', '--working-dir', help="Working Directory"),
option('-U', '--mets-server-url', help="METS server URL. Starts with http:// then TCP, otherwise unix socket path"),
# TODO OCR-D/core#274
Expand Down
7 changes: 4 additions & 3 deletions ocrd/ocrd/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ocrd.constants import TMP_PREFIX
from ocrd_utils import (
config,
DEFAULT_METS_BASENAME,
getLogger,
is_local_filename,
get_local_filename,
Expand Down Expand Up @@ -224,7 +225,7 @@ def workspace_from_url(

return workspace

def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False):
def workspace_from_nothing(self, directory, mets_basename=DEFAULT_METS_BASENAME, clobber_mets=False):
"""
Create an empty workspace.

Expand Down Expand Up @@ -252,7 +253,7 @@ def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_me

return Workspace(self, directory, mets, mets_basename=mets_basename)

def resolve_mets_arguments(self, directory, mets_url, mets_basename, mets_server_url):
def resolve_mets_arguments(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, mets_server_url=None):
"""
Resolve the ``--mets``, ``--mets-basename``, `--directory``,
``--mets-server-url``, arguments into a coherent set of arguments
Expand All @@ -275,7 +276,7 @@ def resolve_mets_arguments(self, directory, mets_url, mets_basename, mets_server
if not mets_basename and mets_url:
mets_basename = Path(mets_url).name
elif not mets_basename and not mets_url:
mets_basename = 'mets.xml'
mets_basename = DEFAULT_METS_BASENAME
elif mets_basename and mets_url:
raise ValueError("Use either --mets or --mets-basename, not both")
else:
Expand Down
3 changes: 2 additions & 1 deletion ocrd/ocrd/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
pushd_popd,
is_local_filename,
deprecated_alias,
DEFAULT_METS_BASENAME,
MIME_TO_EXT,
MIME_TO_PIL,
MIMETYPE_PAGE,
Expand Down Expand Up @@ -70,7 +71,7 @@ class Workspace():
baseurl (string) : Base URL to prefix to relative URL.
"""

def __init__(self, resolver, directory, mets=None, mets_basename='mets.xml', automatic_backup=False, baseurl=None, mets_server_url=None):
def __init__(self, resolver, directory, mets=None, mets_basename=DEFAULT_METS_BASENAME, automatic_backup=False, baseurl=None, mets_server_url=None):
self.resolver = resolver
self.directory = directory
self.mets_target = str(Path(directory, mets_basename))
Expand Down
8 changes: 4 additions & 4 deletions ocrd/ocrd/workspace_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import hashlib

from ocrd_models import OcrdMets
from ocrd_utils import getLogger, atomic_write
from ocrd_utils import getLogger, atomic_write, DEFAULT_METS_BASENAME

from .constants import BACKUP_DIR

Expand All @@ -17,7 +17,7 @@ class WorkspaceBackup():

@classmethod
def from_path(cls, d):
mets_file = join(d, 'mets.xml')
mets_file = join(d, DEFAULT_METS_BASENAME)
(chksum, lastmod) = basename(d).split('.', maxsplit=1)
size = getsize(mets_file)
mets_xml = OcrdMets(filename=mets_file)
Expand Down Expand Up @@ -61,7 +61,7 @@ def restore(self, chksum, choose_first=False):
bak = candidates[0]
self.add()
log.info("Restoring from %s/mets.xml" % bak)
src = join(bak, 'mets.xml')
src = join(bak, DEFAULT_METS_BASENAME)
dest = self.workspace.mets_target
log.debug('cp "%s" "%s"', src, dest)
copy(src, dest)
Expand All @@ -80,7 +80,7 @@ def add(self):
else:
timestamp = datetime.now().timestamp()
d = join(self.backup_directory, '%s.%s' % (chksum, timestamp))
mets_file = join(d, 'mets.xml')
mets_file = join(d, DEFAULT_METS_BASENAME)
log.info("Backing up to %s" % mets_file)
makedirs(d)
with atomic_write(mets_file) as f:
Expand Down
10 changes: 5 additions & 5 deletions ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
getLogger,
MIME_TO_EXT,
unzip_file_to_dir,

DEFAULT_METS_BASENAME,
MIMETYPE_PAGE,
VERSION,
)
Expand Down Expand Up @@ -113,7 +113,7 @@ def _bag_mets_files(
log.info("New vs. old: %s" % changed_local_filenames)
return total_bytes, total_files

def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets='mets.xml'):
def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets=DEFAULT_METS_BASENAME):
bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL
bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % (
VERSION, # TODO
Expand All @@ -126,14 +126,14 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas
bag.info['Ocrd-Base-Version-Checksum'] = ocrd_base_version_checksum
bag.info['Bagging-Date'] = str(datetime.now())
bag.info['Payload-Oxum'] = '%s.%s' % (total_bytes, total_files)
if ocrd_mets != 'mets.xml':
if ocrd_mets != DEFAULT_METS_BASENAME:
bag.info['Ocrd-Mets'] = ocrd_mets

def bag(self,
workspace,
ocrd_identifier,
dest=None,
ocrd_mets='mets.xml',
ocrd_mets=DEFAULT_METS_BASENAME,
ocrd_base_version_checksum=None,
processes=1,
skip_zip=False,
Expand Down Expand Up @@ -245,7 +245,7 @@ def spill(self, src, dest):
rmtree(bagdir)

# Create workspace
mets_basename = bag_info.get("Ocrd-Mets", "mets.xml")
mets_basename = bag_info.get("Ocrd-Mets", DEFAULT_METS_BASENAME)
workspace = Workspace(self.resolver, directory=dest, mets_basename=mets_basename)

# TODO validate workspace
Expand Down
4 changes: 3 additions & 1 deletion ocrd_network/ocrd_network/cli/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import click
from typing import Optional

from ocrd_utils import DEFAULT_METS_BASENAME

from ocrd.decorators import (
parameter_option,
parameter_override_option
Expand Down Expand Up @@ -36,7 +38,7 @@ def processing_cli():
@processing_cli.command('processor')
@click.argument('processor_name', required=True, type=click.STRING)
@click.option('--address')
@click.option('-m', '--mets', required=True, default="mets.xml")
@click.option('-m', '--mets', required=True, default=DEFAULT_METS_BASENAME)
@click.option('-I', '--input-file-grp', default='OCR-D-INPUT')
@click.option('-O', '--output-file-grp', default='OCR-D-OUTPUT')
@click.option('-g', '--page-id')
Expand Down
1 change: 1 addition & 0 deletions ocrd_utils/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"""

from .constants import (
DEFAULT_METS_BASENAME,
EXT_TO_MIME,
MIMETYPE_PAGE,
MIME_TO_EXT,
Expand Down
2 changes: 2 additions & 0 deletions ocrd_utils/ocrd_utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,5 @@
LOG_TIMEFMT = r'%H:%M:%S'

RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']

DEFAULT_METS_BASENAME = 'mets.xml'
4 changes: 2 additions & 2 deletions ocrd_validators/ocrd_validators/workspace_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from traceback import format_exc
from pathlib import Path

from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename
from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename, DEFAULT_METS_BASENAME
from ocrd_models import ValidationReport
from ocrd_modelfactory import page_from_file

Expand Down Expand Up @@ -91,7 +91,7 @@ def __init__(self, resolver, mets_url, src_dir=None, skip=None, download=False,
self.log.debug('resolver=%s mets_url=%s src_dir=%s', resolver, mets_url, src_dir)
self.resolver = resolver
if mets_url is None and src_dir is not None:
mets_url = '%s/mets.xml' % src_dir
mets_url = f'{src_dir}/{DEFAULT_METS_BASENAME}'
self.mets_url = mets_url
self.download = download
self.page_strictness = page_strictness
Expand Down