diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fc87a822..9c930133b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * `ocrd ocrd-tool`: wrap `list-resources` and `show-resource` from `Processor` + * bashlib `ocrd__parse_argv`: add `--list-resources` and `--show-resource`, #751 + * `ocrd bashlib`: wrap `input-files` from `Processor` and `make_file_id` + * bashlib `ocrd__wrap`: offer `ocrd__files` and `ocrd__input_file`, #571 + ## [2.28.0] - 2021-11-30 Added: diff --git a/ocrd/bashlib/src/dumpjson.bash b/ocrd/bashlib/src/dumpjson.bash index 34e06be6d..633be7f69 100644 --- a/ocrd/bashlib/src/dumpjson.bash +++ b/ocrd/bashlib/src/dumpjson.bash @@ -13,3 +13,16 @@ ocrd__dumpjson () { ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" dump } +## +## Output file resource content. +## +ocrd__show_resource () { + ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" show-resource "$1" +} + +## +## Output file resources names. +## +ocrd__list_resources () { + ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" list-resources +} diff --git a/ocrd/bashlib/src/parse_argv.bash b/ocrd/bashlib/src/parse_argv.bash index 542372166..8d68a0717 100644 --- a/ocrd/bashlib/src/parse_argv.bash +++ b/ocrd/bashlib/src/parse_argv.bash @@ -34,6 +34,8 @@ ocrd__parse_argv () { -l|--log-level) ocrd__argv[log_level]=$2 ; shift ;; -h|--help|--usage) ocrd__usage; exit ;; -J|--dump-json) ocrd__dumpjson; exit ;; + -C|--show-resource) ocrd__show_resource "$2"; exit ;; + -L|--list-resources) ocrd__list_resources; exit ;; -p|--parameter) __parameters+=(-p "$2") ; shift ;; -P|--parameter-override) __parameter_overrides+=(-P "$2" "$3") ; shift ; shift ;; -g|--page-id) ocrd__argv[page_id]=$2 ; shift ;; diff --git a/ocrd/bashlib/src/wrap.bash b/ocrd/bashlib/src/wrap.bash index aaa2cee99..e46f5b90d 100644 --- a/ocrd/bashlib/src/wrap.bash +++ b/ocrd/bashlib/src/wrap.bash @@ -27,4 +27,20 @@ ocrd__wrap () { ocrd__parse_argv "$@" + i=0 + declare -ag ocrd__files + while read line; do + eval declare -Ag "ocrd__file$i=( $line )" + eval "ocrd__files[$i]=ocrd__file$i" + let ++i + done < <(ocrd bashlib input-files \ + -m "${ocrd__argv[mets_file]}" \ + -I "${ocrd__argv[input_file_grp]}" \ + -O "${ocrd__argv[output_file_grp]}" \ + ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) +} + +# usage: pageId=$(ocrd__input_file 3 pageId) +ocrd__input_file() { + eval echo "\${${ocrd__files[$1]}[$2]}" } diff --git a/ocrd/ocrd/cli/bashlib.py b/ocrd/ocrd/cli/bashlib.py index 92f5722c0..ed3d8c334 100644 --- a/ocrd/ocrd/cli/bashlib.py +++ b/ocrd/ocrd/cli/bashlib.py @@ -8,6 +8,7 @@ """ from __future__ import print_function import sys +from os.path import isfile import click from ocrd.constants import BASHLIB_FILENAME @@ -15,6 +16,19 @@ import ocrd_utils.constants import ocrd_models.constants import ocrd_validators.constants +from ocrd.decorators import ( + parameter_option, + parameter_override_option, + ocrd_loglevel +) +from ocrd_utils import ( + is_local_filename, + get_local_filename, + initLogging, + make_file_id +) +from ocrd.resolver import Resolver +from ocrd.processor import Processor # ---------------------------------------------------------------------- # ocrd bashlib @@ -61,3 +75,44 @@ def bashlib_constants(name): print("[%s]=%s" % (key, val[key]), end=' ') else: print(val) + +@bashlib_cli.command('input-files') +@click.option('-m', '--mets', help="METS to process", default="mets.xml") +@click.option('-w', '--working-dir', help="Working Directory") +@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT') +@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT') +# repeat some other processor options for convenience (will be ignored here) +@click.option('-g', '--page-id', help="ID(s) of the pages to process") +@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist") +@parameter_option +@parameter_override_option +@ocrd_loglevel +def bashlib_input_files(**kwargs): + """ + List input files for processing + + Instantiate a processor and workspace from the given processing options. + Then loop through the input files of the input fileGrp, and for each one, + print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended + `outputFileId` (from ``make_file_id``). + + (The printing format is one associative array initializer per line.) + """ + initLogging() + mets = kwargs.pop('mets') + working_dir = kwargs.pop('working_dir') + if is_local_filename(mets) and not isfile(get_local_filename(mets)): + msg = "File does not exist: %s" % mets + raise Exception(msg) + resolver = Resolver() + workspace = resolver.workspace_from_url(mets, working_dir) + processor = Processor(workspace, + ocrd_tool=None, + page_id=kwargs['page_id'], + input_file_grp=kwargs['input_file_grp'], + output_file_grp=kwargs['output_file_grp']) + for input_file in processor.input_files: + for field in ['url', 'ID', 'mimetype', 'pageId']: + # make this bash-friendly (show initialization for associative array) + print("[%s]='%s'" % (field, getattr(input_file, field)), end=' ') + print("[outputFileId]='%s'" % make_file_id(input_file, kwargs['output_file_grp'])) diff --git a/ocrd/ocrd/cli/ocrd_tool.py b/ocrd/ocrd/cli/ocrd_tool.py index 8bf6bc3ae..8ac03d673 100644 --- a/ocrd/ocrd/cli/ocrd_tool.py +++ b/ocrd/ocrd/cli/ocrd_tool.py @@ -13,12 +13,12 @@ import click from ocrd.decorators import parameter_option, parameter_override_option -from ocrd.processor import generate_processor_help +from ocrd.processor import Processor from ocrd_utils import ( - set_json_key_value_overrides, - VERSION as OCRD_VERSION, - parse_json_string_with_comments as loads - ) + set_json_key_value_overrides, + VERSION as OCRD_VERSION, + parse_json_string_with_comments as loads +) from ocrd_validators import ParameterValidator, OcrdToolValidator class OcrdToolCtx(): @@ -93,10 +93,24 @@ def ocrd_tool_tool(ctx, tool_name): def ocrd_tool_tool_description(ctx): print(ctx.json['tools'][ctx.tool_name]['description']) +@ocrd_tool_tool.command('list-resources', help="List tool's file resources") +@pass_ocrd_tool +def ocrd_tool_tool_list_resources(ctx): + Processor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name], + list_resources=True) + +@ocrd_tool_tool.command('show-resource', help="Dump a tool's file resource") +@click.argument('res_name') +@pass_ocrd_tool +def ocrd_tool_tool_show_resource(ctx, res_name): + Processor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name], + show_resource=res_name) + @ocrd_tool_tool.command('help', help="Generate help for processors") @pass_ocrd_tool def ocrd_tool_tool_params_help(ctx): - print(generate_processor_help(ctx.json['tools'][ctx.tool_name])) + Processor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name], + show_help=True) # ---------------------------------------------------------------------- # ocrd ocrd-tool tool categories diff --git a/ocrd/ocrd/lib.bash b/ocrd/ocrd/lib.bash index c8573b53b..8fbb37d9c 100644 --- a/ocrd/ocrd/lib.bash +++ b/ocrd/ocrd/lib.bash @@ -72,6 +72,20 @@ ocrd__dumpjson () { ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" dump } +## +## Output file resource content. +## +ocrd__show_resource () { + ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" show-resource "$1" +} + +## +## Output file resources names. +## +ocrd__list_resources () { + ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" list-resources +} + # END-INCLUDE # BEGIN-INCLUDE ./src/usage.bash ## ### `ocrd__usage` @@ -122,6 +136,8 @@ ocrd__parse_argv () { -l|--log-level) ocrd__argv[log_level]=$2 ; shift ;; -h|--help|--usage) ocrd__usage; exit ;; -J|--dump-json) ocrd__dumpjson; exit ;; + -C|--show-resource) ocrd__show_resource "$2"; exit ;; + -L|--list-resources) ocrd__list_resources; exit ;; -p|--parameter) __parameters+=(-p "$2") ; shift ;; -P|--parameter-override) __parameter_overrides+=(-P "$2" "$3") ; shift ; shift ;; -g|--page-id) ocrd__argv[page_id]=$2 ; shift ;; @@ -209,6 +225,22 @@ ocrd__wrap () { ocrd__parse_argv "$@" + i=0 + declare -ag ocrd__files + while read line; do + eval declare -Ag "ocrd__file$i=( $line )" + eval "ocrd__files[$i]=ocrd__file$i" + let ++i + done < <(ocrd bashlib input-files \ + -m "${ocrd__argv[mets_file]}" \ + -I "${ocrd__argv[input_file_grp]}" \ + -O "${ocrd__argv[output_file_grp]}" \ + ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) +} + +# usage: pageId=$(ocrd__input_file 3 pageId) +ocrd__input_file() { + eval echo "\${${ocrd__files[$1]}[$2]}" } # END-INCLUDE diff --git a/ocrd_utils/setup.py b/ocrd_utils/setup.py index b398d6286..92c8de5cc 100644 --- a/ocrd_utils/setup.py +++ b/ocrd_utils/setup.py @@ -5,7 +5,7 @@ setup( name='ocrd_utils', - version='2.28.0', + version='2.29.0', description='OCR-D framework - shared code, helpers, constants', long_description=open('README.md').read(), long_description_content_type='text/markdown',