Skip to content

Commit

Permalink
Merge branch 'dev-processing-broker'
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 2, 2023
2 parents ca26b60 + bc9a29a commit a2bfd62
Show file tree
Hide file tree
Showing 46 changed files with 2,799 additions and 81 deletions.
64 changes: 29 additions & 35 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,71 @@ jobs:
steps:
- checkout
- run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos
- run: make install
- run: make deps-test test benchmark

test-python36:
docker:
- image: python:3.6.15
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: pip install -U pip
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: make install deps-test
- run: make test benchmark

test-python37:
docker:
- image: python:3.7.16
- image: cimg/python:3.7
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python38:
docker:
- image: python:3.8.16
- image: cimg/python:3.8
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python39:
docker:
- image: python:3.9.16
- image: cimg/python:3.9
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python310:
docker:
- image: python:3.10.10
- image: cimg/python:3.10
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python311:
docker:
- image: python:3.11.2
- image: cimg/python:3.11
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

deploy:
docker:
- image: circleci/buildpack-deps:stretch
steps:
- checkout
- setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/
- setup_remote_docker: # https://circleci.com/docs/2.0/building-docker-images/
docker_layer_caching: true
- run: make docker
- run: make docker-cuda
- run:
Expand All @@ -104,7 +99,6 @@ workflows:
only: master
test-pull-request:
jobs:
- test-python36
- test-python37
- test-python38
- test-python39
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ COPY ocrd_models ./ocrd_models
COPY ocrd_utils ./ocrd_utils
RUN mv ./ocrd_utils/ocrd_logging.conf /etc
COPY ocrd_validators/ ./ocrd_validators
COPY ocrd_network/ ./ocrd_network
COPY Makefile .
COPY README.md .
COPY LICENSE .
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ TESTDIR = tests

SPHINX_APIDOC =

BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd
BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd_network ocrd

FIND_VERSION = grep version= ocrd_utils/setup.py|grep -Po "([0-9ab]+\.?)+"

Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
* [ocrd_models](#ocrd_models)
* [ocrd_modelfactory](#ocrd_modelfactory)
* [ocrd_validators](#ocrd_validators)
* [ocrd_network](#ocrd_network)
* [ocrd](#ocrd)
* [bash library](#bash-library)
* [bashlib API](#bashlib-api)
Expand Down Expand Up @@ -122,6 +123,12 @@ Schemas and routines for validating BagIt, `ocrd-tool.json`, workspaces, METS, p

See [README for `ocrd_validators`](./ocrd_validators/README.md) for further information.

### ocrd_network

Components related to OCR-D Web API

See [README for `ocrd_network`](./ocrd_network/README.md) for further information.

### ocrd

Depends on all of the above, also contains decorators and classes for creating OCR-D processors and CLIs.
Expand Down
5 changes: 5 additions & 0 deletions ocrd/ocrd/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def get_help(self, ctx):
from ocrd.decorators import ocrd_loglevel
from .zip import zip_cli
from .log import log_cli
from .processing_server import processing_server_cli
from .processing_worker import processing_worker_cli


@click.group()
@click.version_option()
Expand All @@ -48,3 +51,5 @@ def cli(**kwargs): # pylint: disable=unused-argument
cli.add_command(validate_cli)
cli.add_command(log_cli)
cli.add_command(resmgr_cli)
cli.add_command(processing_server_cli)
cli.add_command(processing_worker_cli)
41 changes: 41 additions & 0 deletions ocrd/ocrd/cli/processing_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
OCR-D CLI: start the processing server
.. click:: ocrd.cli.processing_server:processing_server_cli
:prog: ocrd processing-server
:nested: full
"""
import click
import logging
from ocrd_utils import initLogging
from ocrd_network import (
ProcessingServer,
ProcessingServerParamType
)


@click.command('processing-server')
@click.argument('path_to_config', required=True, type=click.STRING)
@click.option('-a', '--address',
default="localhost:8080",
help='The URL of the Processing server, format: host:port',
type=ProcessingServerParamType(),
required=True)
def processing_server_cli(path_to_config, address: str):
"""
Start and manage processing workers with the processing server
PATH_TO_CONFIG is a yaml file to configure the server and the workers. See
https://github.com/OCR-D/spec/pull/222/files#diff-a71bf71cbc7d9ce94fded977f7544aba4df9e7bdb8fc0cf1014e14eb67a9b273
for further information (TODO: update path when spec is available/merged)
"""
initLogging()
# TODO: Remove before the release
logging.getLogger('paramiko.transport').setLevel(logging.INFO)
logging.getLogger('ocrd.network').setLevel(logging.DEBUG)

# Note, the address is already validated with the type field
host, port = address.split(':')
processing_server = ProcessingServer(path_to_config, host, port)
processing_server.start()
61 changes: 61 additions & 0 deletions ocrd/ocrd/cli/processing_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
OCR-D CLI: start the processing worker
.. click:: ocrd.cli.processing_worker:processing_worker_cli
:prog: ocrd processing-worker
:nested: full
"""
import click
import logging
from ocrd_utils import (
initLogging,
get_ocrd_tool_json
)
from ocrd_network import (
DatabaseParamType,
ProcessingWorker,
QueueServerParamType,
)


@click.command('processing-worker')
@click.argument('processor_name', required=True, type=click.STRING)
@click.option('-q', '--queue',
default="amqp://admin:admin@localhost:5672/",
help='The URL of the Queue Server, format: amqp://username:password@host:port/vhost',
type=QueueServerParamType())
@click.option('-d', '--database',
default="mongodb://localhost:27018",
help='The URL of the MongoDB, format: mongodb://host:port',
type=DatabaseParamType())
def processing_worker_cli(processor_name: str, queue: str, database: str):
"""
Start a processing worker (a specific ocr-d processor)
"""
initLogging()
# TODO: Remove before the release
logging.getLogger('ocrd.network').setLevel(logging.DEBUG)

# Get the ocrd_tool dictionary
# ocrd_tool = parse_json_string_with_comments(
# run([processor_name, '--dump-json'], stdout=PIPE, check=True, universal_newlines=True).stdout
# )

ocrd_tool = get_ocrd_tool_json(processor_name)
if not ocrd_tool:
raise Exception(f"The ocrd_tool is empty or missing")

try:
processing_worker = ProcessingWorker(
rabbitmq_addr=queue,
mongodb_addr=database,
processor_name=ocrd_tool['executable'],
ocrd_tool=ocrd_tool,
processor_class=None, # For readability purposes assigned here
)
# The RMQConsumer is initialized and a connection to the RabbitMQ is performed
processing_worker.connect_consumer()
# Start consuming from the queue with name `processor_name`
processing_worker.start_consuming()
except Exception as e:
raise Exception("Processing worker has failed with error") from e
37 changes: 36 additions & 1 deletion ocrd/ocrd/decorators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from os.path import isfile
from os import environ
import sys
from contextlib import redirect_stdout
from io import StringIO

import click

Expand All @@ -10,9 +12,11 @@
set_json_key_value_overrides,
)

from ocrd_utils import getLogger, initLogging
from ocrd_utils import getLogger, initLogging, parse_json_string_with_comments
from ocrd_validators import WorkspaceValidator

from ocrd_network import ProcessingWorker

from ..resolver import Resolver
from ..processor.base import run_processor

Expand All @@ -34,6 +38,8 @@ def ocrd_cli_wrap_processor(
overwrite=False,
show_resource=None,
list_resources=False,
queue=None,
database=None,
**kwargs
):
if not sys.argv[1:]:
Expand All @@ -50,6 +56,35 @@ def ocrd_cli_wrap_processor(
list_resources=list_resources
)
sys.exit()
# If either of these two is provided but not both
if bool(queue) != bool(database):
raise Exception("Options --queue and --database require each other.")
# If both of these are provided - start the processing worker instead of the processor - processorClass
if queue and database:
initLogging()
# TODO: Remove before the release
# We are importing the logging here because it's not the ocrd logging but python one
import logging
logging.getLogger('ocrd.network').setLevel(logging.DEBUG)

# Get the ocrd_tool dictionary
processor = processorClass(workspace=None, dump_json=True)
ocrd_tool = processor.ocrd_tool

try:
processing_worker = ProcessingWorker(
rabbitmq_addr=queue,
mongodb_addr=database,
processor_name=ocrd_tool['executable'],
ocrd_tool=ocrd_tool,
processor_class=processorClass,
)
# The RMQConsumer is initialized and a connection to the RabbitMQ is performed
processing_worker.connect_consumer()
# Start consuming from the queue with name `processor_name`
processing_worker.start_consuming()
except Exception as e:
raise Exception("Processing worker has failed with error") from e
else:
initLogging()
LOG = getLogger('ocrd_cli_wrap_processor')
Expand Down
39 changes: 22 additions & 17 deletions ocrd/ocrd/decorators/ocrd_cli_options.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from click import option
from click import option, Path
from .parameter_option import parameter_option, parameter_override_option
from .loglevel_option import loglevel_option
from ocrd_network import QueueServerParamType, DatabaseParamType


def ocrd_cli_options(f):
"""
Expand All @@ -17,27 +19,30 @@ def cli(mets_url):
"""
# XXX Note that the `--help` output is statically generate_processor_help
params = [
option('-m', '--mets', help="METS to process", default="mets.xml"),
option('-w', '--working-dir', help="Working Directory"),
option('-m', '--mets', default="mets.xml"),
option('-w', '--working-dir'),
# TODO OCR-D/core#274
# option('-I', '--input-file-grp', help='File group(s) used as input. **required**'),
# option('-O', '--output-file-grp', help='File group(s) used as output. **required**'),
option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT'),
option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT'),
option('-g', '--page-id', help="ID(s) of the pages to process"),
option('--overwrite', help="Overwrite the output file group or a page range (--page-id)", is_flag=True, default=False),
option('-C', '--show-resource', help='Dump the content of processor resource RESNAME', metavar='RESNAME'),
option('-L', '--list-resources', is_flag=True, default=False, help='List names of processor resources'),
# option('-I', '--input-file-grp', required=True),
# option('-O', '--output-file-grp', required=True),
option('-I', '--input-file-grp', default='INPUT'),
option('-O', '--output-file-grp', default='OUTPUT'),
option('-g', '--page-id'),
option('--overwrite', is_flag=True, default=False),
option('--profile', is_flag=True, default=False),
option('--profile-file', type=Path(dir_okay=False, writable=True)),
parameter_option,
parameter_override_option,
option('-J', '--dump-json', help="Dump tool description as JSON and exit", is_flag=True, default=False),
option('-D', '--dump-module-dir', help="Print processor's 'moduledir' of resourcess", is_flag=True, default=False),
loglevel_option,
option('-V', '--version', help="Show version", is_flag=True, default=False),
option('-h', '--help', help="This help message", is_flag=True, default=False),
option('--profile', help="Enable profiling", is_flag=True, default=False),
option('--profile-file', help="Write cProfile stats to this file. Implies --profile"),
option('--queue', type=QueueServerParamType()),
option('--database', type=DatabaseParamType()),
option('-C', '--show-resource'),
option('-L', '--list-resources', is_flag=True, default=False),
option('-J', '--dump-json', is_flag=True, default=False),
option('-D', '--dump-module-dir', is_flag=True, default=False),
option('-h', '--help', is_flag=True, default=False),
option('-V', '--version', is_flag=True, default=False),
]
for param in params:
param(f)
return f

Loading

0 comments on commit a2bfd62

Please sign in to comment.