diff --git a/.pylintrc b/.pylintrc index 9958d1b0f8..b2125d824c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -6,6 +6,7 @@ ignored-modules=cv2,tesserocr,ocrd.model ignore-patterns='.*generateds.*' disable = fixme, + E501, trailing-whitespace, logging-not-lazy, inconsistent-return-statements, diff --git a/Makefile b/Makefile index 16614622de..dd17b20264 100644 --- a/Makefile +++ b/Makefile @@ -251,3 +251,10 @@ cuda-ldconfig: /etc/ld.so.conf.d/cuda.conf pypi: uninstall install for mod in $(BUILD_ORDER);do (cd $$mod; $(PYTHON) setup.py sdist bdist_wheel);done version=`$(FIND_VERSION)`; twine upload ocrd*/dist/ocrd*$$version*{tar.gz,whl} + +# +# Testing METS server +# + +mets-server-start: + uvicorn ocrd.mets_server:app --host 0.0.0.0 --reload diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index 81be6aa979..ae010d6c33 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -16,6 +16,7 @@ import time import click +import uvicorn from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file @@ -32,6 +33,8 @@ def __init__(self, directory, mets_url, mets_basename, automatic_backup): self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.')) self.directory, self.mets_url, self.mets_basename = self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename) self.automatic_backup = automatic_backup + self.server_options = {} + pass_workspace = click.make_pass_decorator(WorkspaceCtx) @@ -674,3 +677,28 @@ def workspace_backup_undo(ctx): """ backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)) backup_manager.undo() + + +# ---------------------------------------------------------------------- +# ocrd workspace serve +# ---------------------------------------------------------------------- + +@workspace_cli.group('server') +@click.option('-h', '--hostname', help="Hostname for the server", default="localhost") +@click.option('-p', '--port', help="Port for the server", default=8899) +@click.pass_context +def workspace_serve_cli(ctx, hostname, port): # pylint: disable=unused-argument + ctx.obj.server_options['hostname'] = hostname + ctx.obj.server_options['port'] = port + +@workspace_serve_cli.command('start') +@pass_workspace +def workspace_serve_start(ctx): # pylint: disable=unused-argument + """ + Start a METS server + """ + workspace_server = WorkspaceServer( + workspace=Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename), + hostname=ctx.server_options['hostname'], + port=ctx.server_options['port']) + ) diff --git a/ocrd/ocrd/mets_server.py b/ocrd/ocrd/mets_server.py new file mode 100644 index 0000000000..1a85349772 --- /dev/null +++ b/ocrd/ocrd/mets_server.py @@ -0,0 +1,185 @@ +import re +from os import environ +from typing import Any, Dict, Optional, Union, List + +from fastapi import FastAPI, Request, File, Form, UploadFile +from fastapi.responses import JSONResponse +from requests import request +from pydantic import BaseModel, Field, constr, ValidationError + +from ocrd import Resolver +from ocrd_utils import initLogging, getLogger, deprecated_alias + +# +# XXX +# +initLogging() +workspace = Resolver().workspace_from_url('/home/kba/monorepo/assets/data/kant_aufklaerung_1784/data/mets.xml') + +# +# Models +# + +class OcrdFileModel(BaseModel): + file_grp : str = Field() + file_id : str = Field() + mimetype : str = Field() + page_id : Union[str, None] = Field() + url : str = Field() + +class OcrdFileListModel(BaseModel): + files : List[OcrdFileModel] = Field() + +# +# Client +# + +class ClientSideOcrdFile: + + def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None): + """ + Args: + el (): ignored + Keyword Args: + mets (): ignored + mimetype (string): ``@MIMETYPE`` of this ``mets:file`` + pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` + loctype (string): ``@LOCTYPE`` of this ``mets:file`` + local_filename (): ignored + url (string): ``@xlink:href`` of this ``mets:file`` + ID (string): ``@ID`` of this ``mets:file`` + """ + self.ID = ID + self.mimetype = mimetype + self.url = url + self.loctype = loctype + self.pageId = pageId + +class OcrdWorkspaceClient(): + + def __init__(self, hostname, port): + self.log = getLogger('ocrd.workspace_client') + self.url = f'http://{hostname}:{port}' + + def find_files(self, **kwargs): + r = request('GET', self.url, params={**kwargs}) + for f in r.json()['files']: + yield ClientSideOcrdFile(None, ID=f.file_id, pageId=f.page_id, fileGrp=f.file_grp, url=f.url) + + def find_all_files(self, *args, **kwargs): + return list(self.find_files(*args, **kwargs)) + + @deprecated_alias(pageId="page_id") + @deprecated_alias(ID="file_id") + def add_file(self, file_grp, content=None, file_id=None, url=None, mimetype=None, page_id=None, **kwargs): + r = request( + 'POST', + self.url, + data=OcrdFileModel( + file_id=file_id, + file_grp=file_grp, + page_id=page_id, + mimetype=mimetype, + url=url).json(), + files=('data', content) + ) + +# +# FastAPI +# + +app = FastAPI( + title="OCR-D Workspace Server", + description="Providing simultaneous write-access to mets.xml for OCR-D", +) + +@app.exception_handler(ValidationError) +async def exception_handler_invalid400(request: Request, exc: ValidationError): + return JSONResponse(status_code=400, content=exc.errors()) + +@app.exception_handler(FileExistsError) +async def exception_handler_invalid400(request: Request, exc: FileExistsError): + return JSONResponse(status_code=400, content=str(exc)) + +@app.exception_handler(re.error) +async def exception_handler_invalid400(request: Request, exc: re.error): + return JSONResponse(status_code=400, content=f'invalid regex: {exc}') + +@app.on_event("startup") +async def on_startup(): + getLogger('ocrd.mets_server').info("Starting up") + + +@app.on_event("shutdown") +async def on_shutdown(): + getLogger('ocrd.mets_server').info("Shutting down") + +@app.get( + "/", + # response_model=OcrdFileListModel, +) +async def find_files( + file_grp : Union[str, None] = None, + file_id : Union[str, None] = None, + page_id : Union[str, None] = None, + mimetype : Union[str, None] = None, + ): + """ + Find files in the mets + """ + found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype) + return OcrdFileListModel( + files=[OcrdFileModel(file_grp=of.fileGrp, file_id=of.ID, mimetype=of.mimetype, page_id=of.pageId, url=of.url) for of in found] + ) + +@app.delete('/') +async def stop(): + """ + Stop the server + """ + # TODO + +@app.post( + '/', + response_model=OcrdFileModel +) +async def add_file( + data : bytes = File(), + file_grp : str = Form(), + file_id : str = Form(), + page_id : Union[str, None] = Form(), + mimetype : str = Form(), + url : str = Form(), +): + """ + Add a file + """ + # Validate + file_resource = OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url) + # Add to workspace + kwargs = file_resource.dict() + kwargs['page_id'] = page_id + kwargs['content'] = data + kwargs['local_filename'] = kwargs.pop('url') + workspace.add_file(**kwargs) + workspace.save_mets() + return file_resource + +# +# Server +# + +class OcrdWorkspaceServer(): + + def __init__(self): + self.hostname = hostname + self.port = port + self.log = getLogger('ocrd.workspace_client') + + def shutdown(): + pass + + def startup(): + uvicorn.run(workspace_server.app, hostname=self.hostname, port=self.port) + + diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index ad30bc1f83..7fe49d1924 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -10,3 +10,6 @@ pyyaml Deprecated == 1.2.0 memory-profiler >= 0.58.0 sparklines >= 0.4.2 +uvicorn +fastapi +python-multipart diff --git a/repo/spec b/repo/spec index 02b6233d31..a859ada2d2 160000 --- a/repo/spec +++ b/repo/spec @@ -1 +1 @@ -Subproject commit 02b6233d316a0125286b878f953fcd2c59228d39 +Subproject commit a859ada2d22a2888949ab786a108c76c5aa2e017