Skip to content

Commit

Permalink
Avoid workflow-script redundancy when uploading
Browse files Browse the repository at this point in the history
  • Loading branch information
joschrew committed Nov 23, 2023
1 parent d695f49 commit d14c787
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
12 changes: 12 additions & 0 deletions ocrd_network/ocrd_network/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,15 @@ async def db_get_workflow_script(workflow_id: str) -> DBWorkflowScript:
@call_sync
async def sync_db_get_workflow_script(workflow_id: str) -> DBWorkflowScript:
return await db_get_workflow_script(workflow_id)


async def db_find_first_workflow_script_by_content(content_hash: str) -> DBWorkflowScript:
workflow = await DBWorkflowScript.find_one(DBWorkflowScript.content_hash == content_hash)
if not workflow:
raise ValueError(f'Workflow-script with content_hash "{content_hash}" not in the DB.')
return workflow


@call_sync
async def sync_db_find_first_workflow_script_by_content(workflow_id: str) -> DBWorkflowScript:
return await db_get_workflow_script(workflow_id)
1 change: 1 addition & 0 deletions ocrd_network/ocrd_network/models/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ class DBWorkflowScript(Document):
"""
workflow_id: str
content: str
content_hash: str
14 changes: 14 additions & 0 deletions ocrd_network/ocrd_network/processing_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
db_update_processing_job,
db_update_workspace,
db_get_workflow_script,
db_find_first_workflow_script_by_content
)
from .deployer import Deployer
from .logging import get_processing_server_logging_file_path
Expand Down Expand Up @@ -68,6 +69,7 @@
validate_workflow,
)
from urllib.parse import urljoin
from hashlib import md5


class ProcessingServer(FastAPI):
Expand Down Expand Up @@ -869,9 +871,19 @@ async def upload_workflow(self, workflow: UploadFile) -> Dict:
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="Provided workflow script is invalid")

content_hash = md5(content.encode("utf-8")).hexdigest()
try:
db_workflow_script = await db_find_first_workflow_script_by_content(content_hash)
if db_workflow_script:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="The same workflow"
f"-script exists with id '{db_workflow_script.workflow_id}'")
except ValueError:
pass

db_workflow_script = DBWorkflowScript(
workflow_id=workflow_id,
content=content,
content_hash=content_hash,
)
await db_workflow_script.insert()
return {"workflow_id": workflow_id}
Expand All @@ -886,6 +898,8 @@ async def replace_workflow(self, workflow_id, workflow: UploadFile) -> str:
try:
db_workflow_script = await db_get_workflow_script(workflow_id)
db_workflow_script.content = content
content_hash = md5(content.encode("utf-8")).hexdigest()
db_workflow_script.content_hash = content_hash
except ValueError as e:
self.log.exception(f"Workflow with id '{workflow_id}' not existing, error: {e}")
raise HTTPException(
Expand Down

0 comments on commit d14c787

Please sign in to comment.