From 0425c8e1d0cc28e2ca5d963bbd63341674780b69 Mon Sep 17 00:00:00 2001 From: Gabo Date: Tue, 16 Jul 2024 11:55:18 +0200 Subject: [PATCH] Make toc asynchronous --- src/app.py | 8 +++----- src/toc/get_toc.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 src/toc/get_toc.py diff --git a/src/app.py b/src/app.py index d0db634..99d3176 100755 --- a/src/app.py +++ b/src/app.py @@ -10,6 +10,7 @@ from pdf_layout_analysis.run_pdf_layout_analysis import analyze_pdf from pdf_layout_analysis.run_pdf_layout_analysis_fast import analyze_pdf_fast from toc.extract_table_of_contents import extract_table_of_contents +from toc.get_toc import get_toc service_logger.info(f"Is PyTorch using GPU: {torch.cuda.is_available()}") @@ -52,8 +53,5 @@ async def run_fast(file: UploadFile = File(...)): @app.post("/toc") @catch_exceptions -async def get_toc(file: UploadFile = File(...), fast: bool = Form(False)): - file_content = file.file.read() - if fast: - return extract_table_of_contents(file_content, analyze_pdf_fast(file_content)) - return extract_table_of_contents(file_content, analyze_pdf(file_content)) +async def get_toc_endpoint(file: UploadFile = File(...), fast: bool = Form(False)): + return await run_in_threadpool(get_toc, file, fast) diff --git a/src/toc/get_toc.py b/src/toc/get_toc.py new file mode 100644 index 0000000..71196bf --- /dev/null +++ b/src/toc/get_toc.py @@ -0,0 +1,12 @@ +from fastapi import UploadFile + +from pdf_layout_analysis.run_pdf_layout_analysis import analyze_pdf +from pdf_layout_analysis.run_pdf_layout_analysis_fast import analyze_pdf_fast +from toc.extract_table_of_contents import extract_table_of_contents + + +def get_toc(file: UploadFile, fast: bool): + file_content = file.file.read() + if fast: + return extract_table_of_contents(file_content, analyze_pdf_fast(file_content)) + return extract_table_of_contents(file_content, analyze_pdf(file_content, ""))