diff --git a/server/app/routes/convert.py b/server/app/routes/convert.py index a896bb37..69ea151f 100644 --- a/server/app/routes/convert.py +++ b/server/app/routes/convert.py @@ -42,10 +42,10 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str: return f"Error processing document: {str(e)}" @router.post("/api/convert-documents") -async def convert_documents(files: List[UploadFile] = File(...)): - # First try Modal endpoint if there are no txt files +async def convert_documents(files: List[UploadFile] = File(...), use_docetl_server: bool = False): + # Only try Modal endpoint if use_docetl_server is true and there are no txt files all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files) - if not all_txt_files: + if use_docetl_server and not all_txt_files: try: async with aiohttp.ClientSession() as session: # Prepare files for multipart upload @@ -63,9 +63,24 @@ async def convert_documents(files: List[UploadFile] = File(...)): except Exception as e: print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...") - # If Modal fails, fall back to local processing - from docling.document_converter import DocumentConverter - doc_converter = DocumentConverter() + # Process locally if Modal wasn't used or failed + from docling.datamodel.base_models import InputFormat + from docling.document_converter import DocumentConverter, PdfFormatOption + from docling.datamodel.pipeline_options import PdfPipelineOptions + from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend + + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = True + + doc_converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend + ) + } + ) # Create a temporary directory to store uploaded files with tempfile.TemporaryDirectory() as temp_dir: diff --git a/website/package-lock.json b/website/package-lock.json index b56e3f00..b5ff245f 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -27,6 +27,7 @@ "@radix-ui/react-menubar": "^1.1.2", "@radix-ui/react-popover": "^1.0.7", "@radix-ui/react-progress": "^1.1.0", + "@radix-ui/react-radio-group": "^1.2.1", "@radix-ui/react-scroll-area": "^1.1.0", "@radix-ui/react-select": "^2.1.1", "@radix-ui/react-slot": "^1.1.0", @@ -3285,6 +3286,77 @@ } } }, + "node_modules/@radix-ui/react-radio-group": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.2.1.tgz", + "integrity": "sha512-kdbv54g4vfRjja9DNWPMxKvXblzqbpEC8kspEkZ6dVP7kQksGCn+iZHkcCz2nb00+lPdRvxrqy4WrvvV1cNqrQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-roving-focus": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "@radix-ui/react-use-previous": "1.1.0", + "@radix-ui/react-use-size": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-presence": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.1.tgz", + "integrity": "sha512-IeFXVi4YS1K0wVZzXNrbaaUvIJ3qdY+/Ih4eHFhWA9SwGR9UDX7Ck8abvL57C4cv3wwMvUE0OG69Qc3NCcTe/A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz", diff --git a/website/package.json b/website/package.json index a0c4dc69..5b222600 100644 --- a/website/package.json +++ b/website/package.json @@ -28,6 +28,7 @@ "@radix-ui/react-menubar": "^1.1.2", "@radix-ui/react-popover": "^1.0.7", "@radix-ui/react-progress": "^1.1.0", + "@radix-ui/react-radio-group": "^1.2.1", "@radix-ui/react-scroll-area": "^1.1.0", "@radix-ui/react-select": "^2.1.1", "@radix-ui/react-slot": "^1.1.0", diff --git a/website/src/app/api/convertDocuments/route.ts b/website/src/app/api/convertDocuments/route.ts index da938e6f..cd1f9985 100644 --- a/website/src/app/api/convertDocuments/route.ts +++ b/website/src/app/api/convertDocuments/route.ts @@ -6,6 +6,7 @@ export async function POST(request: NextRequest) { try { const formData = await request.formData(); const files = formData.getAll("files"); + const conversionMethod = formData.get("conversion_method"); if (!files || files.length === 0) { return NextResponse.json({ error: "No files provided" }, { status: 400 }); @@ -17,6 +18,12 @@ export async function POST(request: NextRequest) { backendFormData.append("files", file); }); + // Add conversion method to form data + backendFormData.append( + "use_docetl_server", + conversionMethod === "docetl" ? "true" : "false" + ); + // Get Azure credentials from headers if they exist const azureEndpoint = request.headers.get("azure-endpoint"); const azureKey = request.headers.get("azure-key"); diff --git a/website/src/components/BookmarksPanel.tsx b/website/src/components/BookmarksPanel.tsx index 9304c859..52cec5db 100644 --- a/website/src/components/BookmarksPanel.tsx +++ b/website/src/components/BookmarksPanel.tsx @@ -93,7 +93,7 @@ const BookmarksPanel: React.FC = () => {