diff --git a/server/app/routes/convert.py b/server/app/routes/convert.py index a896bb37..69ea151f 100644 --- a/server/app/routes/convert.py +++ b/server/app/routes/convert.py @@ -42,10 +42,10 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str: return f"Error processing document: {str(e)}" @router.post("/api/convert-documents") -async def convert_documents(files: List[UploadFile] = File(...)): - # First try Modal endpoint if there are no txt files +async def convert_documents(files: List[UploadFile] = File(...), use_docetl_server: bool = False): + # Only try Modal endpoint if use_docetl_server is true and there are no txt files all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files) - if not all_txt_files: + if use_docetl_server and not all_txt_files: try: async with aiohttp.ClientSession() as session: # Prepare files for multipart upload @@ -63,9 +63,24 @@ async def convert_documents(files: List[UploadFile] = File(...)): except Exception as e: print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...") - # If Modal fails, fall back to local processing - from docling.document_converter import DocumentConverter - doc_converter = DocumentConverter() + # Process locally if Modal wasn't used or failed + from docling.datamodel.base_models import InputFormat + from docling.document_converter import DocumentConverter, PdfFormatOption + from docling.datamodel.pipeline_options import PdfPipelineOptions + from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend + + pipeline_options = PdfPipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = True + + doc_converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption( + pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend + ) + } + ) # Create a temporary directory to store uploaded files with tempfile.TemporaryDirectory() as temp_dir: diff --git a/website/package-lock.json b/website/package-lock.json index b56e3f00..b5ff245f 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -27,6 +27,7 @@ "@radix-ui/react-menubar": "^1.1.2", "@radix-ui/react-popover": "^1.0.7", "@radix-ui/react-progress": "^1.1.0", + "@radix-ui/react-radio-group": "^1.2.1", "@radix-ui/react-scroll-area": "^1.1.0", "@radix-ui/react-select": "^2.1.1", "@radix-ui/react-slot": "^1.1.0", @@ -3285,6 +3286,77 @@ } } }, + "node_modules/@radix-ui/react-radio-group": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.2.1.tgz", + "integrity": "sha512-kdbv54g4vfRjja9DNWPMxKvXblzqbpEC8kspEkZ6dVP7kQksGCn+iZHkcCz2nb00+lPdRvxrqy4WrvvV1cNqrQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-roving-focus": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "@radix-ui/react-use-previous": "1.1.0", + "@radix-ui/react-use-size": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-presence": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.1.tgz", + "integrity": "sha512-IeFXVi4YS1K0wVZzXNrbaaUvIJ3qdY+/Ih4eHFhWA9SwGR9UDX7Ck8abvL57C4cv3wwMvUE0OG69Qc3NCcTe/A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz", diff --git a/website/package.json b/website/package.json index a0c4dc69..5b222600 100644 --- a/website/package.json +++ b/website/package.json @@ -28,6 +28,7 @@ "@radix-ui/react-menubar": "^1.1.2", "@radix-ui/react-popover": "^1.0.7", "@radix-ui/react-progress": "^1.1.0", + "@radix-ui/react-radio-group": "^1.2.1", "@radix-ui/react-scroll-area": "^1.1.0", "@radix-ui/react-select": "^2.1.1", "@radix-ui/react-slot": "^1.1.0", diff --git a/website/src/app/api/convertDocuments/route.ts b/website/src/app/api/convertDocuments/route.ts index da938e6f..cd1f9985 100644 --- a/website/src/app/api/convertDocuments/route.ts +++ b/website/src/app/api/convertDocuments/route.ts @@ -6,6 +6,7 @@ export async function POST(request: NextRequest) { try { const formData = await request.formData(); const files = formData.getAll("files"); + const conversionMethod = formData.get("conversion_method"); if (!files || files.length === 0) { return NextResponse.json({ error: "No files provided" }, { status: 400 }); @@ -17,6 +18,12 @@ export async function POST(request: NextRequest) { backendFormData.append("files", file); }); + // Add conversion method to form data + backendFormData.append( + "use_docetl_server", + conversionMethod === "docetl" ? "true" : "false" + ); + // Get Azure credentials from headers if they exist const azureEndpoint = request.headers.get("azure-endpoint"); const azureKey = request.headers.get("azure-key"); diff --git a/website/src/components/BookmarksPanel.tsx b/website/src/components/BookmarksPanel.tsx index 9304c859..52cec5db 100644 --- a/website/src/components/BookmarksPanel.tsx +++ b/website/src/components/BookmarksPanel.tsx @@ -93,7 +93,7 @@ const BookmarksPanel: React.FC = () => {

- + NOTES

diff --git a/website/src/components/FileExplorer.tsx b/website/src/components/FileExplorer.tsx index ff733029..6f1479ed 100644 --- a/website/src/components/FileExplorer.tsx +++ b/website/src/components/FileExplorer.tsx @@ -45,7 +45,6 @@ import { AlertDialogHeader, AlertDialogTitle, } from "@/components/ui/alert-dialog"; -import { Switch } from "@/components/ui/switch"; import { Label } from "@/components/ui/label"; import { Tooltip, @@ -53,6 +52,7 @@ import { TooltipProvider, TooltipTrigger, } from "./ui/tooltip"; +import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group"; interface FileExplorerProps { files: File[]; @@ -145,7 +145,7 @@ async function getAllFiles(entry: FileSystemEntry): Promise { return files; } -type ConversionMethod = "docling" | "azure"; +type ConversionMethod = "local" | "azure" | "docetl"; async function validateJsonDataset(file: Blob): Promise { const text = await file.text(); @@ -212,7 +212,7 @@ export const FileExplorer: React.FC = ({ const [folderToDelete, setFolderToDelete] = useState(null); const [uploadingFiles, setUploadingFiles] = useState>(new Set()); const [conversionMethod, setConversionMethod] = - useState("docling"); + useState("local"); const [azureEndpoint, setAzureEndpoint] = useState(""); const [azureKey, setAzureKey] = useState(""); @@ -381,6 +381,7 @@ export const FileExplorer: React.FC = ({ ); }); originalDocsFormData.append("namespace", namespace); + formData.append("conversion_method", conversionMethod); try { // First save the original documents @@ -544,13 +545,13 @@ export const FileExplorer: React.FC = ({

- + FILE EXPLORER

@@ -709,68 +710,132 @@ export const FileExplorer: React.FC = ({ Upload Documents -
-
-
- - setConversionMethod(checked ? "azure" : "docling") - } - /> - -
+
+
+ - {conversionMethod === "azure" && ( -
-
-