Skip to content

Commit

Permalink
feat: clean up upload dialog
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyashankar committed Dec 7, 2024
1 parent 0297d6a commit b6f7968
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 66 deletions.
27 changes: 21 additions & 6 deletions server/app/routes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str:
return f"Error processing document: {str(e)}"

@router.post("/api/convert-documents")
async def convert_documents(files: List[UploadFile] = File(...)):
# First try Modal endpoint if there are no txt files
async def convert_documents(files: List[UploadFile] = File(...), use_docetl_server: bool = False):
# Only try Modal endpoint if use_docetl_server is true and there are no txt files
all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files)
if not all_txt_files:
if use_docetl_server and not all_txt_files:
try:
async with aiohttp.ClientSession() as session:
# Prepare files for multipart upload
Expand All @@ -63,9 +63,24 @@ async def convert_documents(files: List[UploadFile] = File(...)):
except Exception as e:
print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...")

# If Modal fails, fall back to local processing
from docling.document_converter import DocumentConverter
doc_converter = DocumentConverter()
# Process locally if Modal wasn't used or failed
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend

pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
pipeline_options.table_structure_options.do_cell_matching = True

doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend
)
}
)

# Create a temporary directory to store uploaded files
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down
72 changes: 72 additions & 0 deletions website/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@radix-ui/react-menubar": "^1.1.2",
"@radix-ui/react-popover": "^1.0.7",
"@radix-ui/react-progress": "^1.1.0",
"@radix-ui/react-radio-group": "^1.2.1",
"@radix-ui/react-scroll-area": "^1.1.0",
"@radix-ui/react-select": "^2.1.1",
"@radix-ui/react-slot": "^1.1.0",
Expand Down
7 changes: 7 additions & 0 deletions website/src/app/api/convertDocuments/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export async function POST(request: NextRequest) {
try {
const formData = await request.formData();
const files = formData.getAll("files");
const conversionMethod = formData.get("conversion_method");

if (!files || files.length === 0) {
return NextResponse.json({ error: "No files provided" }, { status: 400 });
Expand All @@ -17,6 +18,12 @@ export async function POST(request: NextRequest) {
backendFormData.append("files", file);
});

// Add conversion method to form data
backendFormData.append(
"use_docetl_server",
conversionMethod === "docetl" ? "true" : "false"
);

// Get Azure credentials from headers if they exist
const azureEndpoint = request.headers.get("azure-endpoint");
const azureKey = request.headers.get("azure-key");
Expand Down
4 changes: 2 additions & 2 deletions website/src/components/BookmarksPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ const BookmarksPanel: React.FC = () => {
<div className="h-full p-4 flex flex-col">
<div className="flex justify-between items-center mb-4 border-b pb-3">
<h2 className="text-base font-bold flex items-center">
<Bookmark className="mr-2" size={18} />
<Bookmark className="mr-2" size={14} />
NOTES
</h2>
<Button
Expand All @@ -102,7 +102,7 @@ const BookmarksPanel: React.FC = () => {
onClick={handleClearAll}
className="text-gray-500 hover:text-gray-700"
>
<X size={16} className="mr-1.5" />
<X size={14} className="mr-1.5" />
Clear All
</Button>
</div>
Expand Down
Loading

0 comments on commit b6f7968

Please sign in to comment.