Skip to content

Commit

Permalink
feat: change histograms to be bar charts for categorical columns (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyashankar authored Nov 21, 2024
1 parent 3c7b03b commit d744d2e
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 70 deletions.
36 changes: 19 additions & 17 deletions server/app/routes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,25 @@ def process_document_with_azure(file_path: str, endpoint: str, key: str) -> str:

@router.post("/api/convert-documents")
async def convert_documents(files: List[UploadFile] = File(...)):
# First try Modal endpoint
try:
async with aiohttp.ClientSession() as session:
# Prepare files for multipart upload
data = aiohttp.FormData()
for file in files:
data.add_field('files',
await file.read(),
filename=file.filename,
content_type=file.content_type)
# First try Modal endpoint if there are no txt files
all_txt_files = all(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md') for file in files)
if not all_txt_files:
try:
async with aiohttp.ClientSession() as session:
# Prepare files for multipart upload
data = aiohttp.FormData()
for file in files:
data.add_field('files',
await file.read(),
filename=file.filename,
content_type=file.content_type)

async with session.post(MODAL_ENDPOINT, data=data, timeout=120) as response:
if response.status == 200:
return await response.json()
async with session.post(MODAL_ENDPOINT, data=data, timeout=120) as response:
if response.status == 200:
return await response.json()

except Exception as e:
print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...")
except Exception as e:
print(f"Modal endpoint failed: {str(e)}. Falling back to local processing...")

# If Modal fails, fall back to local processing
from docling.document_converter import DocumentConverter
Expand All @@ -70,7 +72,7 @@ async def convert_documents(files: List[UploadFile] = File(...)):
# Save uploaded files to temporary directory
file_paths = []
original_filenames = [] # Keep track of original filenames
txt_files = [] # Track which files are .txt
txt_files = [] # Track which files are .txt or markdown
for file in files:
# Reset file position since we might have read it in the Modal attempt
await file.seek(0)
Expand All @@ -82,7 +84,7 @@ async def convert_documents(files: List[UploadFile] = File(...)):
buffer.write(content)
file_paths.append(file_path)
original_filenames.append(file.filename)
txt_files.append(file.filename.lower().endswith('.txt'))
txt_files.append(file.filename.lower().endswith('.txt') or file.filename.lower().endswith('.md'))

# Convert all documents
results = []
Expand Down
4 changes: 2 additions & 2 deletions website/src/components/FileExplorer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ export const FileExplorer: React.FC<FileExplorerProps> = ({
type="file"
multiple
className="hidden"
accept=".pdf,.docx,.doc,.txt,.html,.pptx"
accept=".pdf,.docx,.doc,.txt,.html,.pptx,.md"
onChange={(e) => {
if (e.target.files) {
handleFolderUpload(e.target.files);
Expand All @@ -790,7 +790,7 @@ export const FileExplorer: React.FC<FileExplorerProps> = ({
</span>
</label>
<p className="text-xs text-gray-500">
Supported formats: PDF, DOCX, DOC, TXT, HTML, PPTX
Supported formats: PDF, DOCX, DOC, TXT, HTML, PPTX, MD
</p>
<p className="text-xs text-gray-500">
Processing may take up to 2 minutes
Expand Down
Loading

0 comments on commit d744d2e

Please sign in to comment.