Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions apps/sim/app/api/files/serve/[...path]/route.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { readFile } from 'fs/promises'
import type { NextRequest } from 'next/server'
import { NextResponse } from 'next/server'
import { checkHybridAuth } from '@/lib/auth/hybrid'
import { createLogger } from '@/lib/logs/console/logger'
import { downloadFile, getStorageProvider, isUsingCloudStorage } from '@/lib/uploads'
import { S3_KB_CONFIG } from '@/lib/uploads/setup'
import '@/lib/uploads/setup.server'
import { getSession } from '@/lib/auth'
import {
createErrorResponse,
createFileResponse,
Expand All @@ -29,23 +29,19 @@ export async function GET(

logger.info('File serve request:', { path })

const session = await getSession()
if (!session?.user?.id) {
logger.warn('Unauthorized file access attempt', { path })
const authResult = await checkHybridAuth(request, { requireWorkflowId: false })

if (!authResult.success) {
logger.warn('Unauthorized file access attempt', { path, error: authResult.error })
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}

const userId = session.user.id
const userId = authResult.userId
const fullPath = path.join('/')
const isS3Path = path[0] === 's3'
const isBlobPath = path[0] === 'blob'
const isCloudPath = isS3Path || isBlobPath
const cloudKey = isCloudPath ? path.slice(1).join('/') : fullPath
const isExecutionFile = cloudKey.split('/').length >= 3 && !cloudKey.startsWith('kb/')

if (!isExecutionFile) {
logger.info('Authenticated file access granted', { userId, path: cloudKey })
}

if (isUsingCloudStorage() || isCloudPath) {
const bucketType = request.nextUrl.searchParams.get('bucket')
Expand All @@ -64,7 +60,7 @@ export async function GET(
}
}

async function handleLocalFile(filename: string, userId: string): Promise<NextResponse> {
async function handleLocalFile(filename: string, userId?: string): Promise<NextResponse> {
try {
const filePath = findLocalFile(filename)

Expand Down
3 changes: 1 addition & 2 deletions apps/sim/app/api/files/upload/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ export async function POST(request: NextRequest) {
}
}

// Create the serve path
const servePath = `/api/files/serve/${result.key}`
const servePath = result.path

const uploadResult = {
name: originalName,
Expand Down
18 changes: 17 additions & 1 deletion apps/sim/app/api/files/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,22 @@ function getSecureFileHeaders(filename: string, originalContentType: string) {
}
}

/**
* Encode filename for Content-Disposition header to support non-ASCII characters
* Uses RFC 5987 encoding for international characters
*/
function encodeFilenameForHeader(filename: string): string {
const hasNonAscii = /[^\x00-\x7F]/.test(filename)

if (!hasNonAscii) {
return `filename="${filename}"`
}

const encodedFilename = encodeURIComponent(filename)
const asciiSafe = filename.replace(/[^\x00-\x7F]/g, '_')
return `filename="${asciiSafe}"; filename*=UTF-8''${encodedFilename}`
}

/**
* Create a file response with appropriate security headers
*/
Expand All @@ -317,7 +333,7 @@ export function createFileResponse(file: FileResponse): NextResponse {
status: 200,
headers: {
'Content-Type': contentType,
'Content-Disposition': `${disposition}; filename="${file.filename}"`,
'Content-Disposition': `${disposition}; ${encodeFilenameForHeader(file.filename)}`,
'Cache-Control': 'public, max-age=31536000', // Cache for 1 year
'X-Content-Type-Options': 'nosniff',
'Content-Security-Policy': "default-src 'none'; style-src 'unsafe-inline'; sandbox;",
Expand Down
2 changes: 1 addition & 1 deletion apps/sim/background/knowledge-processing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export type DocumentProcessingPayload = {

export const processDocument = task({
id: 'knowledge-process-document',
maxDuration: env.KB_CONFIG_MAX_DURATION || 300,
maxDuration: env.KB_CONFIG_MAX_DURATION || 600,
retry: {
maxAttempts: env.KB_CONFIG_MAX_ATTEMPTS || 3,
factor: env.KB_CONFIG_RETRY_FACTOR || 2,
Expand Down
2 changes: 1 addition & 1 deletion apps/sim/lib/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ export const env = createEnv({
RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('1000'), // Enterprise tier async API executions per minute

// Knowledge Base Processing Configuration - Shared across all processing methods
KB_CONFIG_MAX_DURATION: z.number().optional().default(300), // Max processing duration in s
KB_CONFIG_MAX_DURATION: z.number().optional().default(600), // Max processing duration in seconds (10 minutes)
KB_CONFIG_MAX_ATTEMPTS: z.number().optional().default(3), // Max retry attempts
KB_CONFIG_RETRY_FACTOR: z.number().optional().default(2), // Retry backoff factor
KB_CONFIG_MIN_TIMEOUT: z.number().optional().default(1000), // Min timeout in ms
Expand Down
15 changes: 13 additions & 2 deletions apps/sim/lib/knowledge/documents/document-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ async function parseDocument(
}

async function handleFileForOCR(fileUrl: string, filename: string, mimeType: string) {
if (fileUrl.startsWith('https://')) {
const isExternalHttps = fileUrl.startsWith('https://') && !fileUrl.includes('/api/files/serve/')

if (isExternalHttps) {
return { httpsUrl: fileUrl }
}

Expand All @@ -207,7 +209,16 @@ async function downloadFileWithTimeout(fileUrl: string): Promise<Buffer> {
const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD)

try {
const response = await fetch(fileUrl, { signal: controller.signal })
const isInternalFileServe = fileUrl.includes('/api/files/serve/')
const headers: HeadersInit = {}

if (isInternalFileServe) {
const { generateInternalToken } = await import('@/lib/auth/internal')
const token = await generateInternalToken()
headers.Authorization = `Bearer ${token}`
}

const response = await fetch(fileUrl, { signal: controller.signal, headers })
clearTimeout(timeoutId)

if (!response.ok) {
Expand Down
4 changes: 2 additions & 2 deletions apps/sim/lib/knowledge/documents/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ import type { DocumentSortField, SortOrder } from './types'
const logger = createLogger('DocumentService')

const TIMEOUTS = {
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Increased to 10 minutes to match Trigger's timeout
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Default 10 minutes for KB document processing
EMBEDDINGS_API: (env.KB_CONFIG_MAX_TIMEOUT || 10000) * 18,
} as const

// Configuration for handling large documents
const LARGE_DOC_CONFIG = {
MAX_CHUNKS_PER_BATCH: 500, // Insert embeddings in batches of 500
MAX_EMBEDDING_BATCH: 50, // Generate embeddings in batches of 50
MAX_EMBEDDING_BATCH: 500, // Generate embeddings in batches of 500
MAX_FILE_SIZE: 100 * 1024 * 1024, // 100MB max file size
MAX_CHUNKS_PER_DOCUMENT: 100000, // Maximum chunks allowed per document
}
Expand Down
Loading