From a6b42a44d66e2196a0d39db344a972ae41ee3b3d Mon Sep 17 00:00:00 2001 From: Adam Gough Date: Sat, 28 Jun 2025 17:18:42 -0700 Subject: [PATCH 1/6] improvement: added knowledge upload --- apps/docs/content/docs/tools/knowledge.mdx | 19 +++ apps/sim/blocks/blocks/knowledge.ts | 18 ++- apps/sim/tools/knowledge/index.ts | 3 +- apps/sim/tools/knowledge/types.ts | 21 +++ apps/sim/tools/knowledge/upload_document.ts | 149 ++++++++++++++++++++ apps/sim/tools/registry.ts | 7 +- 6 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 apps/sim/tools/knowledge/upload_document.ts diff --git a/apps/docs/content/docs/tools/knowledge.mdx b/apps/docs/content/docs/tools/knowledge.mdx index 3424c62421..acbe3c7b04 100644 --- a/apps/docs/content/docs/tools/knowledge.mdx +++ b/apps/docs/content/docs/tools/knowledge.mdx @@ -100,6 +100,25 @@ Upload a new chunk to a document in a knowledge base | `createdAt` | string | | `updatedAt` | string | +### `knowledge_upload_document` + +Upload documents to a knowledge base + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `knowledgeBaseId` | string | Yes | ID of the knowledge base containing the document | +| `knowledgeBaseName` | string | Yes | Name of the knowledge base to upload the document to | +| `file` | file | Yes | Document\(s\) to upload | + +#### Output + +| Parameter | Type | +| --------- | ---- | +| `data` | string | +| `name` | string | + ## Block Configuration diff --git a/apps/sim/blocks/blocks/knowledge.ts b/apps/sim/blocks/blocks/knowledge.ts index d43cef05f9..9e90c2f2f6 100644 --- a/apps/sim/blocks/blocks/knowledge.ts +++ b/apps/sim/blocks/blocks/knowledge.ts @@ -12,7 +12,7 @@ export const KnowledgeBlock: BlockConfig = { category: 'blocks', docsLink: 'https://docs.simstudio.ai/blocks/knowledge', tools: { - access: ['knowledge_search', 'knowledge_upload_chunk'], + access: ['knowledge_search', 'knowledge_upload_chunk', 'knowledge_upload_document'], config: { tool: (params) => { switch (params.operation) { @@ -20,6 +20,8 @@ export const KnowledgeBlock: BlockConfig = { return 'knowledge_search' case 'upload_chunk': return 'knowledge_upload_chunk' + case 'upload_document': + return 'knowledge_upload_document' default: return 'knowledge_search' } @@ -53,6 +55,7 @@ export const KnowledgeBlock: BlockConfig = { options: [ { label: 'Search', id: 'search' }, { label: 'Upload Chunk', id: 'upload_chunk' }, + { label: 'Upload Document', id: 'upload_document' }, ], value: () => 'search', }, @@ -72,7 +75,7 @@ export const KnowledgeBlock: BlockConfig = { layout: 'full', placeholder: 'Select knowledge base', multiSelect: false, - condition: { field: 'operation', value: 'upload_chunk' }, + condition: { field: 'operation', value: ['upload_chunk', 'upload_document'] }, }, { id: 'query', @@ -98,6 +101,17 @@ export const KnowledgeBlock: BlockConfig = { placeholder: 'Select document', condition: { field: 'operation', value: 'upload_chunk' }, }, + { + id: 'file', + title: 'File', + type: 'file-upload', + layout: 'full', + placeholder: 'Select file to upload', + condition: { field: 'operation', value: 'upload_document' }, + acceptedTypes: '.pdf,.doc,.docx,.txt,.csv,.xls,.xlsx', + multiple: true, // Allow multiple document uploads + maxSize: 100, // 100MB max for knowledge document uploads + }, { id: 'content', title: 'Chunk Content', diff --git a/apps/sim/tools/knowledge/index.ts b/apps/sim/tools/knowledge/index.ts index a6596048b9..dae8228507 100644 --- a/apps/sim/tools/knowledge/index.ts +++ b/apps/sim/tools/knowledge/index.ts @@ -1,4 +1,5 @@ import { knowledgeSearchTool } from './search' import { knowledgeUploadChunkTool } from './upload_chunk' +import { knowledgeUploadDocumentTool } from './upload_document' -export { knowledgeSearchTool, knowledgeUploadChunkTool } +export { knowledgeSearchTool, knowledgeUploadChunkTool, knowledgeUploadDocumentTool } diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index 1ad6edc86b..738396c476 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -49,3 +49,24 @@ export interface KnowledgeUploadChunkParams { content: string enabled?: boolean } + +export interface KnowledgeUploadDocumentResult { + id: string + name: string + size: number + type: string + url: string + enabled: boolean + createdAt: string + updatedAt: string +} + +export interface KnowledgeUploadDocumentResponse { + success: boolean + output: { + data: KnowledgeUploadDocumentResult + message: string + documentId: string + } + error?: string +} diff --git a/apps/sim/tools/knowledge/upload_document.ts b/apps/sim/tools/knowledge/upload_document.ts new file mode 100644 index 0000000000..12965e2762 --- /dev/null +++ b/apps/sim/tools/knowledge/upload_document.ts @@ -0,0 +1,149 @@ +import type { ToolConfig } from '../types' +import type { KnowledgeUploadDocumentResponse } from './types' + +export const knowledgeUploadDocumentTool: ToolConfig = { + id: 'knowledge_upload_document', + name: 'Knowledge Upload Document', + description: 'Upload documents to a knowledge base', + version: '1.0.0', + params: { + knowledgeBaseId: { + type: 'string', + required: true, + description: 'ID of the knowledge base containing the document', + }, + knowledgeBaseName: { + type: 'string', + required: true, + description: 'Name of the knowledge base to upload the document to', + }, + file: { + type: 'file', + required: true, + description: 'Document(s) to upload', + }, + }, + request: { + url: (params) => `/api/knowledge/${params.knowledgeBaseId}/documents`, + method: 'POST', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + // Handle both single file and array of files from FileUpload component + const files = Array.isArray(params.file) ? params.file : [params.file] + + // Map files to the expected document format + const documents = files.map( + (fileData: { name: string; path: string; size: number; type: string }) => { + // Create file URL (handle both relative and absolute paths) + const fileUrl = fileData.path?.startsWith('http') + ? fileData.path + : `${typeof window !== 'undefined' ? window.location.origin : ''}${fileData.path}` + + return { + filename: fileData.name, + fileUrl: fileUrl, + fileSize: fileData.size, + mimeType: fileData.type, + } + } + ) + + // Use bulk upload format (required for processing) + const requestBody = { + documents: documents, + processingOptions: { + chunkSize: 1024, + minCharactersPerChunk: 100, + chunkOverlap: 200, + recipe: 'default', + lang: 'en', + }, + bulk: true, + } + + return requestBody + }, + isInternalRoute: true, + }, + transformResponse: async (response): Promise => { + try { + const result = await response.json() + + if (!response.ok) { + const errorMessage = result.error?.message || result.message || 'Failed to upload documents' + throw new Error(errorMessage) + } + + const data = result.data || result + const documentsCreated = data.documentsCreated || [] + + // Handle multiple documents response + const uploadCount = documentsCreated.length + const firstDocument = documentsCreated[0] + + return { + success: true, + output: { + data: { + id: firstDocument?.documentId || firstDocument?.id || '', + name: + uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown', + size: 0, // Size not returned in bulk response + type: 'document', + url: '', + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + enabled: true, + }, + message: + uploadCount > 1 + ? `Successfully uploaded ${uploadCount} documents to knowledge base` + : `Successfully uploaded document to knowledge base`, + documentId: firstDocument?.documentId || firstDocument?.id || '', + }, + } + } catch (error: any) { + return { + success: false, + output: { + data: { + id: '', + name: '', + size: 0, + type: '', + url: '', + enabled: true, + createdAt: '', + updatedAt: '', + }, + message: `Failed to upload documents: ${error.message || 'Unknown error'}`, + documentId: '', + }, + error: `Failed to upload documents: ${error.message || 'Unknown error'}`, + } + } + }, + transformError: async (error): Promise => { + const errorMessage = `Failed to upload documents: ${error.message || 'Unknown error'}` + return { + success: false, + output: { + data: { + id: '', + name: '', + size: 0, + type: '', + url: '', + enabled: true, + createdAt: '', + updatedAt: '', + }, + message: errorMessage, + documentId: '', + }, + error: errorMessage, + } + }, +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 201dee7b38..3433198514 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -53,7 +53,11 @@ import { contactsTool as hubspotContacts } from './hubspot/contacts' import { huggingfaceChatTool } from './huggingface' import { readUrlTool } from './jina' import { jiraBulkRetrieveTool, jiraRetrieveTool, jiraUpdateTool, jiraWriteTool } from './jira' -import { knowledgeSearchTool, knowledgeUploadChunkTool } from './knowledge' +import { + knowledgeSearchTool, + knowledgeUploadChunkTool, + knowledgeUploadDocumentTool, +} from './knowledge' import { linearCreateIssueTool, linearReadIssuesTool } from './linear' import { linkupSearchTool } from './linkup' import { mem0AddMemoriesTool, mem0GetMemoriesTool, mem0SearchMemoriesTool } from './mem0' @@ -191,6 +195,7 @@ export const tools: Record = { memory_delete: memoryDeleteTool, knowledge_search: knowledgeSearchTool, knowledge_upload_chunk: knowledgeUploadChunkTool, + knowledge_upload_document: knowledgeUploadDocumentTool, elevenlabs_tts: elevenLabsTtsTool, s3_get_object: s3GetObjectTool, telegram_message: telegramMessageTool, From 7be0aa941e03693b3c3305aaf920493082363e28 Mon Sep 17 00:00:00 2001 From: Adam Gough Date: Sat, 28 Jun 2025 17:32:53 -0700 Subject: [PATCH 2/6] improvement: added greptile comments (#579) --- apps/docs/content/docs/tools/knowledge.mdx | 1 - apps/sim/tools/knowledge/upload_document.ts | 5 ----- 2 files changed, 6 deletions(-) diff --git a/apps/docs/content/docs/tools/knowledge.mdx b/apps/docs/content/docs/tools/knowledge.mdx index acbe3c7b04..a59ad97543 100644 --- a/apps/docs/content/docs/tools/knowledge.mdx +++ b/apps/docs/content/docs/tools/knowledge.mdx @@ -109,7 +109,6 @@ Upload documents to a knowledge base | Parameter | Type | Required | Description | | --------- | ---- | -------- | ----------- | | `knowledgeBaseId` | string | Yes | ID of the knowledge base containing the document | -| `knowledgeBaseName` | string | Yes | Name of the knowledge base to upload the document to | | `file` | file | Yes | Document\(s\) to upload | #### Output diff --git a/apps/sim/tools/knowledge/upload_document.ts b/apps/sim/tools/knowledge/upload_document.ts index 12965e2762..1c97217b9c 100644 --- a/apps/sim/tools/knowledge/upload_document.ts +++ b/apps/sim/tools/knowledge/upload_document.ts @@ -12,11 +12,6 @@ export const knowledgeUploadDocumentTool: ToolConfig Date: Sat, 28 Jun 2025 19:33:46 -0700 Subject: [PATCH 3/6] improvement: changed to text to doc (#579) --- apps/docs/content/docs/tools/knowledge.mdx | 7 +- apps/sim/blocks/blocks/knowledge.ts | 38 ++++---- apps/sim/lib/documents/document-processor.ts | 37 +++++++- ...{upload_document.ts => create_document.ts} | 88 ++++++++++--------- apps/sim/tools/knowledge/index.ts | 4 +- apps/sim/tools/knowledge/types.ts | 8 +- apps/sim/tools/registry.ts | 4 +- 7 files changed, 114 insertions(+), 72 deletions(-) rename apps/sim/tools/knowledge/{upload_document.ts => create_document.ts} (55%) diff --git a/apps/docs/content/docs/tools/knowledge.mdx b/apps/docs/content/docs/tools/knowledge.mdx index a59ad97543..eb8ddef3bd 100644 --- a/apps/docs/content/docs/tools/knowledge.mdx +++ b/apps/docs/content/docs/tools/knowledge.mdx @@ -100,16 +100,17 @@ Upload a new chunk to a document in a knowledge base | `createdAt` | string | | `updatedAt` | string | -### `knowledge_upload_document` +### `knowledge_create_document` -Upload documents to a knowledge base +Create a new document in a knowledge base #### Input | Parameter | Type | Required | Description | | --------- | ---- | -------- | ----------- | | `knowledgeBaseId` | string | Yes | ID of the knowledge base containing the document | -| `file` | file | Yes | Document\(s\) to upload | +| `name` | string | Yes | Name of the document | +| `content` | string | Yes | Content of the document | #### Output diff --git a/apps/sim/blocks/blocks/knowledge.ts b/apps/sim/blocks/blocks/knowledge.ts index 9e90c2f2f6..b71f76bd7f 100644 --- a/apps/sim/blocks/blocks/knowledge.ts +++ b/apps/sim/blocks/blocks/knowledge.ts @@ -12,7 +12,7 @@ export const KnowledgeBlock: BlockConfig = { category: 'blocks', docsLink: 'https://docs.simstudio.ai/blocks/knowledge', tools: { - access: ['knowledge_search', 'knowledge_upload_chunk', 'knowledge_upload_document'], + access: ['knowledge_search', 'knowledge_upload_chunk', 'knowledge_create_document'], config: { tool: (params) => { switch (params.operation) { @@ -20,8 +20,8 @@ export const KnowledgeBlock: BlockConfig = { return 'knowledge_search' case 'upload_chunk': return 'knowledge_upload_chunk' - case 'upload_document': - return 'knowledge_upload_document' + case 'create_document': + return 'knowledge_create_document' default: return 'knowledge_search' } @@ -55,7 +55,7 @@ export const KnowledgeBlock: BlockConfig = { options: [ { label: 'Search', id: 'search' }, { label: 'Upload Chunk', id: 'upload_chunk' }, - { label: 'Upload Document', id: 'upload_document' }, + { label: 'Create Document', id: 'create_document' }, ], value: () => 'search', }, @@ -75,7 +75,7 @@ export const KnowledgeBlock: BlockConfig = { layout: 'full', placeholder: 'Select knowledge base', multiSelect: false, - condition: { field: 'operation', value: ['upload_chunk', 'upload_document'] }, + condition: { field: 'operation', value: ['upload_chunk', 'create_document'] }, }, { id: 'query', @@ -101,17 +101,6 @@ export const KnowledgeBlock: BlockConfig = { placeholder: 'Select document', condition: { field: 'operation', value: 'upload_chunk' }, }, - { - id: 'file', - title: 'File', - type: 'file-upload', - layout: 'full', - placeholder: 'Select file to upload', - condition: { field: 'operation', value: 'upload_document' }, - acceptedTypes: '.pdf,.doc,.docx,.txt,.csv,.xls,.xlsx', - multiple: true, // Allow multiple document uploads - maxSize: 100, // 100MB max for knowledge document uploads - }, { id: 'content', title: 'Chunk Content', @@ -121,5 +110,22 @@ export const KnowledgeBlock: BlockConfig = { rows: 6, condition: { field: 'operation', value: 'upload_chunk' }, }, + { + id: 'name', + title: 'Document Name', + type: 'short-input', + layout: 'full', + placeholder: 'Enter the document name to create', + condition: { field: 'operation', value: ['create_document'] }, + }, + { + id: 'content', + title: 'Document Content', + type: 'long-input', + layout: 'full', + placeholder: 'Enter the document content to create', + rows: 6, + condition: { field: 'operation', value: ['create_document'] }, + }, ], } diff --git a/apps/sim/lib/documents/document-processor.ts b/apps/sim/lib/documents/document-processor.ts index 70d0a23a88..b7c62ca224 100644 --- a/apps/sim/lib/documents/document-processor.ts +++ b/apps/sim/lib/documents/document-processor.ts @@ -339,7 +339,42 @@ async function parseWithFileParser( try { let content: string - if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) { + if (fileUrl.startsWith('data:')) { + // Handle data URI (e.g., data:text/plain;base64,SGVsbG8gV29ybGQ=) + logger.info(`Processing data URI for: ${filename}`) + + try { + const [header, base64Data] = fileUrl.split(',') + if (!base64Data) { + throw new Error('Invalid data URI format') + } + + // Check if it's base64 encoded + if (header.includes('base64')) { + // Decode base64 content + const buffer = Buffer.from(base64Data, 'base64') + content = buffer.toString('utf8') + } else { + // Handle URL-encoded data URIs (though we primarily use base64) + content = decodeURIComponent(base64Data) + } + + // For text content, return it directly + if (mimeType === 'text/plain') { + logger.info(`Data URI processed successfully for text content: ${filename}`) + } else { + // For other file types, try to parse the buffer + const extension = filename.split('.').pop()?.toLowerCase() || 'txt' + const buffer = Buffer.from(base64Data, 'base64') + const result = await parseBuffer(buffer, extension) + content = result.content + } + } catch (error) { + throw new Error( + `Failed to process data URI: ${error instanceof Error ? error.message : 'Unknown error'}` + ) + } + } else if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) { // Download and parse remote file with timeout const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD) diff --git a/apps/sim/tools/knowledge/upload_document.ts b/apps/sim/tools/knowledge/create_document.ts similarity index 55% rename from apps/sim/tools/knowledge/upload_document.ts rename to apps/sim/tools/knowledge/create_document.ts index 1c97217b9c..fd51c0f6cc 100644 --- a/apps/sim/tools/knowledge/upload_document.ts +++ b/apps/sim/tools/knowledge/create_document.ts @@ -1,10 +1,10 @@ import type { ToolConfig } from '../types' -import type { KnowledgeUploadDocumentResponse } from './types' +import type { KnowledgeCreateDocumentResponse } from './types' -export const knowledgeUploadDocumentTool: ToolConfig = { - id: 'knowledge_upload_document', - name: 'Knowledge Upload Document', - description: 'Upload documents to a knowledge base', +export const knowledgeCreateDocumentTool: ToolConfig = { + id: 'knowledge_create_document', + name: 'Knowledge Create Document', + description: 'Create a new document in a knowledge base', version: '1.0.0', params: { knowledgeBaseId: { @@ -12,10 +12,15 @@ export const knowledgeUploadDocumentTool: ToolConfig { - // Handle both single file and array of files from FileUpload component - const files = Array.isArray(params.file) ? params.file : [params.file] + // Create document from text content + const textContent = params.content + const documentName = params.name + + // Calculate content metrics + const contentBytes = new TextEncoder().encode(textContent).length - // Map files to the expected document format - const documents = files.map( - (fileData: { name: string; path: string; size: number; type: string }) => { - // Create file URL (handle both relative and absolute paths) - const fileUrl = fileData.path?.startsWith('http') - ? fileData.path - : `${typeof window !== 'undefined' ? window.location.origin : ''}${fileData.path}` + // Properly encode UTF-8 text to base64 + const utf8Bytes = new TextEncoder().encode(textContent) + const base64Content = + typeof Buffer !== 'undefined' + ? Buffer.from(textContent, 'utf8').toString('base64') + : btoa(String.fromCharCode(...utf8Bytes)) - return { - filename: fileData.name, - fileUrl: fileUrl, - fileSize: fileData.size, - mimeType: fileData.type, - } - } - ) + // Create data URI for text content + const dataUri = `data:text/plain;base64,${base64Content}` - // Use bulk upload format (required for processing) - const requestBody = { + const documents = [ + { + filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`, + fileUrl: dataUri, // or handle as direct text content + fileSize: contentBytes, + mimeType: 'text/plain', + }, + ] + + return { documents: documents, processingOptions: { chunkSize: 1024, @@ -57,17 +67,15 @@ export const knowledgeUploadDocumentTool: ToolConfig => { + transformResponse: async (response): Promise => { try { const result = await response.json() if (!response.ok) { - const errorMessage = result.error?.message || result.message || 'Failed to upload documents' + const errorMessage = result.error?.message || result.message || 'Failed to create document' throw new Error(errorMessage) } @@ -85,17 +93,15 @@ export const knowledgeUploadDocumentTool: ToolConfig 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown', - size: 0, // Size not returned in bulk response type: 'document', - url: '', createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), enabled: true, }, message: uploadCount > 1 - ? `Successfully uploaded ${uploadCount} documents to knowledge base` - : `Successfully uploaded document to knowledge base`, + ? `Successfully created ${uploadCount} documents in knowledge base` + : `Successfully created document in knowledge base`, documentId: firstDocument?.documentId || firstDocument?.id || '', }, } @@ -106,31 +112,27 @@ export const knowledgeUploadDocumentTool: ToolConfig => { - const errorMessage = `Failed to upload documents: ${error.message || 'Unknown error'}` + transformError: async (error): Promise => { + const errorMessage = `Failed to create document: ${error.message || 'Unknown error'}` return { success: false, output: { data: { id: '', name: '', - size: 0, type: '', - url: '', enabled: true, createdAt: '', updatedAt: '', diff --git a/apps/sim/tools/knowledge/index.ts b/apps/sim/tools/knowledge/index.ts index dae8228507..45514650f1 100644 --- a/apps/sim/tools/knowledge/index.ts +++ b/apps/sim/tools/knowledge/index.ts @@ -1,5 +1,5 @@ +import { knowledgeCreateDocumentTool } from './create_document' import { knowledgeSearchTool } from './search' import { knowledgeUploadChunkTool } from './upload_chunk' -import { knowledgeUploadDocumentTool } from './upload_document' -export { knowledgeSearchTool, knowledgeUploadChunkTool, knowledgeUploadDocumentTool } +export { knowledgeSearchTool, knowledgeUploadChunkTool, knowledgeCreateDocumentTool } diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index 738396c476..ec3b686e2a 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -50,21 +50,19 @@ export interface KnowledgeUploadChunkParams { enabled?: boolean } -export interface KnowledgeUploadDocumentResult { +export interface KnowledgeCreateDocumentResult { id: string name: string - size: number type: string - url: string enabled: boolean createdAt: string updatedAt: string } -export interface KnowledgeUploadDocumentResponse { +export interface KnowledgeCreateDocumentResponse { success: boolean output: { - data: KnowledgeUploadDocumentResult + data: KnowledgeCreateDocumentResult message: string documentId: string } diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 3433198514..fbaeb2dbb9 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -54,9 +54,9 @@ import { huggingfaceChatTool } from './huggingface' import { readUrlTool } from './jina' import { jiraBulkRetrieveTool, jiraRetrieveTool, jiraUpdateTool, jiraWriteTool } from './jira' import { + knowledgeCreateDocumentTool, knowledgeSearchTool, knowledgeUploadChunkTool, - knowledgeUploadDocumentTool, } from './knowledge' import { linearCreateIssueTool, linearReadIssuesTool } from './linear' import { linkupSearchTool } from './linkup' @@ -195,7 +195,7 @@ export const tools: Record = { memory_delete: memoryDeleteTool, knowledge_search: knowledgeSearchTool, knowledge_upload_chunk: knowledgeUploadChunkTool, - knowledge_upload_document: knowledgeUploadDocumentTool, + knowledge_create_document: knowledgeCreateDocumentTool, elevenlabs_tts: elevenLabsTtsTool, s3_get_object: s3GetObjectTool, telegram_message: telegramMessageTool, From 581eb67014f1218f5b861136f700f703665df56d Mon Sep 17 00:00:00 2001 From: Adam Gough Date: Sat, 28 Jun 2025 19:35:05 -0700 Subject: [PATCH 4/6] improvement: removed comment (#579) --- apps/sim/lib/documents/document-processor.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/apps/sim/lib/documents/document-processor.ts b/apps/sim/lib/documents/document-processor.ts index b7c62ca224..1d3a5604ed 100644 --- a/apps/sim/lib/documents/document-processor.ts +++ b/apps/sim/lib/documents/document-processor.ts @@ -340,7 +340,6 @@ async function parseWithFileParser( let content: string if (fileUrl.startsWith('data:')) { - // Handle data URI (e.g., data:text/plain;base64,SGVsbG8gV29ybGQ=) logger.info(`Processing data URI for: ${filename}`) try { @@ -349,21 +348,16 @@ async function parseWithFileParser( throw new Error('Invalid data URI format') } - // Check if it's base64 encoded if (header.includes('base64')) { - // Decode base64 content const buffer = Buffer.from(base64Data, 'base64') content = buffer.toString('utf8') } else { - // Handle URL-encoded data URIs (though we primarily use base64) content = decodeURIComponent(base64Data) } - // For text content, return it directly if (mimeType === 'text/plain') { logger.info(`Data URI processed successfully for text content: ${filename}`) } else { - // For other file types, try to parse the buffer const extension = filename.split('.').pop()?.toLowerCase() || 'txt' const buffer = Buffer.from(base64Data, 'base64') const result = await parseBuffer(buffer, extension) @@ -375,7 +369,6 @@ async function parseWithFileParser( ) } } else if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) { - // Download and parse remote file with timeout const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD) @@ -389,7 +382,6 @@ async function parseWithFileParser( const buffer = Buffer.from(await response.arrayBuffer()) - // Extract file extension from filename const extension = filename.split('.').pop()?.toLowerCase() || '' if (!extension) { throw new Error(`Could not determine file extension from filename: ${filename}`) From 578d2d9e51998e93eef3279b9e7e2d3e2f37103a Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 30 Jun 2025 18:33:59 -0700 Subject: [PATCH 5/6] added input validation, tested persistence of KB selector --- apps/sim/blocks/blocks/knowledge.ts | 6 +-- apps/sim/providers/utils.test.ts | 7 ++-- apps/sim/tools/knowledge/create_document.ts | 43 +++++++++++++++++---- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/apps/sim/blocks/blocks/knowledge.ts b/apps/sim/blocks/blocks/knowledge.ts index b71f76bd7f..394f90d5f0 100644 --- a/apps/sim/blocks/blocks/knowledge.ts +++ b/apps/sim/blocks/blocks/knowledge.ts @@ -6,7 +6,7 @@ export const KnowledgeBlock: BlockConfig = { name: 'Knowledge', description: 'Use vector search', longDescription: - 'Perform semantic vector search across one or more knowledge bases or upload new chunks to documents. Uses advanced AI embeddings to understand meaning and context for search operations.', + 'Perform semantic vector search across knowledge bases, upload individual chunks to existing documents, or create new documents from text content. Uses advanced AI embeddings to understand meaning and context for search operations.', bgColor: '#00B0B0', icon: PackageSearchIcon, category: 'blocks', @@ -115,7 +115,7 @@ export const KnowledgeBlock: BlockConfig = { title: 'Document Name', type: 'short-input', layout: 'full', - placeholder: 'Enter the document name to create', + placeholder: 'Enter document name', condition: { field: 'operation', value: ['create_document'] }, }, { @@ -123,7 +123,7 @@ export const KnowledgeBlock: BlockConfig = { title: 'Document Content', type: 'long-input', layout: 'full', - placeholder: 'Enter the document content to create', + placeholder: 'Enter the document content', rows: 6, condition: { field: 'operation', value: ['create_document'] }, }, diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts index 4672df358b..dbe29da822 100644 --- a/apps/sim/providers/utils.test.ts +++ b/apps/sim/providers/utils.test.ts @@ -110,6 +110,9 @@ describe('Model Capabilities', () => { it.concurrent('should return true for models that support temperature', () => { const supportedModels = [ 'gpt-4o', + 'gpt-4.1', + 'gpt-4.1-mini', + 'gpt-4.1-nano', 'gemini-2.5-flash', 'claude-sonnet-4-0', 'claude-opus-4-0', @@ -139,10 +142,6 @@ describe('Model Capabilities', () => { 'deepseek-r1', // Chat models that don't support temperature 'deepseek-chat', - // GPT-4.1 family models that don't support temperature - 'gpt-4.1', - 'gpt-4.1-nano', - 'gpt-4.1-mini', 'azure/gpt-4.1', 'azure/model-router', ] diff --git a/apps/sim/tools/knowledge/create_document.ts b/apps/sim/tools/knowledge/create_document.ts index fd51c0f6cc..be76737168 100644 --- a/apps/sim/tools/knowledge/create_document.ts +++ b/apps/sim/tools/knowledge/create_document.ts @@ -30,27 +30,39 @@ export const knowledgeCreateDocumentTool: ToolConfig { - // Create document from text content - const textContent = params.content - const documentName = params.name + const textContent = params.content?.trim() + const documentName = params.name?.trim() + + if (!documentName || documentName.length === 0) { + throw new Error('Document name is required') + } + if (documentName.length > 255) { + throw new Error('Document name must be 255 characters or less') + } + if (/[<>:"/\\|?*]/.test(documentName)) { + throw new Error('Document name contains invalid characters. Avoid: < > : " / \\ | ? *') + } + if (!textContent || textContent.length < 10) { + throw new Error('Document content must be at least 10 characters long') + } + if (textContent.length > 1000000) { + throw new Error('Document content exceeds maximum size of 1MB') + } - // Calculate content metrics const contentBytes = new TextEncoder().encode(textContent).length - // Properly encode UTF-8 text to base64 const utf8Bytes = new TextEncoder().encode(textContent) const base64Content = typeof Buffer !== 'undefined' ? Buffer.from(textContent, 'utf8').toString('base64') : btoa(String.fromCharCode(...utf8Bytes)) - // Create data URI for text content const dataUri = `data:text/plain;base64,${base64Content}` const documents = [ { filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`, - fileUrl: dataUri, // or handle as direct text content + fileUrl: dataUri, fileSize: contentBytes, mimeType: 'text/plain', }, @@ -125,7 +137,22 @@ export const knowledgeCreateDocumentTool: ToolConfig => { - const errorMessage = `Failed to create document: ${error.message || 'Unknown error'}` + let errorMessage = 'Failed to create document' + + if (error.message) { + if (error.message.includes('Document name')) { + errorMessage = `Document name error: ${error.message}` + } else if (error.message.includes('Document content')) { + errorMessage = `Document content error: ${error.message}` + } else if (error.message.includes('invalid characters')) { + errorMessage = `${error.message}. Please use a valid filename.` + } else if (error.message.includes('maximum size')) { + errorMessage = `${error.message}. Consider breaking large content into smaller documents.` + } else { + errorMessage = `Failed to create document: ${error.message}` + } + } + return { success: false, output: { From 0514e7e98fce4d8b804054c05e153a3134fde965 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 30 Jun 2025 18:40:00 -0700 Subject: [PATCH 6/6] update docs --- apps/docs/content/docs/tools/knowledge.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/docs/content/docs/tools/knowledge.mdx b/apps/docs/content/docs/tools/knowledge.mdx index eb8ddef3bd..b8f3dad064 100644 --- a/apps/docs/content/docs/tools/knowledge.mdx +++ b/apps/docs/content/docs/tools/knowledge.mdx @@ -49,7 +49,7 @@ In Sim Studio, the Knowledge Base block enables your agents to perform intellige ## Usage Instructions -Perform semantic vector search across one or more knowledge bases or upload new chunks to documents. Uses advanced AI embeddings to understand meaning and context for search operations. +Perform semantic vector search across knowledge bases, upload individual chunks to existing documents, or create new documents from text content. Uses advanced AI embeddings to understand meaning and context for search operations.