Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion apps/docs/content/docs/tools/knowledge.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ In Sim Studio, the Knowledge Base block enables your agents to perform intellige

## Usage Instructions

Perform semantic vector search across one or more knowledge bases or upload new chunks to documents. Uses advanced AI embeddings to understand meaning and context for search operations.
Perform semantic vector search across knowledge bases, upload individual chunks to existing documents, or create new documents from text content. Uses advanced AI embeddings to understand meaning and context for search operations.



Expand Down Expand Up @@ -100,6 +100,25 @@ Upload a new chunk to a document in a knowledge base
| `createdAt` | string |
| `updatedAt` | string |

### `knowledge_create_document`

Create a new document in a knowledge base

#### Input

| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `knowledgeBaseId` | string | Yes | ID of the knowledge base containing the document |
| `name` | string | Yes | Name of the document |
| `content` | string | Yes | Content of the document |

#### Output

| Parameter | Type |
| --------- | ---- |
| `data` | string |
| `name` | string |



## Block Configuration
Expand Down
26 changes: 23 additions & 3 deletions apps/sim/blocks/blocks/knowledge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,22 @@ export const KnowledgeBlock: BlockConfig = {
name: 'Knowledge',
description: 'Use vector search',
longDescription:
'Perform semantic vector search across one or more knowledge bases or upload new chunks to documents. Uses advanced AI embeddings to understand meaning and context for search operations.',
'Perform semantic vector search across knowledge bases, upload individual chunks to existing documents, or create new documents from text content. Uses advanced AI embeddings to understand meaning and context for search operations.',
bgColor: '#00B0B0',
icon: PackageSearchIcon,
category: 'blocks',
docsLink: 'https://docs.simstudio.ai/blocks/knowledge',
tools: {
access: ['knowledge_search', 'knowledge_upload_chunk'],
access: ['knowledge_search', 'knowledge_upload_chunk', 'knowledge_create_document'],
config: {
tool: (params) => {
switch (params.operation) {
case 'search':
return 'knowledge_search'
case 'upload_chunk':
return 'knowledge_upload_chunk'
case 'create_document':
return 'knowledge_create_document'
default:
return 'knowledge_search'
}
Expand Down Expand Up @@ -53,6 +55,7 @@ export const KnowledgeBlock: BlockConfig = {
options: [
{ label: 'Search', id: 'search' },
{ label: 'Upload Chunk', id: 'upload_chunk' },
{ label: 'Create Document', id: 'create_document' },
],
value: () => 'search',
},
Expand All @@ -72,7 +75,7 @@ export const KnowledgeBlock: BlockConfig = {
layout: 'full',
placeholder: 'Select knowledge base',
multiSelect: false,
condition: { field: 'operation', value: 'upload_chunk' },
condition: { field: 'operation', value: ['upload_chunk', 'create_document'] },
},
{
id: 'query',
Expand Down Expand Up @@ -107,5 +110,22 @@ export const KnowledgeBlock: BlockConfig = {
rows: 6,
condition: { field: 'operation', value: 'upload_chunk' },
},
{
id: 'name',
title: 'Document Name',
type: 'short-input',
layout: 'full',
placeholder: 'Enter document name',
condition: { field: 'operation', value: ['create_document'] },
},
{
id: 'content',
title: 'Document Content',
type: 'long-input',
layout: 'full',
placeholder: 'Enter the document content',
rows: 6,
condition: { field: 'operation', value: ['create_document'] },
},
],
}
33 changes: 30 additions & 3 deletions apps/sim/lib/documents/document-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,36 @@ async function parseWithFileParser(
try {
let content: string

if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) {
// Download and parse remote file with timeout
if (fileUrl.startsWith('data:')) {
logger.info(`Processing data URI for: ${filename}`)

try {
const [header, base64Data] = fileUrl.split(',')
if (!base64Data) {
throw new Error('Invalid data URI format')
}

if (header.includes('base64')) {
const buffer = Buffer.from(base64Data, 'base64')
content = buffer.toString('utf8')
} else {
content = decodeURIComponent(base64Data)
}

if (mimeType === 'text/plain') {
logger.info(`Data URI processed successfully for text content: ${filename}`)
} else {
const extension = filename.split('.').pop()?.toLowerCase() || 'txt'
const buffer = Buffer.from(base64Data, 'base64')
const result = await parseBuffer(buffer, extension)
content = result.content
}
} catch (error) {
throw new Error(
`Failed to process data URI: ${error instanceof Error ? error.message : 'Unknown error'}`
)
}
} else if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) {
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD)

Expand All @@ -354,7 +382,6 @@ async function parseWithFileParser(

const buffer = Buffer.from(await response.arrayBuffer())

// Extract file extension from filename
const extension = filename.split('.').pop()?.toLowerCase() || ''
if (!extension) {
throw new Error(`Could not determine file extension from filename: ${filename}`)
Expand Down
7 changes: 3 additions & 4 deletions apps/sim/providers/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ describe('Model Capabilities', () => {
it.concurrent('should return true for models that support temperature', () => {
const supportedModels = [
'gpt-4o',
'gpt-4.1',
'gpt-4.1-mini',
'gpt-4.1-nano',
'gemini-2.5-flash',
'claude-sonnet-4-0',
'claude-opus-4-0',
Expand Down Expand Up @@ -139,10 +142,6 @@ describe('Model Capabilities', () => {
'deepseek-r1',
// Chat models that don't support temperature
'deepseek-chat',
// GPT-4.1 family models that don't support temperature
'gpt-4.1',
'gpt-4.1-nano',
'gpt-4.1-mini',
'azure/gpt-4.1',
'azure/model-router',
]
Expand Down
173 changes: 173 additions & 0 deletions apps/sim/tools/knowledge/create_document.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import type { ToolConfig } from '../types'
import type { KnowledgeCreateDocumentResponse } from './types'

export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumentResponse> = {
id: 'knowledge_create_document',
name: 'Knowledge Create Document',
description: 'Create a new document in a knowledge base',
version: '1.0.0',
params: {
knowledgeBaseId: {
type: 'string',
required: true,
description: 'ID of the knowledge base containing the document',
},
name: {
type: 'string',
required: true,
description: 'Name of the document',
},
content: {
type: 'string',
required: true,
description: 'Content of the document',
},
},
request: {
url: (params) => `/api/knowledge/${params.knowledgeBaseId}/documents`,
method: 'POST',
headers: () => ({
'Content-Type': 'application/json',
}),
body: (params) => {
const textContent = params.content?.trim()
const documentName = params.name?.trim()

if (!documentName || documentName.length === 0) {
throw new Error('Document name is required')
}
if (documentName.length > 255) {
throw new Error('Document name must be 255 characters or less')
}
if (/[<>:"/\\|?*]/.test(documentName)) {
throw new Error('Document name contains invalid characters. Avoid: < > : " / \\ | ? *')
}
if (!textContent || textContent.length < 10) {
throw new Error('Document content must be at least 10 characters long')
}
if (textContent.length > 1000000) {
throw new Error('Document content exceeds maximum size of 1MB')
}

const contentBytes = new TextEncoder().encode(textContent).length

const utf8Bytes = new TextEncoder().encode(textContent)
const base64Content =
typeof Buffer !== 'undefined'
? Buffer.from(textContent, 'utf8').toString('base64')
: btoa(String.fromCharCode(...utf8Bytes))

const dataUri = `data:text/plain;base64,${base64Content}`

const documents = [
{
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
},
]

return {
documents: documents,
processingOptions: {
chunkSize: 1024,
minCharactersPerChunk: 100,
chunkOverlap: 200,
recipe: 'default',
lang: 'en',
},
bulk: true,
}
},
isInternalRoute: true,
},
transformResponse: async (response): Promise<KnowledgeCreateDocumentResponse> => {
try {
const result = await response.json()

if (!response.ok) {
const errorMessage = result.error?.message || result.message || 'Failed to create document'
throw new Error(errorMessage)
}

const data = result.data || result
const documentsCreated = data.documentsCreated || []

// Handle multiple documents response
const uploadCount = documentsCreated.length
const firstDocument = documentsCreated[0]

return {
success: true,
output: {
data: {
id: firstDocument?.documentId || firstDocument?.id || '',
name:
uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
type: 'document',
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
enabled: true,
},
message:
uploadCount > 1
? `Successfully created ${uploadCount} documents in knowledge base`
: `Successfully created document in knowledge base`,
documentId: firstDocument?.documentId || firstDocument?.id || '',
},
}
} catch (error: any) {
return {
success: false,
output: {
data: {
id: '',
name: '',
type: '',
enabled: true,
createdAt: '',
updatedAt: '',
},
message: `Failed to create document: ${error.message || 'Unknown error'}`,
documentId: '',
},
error: `Failed to create document: ${error.message || 'Unknown error'}`,
}
}
},
transformError: async (error): Promise<KnowledgeCreateDocumentResponse> => {
let errorMessage = 'Failed to create document'

if (error.message) {
if (error.message.includes('Document name')) {
errorMessage = `Document name error: ${error.message}`
} else if (error.message.includes('Document content')) {
errorMessage = `Document content error: ${error.message}`
} else if (error.message.includes('invalid characters')) {
errorMessage = `${error.message}. Please use a valid filename.`
} else if (error.message.includes('maximum size')) {
errorMessage = `${error.message}. Consider breaking large content into smaller documents.`
} else {
errorMessage = `Failed to create document: ${error.message}`
}
}

return {
success: false,
output: {
data: {
id: '',
name: '',
type: '',
enabled: true,
createdAt: '',
updatedAt: '',
},
message: errorMessage,
documentId: '',
},
error: errorMessage,
}
},
}
3 changes: 2 additions & 1 deletion apps/sim/tools/knowledge/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { knowledgeCreateDocumentTool } from './create_document'
import { knowledgeSearchTool } from './search'
import { knowledgeUploadChunkTool } from './upload_chunk'

export { knowledgeSearchTool, knowledgeUploadChunkTool }
export { knowledgeSearchTool, knowledgeUploadChunkTool, knowledgeCreateDocumentTool }
19 changes: 19 additions & 0 deletions apps/sim/tools/knowledge/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,22 @@ export interface KnowledgeUploadChunkParams {
content: string
enabled?: boolean
}

export interface KnowledgeCreateDocumentResult {
id: string
name: string
type: string
enabled: boolean
createdAt: string
updatedAt: string
}

export interface KnowledgeCreateDocumentResponse {
success: boolean
output: {
data: KnowledgeCreateDocumentResult
message: string
documentId: string
}
error?: string
}
7 changes: 6 additions & 1 deletion apps/sim/tools/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ import { contactsTool as hubspotContacts } from './hubspot/contacts'
import { huggingfaceChatTool } from './huggingface'
import { readUrlTool } from './jina'
import { jiraBulkRetrieveTool, jiraRetrieveTool, jiraUpdateTool, jiraWriteTool } from './jira'
import { knowledgeSearchTool, knowledgeUploadChunkTool } from './knowledge'
import {
knowledgeCreateDocumentTool,
knowledgeSearchTool,
knowledgeUploadChunkTool,
} from './knowledge'
import { linearCreateIssueTool, linearReadIssuesTool } from './linear'
import { linkupSearchTool } from './linkup'
import { mem0AddMemoriesTool, mem0GetMemoriesTool, mem0SearchMemoriesTool } from './mem0'
Expand Down Expand Up @@ -191,6 +195,7 @@ export const tools: Record<string, ToolConfig> = {
memory_delete: memoryDeleteTool,
knowledge_search: knowledgeSearchTool,
knowledge_upload_chunk: knowledgeUploadChunkTool,
knowledge_create_document: knowledgeCreateDocumentTool,
elevenlabs_tts: elevenLabsTtsTool,
s3_get_object: s3GetObjectTool,
telegram_message: telegramMessageTool,
Expand Down