diff --git a/packages/components/nodes/documentloaders/NotionDB/NotionDB.ts b/packages/components/nodes/documentloaders/Notion/NotionDB.ts similarity index 77% rename from packages/components/nodes/documentloaders/NotionDB/NotionDB.ts rename to packages/components/nodes/documentloaders/Notion/NotionDB.ts index 71e5e507a6a..111e371803f 100644 --- a/packages/components/nodes/documentloaders/NotionDB/NotionDB.ts +++ b/packages/components/nodes/documentloaders/Notion/NotionDB.ts @@ -1,6 +1,6 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' -import { NotionDBLoader, NotionDBLoaderParams } from 'langchain/document_loaders/web/notiondb' +import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi' class NotionDB_DocumentLoaders implements INode { label: string @@ -18,7 +18,7 @@ class NotionDB_DocumentLoaders implements INode { this.type = 'Document' this.icon = 'notion.png' this.category = 'Document Loaders' - this.description = 'Load data from Notion Database ID' + this.description = 'Load data from Notion Database (each row is a separate document with all properties as metadata)' this.baseClasses = [this.type] this.inputs = [ { @@ -27,13 +27,6 @@ class NotionDB_DocumentLoaders implements INode { type: 'TextSplitter', optional: true }, - { - label: 'Notion Database Id', - name: 'databaseId', - type: 'string', - description: - 'If your URL looks like - https://www.notion.so/?v=, then is the database ID' - }, { label: 'Notion Integration Token', name: 'notionIntegrationToken', @@ -42,10 +35,10 @@ class NotionDB_DocumentLoaders implements INode { 'You can find integration token here' }, { - label: 'Page Size Limit', - name: 'pageSizeLimit', - type: 'number', - default: 10 + label: 'Notion Database Id', + name: 'databaseId', + type: 'string', + description: 'If your URL looks like - https://www.notion.so/abcdefh?v=long_hash_2, then abcdefh is the database ID' }, { label: 'Metadata', @@ -60,16 +53,17 @@ class NotionDB_DocumentLoaders implements INode { async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const databaseId = nodeData.inputs?.databaseId as string - const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string - const pageSizeLimit = nodeData.inputs?.pageSizeLimit as string const metadata = nodeData.inputs?.metadata + const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string - const obj: NotionDBLoaderParams = { - pageSizeLimit: pageSizeLimit ? parseInt(pageSizeLimit, 10) : 10, - databaseId, - notionIntegrationToken + const obj: NotionAPILoaderOptions = { + clientOptions: { + auth: notionIntegrationToken + }, + id: databaseId, + type: 'database' } - const loader = new NotionDBLoader(obj) + const loader = new NotionAPILoader(obj) let docs = [] if (textSplitter) { diff --git a/packages/components/nodes/documentloaders/NotionFolder/NotionFolder.ts b/packages/components/nodes/documentloaders/Notion/NotionFolder.ts similarity index 100% rename from packages/components/nodes/documentloaders/NotionFolder/NotionFolder.ts rename to packages/components/nodes/documentloaders/Notion/NotionFolder.ts diff --git a/packages/components/nodes/documentloaders/Notion/NotionPage.ts b/packages/components/nodes/documentloaders/Notion/NotionPage.ts new file mode 100644 index 00000000000..37c43d5587d --- /dev/null +++ b/packages/components/nodes/documentloaders/Notion/NotionPage.ts @@ -0,0 +1,96 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi' + +class NotionPage_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Notion Page' + this.name = 'notionPage' + this.type = 'Document' + this.icon = 'notion.png' + this.category = 'Document Loaders' + this.description = 'Load data from Notion Page (including child pages all as separate documents)' + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Notion Integration Token', + name: 'notionIntegrationToken', + type: 'password', + description: + 'You can find integration token here' + }, + { + label: 'Notion Page Id', + name: 'pageId', + type: 'string', + description: + 'The last The 32 char hex in the url path. For example: https://www.notion.so/skarard/LangChain-Notion-API-b34ca03f219c4420a6046fc4bdfdf7b4, b34ca03f219c4420a6046fc4bdfdf7b4 is the Page ID' + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const pageId = nodeData.inputs?.pageId as string + const metadata = nodeData.inputs?.metadata + const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string + + const obj: NotionAPILoaderOptions = { + clientOptions: { + auth: notionIntegrationToken + }, + id: pageId, + type: 'page' + } + const loader = new NotionAPILoader(obj) + + let docs = [] + if (textSplitter) { + docs = await loader.loadAndSplit(textSplitter) + } else { + docs = await loader.load() + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs + } +} + +module.exports = { nodeClass: NotionPage_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/NotionDB/notion.png b/packages/components/nodes/documentloaders/Notion/notion.png similarity index 100% rename from packages/components/nodes/documentloaders/NotionDB/notion.png rename to packages/components/nodes/documentloaders/Notion/notion.png diff --git a/packages/components/nodes/documentloaders/NotionFolder/notion.png b/packages/components/nodes/documentloaders/NotionFolder/notion.png deleted file mode 100644 index 391051679c8..00000000000 Binary files a/packages/components/nodes/documentloaders/NotionFolder/notion.png and /dev/null differ diff --git a/packages/components/package.json b/packages/components/package.json index 14bd207006f..2d13278922e 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -20,6 +20,7 @@ "@dqbd/tiktoken": "^1.0.7", "@getzep/zep-js": "^0.4.1", "@huggingface/inference": "^2.6.1", + "@notionhq/client": "^2.2.8", "@opensearch-project/opensearch": "^1.2.0", "@pinecone-database/pinecone": "^0.0.12", "@qdrant/js-client-rest": "^1.2.2", @@ -41,8 +42,10 @@ "linkifyjs": "^4.1.1", "mammoth": "^1.5.1", "moment": "^2.29.3", + "mysql2": "^3.5.1", "node-fetch": "^2.6.11", "node-html-markdown": "^1.3.0", + "notion-to-md": "^3.1.1", "pdf-parse": "^1.1.1", "pdfjs-dist": "^3.7.107", "playwright": "^1.35.0", @@ -52,8 +55,7 @@ "srt-parser-2": "^1.2.3", "vm2": "^3.9.19", "weaviate-ts-client": "^1.1.0", - "ws": "^8.9.0", - "mysql2": "^3.5.1" + "ws": "^8.9.0" }, "devDependencies": { "@types/gulp": "4.0.9",