Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/Update Notion Loader #591

Merged
merged 1 commit into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionDBLoader, NotionDBLoaderParams } from 'langchain/document_loaders/web/notiondb'
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'

class NotionDB_DocumentLoaders implements INode {
label: string
Expand All @@ -18,7 +18,7 @@ class NotionDB_DocumentLoaders implements INode {
this.type = 'Document'
this.icon = 'notion.png'
this.category = 'Document Loaders'
this.description = 'Load data from Notion Database ID'
this.description = 'Load data from Notion Database (each row is a separate document with all properties as metadata)'
this.baseClasses = [this.type]
this.inputs = [
{
Expand All @@ -27,13 +27,6 @@ class NotionDB_DocumentLoaders implements INode {
type: 'TextSplitter',
optional: true
},
{
label: 'Notion Database Id',
name: 'databaseId',
type: 'string',
description:
'If your URL looks like - https://www.notion.so/<long_hash_1>?v=<long_hash_2>, then <long_hash_1> is the database ID'
},
{
label: 'Notion Integration Token',
name: 'notionIntegrationToken',
Expand All @@ -42,10 +35,10 @@ class NotionDB_DocumentLoaders implements INode {
'You can find integration token <a target="_blank" href="https://developers.notion.com/docs/create-a-notion-integration#step-1-create-an-integration">here</a>'
},
{
label: 'Page Size Limit',
name: 'pageSizeLimit',
type: 'number',
default: 10
label: 'Notion Database Id',
name: 'databaseId',
type: 'string',
description: 'If your URL looks like - https://www.notion.so/abcdefh?v=long_hash_2, then abcdefh is the database ID'
},
{
label: 'Metadata',
Expand All @@ -60,16 +53,17 @@ class NotionDB_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const databaseId = nodeData.inputs?.databaseId as string
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string
const pageSizeLimit = nodeData.inputs?.pageSizeLimit as string
const metadata = nodeData.inputs?.metadata
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string

const obj: NotionDBLoaderParams = {
pageSizeLimit: pageSizeLimit ? parseInt(pageSizeLimit, 10) : 10,
databaseId,
notionIntegrationToken
const obj: NotionAPILoaderOptions = {
clientOptions: {
auth: notionIntegrationToken
},
id: databaseId,
type: 'database'
}
const loader = new NotionDBLoader(obj)
const loader = new NotionAPILoader(obj)

let docs = []
if (textSplitter) {
Expand Down
96 changes: 96 additions & 0 deletions packages/components/nodes/documentloaders/Notion/NotionPage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'

class NotionPage_DocumentLoaders implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]

constructor() {
this.label = 'Notion Page'
this.name = 'notionPage'
this.type = 'Document'
this.icon = 'notion.png'
this.category = 'Document Loaders'
this.description = 'Load data from Notion Page (including child pages all as separate documents)'
this.baseClasses = [this.type]
this.inputs = [
{
label: 'Text Splitter',
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Notion Integration Token',
name: 'notionIntegrationToken',
type: 'password',
description:
'You can find integration token <a target="_blank" href="https://developers.notion.com/docs/create-a-notion-integration#step-1-create-an-integration">here</a>'
},
{
label: 'Notion Page Id',
name: 'pageId',
type: 'string',
description:
'The last The 32 char hex in the url path. For example: https://www.notion.so/skarard/LangChain-Notion-API-b34ca03f219c4420a6046fc4bdfdf7b4, b34ca03f219c4420a6046fc4bdfdf7b4 is the Page ID'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pageId = nodeData.inputs?.pageId as string
const metadata = nodeData.inputs?.metadata
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string

const obj: NotionAPILoaderOptions = {
clientOptions: {
auth: notionIntegrationToken
},
id: pageId,
type: 'page'
}
const loader = new NotionAPILoader(obj)

let docs = []
if (textSplitter) {
docs = await loader.loadAndSplit(textSplitter)
} else {
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

module.exports = { nodeClass: NotionPage_DocumentLoaders }
Binary file not shown.
6 changes: 4 additions & 2 deletions packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"@dqbd/tiktoken": "^1.0.7",
"@getzep/zep-js": "^0.4.1",
"@huggingface/inference": "^2.6.1",
"@notionhq/client": "^2.2.8",
"@opensearch-project/opensearch": "^1.2.0",
"@pinecone-database/pinecone": "^0.0.12",
"@qdrant/js-client-rest": "^1.2.2",
Expand All @@ -41,8 +42,10 @@
"linkifyjs": "^4.1.1",
"mammoth": "^1.5.1",
"moment": "^2.29.3",
"mysql2": "^3.5.1",
"node-fetch": "^2.6.11",
"node-html-markdown": "^1.3.0",
"notion-to-md": "^3.1.1",
"pdf-parse": "^1.1.1",
"pdfjs-dist": "^3.7.107",
"playwright": "^1.35.0",
Expand All @@ -52,8 +55,7 @@
"srt-parser-2": "^1.2.3",
"vm2": "^3.9.19",
"weaviate-ts-client": "^1.1.0",
"ws": "^8.9.0",
"mysql2": "^3.5.1"
"ws": "^8.9.0"
},
"devDependencies": {
"@types/gulp": "4.0.9",
Expand Down