Skip to content

Commit

Permalink
Feature/Add support for state-based metadata filter to Retriever Tool (
Browse files Browse the repository at this point in the history
…#3501)

* Added support for state-based metadata filter to Retriever Tool

* Update RetrieverTool.ts

---------

Co-authored-by: Henry Heng <henryheng@flowiseai.com>
  • Loading branch information
serhiy-matoffo and HenryHengZJ authored Nov 16, 2024
1 parent 38ddbd8 commit 16ceed1
Showing 1 changed file with 151 additions and 8 deletions.
159 changes: 151 additions & 8 deletions packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,121 @@
import { z } from 'zod'
import { DynamicStructuredTool } from '@langchain/core/tools'
import { CallbackManagerForToolRun } from '@langchain/core/callbacks/manager'
import { DynamicTool } from '@langchain/core/tools'
import { CallbackManager, CallbackManagerForToolRun, Callbacks, parseCallbackConfigArg } from '@langchain/core/callbacks/manager'
import { BaseDynamicToolInput, DynamicTool, StructuredTool, ToolInputParsingException } from '@langchain/core/tools'
import { BaseRetriever } from '@langchain/core/retrievers'
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents'
import { RunnableConfig } from '@langchain/core/runnables'
import { customGet } from '../../sequentialagents/commonUtils'
import { VectorStoreRetriever } from '@langchain/core/vectorstores'

const howToUse = `Add additional filters to vector store. You can also filter with flow config, including the current "state":
- \`$flow.sessionId\`
- \`$flow.chatId\`
- \`$flow.chatflowId\`
- \`$flow.input\`
- \`$flow.state\`
`

type ZodObjectAny = z.ZodObject<any, any, any, any>
type IFlowConfig = { sessionId?: string; chatId?: string; input?: string; state?: ICommonObject }
interface DynamicStructuredToolInput<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>>
extends BaseDynamicToolInput {
func?: (input: z.infer<T>, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => Promise<string>
schema: T
}

class DynamicStructuredTool<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>> extends StructuredTool<
T extends ZodObjectAny ? T : ZodObjectAny
> {
static lc_name() {
return 'DynamicStructuredTool'
}

name: string

description: string

func: DynamicStructuredToolInput['func']

// @ts-ignore
schema: T

private flowObj: any

constructor(fields: DynamicStructuredToolInput<T>) {
super(fields)
this.name = fields.name
this.description = fields.description
this.func = fields.func
this.returnDirect = fields.returnDirect ?? this.returnDirect
this.schema = fields.schema
}

async call(arg: any, configArg?: RunnableConfig | Callbacks, tags?: string[], flowConfig?: IFlowConfig): Promise<string> {
const config = parseCallbackConfigArg(configArg)
if (config.runName === undefined) {
config.runName = this.name
}
let parsed
try {
parsed = await this.schema.parseAsync(arg)
} catch (e) {
throw new ToolInputParsingException(`Received tool input did not match expected schema`, JSON.stringify(arg))
}
const callbackManager_ = await CallbackManager.configure(
config.callbacks,
this.callbacks,
config.tags || tags,
this.tags,
config.metadata,
this.metadata,
{ verbose: this.verbose }
)
const runManager = await callbackManager_?.handleToolStart(
this.toJSON(),
typeof parsed === 'string' ? parsed : JSON.stringify(parsed),
undefined,
undefined,
undefined,
undefined,
config.runName
)
let result
try {
result = await this._call(parsed, runManager, flowConfig)
} catch (e) {
await runManager?.handleToolError(e)
throw e
}
if (result && typeof result !== 'string') {
result = JSON.stringify(result)
}
await runManager?.handleToolEnd(result)
return result
}

// @ts-ignore
protected _call(arg: any, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig): Promise<string> {
let flowConfiguration: ICommonObject = {}
if (typeof arg === 'object' && Object.keys(arg).length) {
for (const item in arg) {
flowConfiguration[`$${item}`] = arg[item]
}
}

// inject flow properties
if (this.flowObj) {
flowConfiguration['$flow'] = { ...this.flowObj, ...flowConfig }
}

return this.func!(arg as any, runManager, flowConfiguration)
}

setFlowObject(flow: any) {
this.flowObj = flow
}
}

class Retriever_Tools implements INode {
label: string
Expand All @@ -22,7 +132,7 @@ class Retriever_Tools implements INode {
constructor() {
this.label = 'Retriever Tool'
this.name = 'retrieverTool'
this.version = 2.0
this.version = 3.0
this.type = 'RetrieverTool'
this.icon = 'retrievertool.svg'
this.category = 'Tools'
Expand Down Expand Up @@ -53,23 +163,55 @@ class Retriever_Tools implements INode {
name: 'returnSourceDocuments',
type: 'boolean',
optional: true
},
{
label: 'Additional Metadata Filter',
name: 'retrieverToolMetadataFilter',
type: 'json',
description: 'Add additional metadata filter on top of the existing filter from vector store',
optional: true,
additionalParams: true,
hint: {
label: 'What can you filter?',
value: howToUse
}
}
]
}

async init(nodeData: INodeData): Promise<any> {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const name = nodeData.inputs?.name as string
const description = nodeData.inputs?.description as string
const retriever = nodeData.inputs?.retriever as BaseRetriever
const returnSourceDocuments = nodeData.inputs?.returnSourceDocuments as boolean
const retrieverToolMetadataFilter = nodeData.inputs?.retrieverToolMetadataFilter

const input = {
name,
description
}

const func = async ({ input }: { input: string }, runManager?: CallbackManagerForToolRun) => {
const docs = await retriever.getRelevantDocuments(input, runManager?.getChild('retriever'))
const flow = { chatflowId: options.chatflowid }

const func = async ({ input }: { input: string }, _?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => {
if (retrieverToolMetadataFilter) {
const flowObj = flowConfig

const metadatafilter =
typeof retrieverToolMetadataFilter === 'object' ? retrieverToolMetadataFilter : JSON.parse(retrieverToolMetadataFilter)
const newMetadataFilter: any = {}
for (const key in metadatafilter) {
let value = metadatafilter[key]
if (value.startsWith('$flow')) {
value = customGet(flowObj, value)
}
newMetadataFilter[key] = value
}

const vectorStore = (retriever as VectorStoreRetriever<any>).vectorStore
vectorStore.filter = newMetadataFilter
}
const docs = await retriever.invoke(input)
const content = docs.map((doc) => doc.pageContent).join('\n\n')
const sourceDocuments = JSON.stringify(docs)
return returnSourceDocuments ? content + SOURCE_DOCUMENTS_PREFIX + sourceDocuments : content
Expand All @@ -80,6 +222,7 @@ class Retriever_Tools implements INode {
}) as any

const tool = new DynamicStructuredTool({ ...input, func, schema })
tool.setFlowObject(flow)
return tool
}
}
Expand Down

0 comments on commit 16ceed1

Please sign in to comment.