From 8e50897848d575bcdfdbe68d217efc0a52d27764 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:00:14 +0700 Subject: [PATCH 01/10] feat: implement filters for MongoDB --- examples/mongodb/2_load_and_index.ts | 3 +- examples/mongodb/3_query.ts | 13 +++- .../vectorStore/MongoDBAtlasVectorStore.ts | 59 ++++++++++++++----- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/examples/mongodb/2_load_and_index.ts b/examples/mongodb/2_load_and_index.ts index d20d0279de..851e51deeb 100644 --- a/examples/mongodb/2_load_and_index.ts +++ b/examples/mongodb/2_load_and_index.ts @@ -34,6 +34,7 @@ async function loadAndIndex() { dbName: databaseName, collectionName: vectorCollectionName, // this is where your embeddings will be stored indexName: indexName, // this is the name of the index you will need to create + populatedMetadataFields: ["_node_type", "document_id"], // this is the field that will be used for the query }); // now create an index from all the Documents and store them in Atlas @@ -46,5 +47,3 @@ async function loadAndIndex() { } loadAndIndex().catch(console.error); - -// you can't query your index yet because you need to create a vector search index in mongodb's UI now diff --git a/examples/mongodb/3_query.ts b/examples/mongodb/3_query.ts index 1064b0036a..79bd241d9c 100644 --- a/examples/mongodb/3_query.ts +++ b/examples/mongodb/3_query.ts @@ -19,7 +19,18 @@ async function query() { const index = await VectorStoreIndex.fromVectorStore(store); const retriever = index.asRetriever({ similarityTopK: 20 }); - const queryEngine = index.asQueryEngine({ retriever }); + const queryEngine = index.asQueryEngine({ + retriever, + preFilters: { + filters: [ + { + key: "_node_type", + value: "TextNode", + operator: "==", + }, + ], + }, + }); const result = await queryEngine.query({ query: "What does author receive when he was 11 years old?", // Isaac Asimov's "Foundation" for Christmas }); diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 06005660a4..86942e8177 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -13,6 +13,14 @@ import { } from "./types.js"; import { metadataDictToNode, nodeToMetadata } from "./utils.js"; +// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/ +const DEFAULT_EMBEDDING_DEFINITION = { + type: "knnVector", + dimensions: 1536, + similarity: "cosine", +}; + +// TODO: Build filters based on the operator // Utility function to convert metadata filters to MongoDB filter function toMongoDBFilter( standardFilters: MetadataFilters, @@ -38,6 +46,8 @@ export class MongoDBAtlasVectorSearch dbName: string; collectionName: string; autoCreateIndex: boolean; + embeddingDefinition: Record; + populatedMetadataFields: string[]; /** * The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable. @@ -98,26 +108,14 @@ export class MongoDBAtlasVectorSearch numCandidates: (query: VectorStoreQuery) => number; private collection?: Collection; - // define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/ - readonly SEARCH_INDEX_DEFINITION = { - mappings: { - dynamic: true, - fields: { - embedding: { - type: "knnVector", - dimensions: 1536, - similarity: "cosine", - }, - }, - }, - }; - constructor( init: Partial & { dbName: string; collectionName: string; embedModel?: BaseEmbedding; autoCreateIndex?: boolean; + populatedMetadataFields?: string[]; + embeddingDefinition?: Record; }, ) { super(init.embedModel); @@ -136,6 +134,9 @@ export class MongoDBAtlasVectorSearch this.dbName = init.dbName ?? "default_db"; this.collectionName = init.collectionName ?? "default_collection"; this.autoCreateIndex = init.autoCreateIndex ?? true; + this.populatedMetadataFields = init.populatedMetadataFields ?? []; + this.embeddingDefinition = + init.embeddingDefinition ?? DEFAULT_EMBEDDING_DEFINITION; this.indexName = init.indexName ?? "default"; this.embeddingKey = init.embeddingKey ?? "embedding"; this.idKey = init.idKey ?? "id"; @@ -161,9 +162,30 @@ export class MongoDBAtlasVectorSearch (index) => index.name === this.indexName, ); if (!indexExists) { + const additionalDefinition: Record = {}; + this.populatedMetadataFields.forEach((field) => { + additionalDefinition[field] = { type: "token" }; + }); + console.log("createSearchIndex", { + mappings: { + dynamic: true, + fields: { + embedding: this.embeddingDefinition, + ...additionalDefinition, + }, + }, + }); await this.collection.createSearchIndex({ name: this.indexName, - definition: this.SEARCH_INDEX_DEFINITION, + definition: { + mappings: { + dynamic: true, + fields: { + embedding: this.embeddingDefinition, + ...additionalDefinition, + }, + }, + }, }); console.log("Created search index: ", this.indexName); } @@ -190,11 +212,18 @@ export class MongoDBAtlasVectorSearch this.flatMetadata, ); + // Include the specified metadata fields in the top level of the document (to help filter) + const populatedMetadata: Record = {}; + for (const field of this.populatedMetadataFields) { + populatedMetadata[field] = metadata[field]; + } + return { [this.idKey]: node.id_, [this.embeddingKey]: node.getEmbedding(), [this.textKey]: node.getContent(MetadataMode.NONE) || "", [this.metadataKey]: metadata, + ...populatedMetadata, }; }); From 6a1b133fa26bc21752fa0a4e4535c1f36c0a7eec Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:02:08 +0700 Subject: [PATCH 02/10] remove log --- .../src/storage/vectorStore/MongoDBAtlasVectorStore.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 86942e8177..2300b3e1c5 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -166,15 +166,6 @@ export class MongoDBAtlasVectorSearch this.populatedMetadataFields.forEach((field) => { additionalDefinition[field] = { type: "token" }; }); - console.log("createSearchIndex", { - mappings: { - dynamic: true, - fields: { - embedding: this.embeddingDefinition, - ...additionalDefinition, - }, - }, - }); await this.collection.createSearchIndex({ name: this.indexName, definition: { From abf699dbde127ef0d1f0ec82e74e471f50584e76 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:02:40 +0700 Subject: [PATCH 03/10] Create red-vans-taste.md --- .changeset/red-vans-taste.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/red-vans-taste.md diff --git a/.changeset/red-vans-taste.md b/.changeset/red-vans-taste.md new file mode 100644 index 0000000000..f59bef834d --- /dev/null +++ b/.changeset/red-vans-taste.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +implement filters for MongoDBAtlasVectorSearch From f24cc74d8cd20bf364472a77ed30d6e60bd83797 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:13:13 +0700 Subject: [PATCH 04/10] update example --- examples/mongodb/3_query.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/mongodb/3_query.ts b/examples/mongodb/3_query.ts index 79bd241d9c..d57f2701ca 100644 --- a/examples/mongodb/3_query.ts +++ b/examples/mongodb/3_query.ts @@ -14,6 +14,7 @@ async function query() { dbName: process.env.MONGODB_DATABASE!, collectionName: process.env.MONGODB_VECTORS!, indexName: process.env.MONGODB_VECTOR_INDEX!, + populatedMetadataFields: ["_node_type", "document_id"], }); const index = await VectorStoreIndex.fromVectorStore(store); From a624546f6892ce2ca728275252b47b9f4c59e13a Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:54:56 +0700 Subject: [PATCH 05/10] feat: update mongodb example --- examples/mongodb/2_load_and_index.ts | 12 +++++++++++- examples/mongodb/3_query.ts | 6 +++--- .../storage/vectorStore/MongoDBAtlasVectorStore.ts | 10 +++++----- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/examples/mongodb/2_load_and_index.ts b/examples/mongodb/2_load_and_index.ts index 851e51deeb..3400132542 100644 --- a/examples/mongodb/2_load_and_index.ts +++ b/examples/mongodb/2_load_and_index.ts @@ -28,13 +28,23 @@ async function loadAndIndex() { "full_text", ]); + const FILTER_METADATA_FIELD = "content_type"; + + documents.forEach((document, index) => { + const contentType = ["tweet", "post", "story"][index % 3]; // assign a random content type to each document + document.metadata = { + ...document.metadata, + [FILTER_METADATA_FIELD]: contentType, + }; + }); + // create Atlas as a vector store const vectorStore = new MongoDBAtlasVectorSearch({ mongodbClient: client, dbName: databaseName, collectionName: vectorCollectionName, // this is where your embeddings will be stored indexName: indexName, // this is the name of the index you will need to create - populatedMetadataFields: ["_node_type", "document_id"], // this is the field that will be used for the query + indexedMetadataFields: [FILTER_METADATA_FIELD], // this is the field that will be used for the query }); // now create an index from all the Documents and store them in Atlas diff --git a/examples/mongodb/3_query.ts b/examples/mongodb/3_query.ts index d57f2701ca..f6bf62d1dd 100644 --- a/examples/mongodb/3_query.ts +++ b/examples/mongodb/3_query.ts @@ -14,7 +14,7 @@ async function query() { dbName: process.env.MONGODB_DATABASE!, collectionName: process.env.MONGODB_VECTORS!, indexName: process.env.MONGODB_VECTOR_INDEX!, - populatedMetadataFields: ["_node_type", "document_id"], + indexedMetadataFields: ["content_type"], }); const index = await VectorStoreIndex.fromVectorStore(store); @@ -25,8 +25,8 @@ async function query() { preFilters: { filters: [ { - key: "_node_type", - value: "TextNode", + key: "content_type", + value: "story", // try "tweet" or "post" to see the difference operator: "==", }, ], diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 2300b3e1c5..2f66fe57b4 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -47,7 +47,7 @@ export class MongoDBAtlasVectorSearch collectionName: string; autoCreateIndex: boolean; embeddingDefinition: Record; - populatedMetadataFields: string[]; + indexedMetadataFields: string[]; /** * The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable. @@ -114,7 +114,7 @@ export class MongoDBAtlasVectorSearch collectionName: string; embedModel?: BaseEmbedding; autoCreateIndex?: boolean; - populatedMetadataFields?: string[]; + indexedMetadataFields?: string[]; embeddingDefinition?: Record; }, ) { @@ -134,7 +134,7 @@ export class MongoDBAtlasVectorSearch this.dbName = init.dbName ?? "default_db"; this.collectionName = init.collectionName ?? "default_collection"; this.autoCreateIndex = init.autoCreateIndex ?? true; - this.populatedMetadataFields = init.populatedMetadataFields ?? []; + this.indexedMetadataFields = init.indexedMetadataFields ?? []; this.embeddingDefinition = init.embeddingDefinition ?? DEFAULT_EMBEDDING_DEFINITION; this.indexName = init.indexName ?? "default"; @@ -163,7 +163,7 @@ export class MongoDBAtlasVectorSearch ); if (!indexExists) { const additionalDefinition: Record = {}; - this.populatedMetadataFields.forEach((field) => { + this.indexedMetadataFields.forEach((field) => { additionalDefinition[field] = { type: "token" }; }); await this.collection.createSearchIndex({ @@ -205,7 +205,7 @@ export class MongoDBAtlasVectorSearch // Include the specified metadata fields in the top level of the document (to help filter) const populatedMetadata: Record = {}; - for (const field of this.populatedMetadataFields) { + for (const field of this.indexedMetadataFields) { populatedMetadata[field] = metadata[field]; } From 6fa71a5020082e60e81dd271a0712387f35bf4ec Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:35:04 +0700 Subject: [PATCH 06/10] feat: build filters by operator --- .../vectorStore/MongoDBAtlasVectorStore.ts | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 2f66fe57b4..905f2c8032 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -5,7 +5,10 @@ import { getEnv } from "@llamaindex/env"; import type { BulkWriteOptions, Collection } from "mongodb"; import { MongoClient } from "mongodb"; import { + FilterCondition, VectorStoreBase, + type FilterOperator, + type MetadataFilter, type MetadataFilters, type VectorStoreNoEmbedModel, type VectorStoreQuery, @@ -20,16 +23,45 @@ const DEFAULT_EMBEDDING_DEFINITION = { similarity: "cosine", }; -// TODO: Build filters based on the operator -// Utility function to convert metadata filters to MongoDB filter -function toMongoDBFilter( - standardFilters: MetadataFilters, -): Record { - const filters: Record = {}; - for (const filter of standardFilters?.filters ?? []) { - filters[filter.key] = filter.value; +function mapLcMqlFilterOperators(operator: string): string { + const operatorMap: { [key in FilterOperator]?: string } = { + "==": "$eq", + "<": "$lt", + "<=": "$lte", + ">": "$gt", + ">=": "$gte", + "!=": "$ne", + in: "$in", + nin: "$nin", + }; + const mqlOperator = operatorMap[operator as FilterOperator]; + if (!mqlOperator) throw new Error(`Unsupported operator: ${operator}`); + return mqlOperator; +} + +function toMongoDBFilter(filters?: MetadataFilters): Record { + if (!filters) return {}; + + const createFilterObject = (mf: MetadataFilter) => ({ + [mf.key]: { + [mapLcMqlFilterOperators(mf.operator)]: mf.value, + }, + }); + + if (filters.filters.length === 1) { + return createFilterObject(filters.filters[0]); } - return filters; + + if (filters.condition === FilterCondition.AND) { + return { $and: filters.filters.map(createFilterObject) }; + } + + if (filters.condition === FilterCondition.OR) { + return { $or: filters.filters.map(createFilterObject) }; + } + + console.debug("filters.condition not recognized. Returning empty object"); + return {}; } /** @@ -267,6 +299,7 @@ export class MongoDBAtlasVectorSearch }; if (query.filters) { + console.log("query.filters", toMongoDBFilter(query.filters)); params.filter = toMongoDBFilter(query.filters); } From 466939d4d37c7627240ba5983769b7bb989c70f4 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 13:15:43 +0700 Subject: [PATCH 07/10] fix: able to config embedding definition --- .../src/storage/vectorStore/MongoDBAtlasVectorStore.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 905f2c8032..f1a1a4c2c4 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -167,8 +167,10 @@ export class MongoDBAtlasVectorSearch this.collectionName = init.collectionName ?? "default_collection"; this.autoCreateIndex = init.autoCreateIndex ?? true; this.indexedMetadataFields = init.indexedMetadataFields ?? []; - this.embeddingDefinition = - init.embeddingDefinition ?? DEFAULT_EMBEDDING_DEFINITION; + this.embeddingDefinition = { + ...DEFAULT_EMBEDDING_DEFINITION, + ...(init.embeddingDefinition ?? {}), + }; this.indexName = init.indexName ?? "default"; this.embeddingKey = init.embeddingKey ?? "embedding"; this.idKey = init.idKey ?? "id"; From 93791f6533eaf448776cc8ce5197574791bdf1c5 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 13:17:40 +0700 Subject: [PATCH 08/10] remove log --- .../src/storage/vectorStore/MongoDBAtlasVectorStore.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index f1a1a4c2c4..a139742a1d 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -301,7 +301,6 @@ export class MongoDBAtlasVectorSearch }; if (query.filters) { - console.log("query.filters", toMongoDBFilter(query.filters)); params.filter = toMongoDBFilter(query.filters); } From 5cc749784649f16633da126635571352cb0ec732 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 13:32:59 +0700 Subject: [PATCH 09/10] throw error when filters condition not found --- .../src/storage/vectorStore/MongoDBAtlasVectorStore.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index a139742a1d..626885bb90 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -60,8 +60,7 @@ function toMongoDBFilter(filters?: MetadataFilters): Record { return { $or: filters.filters.map(createFilterObject) }; } - console.debug("filters.condition not recognized. Returning empty object"); - return {}; + throw new Error("filters condition not recognized. Returning empty object"); } /** From d8c24ea820f308980552d3c500f59edadda325af Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:03:56 +0700 Subject: [PATCH 10/10] fix: doc --- .../src/storage/vectorStore/MongoDBAtlasVectorStore.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 626885bb90..01ea350b50 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -60,7 +60,7 @@ function toMongoDBFilter(filters?: MetadataFilters): Record { return { $or: filters.filters.map(createFilterObject) }; } - throw new Error("filters condition not recognized. Returning empty object"); + throw new Error("filters condition not recognized. Must be AND or OR"); } /**