From 026d068ddff3c78233ed4b46f027ec1b986d7572 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:34:08 +0700 Subject: [PATCH] feat: enhance pinecone usage (#586) --- .changeset/clean-camels-nail.md | 5 ++++ examples/pinecone-vector-store/README.md | 5 ++-- .../vectorStore/PineconeVectorStore.ts | 28 +++++++++++-------- packages/eslint-config-custom/index.js | 1 + 4 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 .changeset/clean-camels-nail.md diff --git a/.changeset/clean-camels-nail.md b/.changeset/clean-camels-nail.md new file mode 100644 index 0000000000..b225c15c1b --- /dev/null +++ b/.changeset/clean-camels-nail.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: enhance pinecone usage diff --git a/examples/pinecone-vector-store/README.md b/examples/pinecone-vector-store/README.md index b0576bc26c..a2ade6cbf6 100644 --- a/examples/pinecone-vector-store/README.md +++ b/examples/pinecone-vector-store/README.md @@ -7,8 +7,9 @@ There are two scripts available here: load-docs.ts and query.ts You'll need a Pinecone account, project, and index. Pinecone does not allow automatic creation of indexes on the free plan, so this vector store does not check and create the index (unlike, e.g., the PGVectorStore) -Set the **PINECONE_API_KEY** and **PINECONE_ENVIRONMENT** environment variables to match your specific values. You will likely also need to set **PINECONE_INDEX_NAME**, unless your -index is the default value "llama". +Set the **PINECONE_API_KEY** and **PINECONE_ENVIRONMENT** environment variables to match your specific values. +You will likely also need to set **PINECONE_INDEX_NAME**, unless your index is the default value "llama". +By default, all operations take place inside the default namespace '', but you can set **PINECONE_NAMESPACE** to a different value if you need to. You'll also need a value for OPENAI_API_KEY in your environment. diff --git a/packages/core/src/storage/vectorStore/PineconeVectorStore.ts b/packages/core/src/storage/vectorStore/PineconeVectorStore.ts index 10e4c757aa..ae4f6923b7 100644 --- a/packages/core/src/storage/vectorStore/PineconeVectorStore.ts +++ b/packages/core/src/storage/vectorStore/PineconeVectorStore.ts @@ -12,13 +12,15 @@ import type { Index, ScoredPineconeRecord, } from "@pinecone-database/pinecone"; -import { Pinecone } from "@pinecone-database/pinecone"; +import { type Pinecone } from "@pinecone-database/pinecone"; import type { BaseNode, Metadata } from "../../Node.js"; import { metadataDictToNode, nodeToMetadata } from "./utils.js"; type PineconeParams = { indexName?: string; chunkSize?: number; + namespace?: string; + textKey?: string; }; /** @@ -37,18 +39,23 @@ export class PineconeVectorStore implements VectorStore { */ db?: Pinecone; indexName: string; + namespace: string; chunkSize: number; + textKey: string; constructor(params?: PineconeParams) { this.indexName = params?.indexName ?? process.env.PINECONE_INDEX_NAME ?? "llama"; + this.namespace = params?.namespace ?? process.env.PINECONE_NAMESPACE ?? ""; this.chunkSize = params?.chunkSize ?? Number.parseInt(process.env.PINECONE_CHUNK_SIZE ?? "100"); + this.textKey = params?.textKey ?? "text"; } private async getDb(): Promise { if (!this.db) { + const { Pinecone } = await import("@pinecone-database/pinecone"); this.db = await new Pinecone(); } @@ -148,24 +155,23 @@ export class PineconeVectorStore implements VectorStore { }; const idx = await this.index(); - const results = await idx.query(options); + const results = await idx.namespace(this.namespace).query(options); const idList = results.matches.map((row) => row.id); - const records: FetchResponse = await idx.fetch(idList); + const records: FetchResponse = await idx + .namespace(this.namespace) + .fetch(idList); const rows = Object.values(records.records); const nodes = rows.map((row) => { - const metadata = this.metaWithoutText(row.metadata); - const text = this.textFromResultRow(row); - const node = metadataDictToNode(metadata, { + const node = metadataDictToNode(row.metadata, { fallback: { id: row.id, - text, - metadata, + text: this.textFromResultRow(row), + metadata: this.metaWithoutText(row.metadata), embedding: row.values, }, }); - node.setContent(text); return node; }); @@ -199,12 +205,12 @@ export class PineconeVectorStore implements VectorStore { } textFromResultRow(row: ScoredPineconeRecord): string { - return row.metadata?.text ?? ""; + return row.metadata?.[this.textKey] ?? ""; } metaWithoutText(meta: Metadata): any { return Object.keys(meta) - .filter((key) => key != "text") + .filter((key) => key != this.textKey) .reduce((acc: any, key: string) => { acc[key] = meta[key]; return acc; diff --git a/packages/eslint-config-custom/index.js b/packages/eslint-config-custom/index.js index c2c6700597..56bad41be4 100644 --- a/packages/eslint-config-custom/index.js +++ b/packages/eslint-config-custom/index.js @@ -36,6 +36,7 @@ module.exports = { "PINECONE_INDEX_NAME", "PINECONE_CHUNK_SIZE", "PINECONE_INDEX_NAME", + "PINECONE_NAMESPACE", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_API_INSTANCE_NAME",