diff --git a/docs/_guides.yaml b/docs/_guides.yaml index fdc7db3077..d523a6c04f 100644 --- a/docs/_guides.yaml +++ b/docs/_guides.yaml @@ -65,8 +65,6 @@ toc: path: /docs/genkit/plugins/pinecone - title: pgvector (code template) path: /docs/genkit/templates/pgvector - - title: Firestore vector store (code template) - path: /docs/genkit/templates/firestore-vector - title: Firebase path: /docs/genkit/plugins/firebase - title: Google Cloud diff --git a/docs/index.md b/docs/index.md index 10276cda4a..c4382d9246 100644 --- a/docs/index.md +++ b/docs/index.md @@ -370,6 +370,7 @@ maintained by the Genkit team: firebase Cloud deployment: Cloud Functions, Firebase Authentication, App Check
+ Vector database: Cloud Firestore vector store
diff --git a/docs/plugins/firebase.md b/docs/plugins/firebase.md index a1772a4c3e..f5901a1b33 100644 --- a/docs/plugins/firebase.md +++ b/docs/plugins/firebase.md @@ -2,6 +2,7 @@ The Firebase plugin provides several integrations with Firebase services: +- Indexers and retrievers using Cloud Firestore vector store - Trace storage using Cloud Firestore - Flow deployment using Cloud Functions - Authorization policies for Firebase Authentication users @@ -58,7 +59,82 @@ Application Default Credentials. To specify your credentials: This plugin provides several integrations with Firebase services, which you can use together or individually. -### Cloud Firestore +### Cloud Firestore vector store + +You can use Cloud Firestore as a vector store for RAG indexing and retrieval. + +The `firebase` plugin provides a convenience function for defining Firestore +retrievers, `defineFirestoreRetriever()`: + +```js +import { defineFirestoreRetriever } from '@genkit-ai/firebase'; +import { initializeApp } from 'firebase-admin/app'; +import { getFirestore } from 'firebase-admin/firestore'; + +const app = initializeApp(); +const firestore = getFirestore(app); + +const yourRetrieverRef = defineFirestoreRetriever({ + name: 'yourRetriever', + firestore: getFirestore(app), + collection: 'yourCollection', + contentField: 'yourDataChunks', + vectorField: 'embedding', + embedder: textEmbeddingGecko, + distanceMeasure: 'COSINE', // 'EUCLIDEAN', 'DOT_PRODUCT', or 'COSINE' (default) +}); +``` + +To use it, pass it to the `retrieve()` function: + +```js +const docs = await retrieve({ + retriever: yourRetrieverRef, + query: 'look for something', + config: { limit: 5 }, +}); +``` + +For indexing, use an embedding generator along with the Admin SDK: + +```js +import { initializeApp } from 'firebase-admin'; +import { getFirestore, FieldValue } from 'firebase-admin/firestore'; +import { textEmbeddingGecko } from '@genkit-ai/vertexai'; +import { embed } from '@genkit-ai/ai/embedder'; + +const app = initializeApp(); +const firestore = getFirestore(app); + +const indexConfig = { + collection: 'yourCollection', + contentField: 'yourDataChunks', + vectorField: 'embedding', + embedder: textEmbeddingGecko, +}; + +async function indexToFirestore(content) { + const embedding = await embed({ + embedder: indexConfig.embedder, + content, + }); + await firestore.collection(indexConfig.collection).add({ + [indexConfig.vectorField]: FieldValue.vector(embedding), + [indexConfig.contentField]: content, + }); +} +``` + +Firestore depends on indexes to provide fast and efficient querying on +collections. The prior example requires the `embedding` field to be indexed to +work. To do so, invoke the function and Firestore will throw an error with a +command to create an index. Execute that command and your index should be ready +to use. + +See the [Retrieval-augmented generation](../rag.md) page for a general +discussion on indexers and retrievers. + +### Cloud Firestore trace storage You can use Cloud Firestore to store traces: diff --git a/docs/rag.md b/docs/rag.md index 97790a2389..b7425f602d 100644 --- a/docs/rag.md +++ b/docs/rag.md @@ -249,6 +249,7 @@ export const ragFlow = defineFlow( Genkit provides indexer and retriever support through its plugin system. The following plugins are officially supported: +- [Cloud Firestore vector store](plugins/firebase.md) - [Chroma DB](plugins/chroma.md) vector database - [Pinecone](plugins/pinecone.md) cloud vector database @@ -257,7 +258,6 @@ code templates, which you can customize for your database configuration and schema: - PostgreSQL with [`pgvector`](templates/pgvector.md) -- [Firestore vector store](templates/firestore-vector.md) Embedding model support is provided through the following plugins: diff --git a/docs/templates/firestore-vector.md b/docs/templates/firestore-vector.md deleted file mode 100644 index bf2166c81a..0000000000 --- a/docs/templates/firestore-vector.md +++ /dev/null @@ -1,95 +0,0 @@ -# Firestore vector store template - -You can use Firestore vector store in Firebase Genkit to power your RAG flows by -storing and retrieving embedding vectors. - -Here is a sample template which retrieves documents from Firestore. - -Use the following example as a starting point and modify it to work with your database layout. -This sample assumes that you already have a Firestore collection called `vectors` in which each document -has an `embedding` field that stores the embedding vector. - -Important: Vector support is available only in `@google-cloud/firestore` versions starting from `7.6.0`. You must update your dependecies to match this version. - -Firestore depends on indices to provide fast and efficient querying on collections. This sample requires the `embedding` field to be indexed to work. To do so, invoke the -flow and Firestore will throw an error with a command to create an index. Execute that command -and your index should be ready to use. - -```js -import { embed } from '@genkit-ai/ai/embedder'; -import { Document, defineRetriever } from '@genkit-ai/ai/retriever'; -import { textEmbeddingGecko } from '@genkit-ai/vertexai'; -import { - FieldValue, - VectorQuery, - VectorQuerySnapshot, -} from '@google-cloud/firestore'; -import { Firestore } from 'firebase-admin/firestore'; -import * as z from 'zod'; -import { augmentedPrompt } from './prompt'; - -const QueryOptions = z.object({ - k: z.number().optional(), -}); - -const firestoreArtifactsRetriever = defineRetriever( - { - name: 'firestore/artifacts', - configSchema: QueryOptions, - }, - async (input, options) => { - const embedding = await embed({ - embedder: textEmbeddingGecko, - content: input, - }); - - const db = new Firestore(); - const coll = db.collection('vectors' /* your collection name */); - - const vectorQuery: VectorQuery = coll.findNearest( - 'embedding' /* the name of the field that contains the vector */, - FieldValue.vector(embedding), - { - limit: options.k ?? 3, - distanceMeasure: 'COSINE', - } - ); - - const vectorQuerySnapshot: VectorQuerySnapshot = await vectorQuery.get(); - return { - documents: vectorQuerySnapshot.docs.map((doc) => - // doc.data() represents the Firestore document. You may process it as needed to generate - // a Genkit document object, depending on your storage format. - Document.fromText(doc.data().content.text) - ), - }; - } -); -``` - -And here's how to use the retriever in a flow: - -```js -// Simple flow to use the firestoreArtifactsRetriever -export const askQuestionsOnNewsArticles = defineFlow( - { - name: 'askQuestionsOnNewsArticles', - inputSchema: z.string(), - outputSchema: z.string(), - }, - async (inputQuestion) => { - const docs = await retrieve({ - retriever: firestoreArtifactsRetriever, - query: inputQuestion, - options: { - k: 5, - }, - }); - console.log(docs); - - // Continue with using retrieved docs - // in RAG prompts. - //... - } -); -```