@@ -121,12 +121,8 @@ Embedding model support is provided through the following plugins:
121
121
122
122
## Defining a RAG Flow
123
123
124
- The following examples show how you could ingest a collection of PDF documents
125
- into a vector database and retrieve them for use in a flow.
126
-
127
- It uses the local file-based vector similarity retriever
128
- that Genkit provides out-of-the box for simple testing and prototyping (_ do not
129
- use in production_ )
124
+ The following examples show how you could ingest a collection of restaurant menu PDF documents
125
+ into a vector database and retrieve them for use in a flow that determines what food items are available.
130
126
131
127
### Install dependencies for processing PDFs
132
128
@@ -153,7 +149,7 @@ configureGenkit({
153
149
// the local vector store requires an embedder to translate from text to vector
154
150
devLocalVectorstore ([
155
151
{
156
- indexName: ' bob-facts ' ,
152
+ indexName: ' menuQA ' ,
157
153
embedder: textEmbeddingGecko ,
158
154
},
159
155
]),
@@ -175,7 +171,7 @@ use in production_)
175
171
``` ts
176
172
import { devLocalIndexerRef } from ' @genkit-ai/dev-local-vectorstore' ;
177
173
178
- export const pdfIndexer = devLocalIndexerRef (' bob-facts ' );
174
+ export const menuPdfIndexer = devLocalIndexerRef (' menuQA ' );
179
175
```
180
176
181
177
#### Create chunking config
@@ -199,23 +195,22 @@ More chunking options for this library can be found in the [llm-chunk documentat
199
195
#### Define your indexer flow
200
196
201
197
``` ts
198
+ import { index } from ' @genkit-ai/ai' ;
199
+ import { Document } from ' @genkit-ai/ai/retriever' ;
200
+ import { defineFlow , run } from ' @genkit-ai/flow' ;
201
+ import { readFile } from ' fs/promises' ;
202
202
import { chunk } from ' llm-chunk' ;
203
203
import path from ' path' ;
204
204
import pdf from ' pdf-parse' ;
205
- import { readFile } from ' fs/promises' ;
206
- import z from ' zod' ;
207
-
208
- import { Document , index } from ' @genkit-ai/ai/retriever' ;
209
- import { defineFlow , run } from ' @genkit-ai/flow' ;
210
- import { devLocalVectorstore } from ' @genkit-ai/dev-local-vectorstore' ;
205
+ import * as z from ' zod' ;
211
206
212
- export const indexPdf = defineFlow (
207
+ export const indexMenu = defineFlow (
213
208
{
214
- name: ' indexPdf ' ,
215
- input : z .string ().describe (' PDF file path' ),
216
- output : z .void (),
209
+ name: ' indexMenu ' ,
210
+ inputSchema : z .string ().describe (' PDF file path' ),
211
+ outputSchema : z .void (),
217
212
},
218
- async (filePath ) => {
213
+ async (filePath : string ) => {
219
214
filePath = path .resolve (filePath );
220
215
221
216
// Read the pdf.
@@ -235,7 +230,7 @@ export const indexPdf = defineFlow(
235
230
236
231
// Add documents to the index.
237
232
await index ({
238
- indexer: pdfIndexer ,
233
+ indexer: menuPdfIndexer ,
239
234
documents ,
240
235
});
241
236
}
@@ -252,10 +247,10 @@ async function extractTextFromPdf(filePath: string) {
252
247
#### Run the indexer flow
253
248
254
249
``` posix-terminal
255
- genkit flow:run indexPdf "'../pdfs'"
250
+ genkit flow:run indexMenu "'../pdfs'"
256
251
```
257
252
258
- After running the ` indexPdf ` flow, the vector database will be seeded with documents and ready to be used in Genkit flows with retrieval steps.
253
+ After running the ` indexMenu ` flow, the vector database will be seeded with documents and ready to be used in Genkit flows with retrieval steps.
259
254
260
255
### Define a flow with retrieval
261
256
@@ -264,32 +259,39 @@ the indexer example, this example uses Genkit's file-based vector retriever,
264
259
which you should not use in production.
265
260
266
261
``` ts
267
- import { defineFlow } from ' @genkit-ai/flow' ;
268
- import { generate } from ' @genkit-ai/ai/generate' ;
262
+ import { generate } from ' @genkit-ai/ai' ;
269
263
import { retrieve } from ' @genkit-ai/ai/retriever' ;
270
- import {
271
- devLocalRetrieverRef ,
272
- devLocalVectorstore ,
273
- } from ' @genkit-ai/dev-local-vectorstore' ;
274
- import { geminiPro , textEmbeddingGecko , vertexAI } from ' @genkit-ai/vertexai' ;
264
+ import { devLocalRetrieverRef } from ' @genkit-ai/dev-local-vectorstore' ;
265
+ import { defineFlow } from ' @genkit-ai/flow' ;
266
+ import { geminiPro } from ' @genkit-ai/vertexai' ;
275
267
import * as z from ' zod' ;
276
268
277
269
// Define the retriever reference
278
- export const bobFactRetriever = devLocalRetrieverRef (' bob-facts ' );
270
+ export const menuRetriever = devLocalRetrieverRef (' menuQA ' );
279
271
280
- export const ragFlow = defineFlow (
281
- { name: ' ragFlow' , input: z .string (), output: z .string () },
282
- async (input ) => {
272
+ export const menuQAFlow = defineFlow (
273
+ { name: ' menuQA' , inputSchema: z .string (), outputSchema: z .string () },
274
+ async (input : string ) => {
275
+ // retrieve relevant documents
283
276
const docs = await retrieve ({
284
- retriever: bobFactRetriever ,
277
+ retriever: menuRetriever ,
285
278
query: input ,
286
279
options: { k: 3 },
287
280
});
288
281
289
282
// generate a response
290
283
const llmResponse = await generate ({
291
284
model: geminiPro ,
292
- prompt: ` Answer this question: ${input } ` ,
285
+ prompt: `
286
+ You are acting as a helpful AI assistant that can answer
287
+ questions about the food available on the menu at Genkit Grub Pub.
288
+
289
+ Use only the context provided to answer the question.
290
+ If you don't know, do not make up an answer.
291
+ Do not add or change items on the menu.
292
+
293
+ Question: ${input }
294
+ ` ,
293
295
context: docs ,
294
296
});
295
297
@@ -345,19 +347,21 @@ import {
345
347
} from ' @genkit-ai/ai/retriever' ;
346
348
import * as z from ' zod' ;
347
349
348
- const MyAdvancedOptionsSchema = CommonRetrieverOptionsSchema .extend ({
350
+ export const menuRetriever = devLocalRetrieverRef (' menuQA' );
351
+
352
+ const advancedMenuRetrieverOptionsSchema = CommonRetrieverOptionsSchema .extend ({
349
353
preRerankK: z .number ().max (1000 ),
350
354
});
351
355
352
- const advancedRetriever = defineRetriever (
356
+ const advancedMenuRetriever = defineRetriever (
353
357
{
354
- name: ` custom/myAdvancedRetriever ` ,
355
- configSchema: MyAdvancedOptionsSchema ,
358
+ name: ` custom/advancedMenuRetriever ` ,
359
+ configSchema: advancedMenuRetrieverOptionsSchema ,
356
360
},
357
361
async (input , options ) => {
358
362
const extendedPrompt = await extendPrompt (input);
359
363
const docs = await retrieve ({
360
- retriever: bobFactsRetriever ,
364
+ retriever: menuRetriever ,
361
365
query: extendedPrompt,
362
366
options: { k: options .preRerankK || 10 },
363
367
});
@@ -375,7 +379,7 @@ And then you can just swap out your retriever:
375
379
``` javascript
376
380
const docs = await retrieve ({
377
381
retriever: advancedRetriever,
378
- query: ' Who is Bob? ' ,
382
+ query: input ,
379
383
options: { preRerankK: 7 , k: 3 },
380
384
});
381
385
```
0 commit comments