Skip to content

Commit c37df25

Browse files
committed
Add in a menu based rag example to use in docs.
1 parent 4eae7bc commit c37df25

File tree

8 files changed

+291
-0
lines changed

8 files changed

+291
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ firebase-debug.log
2929
!js/samples/rag/package.json
3030
js/samples/rag/*.json
3131
js/samples/cat-eval/__db*.json
32+
js/samples/menu-example/rag/__db*.json
3233

3334
# Test files
3435
last_recording.mp4
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Evaluating pdfQA with cat facts
2+
3+
## Build it
4+
5+
```
6+
pnpm build
7+
```
8+
9+
or if you need to, build everything:
10+
11+
```
12+
cd ../../../; pnpm build; pnpm pack:all; cd -
13+
```
14+
15+
## Run setup
16+
17+
This will add the GenkitGrubPub.pdf to your index
18+
19+
```
20+
genkit flow:run setup
21+
```
22+
23+
or add more pdfs to the index if you want:
24+
25+
```
26+
genkit flow:run setup '["./path/to/your/file.pdf"]'
27+
```
28+
29+
## Run the flow via cli
30+
31+
```
32+
genkit flow:run menuQA '"What burgers are on the menu?"'
33+
```
34+
35+
## Run the flow in the Developer UI
36+
37+
```
38+
genkit start
39+
```
40+
41+
Click on the menuQA flow in the lefthand navigation panel to playground the new flow.
340 KB
Binary file not shown.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"main": "lib/index.js",
3+
"scripts": {
4+
"start": "node lib/index.js",
5+
"compile": "tsc",
6+
"build": "pnpm build:clean && pnpm compile",
7+
"build:clean": "rm -rf ./lib",
8+
"build:watch": "tsc --watch",
9+
"build-and-run": "pnpm build && node lib/index.js"
10+
},
11+
"name": "rag",
12+
"version": "1.0.0",
13+
"description": "",
14+
"keywords": [],
15+
"author": "",
16+
"license": "ISC",
17+
"dependencies": {
18+
"@genkit-ai/ai": "workspace:*",
19+
"@genkit-ai/core": "workspace:*",
20+
"@genkit-ai/dev-local-vectorstore": "workspace:*",
21+
"@genkit-ai/dotprompt": "workspace:*",
22+
"@genkit-ai/firebase": "workspace:*",
23+
"@genkit-ai/flow": "workspace:*",
24+
"@genkit-ai/vertexai": "workspace:*",
25+
"llm-chunk": "^0.0.1",
26+
"pdf-parse": "^1.1.1",
27+
"zod": "^3.22.4"
28+
},
29+
"devDependencies": {
30+
"@types/pdf-parse": "^1.1.4",
31+
"typescript": "^5.3.3"
32+
}
33+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/**
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { configureGenkit } from '@genkit-ai/core';
18+
import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore';
19+
import { defineFlow, runFlow } from '@genkit-ai/flow';
20+
import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai';
21+
import * as z from 'zod';
22+
import { indexMenu } from './indexer';
23+
24+
export default configureGenkit({
25+
plugins: [
26+
vertexAI(),
27+
devLocalVectorstore([
28+
{
29+
indexName: 'menuQA',
30+
embedder: textEmbeddingGecko,
31+
},
32+
]),
33+
],
34+
enableTracingAndMetrics: true,
35+
flowStateStore: 'firebase',
36+
logLevel: 'debug',
37+
traceStore: 'firebase',
38+
});
39+
40+
const menus = ['./docs/GenkitGrubPub.pdf'];
41+
42+
// genkit flow:run setup
43+
// genkit flow:run setup '[\"your_awesome_pdf.pdf\", \"your_other_awesome_pdf.pdf\""]'
44+
export const setup = defineFlow(
45+
{
46+
name: 'setup',
47+
inputSchema: z.array(z.string()).optional(),
48+
},
49+
async (documentArr?: string[]) => {
50+
if (!documentArr) {
51+
documentArr = menus;
52+
} else {
53+
documentArr.concat(menus);
54+
}
55+
56+
await Promise.all(
57+
documentArr.map(async (document) => {
58+
console.log(`Indexed ${document}`);
59+
return runFlow(indexMenu, document);
60+
})
61+
);
62+
}
63+
);
64+
65+
export * from './indexer.js';
66+
export * from './menuQA.js';
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/**
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { index } from '@genkit-ai/ai';
18+
import { Document } from '@genkit-ai/ai/retriever';
19+
import { devLocalIndexerRef } from '@genkit-ai/dev-local-vectorstore';
20+
import { defineFlow, run } from '@genkit-ai/flow';
21+
import { readFile } from 'fs/promises';
22+
import { chunk } from 'llm-chunk';
23+
import path from 'path';
24+
import pdf from 'pdf-parse';
25+
import * as z from 'zod';
26+
27+
// Create a reference to the configured local indexer.
28+
export const menuPdfIndexer = devLocalIndexerRef('menuQA');
29+
30+
// Create chunking config for indexing a pdf of a menu
31+
// See full options in https://www.npmjs.com/package/llm-chunk
32+
const chunkingConfig = {
33+
minLength: 1000,
34+
maxLength: 2000,
35+
splitter: 'sentence',
36+
overlap: 100,
37+
delimiters: '',
38+
} as any;
39+
40+
// Define a flow to index documents into the "vector store"
41+
// genkit flow:run indexMenu '"./docs/.pdf"'
42+
export const indexMenu = defineFlow(
43+
{
44+
name: 'indexMenu',
45+
inputSchema: z.string().describe('PDF file path'),
46+
outputSchema: z.void(),
47+
},
48+
async (filePath: string) => {
49+
filePath = path.resolve(filePath);
50+
51+
// Read the pdf.
52+
const pdfTxt = await run('extract-text', () =>
53+
extractTextFromPdf(filePath)
54+
);
55+
56+
// Divide the pdf text into segments.
57+
const chunks = await run('chunk-it', async () =>
58+
chunk(pdfTxt, chunkingConfig)
59+
);
60+
61+
// Convert chunks of text into documents to store in the index.
62+
const documents = chunks.map((text) => {
63+
return Document.fromText(text, { filePath });
64+
});
65+
66+
// Add documents to the index.
67+
await index({
68+
indexer: menuPdfIndexer,
69+
documents,
70+
});
71+
}
72+
);
73+
74+
async function extractTextFromPdf(filePath: string) {
75+
const pdfFile = path.resolve(filePath);
76+
const dataBuffer = await readFile(pdfFile);
77+
const data = await pdf(dataBuffer);
78+
return data.text;
79+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { generate } from '@genkit-ai/ai';
18+
import { retrieve } from '@genkit-ai/ai/retriever';
19+
import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore';
20+
import { defineFlow } from '@genkit-ai/flow';
21+
import { geminiPro } from '@genkit-ai/vertexai';
22+
import * as z from 'zod';
23+
24+
// Define the retriever reference
25+
export const menuRetriever = devLocalRetrieverRef('menuQA');
26+
27+
export const menuQAFlow = defineFlow(
28+
{ name: 'menuQA', inputSchema: z.string(), outputSchema: z.string() },
29+
async (input: string) => {
30+
// retrieve relevant documents
31+
const docs = await retrieve({
32+
retriever: menuRetriever,
33+
query: input,
34+
options: { k: 3 },
35+
});
36+
37+
// generate a response
38+
const llmResponse = await generate({
39+
model: geminiPro,
40+
prompt: `
41+
You are acting as a helpful AI assistant that can answer
42+
questions about the food available on the menu at Genkit Grub Pub.
43+
44+
Use only the context provided to answer the question.
45+
If you don't know, do not make up an answer.
46+
Do not add or change items on the menu.
47+
48+
Question: ${input}
49+
`,
50+
context: docs,
51+
});
52+
53+
const output = llmResponse.text();
54+
return output;
55+
}
56+
);
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"compilerOptions": {
3+
"module": "NodeNext",
4+
"noImplicitReturns": true,
5+
"noUnusedLocals": false,
6+
"outDir": "lib",
7+
"sourceMap": true,
8+
"strict": true,
9+
"target": "es2017",
10+
"skipLibCheck": true,
11+
"esModuleInterop": true
12+
},
13+
"compileOnSave": true,
14+
"include": ["src"]
15+
}

0 commit comments

Comments
 (0)