diff --git a/.gitignore b/.gitignore index fe8ab0f0a..2a02ac573 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ firebase-debug.log !js/samples/rag/package.json js/samples/rag/*.json js/samples/cat-eval/__db*.json +js/samples/menu-example/rag/__db*.json # Test files last_recording.mp4 diff --git a/js/samples/menu-example/rag/README.md b/js/samples/menu-example/rag/README.md new file mode 100644 index 000000000..17a88d3be --- /dev/null +++ b/js/samples/menu-example/rag/README.md @@ -0,0 +1,41 @@ +# Evaluating menuQA flow + +## Build it + +``` +pnpm build +``` + +or if you need to, build everything: + +``` +cd ../../../; pnpm build; pnpm pack:all; cd - +``` + +## Run setup + +This will add the `GenkitGrubPub.pdf` to your index + +``` +genkit flow:run setup +``` + +or add more pdfs to the index if you want: + +``` +genkit flow:run setup '["./path/to/your/file.pdf"]' +``` + +## Run the flow via cli + +``` +genkit flow:run menuQA '"What burgers are on the menu?"' +``` + +## Run the flow in the Developer UI + +``` +genkit start +``` + +Click on the menuQA flow in the lefthand navigation panel to playground the new flow. diff --git a/js/samples/menu-example/rag/docs/GenkitGrubPub.pdf b/js/samples/menu-example/rag/docs/GenkitGrubPub.pdf new file mode 100644 index 000000000..1215e0775 Binary files /dev/null and b/js/samples/menu-example/rag/docs/GenkitGrubPub.pdf differ diff --git a/js/samples/menu-example/rag/package.json b/js/samples/menu-example/rag/package.json new file mode 100644 index 000000000..7c2319404 --- /dev/null +++ b/js/samples/menu-example/rag/package.json @@ -0,0 +1,33 @@ +{ + "main": "lib/index.js", + "scripts": { + "start": "node lib/index.js", + "compile": "tsc", + "build": "pnpm build:clean && pnpm compile", + "build:clean": "rm -rf ./lib", + "build:watch": "tsc --watch", + "build-and-run": "pnpm build && node lib/index.js" + }, + "name": "rag", + "version": "1.0.0", + "description": "", + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "@genkit-ai/ai": "workspace:*", + "@genkit-ai/core": "workspace:*", + "@genkit-ai/dev-local-vectorstore": "workspace:*", + "@genkit-ai/dotprompt": "workspace:*", + "@genkit-ai/firebase": "workspace:*", + "@genkit-ai/flow": "workspace:*", + "@genkit-ai/vertexai": "workspace:*", + "llm-chunk": "^0.0.1", + "pdf-parse": "^1.1.1", + "zod": "^3.22.4" + }, + "devDependencies": { + "@types/pdf-parse": "^1.1.4", + "typescript": "^5.3.3" + } +} diff --git a/js/samples/menu-example/rag/src/index.ts b/js/samples/menu-example/rag/src/index.ts new file mode 100644 index 000000000..cf3a56d08 --- /dev/null +++ b/js/samples/menu-example/rag/src/index.ts @@ -0,0 +1,66 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { configureGenkit } from '@genkit-ai/core'; +import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore'; +import { defineFlow, runFlow } from '@genkit-ai/flow'; +import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai'; +import * as z from 'zod'; +import { indexMenu } from './indexer'; + +configureGenkit({ + plugins: [ + vertexAI(), + devLocalVectorstore([ + { + indexName: 'menuQA', + embedder: textEmbeddingGecko, + }, + ]), + ], + enableTracingAndMetrics: true, + flowStateStore: 'firebase', + logLevel: 'debug', + traceStore: 'firebase', +}); + +const menus = ['./docs/GenkitGrubPub.pdf']; + +// genkit flow:run setup +// genkit flow:run setup '[\"your_awesome_pdf.pdf\", \"your_other_awesome_pdf.pdf\""]' +export const setup = defineFlow( + { + name: 'setup', + inputSchema: z.array(z.string()).optional(), + }, + async (documentArr?: string[]) => { + if (!documentArr) { + documentArr = menus; + } else { + documentArr.concat(menus); + } + + await Promise.all( + documentArr.map(async (document) => { + console.log(`Indexed ${document}`); + return runFlow(indexMenu, document); + }) + ); + } +); + +export * from './indexer.js'; +export * from './menuQA.js'; diff --git a/js/samples/menu-example/rag/src/indexer.ts b/js/samples/menu-example/rag/src/indexer.ts new file mode 100644 index 000000000..5879fe4e3 --- /dev/null +++ b/js/samples/menu-example/rag/src/indexer.ts @@ -0,0 +1,79 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { index } from '@genkit-ai/ai'; +import { Document } from '@genkit-ai/ai/retriever'; +import { devLocalIndexerRef } from '@genkit-ai/dev-local-vectorstore'; +import { defineFlow, run } from '@genkit-ai/flow'; +import { readFile } from 'fs/promises'; +import { chunk } from 'llm-chunk'; +import path from 'path'; +import pdf from 'pdf-parse'; +import * as z from 'zod'; + +// Create a reference to the configured local indexer. +export const menuPdfIndexer = devLocalIndexerRef('menuQA'); + +// Create chunking config for indexing a pdf of a menu +// See full options in https://www.npmjs.com/package/llm-chunk +const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + delimiters: '', +} as any; + +// Define a flow to index documents into the "vector store" +// genkit flow:run indexMenu '"./docs/.pdf"' +export const indexMenu = defineFlow( + { + name: 'indexMenu', + inputSchema: z.string().describe('PDF file path'), + outputSchema: z.void(), + }, + async (filePath: string) => { + filePath = path.resolve(filePath); + + // Read the pdf. + const pdfTxt = await run('extract-text', () => + extractTextFromPdf(filePath) + ); + + // Divide the pdf text into segments. + const chunks = await run('chunk-it', async () => + chunk(pdfTxt, chunkingConfig) + ); + + // Convert chunks of text into documents to store in the index. + const documents = chunks.map((text) => { + return Document.fromText(text, { filePath }); + }); + + // Add documents to the index. + await index({ + indexer: menuPdfIndexer, + documents, + }); + } +); + +async function extractTextFromPdf(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; +} diff --git a/js/samples/menu-example/rag/src/menuQA.ts b/js/samples/menu-example/rag/src/menuQA.ts new file mode 100644 index 000000000..bf23a573d --- /dev/null +++ b/js/samples/menu-example/rag/src/menuQA.ts @@ -0,0 +1,56 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { generate } from '@genkit-ai/ai'; +import { retrieve } from '@genkit-ai/ai/retriever'; +import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore'; +import { defineFlow } from '@genkit-ai/flow'; +import { geminiPro } from '@genkit-ai/vertexai'; +import * as z from 'zod'; + +// Define the retriever reference +export const menuRetriever = devLocalRetrieverRef('menuQA'); + +export const menuQAFlow = defineFlow( + { name: 'menuQA', inputSchema: z.string(), outputSchema: z.string() }, + async (input: string) => { + // retrieve relevant documents + const docs = await retrieve({ + retriever: menuRetriever, + query: input, + options: { k: 3 }, + }); + + // generate a response + const llmResponse = await generate({ + model: geminiPro, + prompt: ` + You are acting as a helpful AI assistant that can answer + questions about the food available on the menu at Genkit Grub Pub. + + Use only the context provided to answer the question. + If you don't know, do not make up an answer. + Do not add or change items on the menu. + + Question: ${input} + `, + context: docs, + }); + + const output = llmResponse.text(); + return output; + } +); diff --git a/js/samples/menu-example/rag/tsconfig.json b/js/samples/menu-example/rag/tsconfig.json new file mode 100644 index 000000000..e51f33ae3 --- /dev/null +++ b/js/samples/menu-example/rag/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "module": "NodeNext", + "noImplicitReturns": true, + "noUnusedLocals": false, + "outDir": "lib", + "sourceMap": true, + "strict": true, + "target": "es2017", + "skipLibCheck": true, + "esModuleInterop": true + }, + "compileOnSave": true, + "include": ["src"] +}