Add in a menu based rag example to use in docs.

schnecle · schnecle · commit c37df2507366 · 2024-05-08T21:22:31.000Z
diff --git a/.gitignore b/.gitignore
@@ -29,6 +29,7 @@ firebase-debug.log
 !js/samples/rag/package.json
 js/samples/rag/*.json
 js/samples/cat-eval/__db*.json
+js/samples/menu-example/rag/__db*.json
 
 # Test files
 last_recording.mp4
diff --git a/js/samples/menu-example/rag/README.md b/js/samples/menu-example/rag/README.md
@@ -0,0 +1,41 @@
+# Evaluating pdfQA with cat facts
+
+## Build it
+
+```
+pnpm build
+```
+
+or if you need to, build everything:
+
+```
+cd ../../../; pnpm build; pnpm pack:all; cd -
+```
+
+## Run setup
+
+This will add the GenkitGrubPub.pdf to your index
+
+```
+genkit flow:run setup
+```
+
+or add more pdfs to the index if you want:
+
+```
+genkit flow:run setup '["./path/to/your/file.pdf"]'
+```
+
+## Run the flow via cli
+
+```
+genkit flow:run menuQA '"What burgers are on the menu?"'
+```
+
+## Run the flow in the Developer UI
+
+```
+genkit start
+```
+
+Click on the menuQA flow in the lefthand navigation panel to playground the new flow.
diff --git a/js/samples/menu-example/rag/docs/GenkitGrubPub.pdf b/js/samples/menu-example/rag/docs/GenkitGrubPub.pdf
diff --git a/js/samples/menu-example/rag/package.json b/js/samples/menu-example/rag/package.json
@@ -0,0 +1,33 @@
+{
+  "main": "lib/index.js",
+  "scripts": {
+    "start": "node lib/index.js",
+    "compile": "tsc",
+    "build": "pnpm build:clean && pnpm compile",
+    "build:clean": "rm -rf ./lib",
+    "build:watch": "tsc --watch",
+    "build-and-run": "pnpm build && node lib/index.js"
+  },
+  "name": "rag",
+  "version": "1.0.0",
+  "description": "",
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "@genkit-ai/ai": "workspace:*",
+    "@genkit-ai/core": "workspace:*",
+    "@genkit-ai/dev-local-vectorstore": "workspace:*",
+    "@genkit-ai/dotprompt": "workspace:*",
+    "@genkit-ai/firebase": "workspace:*",
+    "@genkit-ai/flow": "workspace:*",
+    "@genkit-ai/vertexai": "workspace:*",
+    "llm-chunk": "^0.0.1",
+    "pdf-parse": "^1.1.1",
+    "zod": "^3.22.4"
+  },
+  "devDependencies": {
+    "@types/pdf-parse": "^1.1.4",
+    "typescript": "^5.3.3"
+  }
+}
diff --git a/js/samples/menu-example/rag/src/index.ts b/js/samples/menu-example/rag/src/index.ts
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { configureGenkit } from '@genkit-ai/core';
+import { devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore';
+import { defineFlow, runFlow } from '@genkit-ai/flow';
+import { textEmbeddingGecko, vertexAI } from '@genkit-ai/vertexai';
+import * as z from 'zod';
+import { indexMenu } from './indexer';
+
+export default configureGenkit({
+  plugins: [
+    vertexAI(),
+    devLocalVectorstore([
+      {
+        indexName: 'menuQA',
+        embedder: textEmbeddingGecko,
+      },
+    ]),
+  ],
+  enableTracingAndMetrics: true,
+  flowStateStore: 'firebase',
+  logLevel: 'debug',
+  traceStore: 'firebase',
+});
+
+const menus = ['./docs/GenkitGrubPub.pdf'];
+
+// genkit flow:run setup
+// genkit flow:run setup '[\"your_awesome_pdf.pdf\", \"your_other_awesome_pdf.pdf\""]'
+export const setup = defineFlow(
+  {
+    name: 'setup',
+    inputSchema: z.array(z.string()).optional(),
+  },
+  async (documentArr?: string[]) => {
+    if (!documentArr) {
+      documentArr = menus;
+    } else {
+      documentArr.concat(menus);
+    }
+
+    await Promise.all(
+      documentArr.map(async (document) => {
+        console.log(`Indexed ${document}`);
+        return runFlow(indexMenu, document);
+      })
+    );
+  }
+);
+
+export * from './indexer.js';
+export * from './menuQA.js';
diff --git a/js/samples/menu-example/rag/src/indexer.ts b/js/samples/menu-example/rag/src/indexer.ts
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { index } from '@genkit-ai/ai';
+import { Document } from '@genkit-ai/ai/retriever';
+import { devLocalIndexerRef } from '@genkit-ai/dev-local-vectorstore';
+import { defineFlow, run } from '@genkit-ai/flow';
+import { readFile } from 'fs/promises';
+import { chunk } from 'llm-chunk';
+import path from 'path';
+import pdf from 'pdf-parse';
+import * as z from 'zod';
+
+// Create a reference to the configured local indexer.
+export const menuPdfIndexer = devLocalIndexerRef('menuQA');
+
+// Create chunking config for indexing a pdf of a menu
+// See full options in https://www.npmjs.com/package/llm-chunk
+const chunkingConfig = {
+  minLength: 1000,
+  maxLength: 2000,
+  splitter: 'sentence',
+  overlap: 100,
+  delimiters: '',
+} as any;
+
+// Define a flow to index documents into the "vector store"
+// genkit flow:run indexMenu '"./docs/.pdf"'
+export const indexMenu = defineFlow(
+  {
+    name: 'indexMenu',
+    inputSchema: z.string().describe('PDF file path'),
+    outputSchema: z.void(),
+  },
+  async (filePath: string) => {
+    filePath = path.resolve(filePath);
+
+    // Read the pdf.
+    const pdfTxt = await run('extract-text', () =>
+      extractTextFromPdf(filePath)
+    );
+
+    // Divide the pdf text into segments.
+    const chunks = await run('chunk-it', async () =>
+      chunk(pdfTxt, chunkingConfig)
+    );
+
+    // Convert chunks of text into documents to store in the index.
+    const documents = chunks.map((text) => {
+      return Document.fromText(text, { filePath });
+    });
+
+    // Add documents to the index.
+    await index({
+      indexer: menuPdfIndexer,
+      documents,
+    });
+  }
+);
+
+async function extractTextFromPdf(filePath: string) {
+  const pdfFile = path.resolve(filePath);
+  const dataBuffer = await readFile(pdfFile);
+  const data = await pdf(dataBuffer);
+  return data.text;
+}
diff --git a/js/samples/menu-example/rag/src/menuQA.ts b/js/samples/menu-example/rag/src/menuQA.ts
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { generate } from '@genkit-ai/ai';
+import { retrieve } from '@genkit-ai/ai/retriever';
+import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore';
+import { defineFlow } from '@genkit-ai/flow';
+import { geminiPro } from '@genkit-ai/vertexai';
+import * as z from 'zod';
+
+// Define the retriever reference
+export const menuRetriever = devLocalRetrieverRef('menuQA');
+
+export const menuQAFlow = defineFlow(
+  { name: 'menuQA', inputSchema: z.string(), outputSchema: z.string() },
+  async (input: string) => {
+    // retrieve relevant documents
+    const docs = await retrieve({
+      retriever: menuRetriever,
+      query: input,
+      options: { k: 3 },
+    });
+
+    // generate a response
+    const llmResponse = await generate({
+      model: geminiPro,
+      prompt: `
+    You are acting as a helpful AI assistant that can answer 
+    questions about the food available on the menu at Genkit Grub Pub.
+    
+    Use only the context provided to answer the question.
+    If you don't know, do not make up an answer.
+    Do not add or change items on the menu.
+
+    Question: ${input}
+    `,
+      context: docs,
+    });
+
+    const output = llmResponse.text();
+    return output;
+  }
+);
diff --git a/js/samples/menu-example/rag/tsconfig.json b/js/samples/menu-example/rag/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "module": "NodeNext",
+    "noImplicitReturns": true,
+    "noUnusedLocals": false,
+    "outDir": "lib",
+    "sourceMap": true,
+    "strict": true,
+    "target": "es2017",
+    "skipLibCheck": true,
+    "esModuleInterop": true
+  },
+  "compileOnSave": true,
+  "include": ["src"]
+}