diff --git a/app/api/chat/engine/generate.ts b/app/api/chat/engine/generate.ts index 280420d..5c3976e 100644 --- a/app/api/chat/engine/generate.ts +++ b/app/api/chat/engine/generate.ts @@ -1,8 +1,12 @@ import * as dotenv from "dotenv"; import { getDocuments } from "./loader"; import { initSettings } from "./settings"; -import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex"; -import { STORAGE_CACHE_DIR } from "@/cl/app/api/chat/engine/shared"; +import { + storageContextFromDefaults, + VectorStoreIndex, + MilvusVectorStore, +} from "llamaindex"; +import { getMilvusClient } from "@/cl/app/api/chat/engine/shared"; // Load environment variables from local .env.development.local file dotenv.config({ path: ".env.development.local" }); @@ -24,14 +28,17 @@ async function generateDatasource() { console.log(`Generating storage context for datasource '${datasource}'...`); // Split documents, create embeddings and store them in the storage context const ms = await getRuntime(async () => { - const storageContext = await storageContextFromDefaults({ - persistDir: `${STORAGE_CACHE_DIR}/${datasource}`, - }); const documents = await getDocuments(datasource); // Set private=false to mark the document as public (required for filtering) documents.forEach((doc) => { doc.metadata["private"] = "false"; }); + const milvusClient = getMilvusClient(); + const vectorStore = new MilvusVectorStore({ + milvusClient, + collection: datasource, + }); + const storageContext = await storageContextFromDefaults({ vectorStore }); await VectorStoreIndex.fromDocuments(documents, { storageContext, }); diff --git a/app/api/chat/engine/index.ts b/app/api/chat/engine/index.ts index 8a41671..8d6ca3f 100644 --- a/app/api/chat/engine/index.ts +++ b/app/api/chat/engine/index.ts @@ -1,20 +1,29 @@ -import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex"; -import { storageContextFromDefaults } from "llamaindex/storage/StorageContext"; -import { STORAGE_CACHE_DIR } from "@/cl/app/api/chat/engine/shared"; +import { VectorStoreIndex } from "llamaindex"; +import { MilvusVectorStore } from "llamaindex/storage/vectorStore/MilvusVectorStore"; +import { + checkRequiredEnvVars, + getMilvusClient, +} from "@/cl/app/api/chat/engine/shared"; + +const checkColllectionExist = async (collection: string) => { + const milvusClient = getMilvusClient(); + const isCollectionExist = await milvusClient.hasCollection({ + collection_name: collection, + }); + return isCollectionExist.value; +}; export async function getDataSource(datasource: string) { console.log(`Using datasource: ${datasource}`); - const storageContext = await storageContextFromDefaults({ - persistDir: `${STORAGE_CACHE_DIR}/${datasource}`, - }); + checkRequiredEnvVars({ checkCollectionEnv: false }); // Do not check for collection env var + const milvusClient = getMilvusClient(); - const numberOfDocs = Object.keys( - (storageContext.docStore as SimpleDocumentStore).toDict(), - ).length; - if (numberOfDocs === 0) { - return null; + // remove this code if you don't want to check collection existence before creating the index + // Milvus can automatically create the collection if it does not exist + if (!(await checkColllectionExist(datasource))) { + throw new Error(`Collection "${datasource}" does not exist`); } - return await VectorStoreIndex.init({ - storageContext, - }); + + const store = new MilvusVectorStore({ milvusClient, collection: datasource }); + return await VectorStoreIndex.fromVectorStore(store); } diff --git a/create-llama.sh b/create-llama.sh index c899c59..c00c632 100644 --- a/create-llama.sh +++ b/create-llama.sh @@ -20,7 +20,7 @@ npx -y create-llama@0.1.27 \ --post-install-action none \ --no-llama-parse \ --example-file \ - --vector-db none \ + --vector-db milvus \ --use-pnpm \ -- cl >/dev/null diff --git a/next.config.mjs b/next.config.mjs index 50d6e5e..d8d38c3 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -2,7 +2,7 @@ import { withSentryConfig } from "@sentry/nextjs"; /** @type {import('next').NextConfig} */ const nextConfig = { experimental: { - serverComponentsExternalPackages: ["pdf-parse"], + serverComponentsExternalPackages: ["pdf-parse", "@zilliz/milvus2-sdk-node"], outputFileTracingIncludes: { "/*": ["./cache/**/*"], "/api/**/*": ["node_modules/tiktoken/tiktoken_bg.wasm"] diff --git a/package.json b/package.json index 1df69cb..5ed4b71 100644 --- a/package.json +++ b/package.json @@ -80,7 +80,8 @@ "@apidevtools/swagger-parser": "^10.1.0", "got": "10.7.0", "ajv": "^8.12.0", - "tiktoken": "^1.0.15" + "tiktoken": "^1.0.15", + "@zilliz/milvus2-sdk-node": "^2.4.4" }, "devDependencies": { "@types/node": "^20.12.7", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2aea173..c78ed39 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -77,6 +77,9 @@ importers: '@vercel/kv': specifier: ^0.2.4 version: 0.2.4 + '@zilliz/milvus2-sdk-node': + specifier: ^2.4.4 + version: 2.4.4 ai: specifier: ^3.0.21 version: 3.1.35(openai@4.54.0(encoding@0.1.13))(react@18.2.0)(solid-js@1.8.17)(svelte@4.2.18)(vue@3.4.27(typescript@5.1.6))(zod@3.23.8) @@ -7036,14 +7039,14 @@ snapshots: dependencies: '@jest/fake-timers': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 20.12.7 + '@types/node': 20.14.13 jest-mock: 29.7.0 '@jest/fake-timers@29.7.0': dependencies: '@jest/types': 29.6.3 '@sinonjs/fake-timers': 10.3.0 - '@types/node': 20.12.7 + '@types/node': 20.14.13 jest-message-util: 29.7.0 jest-mock: 29.7.0 jest-util: 29.7.0 @@ -7057,7 +7060,7 @@ snapshots: '@jest/schemas': 29.6.3 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 20.12.7 + '@types/node': 20.14.13 '@types/yargs': 17.0.32 chalk: 4.1.2 @@ -8323,7 +8326,7 @@ snapshots: dependencies: '@types/http-cache-semantics': 4.0.4 '@types/keyv': 3.1.4 - '@types/node': 20.12.7 + '@types/node': 20.14.13 '@types/responselike': 1.0.3 '@types/d3-scale-chromatic@3.0.3': {} @@ -8362,7 +8365,7 @@ snapshots: '@types/jsdom@20.0.1': dependencies: - '@types/node': 20.12.7 + '@types/node': 20.14.13 '@types/tough-cookie': 4.0.5 parse5: 7.1.2 @@ -8374,7 +8377,7 @@ snapshots: '@types/keyv@3.1.4': dependencies: - '@types/node': 20.12.7 + '@types/node': 20.14.13 '@types/lodash-es@4.17.12': dependencies: @@ -8440,7 +8443,7 @@ snapshots: '@types/responselike@1.0.3': dependencies: - '@types/node': 20.12.7 + '@types/node': 20.14.13 '@types/stack-utils@2.0.3': {} @@ -10738,13 +10741,13 @@ snapshots: jest-mock@29.7.0: dependencies: '@jest/types': 29.6.3 - '@types/node': 20.12.7 + '@types/node': 20.14.13 jest-util: 29.7.0 jest-util@29.7.0: dependencies: '@jest/types': 29.6.3 - '@types/node': 20.12.7 + '@types/node': 20.14.13 chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11