Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use milvus #102

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions app/api/chat/engine/generate.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import * as dotenv from "dotenv";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
import { STORAGE_CACHE_DIR } from "@/cl/app/api/chat/engine/shared";
import {
storageContextFromDefaults,
VectorStoreIndex,
MilvusVectorStore,
} from "llamaindex";
import { getMilvusClient } from "@/cl/app/api/chat/engine/shared";

// Load environment variables from local .env.development.local file
dotenv.config({ path: ".env.development.local" });
Expand All @@ -24,14 +28,17 @@ async function generateDatasource() {
console.log(`Generating storage context for datasource '${datasource}'...`);
// Split documents, create embeddings and store them in the storage context
const ms = await getRuntime(async () => {
const storageContext = await storageContextFromDefaults({
persistDir: `${STORAGE_CACHE_DIR}/${datasource}`,
});
const documents = await getDocuments(datasource);
// Set private=false to mark the document as public (required for filtering)
documents.forEach((doc) => {
doc.metadata["private"] = "false";
});
const milvusClient = getMilvusClient();
const vectorStore = new MilvusVectorStore({
milvusClient,
collection: datasource,
});
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, {
storageContext,
});
Expand Down
37 changes: 23 additions & 14 deletions app/api/chat/engine/index.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
import { STORAGE_CACHE_DIR } from "@/cl/app/api/chat/engine/shared";
import { VectorStoreIndex } from "llamaindex";
import { MilvusVectorStore } from "llamaindex/storage/vectorStore/MilvusVectorStore";
import {
checkRequiredEnvVars,
getMilvusClient,
} from "@/cl/app/api/chat/engine/shared";

const checkColllectionExist = async (collection: string) => {
const milvusClient = getMilvusClient();
const isCollectionExist = await milvusClient.hasCollection({
collection_name: collection,
});
return isCollectionExist.value;
};

export async function getDataSource(datasource: string) {
console.log(`Using datasource: ${datasource}`);
const storageContext = await storageContextFromDefaults({
persistDir: `${STORAGE_CACHE_DIR}/${datasource}`,
});
checkRequiredEnvVars({ checkCollectionEnv: false }); // Do not check for collection env var
const milvusClient = getMilvusClient();

const numberOfDocs = Object.keys(
(storageContext.docStore as SimpleDocumentStore).toDict(),
).length;
if (numberOfDocs === 0) {
return null;
// remove this code if you don't want to check collection existence before creating the index
// Milvus can automatically create the collection if it does not exist
if (!(await checkColllectionExist(datasource))) {
throw new Error(`Collection "${datasource}" does not exist`);
}
return await VectorStoreIndex.init({
storageContext,
});

const store = new MilvusVectorStore({ milvusClient, collection: datasource });
return await VectorStoreIndex.fromVectorStore(store);
}
2 changes: 1 addition & 1 deletion create-llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ npx -y create-llama@0.1.27 \
--post-install-action none \
--no-llama-parse \
--example-file \
--vector-db none \
--vector-db milvus \
--use-pnpm \
-- cl >/dev/null

Expand Down
2 changes: 1 addition & 1 deletion next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { withSentryConfig } from "@sentry/nextjs";
/** @type {import('next').NextConfig} */
const nextConfig = {
experimental: {
serverComponentsExternalPackages: ["pdf-parse"],
serverComponentsExternalPackages: ["pdf-parse", "@zilliz/milvus2-sdk-node"],
outputFileTracingIncludes: {
"/*": ["./cache/**/*"],
"/api/**/*": ["node_modules/tiktoken/tiktoken_bg.wasm"]
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@
"@apidevtools/swagger-parser": "^10.1.0",
"got": "10.7.0",
"ajv": "^8.12.0",
"tiktoken": "^1.0.15"
"tiktoken": "^1.0.15",
"@zilliz/milvus2-sdk-node": "^2.4.4"
},
"devDependencies": {
"@types/node": "^20.12.7",
Expand Down
21 changes: 12 additions & 9 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.