Skip to content

Commit

Permalink
fix: vector store cleanup (run-llama#1175)
Browse files Browse the repository at this point in the history
  • Loading branch information
himself65 authored Sep 11, 2024
1 parent 4810364 commit d3bc663
Show file tree
Hide file tree
Showing 31 changed files with 213 additions and 98 deletions.
7 changes: 7 additions & 0 deletions .changeset/three-islands-brake.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"llamaindex": patch
---

refactor: export vector store only in nodejs environment on top level

If you see some missing modules error, please change vector store related imports to `llamaindex/vector-store`
15 changes: 15 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ jobs:
matrix:
node-version: [18.x, 20.x, 22.x]
name: E2E on Node.js ${{ matrix.node-version }}

env: POSTGRES_DB=vectordb
POSTGRES_USER=testuser
POSTGRES_PASSWORD=testpwd
POSTGRES_HOST_AUTH_METHOD=trust
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -45,6 +50,16 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: ankane/setup-postgres@v1
with:
database: llamaindex_node_test
dev-files: true
- run: |
cd /tmp
git clone --branch v0.7.0 https://github.com/pgvector/pgvector.git
cd pgvector
make
sudo make install
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
Expand Down
14 changes: 11 additions & 3 deletions packages/llamaindex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@
"openai": "^4.57.0",
"papaparse": "^5.4.1",
"pathe": "^1.1.2",
"pg": "^8.12.0",
"pgvector": "^0.2.0",
"portkey-ai": "0.1.16",
"rake-modified": "^1.0.8",
"string-strip-html": "^13.4.8",
Expand All @@ -72,11 +70,19 @@
"zod": "^3.23.8"
},
"peerDependencies": {
"@notionhq/client": "^2.2.15"
"@notionhq/client": "^2.2.15",
"pg": "^8.12.0",
"pgvector": "0.2.0"
},
"peerDependenciesMeta": {
"@notionhq/client": {
"optional": true
},
"pg": {
"optional": true
},
"pgvector": {
"optional": true
}
},
"devDependencies": {
Expand All @@ -85,6 +91,8 @@
"@swc/core": "^1.7.22",
"concurrently": "^8.2.2",
"glob": "^11.0.0",
"pg": "^8.12.0",
"pgvector": "0.2.0",
"typescript": "^5.5.4"
},
"engines": {
Expand Down
4 changes: 4 additions & 0 deletions packages/llamaindex/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ export { GeminiVertexSession } from "./llm/gemini/vertex.js";
// Expose AzureDynamicSessionTool for node.js runtime only
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
export { AzureDynamicSessionTool } from "./tools/AzureDynamicSessionTool.node.js";

// Don't export vector store modules for non-node.js runtime on top level,
// as we cannot guarantee that they will work in other environments
export * from "./vector-store.js";
10 changes: 5 additions & 5 deletions packages/llamaindex/src/indices/vectorStore/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ import {
import type { BaseNodePostprocessor } from "../../postprocessors/types.js";
import type { StorageContext } from "../../storage/StorageContext.js";
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
import type { BaseSynthesizer } from "../../synthesizers/types.js";
import type { QueryEngine } from "../../types.js";
import type {
MetadataFilters,
VectorStore,
VectorStoreByType,
VectorStoreQueryResult,
} from "../../storage/index.js";
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
import { VectorStoreQueryMode } from "../../storage/vectorStore/types.js";
import type { BaseSynthesizer } from "../../synthesizers/types.js";
import type { QueryEngine } from "../../types.js";
} from "../../vector-store/index.js";
import { VectorStoreQueryMode } from "../../vector-store/types.js";
import type { BaseIndexInit } from "../BaseIndex.js";
import { BaseIndex } from "../BaseIndex.js";
import { IndexDict, IndexStructType } from "../json-to-index-struct.js";
Expand Down
5 changes: 1 addition & 4 deletions packages/llamaindex/src/ingestion/IngestionPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ import {
type Metadata,
} from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../storage/docStore/types.js";
import type {
VectorStore,
VectorStoreByType,
} from "../storage/vectorStore/types.js";
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
import {
DocStoreStrategy,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { classify } from "./classify.js";

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { classify } from "./classify.js";

/**
Expand Down
2 changes: 1 addition & 1 deletion packages/llamaindex/src/ingestion/strategies/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
import { UpsertsStrategy } from "./UpsertsStrategy.js";
Expand Down
4 changes: 2 additions & 2 deletions packages/llamaindex/src/storage/StorageContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import {
import { ModalityType, ObjectType } from "@llamaindex/core/schema";
import { path } from "@llamaindex/env";
import { getImageEmbedModel } from "../internal/settings/image-embed-model.js";
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
import type { BaseDocumentStore } from "./docStore/types.js";
import { SimpleIndexStore } from "./indexStore/SimpleIndexStore.js";
import type { BaseIndexStore } from "./indexStore/types.js";
import { SimpleVectorStore } from "./vectorStore/SimpleVectorStore.js";
import type { VectorStore, VectorStoreByType } from "./vectorStore/types.js";

export interface StorageContext {
docStore: BaseDocumentStore;
Expand Down
10 changes: 0 additions & 10 deletions packages/llamaindex/src/storage/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,3 @@ export * from "./indexStore/types.js";
export { SimpleKVStore } from "./kvStore/SimpleKVStore.js";
export * from "./kvStore/types.js";
export * from "./StorageContext.js";
export { AstraDBVectorStore } from "./vectorStore/AstraDBVectorStore.js";
export { ChromaVectorStore } from "./vectorStore/ChromaVectorStore.js";
export { MilvusVectorStore } from "./vectorStore/MilvusVectorStore.js";
export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore.js";
export { PGVectorStore } from "./vectorStore/PGVectorStore.js";
export { PineconeVectorStore } from "./vectorStore/PineconeVectorStore.js";
export { QdrantVectorStore } from "./vectorStore/QdrantVectorStore.js";
export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore.js";
export * from "./vectorStore/types.js";
export { WeaviateVectorStore } from "./vectorStore/WeaviateVectorStore.js";
1 change: 1 addition & 0 deletions packages/llamaindex/src/vector-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./vector-store/index.js";
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,32 @@ import type pg from "pg";
import {
FilterCondition,
FilterOperator,
VectorStoreBase,
type IEmbedModel,
type MetadataFilter,
type MetadataFilterValue,
VectorStoreBase,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";

import { escapeLikeString } from "./utils.js";

import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
import { Document, MetadataMode } from "@llamaindex/core/schema";

export const PGVECTOR_SCHEMA = "public";
export const PGVECTOR_TABLE = "llamaindex_embedding";

export type PGVectorStoreConfig = {
schemaName?: string | undefined;
tableName?: string | undefined;
database?: string | undefined;
connectionString?: string | undefined;
dimensions?: number | undefined;
embedModel?: BaseEmbedding | undefined;
};

/**
* Provides support for writing and querying vector data in Postgres.
* Note: Can't be used with data created using the Python version of the vector store (https://docs.llamaindex.ai/en/stable/examples/vector_stores/postgres.html)
Expand All @@ -33,10 +42,12 @@ export class PGVectorStore
private collection: string = "";
private schemaName: string = PGVECTOR_SCHEMA;
private tableName: string = PGVECTOR_TABLE;

private database: string | undefined = undefined;
private connectionString: string | undefined = undefined;
private dimensions: number = 1536;

private db?: pg.Client;
private db?: pg.ClientBase;

/**
* Constructs a new instance of the PGVectorStore
Expand All @@ -48,26 +59,27 @@ export class PGVectorStore
* PGPASSWORD=your database password
* PGDATABASE=your database name
* PGPORT=your database port
*
* @param {object} config - The configuration settings for the instance.
* @param {string} config.schemaName - The name of the schema (optional). Defaults to PGVECTOR_SCHEMA.
* @param {string} config.tableName - The name of the table (optional). Defaults to PGVECTOR_TABLE.
* @param {string} config.connectionString - The connection string (optional).
* @param {number} config.dimensions - The dimensions of the embedding model.
*/
constructor(
config?: {
schemaName?: string;
tableName?: string;
connectionString?: string;
dimensions?: number;
} & Partial<IEmbedModel>,
) {
super(config?.embedModel);
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
this.connectionString = config?.connectionString;
this.dimensions = config?.dimensions ?? 1536;
constructor(configOrClient?: PGVectorStoreConfig | pg.ClientBase) {
// We cannot import pg from top level, it might have side effects
// so we only check if the config.connect function exists
if (
configOrClient &&
"connect" in configOrClient &&
typeof configOrClient.connect === "function"
) {
const db = configOrClient as pg.ClientBase;
super();
this.db = db;
} else {
const config = configOrClient as PGVectorStoreConfig;
super(config?.embedModel);
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
this.database = config?.database;
this.connectionString = config?.connectionString;
this.dimensions = config?.dimensions ?? 1536;
}
}

/**
Expand All @@ -92,7 +104,7 @@ export class PGVectorStore
return this.collection;
}

private async getDb(): Promise<pg.Client> {
private async getDb(): Promise<pg.ClientBase> {
if (!this.db) {
try {
const pg = await import("pg");
Expand All @@ -102,6 +114,7 @@ export class PGVectorStore
// Create DB connection
// Read connection params from env - see comment block above
const db = new Client({
database: this.database,
connectionString: this.connectionString,
});
await db.connect();
Expand All @@ -110,9 +123,6 @@ export class PGVectorStore
await db.query("CREATE EXTENSION IF NOT EXISTS vector");
await registerType(db);

// Check schema, table(s), index(es)
await this.checkSchema(db);

// All good? Keep the connection reference
this.db = db;
} catch (err) {
Expand All @@ -121,10 +131,15 @@ export class PGVectorStore
}
}

const db = this.db;

// Check schema, table(s), index(es)
await this.checkSchema(db);

return Promise.resolve(this.db);
}

private async checkSchema(db: pg.Client) {
private async checkSchema(db: pg.ClientBase) {
await db.query(`CREATE SCHEMA IF NOT EXISTS ${this.schemaName}`);

const tbl = `CREATE TABLE IF NOT EXISTS ${this.schemaName}.${this.tableName}(
Expand Down Expand Up @@ -171,26 +186,22 @@ export class PGVectorStore
}

private getDataToInsert(embeddingResults: BaseNode<Metadata>[]) {
const result = [];
for (let index = 0; index < embeddingResults.length; index++) {
const row = embeddingResults[index]!;

const id: any = row.id_.length ? row.id_ : null;
const meta = row.metadata || {};
meta.create_date = new Date();
return embeddingResults.map((node) => {
const id: any = node.id_.length ? node.id_ : null;
const meta = node.metadata || {};
if (!meta.create_date) {
meta.create_date = new Date();
}

const params = [
return [
id,
"",
this.collection,
row.getContent(MetadataMode.EMBED),
node.getContent(MetadataMode.NONE),
meta,
"[" + row.getEmbedding().join(",") + "]",
"[" + node.getEmbedding().join(",") + "]",
];

result.push(params);
}
return result;
});
}

/**
Expand All @@ -201,7 +212,7 @@ export class PGVectorStore
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length === 0) {
console.debug("Empty list sent to PGVectorStore::add");
console.warn("Empty list sent to PGVectorStore::add");
return [];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import { DEFAULT_PERSIST_DIR } from "@llamaindex/core/global";
import type { BaseNode } from "@llamaindex/core/schema";
import { fs, path } from "@llamaindex/env";
import {
getTopKEmbeddings,
getTopKMMREmbeddings,
} from "../../internal/utils.js";
import { exists } from "../FileSystem.js";
import { getTopKEmbeddings, getTopKMMREmbeddings } from "../internal/utils.js";
import { exists } from "../storage/FileSystem.js";
import {
FilterOperator,
VectorStoreBase,
Expand Down
10 changes: 10 additions & 0 deletions packages/llamaindex/src/vector-store/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export * from "./AstraDBVectorStore.js";
export * from "./ChromaVectorStore.js";
export * from "./MilvusVectorStore.js";
export * from "./MongoDBAtlasVectorStore.js";
export * from "./PGVectorStore.js";
export * from "./PineconeVectorStore.js";
export * from "./QdrantVectorStore.js";
export * from "./SimpleVectorStore.js";
export * from "./types.js";
export * from "./WeaviateVectorStore.js";
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import type { BaseNode, ModalityType } from "@llamaindex/core/schema";
import { getEmbeddedModel } from "../../internal/settings/EmbedModel.js";
import { getEmbeddedModel } from "../internal/settings/EmbedModel.js";

export interface VectorStoreQueryResult {
nodes?: BaseNode[];
Expand Down
Loading

0 comments on commit d3bc663

Please sign in to comment.