Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: vector store cleanup #1175

Merged
merged 9 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/three-islands-brake.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"llamaindex": patch
---

refactor: export vector store only in nodejs environment on top level

If you see some missing modules error, please change vector store related imports to `llamaindex/vector-store`
15 changes: 15 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ jobs:
matrix:
node-version: [18.x, 20.x, 22.x]
name: E2E on Node.js ${{ matrix.node-version }}

env: POSTGRES_DB=vectordb
POSTGRES_USER=testuser
POSTGRES_PASSWORD=testpwd
POSTGRES_HOST_AUTH_METHOD=trust
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -45,6 +50,16 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: ankane/setup-postgres@v1
with:
database: llamaindex_node_test
dev-files: true
- run: |
cd /tmp
git clone --branch v0.7.0 https://github.com/pgvector/pgvector.git
cd pgvector
make
sudo make install
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
Expand Down
14 changes: 11 additions & 3 deletions packages/llamaindex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@
"openai": "^4.57.0",
"papaparse": "^5.4.1",
"pathe": "^1.1.2",
"pg": "^8.12.0",
"pgvector": "^0.2.0",
"portkey-ai": "0.1.16",
"rake-modified": "^1.0.8",
"string-strip-html": "^13.4.8",
Expand All @@ -72,11 +70,19 @@
"zod": "^3.23.8"
},
"peerDependencies": {
"@notionhq/client": "^2.2.15"
"@notionhq/client": "^2.2.15",
"pg": "^8.12.0",
"pgvector": "0.2.0"
},
"peerDependenciesMeta": {
"@notionhq/client": {
"optional": true
},
"pg": {
"optional": true
},
"pgvector": {
"optional": true
}
},
"devDependencies": {
Expand All @@ -85,6 +91,8 @@
"@swc/core": "^1.7.22",
"concurrently": "^8.2.2",
"glob": "^11.0.0",
"pg": "^8.12.0",
"pgvector": "0.2.0",
"typescript": "^5.5.4"
},
"engines": {
Expand Down
4 changes: 4 additions & 0 deletions packages/llamaindex/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ export { GeminiVertexSession } from "./llm/gemini/vertex.js";
// Expose AzureDynamicSessionTool for node.js runtime only
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
export { AzureDynamicSessionTool } from "./tools/AzureDynamicSessionTool.node.js";

// Don't export vector store modules for non-node.js runtime on top level,
// as we cannot guarantee that they will work in other environments
export * from "./vector-store.js";
10 changes: 5 additions & 5 deletions packages/llamaindex/src/indices/vectorStore/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ import {
import type { BaseNodePostprocessor } from "../../postprocessors/types.js";
import type { StorageContext } from "../../storage/StorageContext.js";
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
import type { BaseSynthesizer } from "../../synthesizers/types.js";
import type { QueryEngine } from "../../types.js";
import type {
MetadataFilters,
VectorStore,
VectorStoreByType,
VectorStoreQueryResult,
} from "../../storage/index.js";
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
import { VectorStoreQueryMode } from "../../storage/vectorStore/types.js";
import type { BaseSynthesizer } from "../../synthesizers/types.js";
import type { QueryEngine } from "../../types.js";
} from "../../vector-store/index.js";
import { VectorStoreQueryMode } from "../../vector-store/types.js";
import type { BaseIndexInit } from "../BaseIndex.js";
import { BaseIndex } from "../BaseIndex.js";
import { IndexDict, IndexStructType } from "../json-to-index-struct.js";
Expand Down
5 changes: 1 addition & 4 deletions packages/llamaindex/src/ingestion/IngestionPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ import {
type Metadata,
} from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../storage/docStore/types.js";
import type {
VectorStore,
VectorStoreByType,
} from "../storage/vectorStore/types.js";
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
import {
DocStoreStrategy,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { classify } from "./classify.js";

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { classify } from "./classify.js";

/**
Expand Down
2 changes: 1 addition & 1 deletion packages/llamaindex/src/ingestion/strategies/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
import type { VectorStore } from "../../storage/vectorStore/types.js";
import type { VectorStore } from "../../vector-store/types.js";
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
import { UpsertsStrategy } from "./UpsertsStrategy.js";
Expand Down
4 changes: 2 additions & 2 deletions packages/llamaindex/src/storage/StorageContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import {
import { ModalityType, ObjectType } from "@llamaindex/core/schema";
import { path } from "@llamaindex/env";
import { getImageEmbedModel } from "../internal/settings/image-embed-model.js";
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
import type { BaseDocumentStore } from "./docStore/types.js";
import { SimpleIndexStore } from "./indexStore/SimpleIndexStore.js";
import type { BaseIndexStore } from "./indexStore/types.js";
import { SimpleVectorStore } from "./vectorStore/SimpleVectorStore.js";
import type { VectorStore, VectorStoreByType } from "./vectorStore/types.js";

export interface StorageContext {
docStore: BaseDocumentStore;
Expand Down
10 changes: 0 additions & 10 deletions packages/llamaindex/src/storage/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,3 @@ export * from "./indexStore/types.js";
export { SimpleKVStore } from "./kvStore/SimpleKVStore.js";
export * from "./kvStore/types.js";
export * from "./StorageContext.js";
export { AstraDBVectorStore } from "./vectorStore/AstraDBVectorStore.js";
export { ChromaVectorStore } from "./vectorStore/ChromaVectorStore.js";
export { MilvusVectorStore } from "./vectorStore/MilvusVectorStore.js";
export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore.js";
export { PGVectorStore } from "./vectorStore/PGVectorStore.js";
export { PineconeVectorStore } from "./vectorStore/PineconeVectorStore.js";
export { QdrantVectorStore } from "./vectorStore/QdrantVectorStore.js";
export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore.js";
export * from "./vectorStore/types.js";
export { WeaviateVectorStore } from "./vectorStore/WeaviateVectorStore.js";
1 change: 1 addition & 0 deletions packages/llamaindex/src/vector-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./vector-store/index.js";
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,32 @@ import type pg from "pg";
import {
FilterCondition,
FilterOperator,
VectorStoreBase,
type IEmbedModel,
type MetadataFilter,
type MetadataFilterValue,
VectorStoreBase,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";

import { escapeLikeString } from "./utils.js";

import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
import { Document, MetadataMode } from "@llamaindex/core/schema";

export const PGVECTOR_SCHEMA = "public";
export const PGVECTOR_TABLE = "llamaindex_embedding";

export type PGVectorStoreConfig = {
schemaName?: string | undefined;
tableName?: string | undefined;
database?: string | undefined;
connectionString?: string | undefined;
dimensions?: number | undefined;
embedModel?: BaseEmbedding | undefined;
};

/**
* Provides support for writing and querying vector data in Postgres.
* Note: Can't be used with data created using the Python version of the vector store (https://docs.llamaindex.ai/en/stable/examples/vector_stores/postgres.html)
Expand All @@ -33,10 +42,12 @@ export class PGVectorStore
private collection: string = "";
private schemaName: string = PGVECTOR_SCHEMA;
private tableName: string = PGVECTOR_TABLE;

private database: string | undefined = undefined;
private connectionString: string | undefined = undefined;
private dimensions: number = 1536;

private db?: pg.Client;
private db?: pg.ClientBase;

/**
* Constructs a new instance of the PGVectorStore
Expand All @@ -48,26 +59,27 @@ export class PGVectorStore
* PGPASSWORD=your database password
* PGDATABASE=your database name
* PGPORT=your database port
*
* @param {object} config - The configuration settings for the instance.
* @param {string} config.schemaName - The name of the schema (optional). Defaults to PGVECTOR_SCHEMA.
* @param {string} config.tableName - The name of the table (optional). Defaults to PGVECTOR_TABLE.
* @param {string} config.connectionString - The connection string (optional).
* @param {number} config.dimensions - The dimensions of the embedding model.
*/
constructor(
config?: {
schemaName?: string;
tableName?: string;
connectionString?: string;
dimensions?: number;
} & Partial<IEmbedModel>,
) {
super(config?.embedModel);
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
this.connectionString = config?.connectionString;
this.dimensions = config?.dimensions ?? 1536;
constructor(configOrClient?: PGVectorStoreConfig | pg.ClientBase) {
// We cannot import pg from top level, it might have side effects
// so we only check if the config.connect function exists
if (
configOrClient &&
"connect" in configOrClient &&
typeof configOrClient.connect === "function"
) {
const db = configOrClient as pg.ClientBase;
super();
this.db = db;
} else {
const config = configOrClient as PGVectorStoreConfig;
super(config?.embedModel);
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
this.database = config?.database;
this.connectionString = config?.connectionString;
this.dimensions = config?.dimensions ?? 1536;
}
}

/**
Expand All @@ -92,7 +104,7 @@ export class PGVectorStore
return this.collection;
}

private async getDb(): Promise<pg.Client> {
private async getDb(): Promise<pg.ClientBase> {
if (!this.db) {
try {
const pg = await import("pg");
Expand All @@ -102,6 +114,7 @@ export class PGVectorStore
// Create DB connection
// Read connection params from env - see comment block above
const db = new Client({
database: this.database,
connectionString: this.connectionString,
});
await db.connect();
Expand All @@ -110,9 +123,6 @@ export class PGVectorStore
await db.query("CREATE EXTENSION IF NOT EXISTS vector");
await registerType(db);

// Check schema, table(s), index(es)
await this.checkSchema(db);

// All good? Keep the connection reference
this.db = db;
} catch (err) {
Expand All @@ -121,10 +131,15 @@ export class PGVectorStore
}
}

const db = this.db;

// Check schema, table(s), index(es)
await this.checkSchema(db);

return Promise.resolve(this.db);
}

private async checkSchema(db: pg.Client) {
private async checkSchema(db: pg.ClientBase) {
await db.query(`CREATE SCHEMA IF NOT EXISTS ${this.schemaName}`);

const tbl = `CREATE TABLE IF NOT EXISTS ${this.schemaName}.${this.tableName}(
Expand Down Expand Up @@ -171,26 +186,22 @@ export class PGVectorStore
}

private getDataToInsert(embeddingResults: BaseNode<Metadata>[]) {
const result = [];
for (let index = 0; index < embeddingResults.length; index++) {
const row = embeddingResults[index]!;

const id: any = row.id_.length ? row.id_ : null;
const meta = row.metadata || {};
meta.create_date = new Date();
return embeddingResults.map((node) => {
const id: any = node.id_.length ? node.id_ : null;
const meta = node.metadata || {};
if (!meta.create_date) {
meta.create_date = new Date();
}

const params = [
return [
id,
"",
this.collection,
row.getContent(MetadataMode.EMBED),
node.getContent(MetadataMode.NONE),
meta,
"[" + row.getEmbedding().join(",") + "]",
"[" + node.getEmbedding().join(",") + "]",
];

result.push(params);
}
return result;
});
}

/**
Expand All @@ -201,7 +212,7 @@ export class PGVectorStore
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length === 0) {
console.debug("Empty list sent to PGVectorStore::add");
console.warn("Empty list sent to PGVectorStore::add");
return [];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import { DEFAULT_PERSIST_DIR } from "@llamaindex/core/global";
import type { BaseNode } from "@llamaindex/core/schema";
import { fs, path } from "@llamaindex/env";
import {
getTopKEmbeddings,
getTopKMMREmbeddings,
} from "../../internal/utils.js";
import { exists } from "../FileSystem.js";
import { getTopKEmbeddings, getTopKMMREmbeddings } from "../internal/utils.js";
import { exists } from "../storage/FileSystem.js";
import {
FilterOperator,
VectorStoreBase,
Expand Down
10 changes: 10 additions & 0 deletions packages/llamaindex/src/vector-store/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export * from "./AstraDBVectorStore.js";
export * from "./ChromaVectorStore.js";
export * from "./MilvusVectorStore.js";
export * from "./MongoDBAtlasVectorStore.js";
export * from "./PGVectorStore.js";
export * from "./PineconeVectorStore.js";
export * from "./QdrantVectorStore.js";
export * from "./SimpleVectorStore.js";
export * from "./types.js";
export * from "./WeaviateVectorStore.js";
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import type { BaseNode, ModalityType } from "@llamaindex/core/schema";
import { getEmbeddedModel } from "../../internal/settings/EmbedModel.js";
import { getEmbeddedModel } from "../internal/settings/EmbedModel.js";

export interface VectorStoreQueryResult {
nodes?: BaseNode[];
Expand Down
Loading