Skip to content

Commit

Permalink
feat: add sqlite vector store
Browse files Browse the repository at this point in the history
  • Loading branch information
himself65 committed Sep 11, 2024
1 parent ee17fb4 commit bec65d0
Show file tree
Hide file tree
Showing 5 changed files with 698 additions and 10 deletions.
4 changes: 4 additions & 0 deletions packages/llamaindex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,14 @@
"@notionhq/client": "^2.2.15",
"@swc/cli": "^0.4.0",
"@swc/core": "^1.7.22",
"@types/better-sqlite3": "^7.6.11",
"better-sqlite3": "^11.3.0",
"concurrently": "^8.2.2",
"glob": "^11.0.0",
"pg": "^8.12.0",
"pgvector": "0.2.0",
"sqlite-vec": "0.1.2-alpha.9",
"sqlite3": "^5.1.7",
"typescript": "^5.5.4"
},
"engines": {
Expand Down
178 changes: 178 additions & 0 deletions packages/llamaindex/src/vector-store/SQLiteVectorStore.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import {
VectorStoreBase,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult
} from './types.js';

import type { BaseEmbedding } from '@llamaindex/core/embeddings';
import {
type BaseNode,
Document,
type Metadata,
MetadataMode
} from '@llamaindex/core/schema';
import {
type StatementResultingChanges,
type SupportedValueType
} from 'node:sqlite';

interface Statement {
run (
...anonymousParameters: SupportedValueType[]
): StatementResultingChanges;

get
(
...anonymousParameters: SupportedValueType[]
): unknown;

all (
...anonymousParameters: SupportedValueType[]
): unknown[];
}

// we use node.js version as standard
type Database = {
close (): void;
exec (sql: string): void;
loadExtension: (path: string) => void;
prepare: (sql: string) => Statement;
};

export type SQLiteVectorStoreConfig = {
filename: string;
tableName?: string | undefined;
dimensions?: number | undefined;
embedModel?: BaseEmbedding | undefined;
};

/**
* Provides support for writing and querying vector data in SQLite.
*/
export class SQLiteVectorStore
extends VectorStoreBase
implements VectorStoreNoEmbedModel {
storesText: boolean = true;

readonly tableName: string = 'vector_data';
readonly dimensions: number = 1536;
private db?: Database;

public readonly filename: string;

constructor (config: SQLiteVectorStoreConfig) {
super(config.embedModel);
this.tableName = config.tableName ?? this.tableName;
this.dimensions = config.dimensions ?? this.dimensions;
this.filename = config.filename;
}

static async fromBetterSqlite3 (filename: string): Promise<SQLiteVectorStore> {
const betterSqlite3 = await import('better-sqlite3');
const Database = 'default' in betterSqlite3
? betterSqlite3.default
: betterSqlite3;
const db = new Database(filename);
const wrapper = {
loadExtension (path: string) {
db.loadExtension(path);
},
close () {
db.close();
},
exec (sql: string) {
db.exec(sql);
},
prepare (sql: string) {
const statement = db.prepare(sql);
return {
run (...params: SupportedValueType[]) {
return statement.run(...params);
},
get (...params: SupportedValueType[]) {
return statement.get(...params);
},
all (...params: SupportedValueType[]) {
return statement.all(...params);
}
};
}
};

const vectorStore = new SQLiteVectorStore(
{ filename, embedModel: undefined });
vectorStore.db = wrapper;
await vectorStore.initializeDatabase();
return vectorStore;
}

client (): Database {
if (!this.db) {
throw new Error('Database connection is not initialized.');
}
return this.db;
}

async initializeDatabase () {
if (!this.db) {
throw new Error('Database connection is not initialized.');
}
this.db.prepare(`CREATE TABLE IF NOT EXISTS ${this.tableName} (id INTEGER PRIMARY KEY AUTOINCREMENT, document TEXT, metadata TEXT, embeddings float[${this.dimensions}])`).
run();
}

async add (nodes: BaseNode<Metadata>[]): Promise<string[]> {
if (!this.db) {
throw new Error('Database connection is not initialized.');
}

const ids: string[] = [];

for (const node of nodes) {
this.db.prepare(`INSERT INTO ${this.tableName} (document, metadata, embeddings) VALUES (?, ?, ?)`).
run(node.getContent(MetadataMode.NONE), JSON.stringify(node.metadata),
JSON.stringify(node.embedding));
}

return ids;
}

async delete (id: string): Promise<void> {
if (!this.db) {
throw new Error('Database connection is not initialized.');
}

this.db.prepare(`DELETE FROM ${this.tableName} WHERE id = ?`).run(id);
}

async query (query: VectorStoreQuery): Promise<VectorStoreQueryResult> {
if (!this.db) {
throw new Error('Database connection is not initialized.');
}

const { queryEmbedding, similarityTopK } = query;
const embedding = JSON.stringify(queryEmbedding);

const results = this.db.prepare(`SELECT * FROM ${this.tableName} ORDER BY ((embeddings - ?) * (embeddings - ?)) ASC LIMIT ?`).
all(embedding, embedding, similarityTopK);

const nodes = results.map((row: any) => new Document({
id_: row.id.toString(),
text: row.document,
metadata: JSON.parse(row.metadata),
embedding: JSON.parse(row.embeddings)
}));

return {
nodes,
similarities: [], // Calculating similarities would require additional logic
ids: nodes.map(node => node.id_)
};
}

persist (persistPath: string): Promise<void> {
// No implementation needed for SQLite as changes are auto-committed
return Promise.resolve();
}
}
1 change: 1 addition & 0 deletions packages/llamaindex/src/vector-store/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ export * from "./QdrantVectorStore.js";
export * from "./SimpleVectorStore.js";
export * from "./types.js";
export * from "./WeaviateVectorStore.js";
export * from './SQLiteVectorStore.js'
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { describe, expect, test } from 'vitest';
import {
SQLiteVectorStore,
VectorStoreQueryMode
} from 'llamaindex/vector-store';
import { Document } from '@llamaindex/core/schema';

describe("better sqlite3", () => {
test("init from better sqlite3", async () => {
const vectorStore = await SQLiteVectorStore.fromBetterSqlite3('llamaindex_node_test');
const client = vectorStore.client()
expect(client).toBeDefined();
client.close();
})

test('add and query', async () => {
const nodes = [
new Document({
text: 'hello world',
embedding: [0.1, 0.2, 0.3],
}),
new Document({
text: 'hello world 2',
embedding: [0.2, 0.3, 0.4],
}),
]
const vectorStore = await SQLiteVectorStore.fromBetterSqlite3('llamaindex_node_test');
await vectorStore.add(nodes);
{
const result = await vectorStore.query({
mode: VectorStoreQueryMode.DEFAULT,
similarityTopK: 1,
queryEmbedding: [0.1, 0.2, 0.3]
});
expect(result.nodes).toHaveLength(1);
expect(result.ids).toHaveLength(1);
await vectorStore.delete(result.ids[0]!);
}
{
const result = await vectorStore.query({
mode: VectorStoreQueryMode.DEFAULT,
similarityTopK: 1,
queryEmbedding: [0.1, 0.2, 0.3]
});
console.log(result);
expect(result.nodes).toHaveLength(0);
expect(result.ids).toHaveLength(0);
}
})
});
Loading

0 comments on commit bec65d0

Please sign in to comment.