Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/sqlite vec vector store #569

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions melos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ command:
cross_file: ^0.3.4+2
crypto: ^3.0.3
csv: ^6.0.0
drift: ^2.21.0
equatable: ^2.0.5
fetch_client: ^1.1.2
firebase_app_check: ^0.3.0
Expand All @@ -53,16 +54,20 @@ command:
math_expressions: ^2.6.0
meta: ^1.11.0
objectbox: ^4.0.1
path: ^1.9.0
pinecone: ^0.7.2
rxdart: ">=0.27.7 <0.29.0"
shared_preferences: ^2.3.0
shelf: ^1.4.2
shelf_router: ^1.1.4
sqlite3: ^2.4.6
supabase: ^2.2.7
uuid: ^4.5.1
web_socket_channel: ^3.0.1
dev_dependencies:
archive: ^3.6.1
build_runner: ^2.4.11
drift_dev: ^2.21.0
freezed: ^2.5.7
json_serializable: ^6.8.0
objectbox_generator: ^4.0.1
Expand Down
Binary file added packages/langchain_community/app.v5.db
Binary file not shown.
15 changes: 15 additions & 0 deletions packages/langchain_community/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
targets:
$default:
builders:
objectbox_generator:generator:
enabled: true
generate_for:
- lib/src/vector_stores/objectbox/**.dart
source_gen:combining_builder:
options:
ignore_for_file:
- type=lint
- subtype=lint
generate_for:
exclude:
- lib/src/vector_stores/objectbox/**.dart
Binary file not shown.
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions packages/langchain_community/lib/dart_utils/debugging.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
///
/// Prints a debug message if the application is not in profile or product mode.
///
/// The message is printed only if the optional condition `cond` is either null
/// or evaluates to true for the given message.
///
/// \param message The message to print.
/// \param cond An optional condition that must be true for the message to be printed.
void kDebugPrint(dynamic message, {bool Function(String str)? cond}) {
if (!const bool.fromEnvironment('dart.vm.profile') &&
!const bool.fromEnvironment('dart.vm.product') &&
(cond == null || cond(message))) {
// ignore: avoid_print
print(message);
}
}

/// Returns true if the application is in debug mode.
///
/// The application is considered to be in debug mode if it is not in profile
/// or product mode.
bool get kDebugDartMode =>
!const bool.fromEnvironment('dart.vm.profile') &&
!const bool.fromEnvironment('dart.vm.product');
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import 'dart:convert';

import 'package:langchain_core/documents.dart';
import 'package:langchain_core/embeddings.dart';
import 'package:langchain_core/vector_stores.dart';

import 'src/database.dart';

/// SQLite with VEC extension as a vector database.
///
/// To use, you should have the `sqlite-vec` package installed.
///
/// Example:
/// ```dart
/// import 'package:langchain_community/vector_stores/sqlite_vec.dart';
/// import 'package:langchain_community/embeddings/openai.dart';
///
/// // ... (rest of the example code)
/// ```
class SQLiteVEC extends VectorStore {
/// Drift sqlite instance
late final Database db;

/// Table name
final String table;

/// Database file path or `:memory:` for in-memory database
final String dbFile;

SQLiteVEC._internal({
required super.embeddings,
required this.table,
required this.dbFile,
required this.db,
});

/// Create a new SQLiteVEC instance.
static Future<SQLiteVEC> create({
required Embeddings embeddings,
required String table,
required String dbFile,
}) async {
final dummyEmbedding = await embeddings.embedQuery('This is a dummy text');
final embeddingDimension = dummyEmbedding.length;

final db = Database(embeddings, embeddingDimension, dbFile: dbFile);

return SQLiteVEC._internal(
embeddings: embeddings,
table: table,
dbFile: dbFile,
db: db,
);
}

/// Return VectorStore initialized from texts and embeddings.
static Future<SQLiteVEC> fromTexts(
List<String> texts,
Embeddings embeddings, {
List<Map<String, dynamic>>? metadatas,
String table = 'langchain',
String dbFile = 'vec',
}) async {
final vec = await SQLiteVEC.create(
embeddings: embeddings,
table: table,
dbFile: dbFile,
);
await vec.addTexts(texts: texts, metadatas: metadatas);
return vec;
}

/// Adds texts to the vector store.
Future<List<int>> addTexts({
required List<String> texts,
List<Map<String, dynamic>>? metadatas,
}) async {
final ids = <int>[];
for (int i = 0; i < texts.length; i++) {
final metadata = (metadatas != null && i < metadatas.length)
? metadatas[i]
: <String, dynamic>{};
// documents.add(Document(pageContent: texts[i], metadata: metadata));

final fileId = await db //
.insertFile('in-memory', texts[i], jsonEncode(metadata))
.then((e) => e);
final chunks = chunkText(texts[i]);
for (final chunk in chunks) {
final chunkId = await db.addChunk(chunk.$1);
await db.insertFileEmbedding(fileId, chunkId, chunk.$2, chunk.$3);
ids.add(chunkId);
}
}
return ids;
}

@override
Future<List<String>> addDocuments({
required List<Document> documents,
}) async {
final vectors = await embeddings.embedDocuments(documents);
return addVectors(
vectors: vectors,
documents: documents,
);
}

@override
Future<List<String>> addVectors({
required List<List<double>> vectors,
required List<Document> documents,
}) async {
final ids = await db.addVectors(vectors: vectors, documents: documents);
return ids.map((id) => id.toString()).toList();
}

@override
Future<void> delete({required List<String> ids}) {
return db.deleteChunks(ids.map(int.parse).toList());
}

@override
Future<List<(Document, double)>> similaritySearchByVectorWithScores({
required List<double> embedding,
VectorStoreSimilaritySearch config = const VectorStoreSimilaritySearch(),
}) async {
return db.similaritySearchByVectorWithScores(
embedding: embedding,
config: config,
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// We use a conditional export to expose the right connection factory depending
// on the platform.
export 'unsupported.dart'
if (dart.library.js) 'web.dart'
if (dart.library.ffi) 'native.dart';
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import 'dart:ffi';
import 'dart:io';

import 'package:drift/drift.dart';
import 'package:drift/native.dart';
import 'package:path/path.dart' as p;
import 'package:sqlite3/sqlite3.dart';

/// Obtains a database connection for running drift in a pure Dart environment.
Future<File> databaseFile(String name) async {
// Replace with a suitable path for non-Flutter environments
final appDir = Directory.current.path; // Use the current directory
final dbPath = p.join(appDir, '$name.db');
return File(dbPath);
}

/// Obtains a database connection for running drift in a pure Dart environment.
QueryExecutor connect(String name) {
// For Android, Linux, or other Unix-based systems
if (Platform.isAndroid || Platform.isLinux) {
final cachebase =
Directory.systemTemp.path; // Use system temporary directory
sqlite3.tempDirectory = cachebase; // Set SQLite temporary directory
}

sqlite3.ensureExtensionLoaded(
SqliteExtension.inLibrary(
_loadLibrary(Platform.isWindows ? 'win_vec0' : 'mac_vec0'),
'sqlite3_vec_init',
),
);

if (name == ':memory:') {
return NativeDatabase.memory();
}
return DatabaseConnection.delayed(
Future(() async {
return NativeDatabase.createBackgroundConnection(
await databaseFile(name),
);
}),
);
}

DynamicLibrary _loadLibrary(String name) {
// Dynamically load the library based on the operating system
if (Platform.isIOS || Platform.isMacOS) {
return DynamicLibrary.open('$name.dylib');
}
if (Platform.isAndroid || Platform.isLinux) {
return DynamicLibrary.open('$name.so');
}
if (Platform.isWindows) {
return DynamicLibrary.open(
p.join(Directory.current.path, 'extensions', '$name.dll'),
);
}
throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}');
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import 'package:drift/drift.dart';

Never _unsupported() {
throw UnsupportedError(
'No suitable database implementation was found on this platform.',
);
}

/// Depending on the platform the app is compiled to, the following stubs will
/// be replaced with the methods in native.dart or web.dart
DatabaseConnection connect(String name) {
_unsupported();
}

/// Depending on the platform the app is compiled to, the following stubs will
Future<void> validateDatabaseSchema(GeneratedDatabase database) async {
_unsupported();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import 'dart:async';

import 'package:drift/drift.dart';
import 'package:drift/wasm.dart';
import '../../../../../dart_utils/debugging.dart';

/// Obtains a database connection for running drift on the web.
DatabaseConnection connect(String name) {
return DatabaseConnection.delayed(
Future(() async {
final db = await WasmDatabase.open(
databaseName: name,
sqlite3Uri: Uri.parse('sqlite3_vec.wasm'),
driftWorkerUri: Uri.parse('drift_worker.js'),
);

if (db.missingFeatures.isNotEmpty) {
kDebugPrint(
'Using ${db.chosenImplementation} due to unsupported '
'browser features: ${db.missingFeatures}',
);
}

return db.resolvedExecutor;
}),
);
}

/// Unfortunately, validating database schemas only works for native platforms
/// right now.
/// As we also have migration tests (see the `Testing migrations` section in
/// the readme of this example), this is not a huge issue.
Future<void> validateDatabaseSchema(GeneratedDatabase database) async {}
Loading
Loading