From 5e599472bdcb8dc04e2212c391447692dc66b22a Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 08:09:14 +0300
Subject: [PATCH 01/66] Adding back SqlStatement
---
src/sqlite-vss.cpp | 419 +++++++++++++++++++++++++--------------------
1 file changed, 231 insertions(+), 188 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index b9f8c6a..6608eec 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -246,6 +246,102 @@ void delVssRangeSearchParams(void *p) {
delete self;
}
+struct SqlStatement {
+
+ SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) {
+
+ this->sql = sql;
+ }
+
+ ~SqlStatement() {
+
+ if (stmt != nullptr)
+ sqlite3_finalize(stmt);
+ if (sql != nullptr)
+ sqlite3_free((void *)sql);
+ }
+
+ int prepare() {
+
+ auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr);
+ if (res != SQLITE_OK || stmt == nullptr) {
+
+ stmt = nullptr;
+ return SQLITE_ERROR;
+ }
+ return res;
+ }
+
+ int bind_int64(int colNo, sqlite3_int64 value) {
+
+ return sqlite3_bind_int64(stmt, colNo, value);
+ }
+
+ int bind_blob64(int colNo, const void * data, int size) {
+
+ return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT);
+ }
+
+ int bind_null(int colNo) {
+
+ return sqlite3_bind_null(stmt, colNo);
+ }
+
+ int bind_pointer(int paramNo, void *ptr, const char * name) {
+
+ return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr);
+ }
+
+ int step() {
+
+ return sqlite3_step(stmt);
+ }
+
+ int exec() {
+
+ return sqlite3_exec(db, sql, nullptr, nullptr, nullptr);
+ }
+
+ int declare_vtab() {
+
+ return sqlite3_declare_vtab(db, sql);
+ }
+
+ const void * column_blob(int colNo) {
+
+ return sqlite3_column_blob(stmt, colNo);
+ }
+
+ int column_bytes(int colNo) {
+
+ return sqlite3_column_bytes(stmt, colNo);
+ }
+
+ int column_int64(int colNo) {
+
+ return sqlite3_column_int64(stmt, colNo);
+ }
+
+ int last_insert_rowid() {
+
+ return sqlite3_last_insert_rowid(db);
+ }
+
+ void finalize() {
+
+ if (stmt != nullptr)
+ sqlite3_finalize(stmt);
+ stmt = nullptr;
+ if (sql != nullptr)
+ sqlite3_free((void *)sql);
+ sql = nullptr;
+ }
+
+ sqlite3 *db;
+ sqlite3_stmt *stmt;
+ const char * sql;
+};
+
#pragma endregion
#pragma region Vtab
@@ -288,95 +384,81 @@ static void vssRangeSearchParamsFunc(sqlite3_context *context, int argc,
sqlite3_result_pointer(context, params, "vss0_rangesearchparams", delVssRangeSearchParams);
}
-static int write_index_insert(faiss::Index *index,
+static int write_index_insert(faiss::VectorIOWriter &writer,
sqlite3 *db,
char *schema,
char *name,
int rowId) {
- faiss::VectorIOWriter writer;
- faiss::write_index(index, &writer);
- sqlite3_int64 indexSize = writer.data.size();
-
- // First try to insert into xyz_index. If that fails with a rowid constraint
- // error, that means the index is already on disk, we just have to UPDATE
- // instead.
+ // If inserts fails it means index already exists.
+ SqlStatement insert(db,
+ sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)",
+ schema,
+ name));
- sqlite3_stmt *stmt;
- char *sql = sqlite3_mprintf(
- "insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)",
- schema,
- name);
-
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- if (rc != SQLITE_OK || stmt == nullptr) {
- sqlite3_free(sql);
+ if (insert.prepare() != SQLITE_OK)
return SQLITE_ERROR;
- }
- rc = sqlite3_bind_int64(stmt, 1, rowId);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ if (insert.bind_int64(1, rowId) != SQLITE_OK)
return SQLITE_ERROR;
- }
- rc = sqlite3_bind_blob64(stmt, 2, writer.data.data(), indexSize, SQLITE_TRANSIENT);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK)
return SQLITE_ERROR;
- }
- int result = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ auto rc = insert.step();
+ if (rc == SQLITE_DONE)
+ return SQLITE_OK; // Index did not exist, and we successfully inserted it.
- if (result == SQLITE_DONE) {
+ return rc;
+}
- // INSERT was success, index wasn't written yet, all good to exit
- return SQLITE_OK;
+static int write_index_update(faiss::VectorIOWriter &writer,
+ sqlite3 *db,
+ char *schema,
+ char *name,
+ int rowId) {
- } else if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID) {
+ // Updating existing index.
+ SqlStatement update(db,
+ sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?",
+ schema,
+ name));
- // INSERT failed for another unknown reason, bad, return error
+ if (update.prepare() != SQLITE_OK)
return SQLITE_ERROR;
- }
-
- // INSERT failed because index already is on disk, so we do an UPDATE instead
- sql = sqlite3_mprintf(
- "update \"%w\".\"%w_index\" set idx = ? where rowid = ?", schema, name);
-
- rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- if (rc != SQLITE_OK || stmt == nullptr) {
- sqlite3_free(sql);
+ if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK)
return SQLITE_ERROR;
- }
- rc = sqlite3_bind_blob64(stmt, 1, writer.data.data(), indexSize, SQLITE_TRANSIENT);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ if (update.bind_int64(2, rowId) != SQLITE_OK)
return SQLITE_ERROR;
- }
- rc = sqlite3_bind_int64(stmt, 2, rowId);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
- return SQLITE_ERROR;
- }
+ auto rc = update.step();
+ if (rc == SQLITE_DONE)
+ return SQLITE_OK; // We successfully updated existing index.
+
+ return rc;
+}
- result = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+static int write_index(faiss::Index *index,
+ sqlite3 *db,
+ char *schema,
+ char *name,
+ int rowId) {
- if (result == SQLITE_DONE) {
+ // Writing our index
+ faiss::VectorIOWriter writer;
+ faiss::write_index(index, &writer);
+
+ // First trying to insert index, if that fails with ROW constraing error, we try to update existing index.
+ if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK)
return SQLITE_OK;
- }
- return result;
+ if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID)
+ return SQLITE_ERROR; // Insert failed for unknown error
+
+ // Insert failed because index already existed, updating existing index.
+ return write_index_update(writer, db, schema, name, rowId);
}
static int shadow_data_insert(sqlite3 *db,
@@ -385,50 +467,45 @@ static int shadow_data_insert(sqlite3 *db,
sqlite3_int64 *rowid,
sqlite3_int64 *retRowid) {
- sqlite3_stmt *stmt;
-
if (rowid == nullptr) {
- auto sql = sqlite3_mprintf(
- "insert into \"%w\".\"%w_data\"(x) values (?)", schema, name);
+ SqlStatement insert(db,
+ sqlite3_mprintf("insert into \"%w\".\"%w_data\"(x) values (?)",
+ schema,
+ name));
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- sqlite3_free(sql);
+ if (insert.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
- if (rc != SQLITE_OK || stmt == nullptr) {
+ if (insert.bind_null(1) != SQLITE_OK)
return SQLITE_ERROR;
- }
- sqlite3_bind_null(stmt, 1);
- if (sqlite3_step(stmt) != SQLITE_DONE) {
- sqlite3_finalize(stmt);
+ if (insert.step() != SQLITE_DONE)
return SQLITE_ERROR;
- }
} else {
- auto sql = sqlite3_mprintf(
- "insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);", schema,
- name);
+ SqlStatement insert(db,
+ sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);",
+ schema,
+ name));
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- sqlite3_free(sql);
+ if (insert.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
- if (rc != SQLITE_OK || stmt == nullptr)
+ if (insert.bind_int64(1, *rowid) != SQLITE_OK)
return SQLITE_ERROR;
- sqlite3_bind_int64(stmt, 1, *rowid);
- sqlite3_bind_null(stmt, 2);
- if (sqlite3_step(stmt) != SQLITE_DONE) {
- sqlite3_finalize(stmt);
+ if (insert.bind_null(2) != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ if (insert.step() != SQLITE_DONE)
return SQLITE_ERROR;
- }
if (retRowid != nullptr)
- *retRowid = sqlite3_last_insert_rowid(db);
+ *retRowid = insert.last_insert_rowid();
}
- sqlite3_finalize(stmt);
return SQLITE_OK;
}
@@ -436,62 +513,47 @@ static int shadow_data_delete(sqlite3 *db,
char *schema,
char *name,
sqlite3_int64 rowid) {
- sqlite3_stmt *stmt;
- // TODO: We should strive to use only one concept and idea while creating
- // SQL statements.
- auto query = sqlite3_str_new(0);
+ SqlStatement del(db,
+ sqlite3_mprintf("delete from \"%w\".\"%w_data\" where rowid = ?",
+ schema,
+ name));
- sqlite3_str_appendf(query, "delete from \"%w\".\"%w_data\" where rowid = ?",
- schema, name);
-
- auto sql = sqlite3_str_finish(query);
+ if (del.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- if (rc != SQLITE_OK || stmt == nullptr)
+ if (del.bind_int64(1, rowid) != SQLITE_OK)
return SQLITE_ERROR;
- sqlite3_bind_int64(stmt, 1, rowid);
- if (sqlite3_step(stmt) != SQLITE_DONE) {
- sqlite3_finalize(stmt);
+ if (del.step() != SQLITE_DONE)
return SQLITE_ERROR;
- }
- sqlite3_free(sql);
- sqlite3_finalize(stmt);
return SQLITE_OK;
}
static faiss::Index *read_index_select(sqlite3 *db, const char *name, int indexId) {
- sqlite3_stmt *stmt;
- auto sql = sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", name);
+ SqlStatement select(db,
+ sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?",
+ name));
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr);
- if (rc != SQLITE_OK || stmt == nullptr) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ if (select.prepare() != SQLITE_OK)
return nullptr;
- }
- sqlite3_bind_int64(stmt, 1, indexId);
- if (sqlite3_step(stmt) != SQLITE_ROW) {
- sqlite3_finalize(stmt);
- sqlite3_free(sql);
+ if (select.bind_int64(1, indexId) != SQLITE_OK)
return nullptr;
- }
- auto index_data = sqlite3_column_blob(stmt, 0);
- int64_t size = sqlite3_column_bytes(stmt, 0);
+ if (select.step() != SQLITE_ROW)
+ return nullptr;
+
+ auto index_data = select.column_blob(0);
+ auto size = select.column_bytes(0);
faiss::VectorIOReader reader;
copy((const uint8_t *)index_data,
((const uint8_t *)index_data) + size,
back_inserter(reader.data));
- sqlite3_free(sql);
- sqlite3_finalize(stmt);
-
return faiss::read_index(&reader);
}
@@ -500,21 +562,27 @@ static int create_shadow_tables(sqlite3 *db,
const char *name,
int n) {
- auto sql = sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
- schema,
- name);
+ SqlStatement create1(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
+ schema,
+ name));
- auto rc = sqlite3_exec(db, sql, 0, 0, 0);
- sqlite3_free(sql);
+ auto rc = create1.exec();
if (rc != SQLITE_OK)
return rc;
- sql = sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
- schema,
- name);
+ /*
+ * Notice, we'll need to explicitly finalize this object since we can only
+ * have one open statement at the same time to the same connetion.
+ */
+ create1.finalize();
- rc = sqlite3_exec(db, sql, nullptr, nullptr, nullptr);
- sqlite3_free(sql);
+ SqlStatement create2(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
+ schema,
+ name));
+
+ rc = create2.exec();
return rc;
}
@@ -525,29 +593,15 @@ static int drop_shadow_tables(sqlite3 *db, char *name) {
for (int i = 0; i < 2; i++) {
- auto curSql = drops[i];
-
- sqlite3_stmt *stmt;
-
- // TODO: Use of one construct to create SQL statements.
- sqlite3_str *query = sqlite3_str_new(0);
- sqlite3_str_appendf(query, curSql, name);
- char *sql = sqlite3_str_finish(query);
+ SqlStatement cur(db,
+ sqlite3_mprintf(drops[i],
+ name));
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
- if (rc != SQLITE_OK || stmt == nullptr) {
- sqlite3_free(sql);
+ if (cur.prepare() != SQLITE_OK)
return SQLITE_ERROR;
- }
- if (sqlite3_step(stmt) != SQLITE_DONE) {
- sqlite3_free(sql);
- sqlite3_finalize(stmt);
+ if (cur.step() != SQLITE_DONE)
return SQLITE_ERROR;
- }
-
- sqlite3_free(sql);
- sqlite3_finalize(stmt);
}
return SQLITE_OK;
}
@@ -696,6 +750,10 @@ unique_ptr> parse_constructor(int argc,
return columns;
}
+#define VSS_INDEX_COLUMN_DISTANCE 0
+#define VSS_INDEX_COLUMN_OPERATION 1
+#define VSS_INDEX_COLUMN_VECTORS 2
+
static int init(sqlite3 *db,
void *pAux,
int argc,
@@ -705,31 +763,23 @@ static int init(sqlite3 *db,
bool isCreate) {
sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
- int rc;
-
- sqlite3_str *str = sqlite3_str_new(nullptr);
- sqlite3_str_appendall(str,
- "create table x(distance hidden, operation hidden");
auto columns = parse_constructor(argc, argv);
-
if (columns == nullptr) {
- *pzErr = sqlite3_mprintf("Error parsing constructor");
- return rc;
+ *pzErr = sqlite3_mprintf("Error parsing VSS index factory constructor");
+ return SQLITE_ERROR;
}
- for (auto column = columns->begin(); column != columns->end(); ++column) {
- sqlite3_str_appendf(str, ", \"%w\"", column->name.c_str());
+ string sql = "create table x(distance hidden, operation hidden";
+ for (auto colIter = columns->begin(); colIter != columns->end(); ++colIter) {
+ sql += ", \"" + colIter->name + "\"";
}
+ sql += ")";
- sqlite3_str_appendall(str, ")");
- auto sql = sqlite3_str_finish(str);
- rc = sqlite3_declare_vtab(db, sql);
- sqlite3_free(sql);
+ SqlStatement create(db,
+ sqlite3_mprintf(sql.c_str()));
-#define VSS_INDEX_COLUMN_DISTANCE 0
-#define VSS_INDEX_COLUMN_OPERATION 1
-#define VSS_INDEX_COLUMN_VECTORS 2
+ auto rc = create.declare_vtab();
if (rc != SQLITE_OK)
return rc;
@@ -738,6 +788,7 @@ static int init(sqlite3 *db,
(vector0_api *)pAux,
sqlite3_mprintf("%s", argv[1]),
sqlite3_mprintf("%s", argv[2]));
+
*ppVtab = pTable;
if (isCreate) {
@@ -751,7 +802,7 @@ static int init(sqlite3 *db,
} catch (faiss::FaissException &e) {
- *pzErr = sqlite3_mprintf("Error building index factory for %s: %s",
+ *pzErr = sqlite3_mprintf("Error building index factory for %s, exception was: %s",
iter->name.c_str(),
e.msg.c_str());
@@ -771,7 +822,7 @@ static int init(sqlite3 *db,
try {
- int rc = write_index_insert((*iter)->index,
+ int rc = write_index((*iter)->index,
pTable->db,
pTable->schema,
pTable->name,
@@ -1232,7 +1283,7 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
int i = 0;
for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) {
- int rc = write_index_insert((*iter)->index,
+ int rc = write_index((*iter)->index,
pTable->db,
pTable->schema,
pTable->name,
@@ -1357,8 +1408,11 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
if (!inserted_rowid) {
sqlite_int64 retrowid;
- auto rc = shadow_data_insert(pTable->db, pTable->schema, pTable->name,
- &rowid, &retrowid);
+ auto rc = shadow_data_insert(pTable->db,
+ pTable->schema,
+ pTable->name,
+ &rowid,
+ &retrowid);
if (rc != SQLITE_OK)
return rc;
@@ -1496,28 +1550,17 @@ static sqlite3_module vssIndexModule = {
vector0_api *vector0_api_from_db(sqlite3 *db) {
- vector0_api *pRet = nullptr;
- sqlite3_stmt *pStmt = nullptr;
-
- auto rc = sqlite3_prepare(db, "select vector0(?1)", -1, &pStmt, nullptr);
- if (rc != SQLITE_OK)
+ SqlStatement select(db, sqlite3_mprintf("select vector0(?1)"));
+ if (select.prepare() != SQLITE_OK)
return nullptr;
- rc = sqlite3_bind_pointer(pStmt, 1, (void *)&pRet, "vector0_api_ptr", nullptr);
- if (rc != SQLITE_OK) {
-
- sqlite3_finalize(pStmt);
+ vector0_api *pRet = nullptr;
+ if (select.bind_pointer(1, (void *)&pRet, "vector0_api_ptr") != SQLITE_OK)
return nullptr;
- }
- rc = sqlite3_step(pStmt);
- if (rc != SQLITE_ROW) {
-
- sqlite3_finalize(pStmt);
+ if (select.step() != SQLITE_ROW)
return nullptr;
- }
- sqlite3_finalize(pStmt);
return pRet;
}
From 6e2e09a7b4bc91d141df86d210c45462a9416f60 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 08:09:43 +0300
Subject: [PATCH 02/66] Removing unused code path
---
src/sqlite-vss.cpp | 53 ++++++++++++----------------------------------
1 file changed, 14 insertions(+), 39 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 6608eec..3340924 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -464,47 +464,24 @@ static int write_index(faiss::Index *index,
static int shadow_data_insert(sqlite3 *db,
char *schema,
char *name,
- sqlite3_int64 *rowid,
- sqlite3_int64 *retRowid) {
-
- if (rowid == nullptr) {
-
- SqlStatement insert(db,
- sqlite3_mprintf("insert into \"%w\".\"%w_data\"(x) values (?)",
- schema,
- name));
-
- if (insert.prepare() != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (insert.bind_null(1) != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (insert.step() != SQLITE_DONE)
- return SQLITE_ERROR;
-
- } else {
-
- SqlStatement insert(db,
- sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);",
- schema,
- name));
+ sqlite3_int64 rowid) {
- if (insert.prepare() != SQLITE_OK)
- return SQLITE_ERROR;
+ SqlStatement insert(db,
+ sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);",
+ schema,
+ name));
- if (insert.bind_int64(1, *rowid) != SQLITE_OK)
- return SQLITE_ERROR;
+ if (insert.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
- if (insert.bind_null(2) != SQLITE_OK)
- return SQLITE_ERROR;
+ if (insert.bind_int64(1, rowid) != SQLITE_OK)
+ return SQLITE_ERROR;
- if (insert.step() != SQLITE_DONE)
- return SQLITE_ERROR;
+ if (insert.bind_null(2) != SQLITE_OK)
+ return SQLITE_ERROR;
- if (retRowid != nullptr)
- *retRowid = insert.last_insert_rowid();
- }
+ if (insert.step() != SQLITE_DONE)
+ return SQLITE_ERROR;
return SQLITE_OK;
}
@@ -1407,12 +1384,10 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
if (!inserted_rowid) {
- sqlite_int64 retrowid;
auto rc = shadow_data_insert(pTable->db,
pTable->schema,
pTable->name,
- &rowid,
- &retrowid);
+ rowid);
if (rc != SQLITE_OK)
return rc;
From f513a1d733346b9922474f7e85b320c10179b01f Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 08:14:05 +0300
Subject: [PATCH 03/66] Making sure we use nullptr instead of 0
---
src/sqlite-vector.cpp | 6 +++---
src/sqlite-vss.cpp | 20 ++++++++++----------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp
index a780fcf..67985b9 100644
--- a/src/sqlite-vector.cpp
+++ b/src/sqlite-vector.cpp
@@ -675,9 +675,9 @@ __declspec(dllexport)
aFunc[i].flags,
aFunc[i].pAux,
aFunc[i].xFunc,
- 0,
- 0,
- 0);
+ nullptr,
+ nullptr,
+ nullptr);
if (rc != SQLITE_OK) {
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 3340924..16e1003 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -1588,42 +1588,42 @@ __declspec(dllexport)
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_distance_l2,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_distance_linf",
2,
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_distance_linf,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_inner_product",
2,
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_inner_product,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_fvec_add",
2,
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_fvec_add,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_fvec_sub",
2,
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_fvec_sub,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_search",
2,
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vssSearchFunc,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_search_params",
@@ -1631,7 +1631,7 @@ __declspec(dllexport)
0,
vector_api,
vssSearchParamsFunc,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_range_search",
@@ -1639,7 +1639,7 @@ __declspec(dllexport)
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vssRangeSearchFunc,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_range_search_params",
@@ -1647,7 +1647,7 @@ __declspec(dllexport)
0,
vector_api,
vssRangeSearchParamsFunc,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_memory_usage",
@@ -1655,7 +1655,7 @@ __declspec(dllexport)
0,
nullptr,
faissMemoryUsageFunc,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
auto rc = sqlite3_create_module_v2(db, "vss0", &vssIndexModule, vector_api, nullptr);
if (rc != SQLITE_OK) {
From 3f9d111c4ad1f479748e75cf638efa2286e3e41e Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 08:34:25 +0300
Subject: [PATCH 04/66] Update sqlite-vss.cpp
---
src/sqlite-vss.cpp | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 16e1003..f9feb89 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -646,11 +646,14 @@ struct vss_index_cursor : public sqlite3_vtab_cursor {
explicit vss_index_cursor(vss_index_vtab *table)
: table(table),
sqlite3_vtab_cursor({0}),
- stmt(nullptr) { }
+ stmt(nullptr),
+ sql(nullptr) { }
~vss_index_cursor() {
if (stmt != nullptr)
sqlite3_finalize(stmt);
+ if (sql != nullptr)
+ sqlite3_free(sql);
}
vss_index_vtab *table;
@@ -670,6 +673,7 @@ struct vss_index_cursor : public sqlite3_vtab_cursor {
// For query_type == QueryType::fullscan
sqlite3_stmt *stmt;
+ char *sql;
int step_result;
};
@@ -1057,12 +1061,13 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
} else if (strcmp(idxStr, "fullscan") == 0) {
pCursor->query_type = QueryType::fullscan;
- sqlite3_stmt *stmt;
+ pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name);
- int res = sqlite3_prepare_v2(
- pCursor->table->db,
- sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name),
- -1, &pCursor->stmt, nullptr);
+ int res = sqlite3_prepare_v2(pCursor->table->db,
+ pCursor->sql,
+ -1,
+ &pCursor->stmt,
+ nullptr);
if (res != SQLITE_OK)
return res;
From f83febe7b43fcda4ead0009e3cab4e0294f3be21 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 08:59:09 +0300
Subject: [PATCH 05/66] Update sqlite-vss.cpp
---
src/sqlite-vss.cpp | 86 ++++++++++++++++++++++++----------------------
1 file changed, 45 insertions(+), 41 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index f9feb89..90360a0 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -611,7 +611,10 @@ struct vss_index_vtab : public sqlite3_vtab {
: db(db),
vector_api(vector_api),
schema(schema),
- name(name) { }
+ name(name) {
+
+ this->zErrMsg = nullptr;
+ }
~vss_index_vtab() {
@@ -619,11 +622,20 @@ struct vss_index_vtab : public sqlite3_vtab {
sqlite3_free(name);
if (schema)
sqlite3_free(schema);
+ if (this->zErrMsg != nullptr)
+ delete this->zErrMsg;
for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) {
delete (*iter);
}
}
+ void setError(char *error) {
+ if (this->zErrMsg != nullptr) {
+ delete this->zErrMsg;
+ }
+ this->zErrMsg = error;
+ }
+
sqlite3 *db;
vector0_api *vector_api;
@@ -975,31 +987,32 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
} else if (sqlite3_libversion_number() < 3041000) {
// https://sqlite.org/forum/info/6b32f818ba1d97ef
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
- pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
- "vss_search() only support vss_search_params() as a "
- "2nd parameter for SQLite versions below 3.41.0");
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(
+ sqlite3_mprintf(
+ "vss_search() only support vss_search_params() as a "
+ "2nd parameter for SQLite versions below 3.41.0"));
return SQLITE_ERROR;
} else if ((query_vector = pCursor->table->vector_api->xValueAsVector(
argv[0])) != nullptr) {
if (argc > 1) {
+
pCursor->limit = sqlite3_value_int(argv[1]);
} else {
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
- pVtabCursor->pVtab->zErrMsg =
- sqlite3_mprintf("LIMIT required on vss_search() queries");
+
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(sqlite3_mprintf("LIMIT required on vss_search() queries"));
+
return SQLITE_ERROR;
}
} else {
- if (pVtabCursor->pVtab->zErrMsg != nullptr)
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(sqlite3_mprintf("2nd argument to vss_search() must be a vector"));
- pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
- "2nd argument to vss_search() must be a vector");
return SQLITE_ERROR;
}
@@ -1008,22 +1021,22 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
if (query_vector->size() != index->d) {
- // TODO: To support index that transforms vectors
- // (to conserve spage, eg?), we should probably
- // have some logic in place that transforms the vectors here?
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
- pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(sqlite3_mprintf(
"Input query size doesn't match index dimensions: %ld != %ld",
query_vector->size(),
- index->d);
+ index->d));
+
return SQLITE_ERROR;
}
if (pCursor->limit <= 0) {
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
- pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
- "Limit must be greater than 0, got %ld", pCursor->limit);
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(sqlite3_mprintf(
+ "Limit must be greater than 0, got %ld",
+ pCursor->limit));
+
return SQLITE_ERROR;
}
@@ -1076,11 +1089,10 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
} else {
- if (pVtabCursor->pVtab->zErrMsg != 0)
- sqlite3_free(pVtabCursor->pVtab->zErrMsg);
+ auto ptrVtab = static_cast(pCursor->pVtab);
+ ptrVtab->setError(sqlite3_mprintf(
+ "%s %s", "vssIndexFilter error: unhandled idxStr", idxStr));
- pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
- "%s %s", "vssIndexFilter error: unhandled idxStr", idxStr);
return SQLITE_ERROR;
}
@@ -1273,9 +1285,9 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
if (rc != SQLITE_OK) {
- sqlite3_free(pVTab->zErrMsg);
- pVTab->zErrMsg = sqlite3_mprintf("Error saving index (%d): %s",
- rc, sqlite3_errmsg(pTable->db));
+ pTable->setError(sqlite3_mprintf("Error saving index (%d): %s",
+ rc,
+ sqlite3_errmsg(pTable->db)));
return rc;
}
}
@@ -1285,10 +1297,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
} catch (faiss::FaissException &e) {
- sqlite3_free(pVTab->zErrMsg);
- pVTab->zErrMsg =
- sqlite3_mprintf("Error during synchroning index. Full error: %s",
- e.msg.c_str());
+ pTable->setError(sqlite3_mprintf("Error during synchroning index. Full error: %s",
+ e.msg.c_str()));
for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) {
@@ -1378,11 +1388,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
// Make sure the index is already trained, if it's needed
if (!(*iter)->index->is_trained) {
- sqlite3_free(pVTab->zErrMsg);
- pVTab->zErrMsg =
- sqlite3_mprintf("Index at i=%d requires training "
+ pTable->setError(sqlite3_mprintf("Index at i=%d requires training "
"before inserting data.",
- i);
+ i));
return SQLITE_ERROR;
}
@@ -1440,11 +1448,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
} else {
// TODO: Implement - UPDATE operation
- sqlite3_free(pVTab->zErrMsg);
-
- pVTab->zErrMsg =
- sqlite3_mprintf("UPDATE statements on vss0 virtual tables not supported yet.");
-
+ pTable->setError(sqlite3_mprintf("UPDATE statements on vss0 virtual tables not supported yet."));
return SQLITE_ERROR;
}
From 98420ad046ca23953f3c68d5638d77df7d4d0133 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 09:40:03 +0300
Subject: [PATCH 06/66] Encapsulating members
---
src/sqlite-vss.cpp | 230 ++++++++++++++++++++++++++++-----------------
1 file changed, 146 insertions(+), 84 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 90360a0..ed2074d 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -246,7 +246,9 @@ void delVssRangeSearchParams(void *p) {
delete self;
}
-struct SqlStatement {
+class SqlStatement {
+
+public:
SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) {
@@ -337,6 +339,8 @@ struct SqlStatement {
sql = nullptr;
}
+private:
+
sqlite3 *db;
sqlite3_stmt *stmt;
const char * sql;
@@ -588,7 +592,9 @@ static int drop_shadow_tables(sqlite3 *db, char *name) {
// Wrapper around a single faiss index, with training data, insert records, and
// delete records.
-struct vss_index {
+class vss_index {
+
+public:
explicit vss_index(faiss::Index *index) : index(index) {}
@@ -598,6 +604,33 @@ struct vss_index {
}
}
+ faiss::Index * getIndex() {
+
+ return index;
+ }
+
+ vector & getTrainings() {
+
+ return trainings;
+ }
+
+ vector & getInsert_data() {
+
+ return insert_data;
+ }
+
+ vector & getInsert_ids() {
+
+ return insert_ids;
+ }
+
+ vector & getDelete_ids() {
+
+ return delete_ids;
+ }
+
+private:
+
faiss::Index *index;
vector trainings;
vector insert_data;
@@ -605,7 +638,9 @@ struct vss_index {
vector delete_ids;
};
-struct vss_index_vtab : public sqlite3_vtab {
+class vss_index_vtab : public sqlite3_vtab {
+
+public:
vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name)
: db(db),
@@ -636,6 +671,33 @@ struct vss_index_vtab : public sqlite3_vtab {
this->zErrMsg = error;
}
+ sqlite3 * getDb() {
+
+ return db;
+ }
+
+ vector0_api * getVector0_api() {
+
+ return vector_api;
+ }
+
+ vector & getIndexes() {
+
+ return indexes;
+ }
+
+ char * getName() {
+
+ return name;
+ }
+
+ char * getSchema() {
+
+ return schema;
+ }
+
+private:
+
sqlite3 *db;
vector0_api *vector_api;
@@ -791,7 +853,7 @@ static int init(sqlite3 *db,
try {
auto index = faiss::index_factory(iter->dimensions, iter->factory.c_str());
- pTable->indexes.push_back(new vss_index(index));
+ pTable->getIndexes().push_back(new vss_index(index));
} catch (faiss::FaissException &e) {
@@ -811,14 +873,14 @@ static int init(sqlite3 *db,
// After shadow tables are created, write the initial index state to
// shadow _index.
auto i = 0;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
try {
- int rc = write_index((*iter)->index,
- pTable->db,
- pTable->schema,
- pTable->name,
+ int rc = write_index((*iter)->getIndex(),
+ pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
i);
if (rc != SQLITE_OK)
@@ -842,7 +904,7 @@ static int init(sqlite3 *db,
*pzErr = sqlite3_mprintf("Could not read index at position %d", i);
return SQLITE_ERROR;
}
- pTable->indexes.push_back(new vss_index(index));
+ pTable->getIndexes().push_back(new vss_index(index));
}
}
@@ -877,7 +939,7 @@ static int vssIndexDisconnect(sqlite3_vtab *pVtab) {
static int vssIndexDestroy(sqlite3_vtab *pVtab) {
auto pTable = static_cast(pVtab);
- drop_shadow_tables(pTable->db, pTable->name);
+ drop_shadow_tables(pTable->getDb(), pTable->getName());
vssIndexDisconnect(pVtab);
return SQLITE_OK;
}
@@ -994,7 +1056,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
"2nd parameter for SQLite versions below 3.41.0"));
return SQLITE_ERROR;
- } else if ((query_vector = pCursor->table->vector_api->xValueAsVector(
+ } else if ((query_vector = pCursor->table->getVector0_api()->xValueAsVector(
argv[0])) != nullptr) {
if (argc > 1) {
@@ -1017,7 +1079,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
}
int nq = 1;
- auto index = pCursor->table->indexes.at(idxNum)->index;
+ auto index = pCursor->table->getIndexes().at(idxNum)->getIndex();
if (query_vector->size() != index->d) {
@@ -1064,7 +1126,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
vector nns(params->distance * nq);
pCursor->range_search_result = unique_ptr(new faiss::RangeSearchResult(nq, true));
- auto index = pCursor->table->indexes.at(idxNum)->index;
+ auto index = pCursor->table->getIndexes().at(idxNum)->getIndex();
index->range_search(nq,
params->vector->data(),
@@ -1074,9 +1136,9 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
} else if (strcmp(idxStr, "fullscan") == 0) {
pCursor->query_type = QueryType::fullscan;
- pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name);
+ pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->getName());
- int res = sqlite3_prepare_v2(pCursor->table->db,
+ int res = sqlite3_prepare_v2(pCursor->table->getDb(),
pCursor->sql,
-1,
&pCursor->stmt,
@@ -1186,7 +1248,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
} else if (i >= VSS_INDEX_COLUMN_VECTORS) {
auto index =
- pCursor->table->indexes.at(i - VSS_INDEX_COLUMN_VECTORS)->index;
+ pCursor->table->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex();
vector vec(index->d);
sqlite3_int64 rowId;
@@ -1207,7 +1269,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
sqlite3_free(errmsg);
return SQLITE_ERROR;
}
- pCursor->table->vector_api->xResultVector(ctx, &vec);
+ pCursor->table->getVector0_api()->xResultVector(ctx, &vec);
}
return SQLITE_OK;
}
@@ -1226,47 +1288,47 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
bool needsWriting = false;
auto idxCol = 0;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, idxCol++) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) {
// Checking if index needs training.
- if (!(*iter)->trainings.empty()) {
+ if (!(*iter)->getTrainings().empty()) {
- (*iter)->index->train(
- (*iter)->trainings.size() / (*iter)->index->d,
- (*iter)->trainings.data());
+ (*iter)->getIndex()->train(
+ (*iter)->getTrainings().size() / (*iter)->getIndex()->d,
+ (*iter)->getTrainings().data());
- (*iter)->trainings.clear();
- (*iter)->trainings.shrink_to_fit();
+ (*iter)->getTrainings().clear();
+ (*iter)->getTrainings().shrink_to_fit();
needsWriting = true;
}
// Checking if we're deleting records from the index.
- if (!(*iter)->delete_ids.empty()) {
+ if (!(*iter)->getDelete_ids().empty()) {
- faiss::IDSelectorBatch selector((*iter)->delete_ids.size(),
- (*iter)->delete_ids.data());
+ faiss::IDSelectorBatch selector((*iter)->getDelete_ids().size(),
+ (*iter)->getDelete_ids().data());
- (*iter)->index->remove_ids(selector);
- (*iter)->delete_ids.clear();
- (*iter)->delete_ids.shrink_to_fit();
+ (*iter)->getIndex()->remove_ids(selector);
+ (*iter)->getDelete_ids().clear();
+ (*iter)->getDelete_ids().shrink_to_fit();
needsWriting = true;
}
// Checking if we're inserting records to the index.
- if (!(*iter)->insert_data.empty()) {
+ if (!(*iter)->getInsert_data().empty()) {
- (*iter)->index->add_with_ids(
- (*iter)->insert_ids.size(),
- (*iter)->insert_data.data(),
- (faiss::idx_t *)(*iter)->insert_ids.data());
+ (*iter)->getIndex()->add_with_ids(
+ (*iter)->getInsert_ids().size(),
+ (*iter)->getInsert_data().data(),
+ (faiss::idx_t *)(*iter)->getInsert_ids().data());
- (*iter)->insert_ids.clear();
- (*iter)->insert_ids.shrink_to_fit();
+ (*iter)->getInsert_ids().clear();
+ (*iter)->getInsert_ids().shrink_to_fit();
- (*iter)->insert_data.clear();
- (*iter)->insert_data.shrink_to_fit();
+ (*iter)->getInsert_data().clear();
+ (*iter)->getInsert_data().shrink_to_fit();
needsWriting = true;
}
@@ -1275,19 +1337,19 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
if (needsWriting) {
int i = 0;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
- int rc = write_index((*iter)->index,
- pTable->db,
- pTable->schema,
- pTable->name,
+ int rc = write_index((*iter)->getIndex(),
+ pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
i);
if (rc != SQLITE_OK) {
pTable->setError(sqlite3_mprintf("Error saving index (%d): %s",
rc,
- sqlite3_errmsg(pTable->db)));
+ sqlite3_errmsg(pTable->getDb())));
return rc;
}
}
@@ -1300,19 +1362,19 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
pTable->setError(sqlite3_mprintf("Error during synchroning index. Full error: %s",
e.msg.c_str()));
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->insert_ids.clear();
- (*iter)->insert_ids.shrink_to_fit();
+ (*iter)->getInsert_ids().clear();
+ (*iter)->getInsert_ids().shrink_to_fit();
- (*iter)->insert_data.clear();
- (*iter)->insert_data.shrink_to_fit();
+ (*iter)->getInsert_data().clear();
+ (*iter)->getInsert_data().shrink_to_fit();
- (*iter)->delete_ids.clear();
- (*iter)->delete_ids.shrink_to_fit();
+ (*iter)->getDelete_ids().clear();
+ (*iter)->getDelete_ids().shrink_to_fit();
- (*iter)->trainings.clear();
- (*iter)->trainings.shrink_to_fit();
+ (*iter)->getTrainings().clear();
+ (*iter)->getTrainings().shrink_to_fit();
}
return SQLITE_ERROR;
@@ -1325,19 +1387,19 @@ static int vssIndexRollback(sqlite3_vtab *pVTab) {
auto pTable = static_cast(pVTab);
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->trainings.clear();
- (*iter)->trainings.shrink_to_fit();
+ (*iter)->getTrainings().clear();
+ (*iter)->getTrainings().shrink_to_fit();
- (*iter)->insert_data.clear();
- (*iter)->insert_data.shrink_to_fit();
+ (*iter)->getInsert_data().clear();
+ (*iter)->getInsert_data().shrink_to_fit();
- (*iter)->insert_ids.clear();
- (*iter)->insert_ids.shrink_to_fit();
+ (*iter)->getInsert_ids().clear();
+ (*iter)->getInsert_ids().shrink_to_fit();
- (*iter)->delete_ids.clear();
- (*iter)->delete_ids.shrink_to_fit();
+ (*iter)->getDelete_ids().clear();
+ (*iter)->getDelete_ids().shrink_to_fit();
}
return SQLITE_OK;
}
@@ -1354,15 +1416,15 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
// DELETE operation
sqlite3_int64 rowid_to_delete = sqlite3_value_int64(argv[0]);
- auto rc = shadow_data_delete(pTable->db,
- pTable->schema,
- pTable->name,
+ auto rc = shadow_data_delete(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
rowid_to_delete);
if (rc != SQLITE_OK)
return rc;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) {
- (*iter)->delete_ids.push_back(rowid_to_delete);
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
+ (*iter)->getDelete_ids().push_back(rowid_to_delete);
}
} else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
@@ -1380,13 +1442,13 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
bool inserted_rowid = false;
auto i = 0;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
- if ((vec = pTable->vector_api->xValueAsVector(
+ if ((vec = pTable->getVector0_api()->xValueAsVector(
argv[2 + VSS_INDEX_COLUMN_VECTORS + i])) != nullptr) {
// Make sure the index is already trained, if it's needed
- if (!(*iter)->index->is_trained) {
+ if (!(*iter)->getIndex()->is_trained) {
pTable->setError(sqlite3_mprintf("Index at i=%d requires training "
"before inserting data.",
@@ -1397,9 +1459,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
if (!inserted_rowid) {
- auto rc = shadow_data_insert(pTable->db,
- pTable->schema,
- pTable->name,
+ auto rc = shadow_data_insert(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
rowid);
if (rc != SQLITE_OK)
return rc;
@@ -1407,13 +1469,13 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
inserted_rowid = true;
}
- (*iter)->insert_data.reserve((*iter)->insert_data.size() + vec->size());
- (*iter)->insert_data.insert(
- (*iter)->insert_data.end(),
+ (*iter)->getInsert_data().reserve((*iter)->getInsert_data().size() + vec->size());
+ (*iter)->getInsert_data().insert(
+ (*iter)->getInsert_data().end(),
vec->begin(),
vec->end());
- (*iter)->insert_ids.push_back(rowid);
+ (*iter)->getInsert_ids().push_back(rowid);
*pRowid = rowid;
}
@@ -1426,14 +1488,14 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
if (operation.compare("training") == 0) {
auto i = 0;
- for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) {
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
- vec_ptr vec = pTable->vector_api->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]);
+ vec_ptr vec = pTable->getVector0_api()->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]);
if (vec != nullptr) {
- (*iter)->trainings.reserve((*iter)->trainings.size() + vec->size());
- (*iter)->trainings.insert(
- (*iter)->trainings.end(),
+ (*iter)->getTrainings().reserve((*iter)->getTrainings().size() + vec->size());
+ (*iter)->getTrainings().insert(
+ (*iter)->getTrainings().end(),
vec->begin(),
vec->end());
}
From 53f6206dc821f13cecdcd2ea7452ca5c1a40fa8d Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 09:49:18 +0300
Subject: [PATCH 07/66] Moving SqlStatement into separate header file
---
src/sql-statement.h | 108 ++++++++++++++++++++++++++++++++++++++++++++
src/sqlite-vss.cpp | 101 +----------------------------------------
2 files changed, 109 insertions(+), 100 deletions(-)
create mode 100644 src/sql-statement.h
diff --git a/src/sql-statement.h b/src/sql-statement.h
new file mode 100644
index 0000000..bac04ec
--- /dev/null
+++ b/src/sql-statement.h
@@ -0,0 +1,108 @@
+
+#ifndef SQL_STATEMENT_H
+#define SQL_STATEMENT_H
+
+#include "sqlite-vss.h"
+
+class SqlStatement {
+
+public:
+
+ SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) {
+
+ this->sql = sql;
+ }
+
+ ~SqlStatement() {
+
+ if (stmt != nullptr)
+ sqlite3_finalize(stmt);
+ if (sql != nullptr)
+ sqlite3_free((void *)sql);
+ }
+
+ int prepare() {
+
+ auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr);
+ if (res != SQLITE_OK || stmt == nullptr) {
+
+ stmt = nullptr;
+ return SQLITE_ERROR;
+ }
+ return res;
+ }
+
+ int bind_int64(int colNo, sqlite3_int64 value) {
+
+ return sqlite3_bind_int64(stmt, colNo, value);
+ }
+
+ int bind_blob64(int colNo, const void * data, int size) {
+
+ return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT);
+ }
+
+ int bind_null(int colNo) {
+
+ return sqlite3_bind_null(stmt, colNo);
+ }
+
+ int bind_pointer(int paramNo, void *ptr, const char * name) {
+
+ return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr);
+ }
+
+ int step() {
+
+ return sqlite3_step(stmt);
+ }
+
+ int exec() {
+
+ return sqlite3_exec(db, sql, nullptr, nullptr, nullptr);
+ }
+
+ int declare_vtab() {
+
+ return sqlite3_declare_vtab(db, sql);
+ }
+
+ const void * column_blob(int colNo) {
+
+ return sqlite3_column_blob(stmt, colNo);
+ }
+
+ int column_bytes(int colNo) {
+
+ return sqlite3_column_bytes(stmt, colNo);
+ }
+
+ int column_int64(int colNo) {
+
+ return sqlite3_column_int64(stmt, colNo);
+ }
+
+ int last_insert_rowid() {
+
+ return sqlite3_last_insert_rowid(db);
+ }
+
+ void finalize() {
+
+ if (stmt != nullptr)
+ sqlite3_finalize(stmt);
+ stmt = nullptr;
+ if (sql != nullptr)
+ sqlite3_free((void *)sql);
+ sql = nullptr;
+ }
+
+private:
+
+ sqlite3 *db;
+ sqlite3_stmt *stmt;
+ const char * sql;
+};
+
+#endif // SQL_STATEMENT_H
+
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index ed2074d..52ce8ca 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -21,6 +21,7 @@ SQLITE_EXTENSION_INIT1
#include
#include "sqlite-vector.h"
+#include "sql-statement.h"
using namespace std;
@@ -246,106 +247,6 @@ void delVssRangeSearchParams(void *p) {
delete self;
}
-class SqlStatement {
-
-public:
-
- SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) {
-
- this->sql = sql;
- }
-
- ~SqlStatement() {
-
- if (stmt != nullptr)
- sqlite3_finalize(stmt);
- if (sql != nullptr)
- sqlite3_free((void *)sql);
- }
-
- int prepare() {
-
- auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr);
- if (res != SQLITE_OK || stmt == nullptr) {
-
- stmt = nullptr;
- return SQLITE_ERROR;
- }
- return res;
- }
-
- int bind_int64(int colNo, sqlite3_int64 value) {
-
- return sqlite3_bind_int64(stmt, colNo, value);
- }
-
- int bind_blob64(int colNo, const void * data, int size) {
-
- return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT);
- }
-
- int bind_null(int colNo) {
-
- return sqlite3_bind_null(stmt, colNo);
- }
-
- int bind_pointer(int paramNo, void *ptr, const char * name) {
-
- return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr);
- }
-
- int step() {
-
- return sqlite3_step(stmt);
- }
-
- int exec() {
-
- return sqlite3_exec(db, sql, nullptr, nullptr, nullptr);
- }
-
- int declare_vtab() {
-
- return sqlite3_declare_vtab(db, sql);
- }
-
- const void * column_blob(int colNo) {
-
- return sqlite3_column_blob(stmt, colNo);
- }
-
- int column_bytes(int colNo) {
-
- return sqlite3_column_bytes(stmt, colNo);
- }
-
- int column_int64(int colNo) {
-
- return sqlite3_column_int64(stmt, colNo);
- }
-
- int last_insert_rowid() {
-
- return sqlite3_last_insert_rowid(db);
- }
-
- void finalize() {
-
- if (stmt != nullptr)
- sqlite3_finalize(stmt);
- stmt = nullptr;
- if (sql != nullptr)
- sqlite3_free((void *)sql);
- sql = nullptr;
- }
-
-private:
-
- sqlite3 *db;
- sqlite3_stmt *stmt;
- const char * sql;
-};
-
#pragma endregion
#pragma region Vtab
From e5f412d216c839a692a55401a2bf67a991e90532 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 09:58:23 +0300
Subject: [PATCH 08/66] Better structure
Moving concepts into separate files
---
src/sqlite-vss.cpp | 200 +---------------------------------
src/vss/calculations.h | 177 ++++++++++++++++++++++++++++++
src/vss/meta-methods.h | 35 ++++++
src/{ => vss}/sql-statement.h | 4 +
4 files changed, 220 insertions(+), 196 deletions(-)
create mode 100644 src/vss/calculations.h
create mode 100644 src/vss/meta-methods.h
rename src/{ => vss}/sql-statement.h (92%)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 52ce8ca..d9f6912 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -20,206 +20,14 @@ SQLITE_EXTENSION_INIT1
#include
#include
-#include "sqlite-vector.h"
-#include "sql-statement.h"
-
using namespace std;
typedef unique_ptr> vec_ptr;
-#pragma region Meta
-
-static void vss_version(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC);
-}
-
-static void vss_debug(sqlite3_context *context,
- int argc,
- sqlite3_value **argv) {
-
- auto resTxt = sqlite3_mprintf(
- "version: %s\nfaiss version: %d.%d.%d\nfaiss compile options: %s",
- SQLITE_VSS_VERSION,
- FAISS_VERSION_MAJOR,
- FAISS_VERSION_MINOR,
- FAISS_VERSION_PATCH,
- faiss::get_compile_options().c_str());
-
- sqlite3_result_text(context, resTxt, -1, SQLITE_TRANSIENT);
- sqlite3_free(resTxt);
-}
-
-#pragma endregion
-
-#pragma region Distances
-
-static void vss_distance_l1(sqlite3_context *context,
- int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
- -1);
- return;
- }
-
- sqlite3_result_double(context, faiss::fvec_L1(lhs->data(), rhs->data(), lhs->size()));
-}
-
-static void vss_distance_l2(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
- -1);
- return;
- }
-
- sqlite3_result_double(context, faiss::fvec_L2sqr(lhs->data(), rhs->data(), lhs->size()));
-}
-
-static void vss_distance_linf(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
- -1);
- return;
- }
-
- sqlite3_result_double(context, faiss::fvec_Linf(lhs->data(), rhs->data(), lhs->size()));
-}
-
-static void vss_inner_product(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
- -1);
- return;
- }
-
- sqlite3_result_double(context,
- faiss::fvec_inner_product(lhs->data(), rhs->data(), lhs->size()));
-}
-
-static void vss_fvec_add(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
- -1);
- return;
- }
-
- auto size = lhs->size();
- vec_ptr c(new vector(size));
- faiss::fvec_add(size, lhs->data(), rhs->data(), c->data());
-
- vector_api->xResultVector(context, c.get());
-}
-
-static void vss_fvec_sub(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
-
- auto vector_api = (vector0_api *)sqlite3_user_data(context);
-
- vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
- if (lhs == nullptr) {
- sqlite3_result_error(context, "LHS is not a vector", -1);
- return;
- }
-
- vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
- if (rhs == nullptr) {
- sqlite3_result_error(context, "RHS is not a vector", -1);
- return;
- }
-
- if (lhs->size() != rhs->size()) {
- sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", -1);
- return;
- }
-
- int size = lhs->size();
- vec_ptr c = vec_ptr(new vector(size));
- faiss::fvec_sub(size, lhs->data(), rhs->data(), c->data());
- vector_api->xResultVector(context, c.get());
-}
-
-#pragma endregion
+#include "sqlite-vector.h"
+#include "vss/sql-statement.h"
+#include "vss/meta-methods.h"
+#include "vss/calculations.h"
#pragma region Structs and cleanup functions
diff --git a/src/vss/calculations.h b/src/vss/calculations.h
new file mode 100644
index 0000000..3cef0e7
--- /dev/null
+++ b/src/vss/calculations.h
@@ -0,0 +1,177 @@
+
+#ifndef VSS_CALCULATIONS_H
+#define VSS_CALCULATIONS_H
+
+#include "sqlite-vss.h"
+#include
+#include
+
+using namespace std;
+
+typedef unique_ptr> vec_ptr;
+
+static void vss_distance_l1(sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
+ -1);
+ return;
+ }
+
+ sqlite3_result_double(context, faiss::fvec_L1(lhs->data(), rhs->data(), lhs->size()));
+}
+
+static void vss_distance_l2(sqlite3_context *context, int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
+ -1);
+ return;
+ }
+
+ sqlite3_result_double(context, faiss::fvec_L2sqr(lhs->data(), rhs->data(), lhs->size()));
+}
+
+static void vss_distance_linf(sqlite3_context *context, int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
+ -1);
+ return;
+ }
+
+ sqlite3_result_double(context, faiss::fvec_Linf(lhs->data(), rhs->data(), lhs->size()));
+}
+
+static void vss_inner_product(sqlite3_context *context, int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
+ -1);
+ return;
+ }
+
+ sqlite3_result_double(context,
+ faiss::fvec_inner_product(lhs->data(), rhs->data(), lhs->size()));
+}
+
+static void vss_fvec_add(sqlite3_context *context, int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size",
+ -1);
+ return;
+ }
+
+ auto size = lhs->size();
+ vec_ptr c(new vector(size));
+ faiss::fvec_add(size, lhs->data(), rhs->data(), c->data());
+
+ vector_api->xResultVector(context, c.get());
+}
+
+static void vss_fvec_sub(sqlite3_context *context, int argc,
+ sqlite3_value **argv) {
+
+ auto vector_api = (vector0_api *)sqlite3_user_data(context);
+
+ vec_ptr lhs = vector_api->xValueAsVector(argv[0]);
+ if (lhs == nullptr) {
+ sqlite3_result_error(context, "LHS is not a vector", -1);
+ return;
+ }
+
+ vec_ptr rhs = vector_api->xValueAsVector(argv[1]);
+ if (rhs == nullptr) {
+ sqlite3_result_error(context, "RHS is not a vector", -1);
+ return;
+ }
+
+ if (lhs->size() != rhs->size()) {
+ sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", -1);
+ return;
+ }
+
+ int size = lhs->size();
+ vec_ptr c = vec_ptr(new vector(size));
+ faiss::fvec_sub(size, lhs->data(), rhs->data(), c->data());
+ vector_api->xResultVector(context, c.get());
+}
+
+#endif // VSS_CALCULATIONS_H
diff --git a/src/vss/meta-methods.h b/src/vss/meta-methods.h
new file mode 100644
index 0000000..2214d15
--- /dev/null
+++ b/src/vss/meta-methods.h
@@ -0,0 +1,35 @@
+
+#ifndef META_METHODS_H
+#define META_METHODS_H
+
+#include "sqlite-vss.h"
+#include
+#include
+
+
+static void vss_version(sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv) {
+
+ sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC);
+}
+
+static void vss_debug(sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv) {
+
+ auto resTxt = sqlite3_mprintf(
+ "version: %s\nfaiss version: %d.%d.%d\nfaiss compile options: %s",
+ SQLITE_VSS_VERSION,
+ FAISS_VERSION_MAJOR,
+ FAISS_VERSION_MINOR,
+ FAISS_VERSION_PATCH,
+ faiss::get_compile_options().c_str());
+
+ sqlite3_result_text(context, resTxt, -1, SQLITE_TRANSIENT);
+ sqlite3_free(resTxt);
+}
+
+
+#endif // META_METHODS_H
+
diff --git a/src/sql-statement.h b/src/vss/sql-statement.h
similarity index 92%
rename from src/sql-statement.h
rename to src/vss/sql-statement.h
index bac04ec..b1bdb29 100644
--- a/src/sql-statement.h
+++ b/src/vss/sql-statement.h
@@ -4,6 +4,10 @@
#include "sqlite-vss.h"
+/*
+ * Helper class encapsulating an SQL statement towards SQLite, with automatic and deterministic destruction
+ * and cleanup of any heap memory, etc.
+ */
class SqlStatement {
public:
From 8590e0b70bd02c5521d83bc2a3a79a23253d33a4 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 10:12:49 +0300
Subject: [PATCH 09/66] Moving more stuff into separate files
---
src/sqlite-vss.cpp | 200 ++-----------------------------------
src/vss/calculations.h | 8 +-
src/vss/inclusions.h | 35 +++++++
src/vss/meta-methods.h | 6 +-
src/vss/sql-statement.h | 3 +-
src/vss/vss-index-cursor.h | 43 ++++++++
src/vss/vss-index-vtab.h | 82 +++++++++++++++
src/vss/vss-index.h | 55 ++++++++++
8 files changed, 228 insertions(+), 204 deletions(-)
create mode 100644 src/vss/inclusions.h
create mode 100644 src/vss/vss-index-cursor.h
create mode 100644 src/vss/vss-index-vtab.h
create mode 100644 src/vss/vss-index.h
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index d9f6912..6f1a978 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -1,33 +1,14 @@
-#include "sqlite-vss.h"
-#include
-#include
-
-#include "sqlite3ext.h"
-SQLITE_EXTENSION_INIT1
-
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-using namespace std;
-typedef unique_ptr> vec_ptr;
+#include "sqlite-vss.h"
+#include "vss/inclusions.h"
#include "sqlite-vector.h"
#include "vss/sql-statement.h"
#include "vss/meta-methods.h"
#include "vss/calculations.h"
+#include "vss/vss-index.h"
+#include "vss/vss-index-vtab.h"
+#include "vss/vss-index-cursor.h"
#pragma region Structs and cleanup functions
@@ -78,7 +59,8 @@ static void vssSearchParamsFunc(sqlite3_context *context,
sqlite3_result_pointer(context, params, "vss0_searchparams", delVssSearchParams);
}
-static void vssRangeSearchParamsFunc(sqlite3_context *context, int argc,
+static void vssRangeSearchParamsFunc(sqlite3_context *context,
+ int argc,
sqlite3_value **argv) {
auto vector_api = (vector0_api *)sqlite3_user_data(context);
@@ -221,7 +203,9 @@ static int shadow_data_delete(sqlite3 *db,
return SQLITE_OK;
}
-static faiss::Index *read_index_select(sqlite3 *db, const char *name, int indexId) {
+static faiss::Index *read_index_select(sqlite3 *db,
+ const char *name,
+ int indexId) {
SqlStatement select(db,
sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?",
@@ -296,170 +280,6 @@ static int drop_shadow_tables(sqlite3 *db, char *name) {
return SQLITE_OK;
}
-#define VSS_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION
-#define VSS_RANGE_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION + 1
-
-// Wrapper around a single faiss index, with training data, insert records, and
-// delete records.
-class vss_index {
-
-public:
-
- explicit vss_index(faiss::Index *index) : index(index) {}
-
- ~vss_index() {
- if (index != nullptr) {
- delete index;
- }
- }
-
- faiss::Index * getIndex() {
-
- return index;
- }
-
- vector & getTrainings() {
-
- return trainings;
- }
-
- vector & getInsert_data() {
-
- return insert_data;
- }
-
- vector & getInsert_ids() {
-
- return insert_ids;
- }
-
- vector & getDelete_ids() {
-
- return delete_ids;
- }
-
-private:
-
- faiss::Index *index;
- vector trainings;
- vector insert_data;
- vector insert_ids;
- vector delete_ids;
-};
-
-class vss_index_vtab : public sqlite3_vtab {
-
-public:
-
- vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name)
- : db(db),
- vector_api(vector_api),
- schema(schema),
- name(name) {
-
- this->zErrMsg = nullptr;
- }
-
- ~vss_index_vtab() {
-
- if (name)
- sqlite3_free(name);
- if (schema)
- sqlite3_free(schema);
- if (this->zErrMsg != nullptr)
- delete this->zErrMsg;
- for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) {
- delete (*iter);
- }
- }
-
- void setError(char *error) {
- if (this->zErrMsg != nullptr) {
- delete this->zErrMsg;
- }
- this->zErrMsg = error;
- }
-
- sqlite3 * getDb() {
-
- return db;
- }
-
- vector0_api * getVector0_api() {
-
- return vector_api;
- }
-
- vector & getIndexes() {
-
- return indexes;
- }
-
- char * getName() {
-
- return name;
- }
-
- char * getSchema() {
-
- return schema;
- }
-
-private:
-
- sqlite3 *db;
- vector0_api *vector_api;
-
- // Name of the virtual table. Must be freed during disconnect
- char *name;
-
- // Name of the schema the virtual table exists in. Must be freed during
- // disconnect
- char *schema;
-
- // Vector holding all the faiss Indices the vtab uses, and their state,
- // implying which items are to be deleted and inserted.
- vector indexes;
-};
-
-enum QueryType { search, range_search, fullscan };
-
-struct vss_index_cursor : public sqlite3_vtab_cursor {
-
- explicit vss_index_cursor(vss_index_vtab *table)
- : table(table),
- sqlite3_vtab_cursor({0}),
- stmt(nullptr),
- sql(nullptr) { }
-
- ~vss_index_cursor() {
- if (stmt != nullptr)
- sqlite3_finalize(stmt);
- if (sql != nullptr)
- sqlite3_free(sql);
- }
-
- vss_index_vtab *table;
-
- sqlite3_int64 iCurrent;
- sqlite3_int64 iRowid;
-
- QueryType query_type;
-
- // For query_type == QueryType::search
- sqlite3_int64 limit;
- vector search_ids;
- vector search_distances;
-
- // For query_type == QueryType::range_search
- unique_ptr range_search_result;
-
- // For query_type == QueryType::fullscan
- sqlite3_stmt *stmt;
- char *sql;
- int step_result;
-};
-
struct VssIndexColumn {
string name;
diff --git a/src/vss/calculations.h b/src/vss/calculations.h
index 3cef0e7..8d55e95 100644
--- a/src/vss/calculations.h
+++ b/src/vss/calculations.h
@@ -2,13 +2,7 @@
#ifndef VSS_CALCULATIONS_H
#define VSS_CALCULATIONS_H
-#include "sqlite-vss.h"
-#include
-#include
-
-using namespace std;
-
-typedef unique_ptr> vec_ptr;
+#include "inclusions.h"
static void vss_distance_l1(sqlite3_context *context,
int argc,
diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h
new file mode 100644
index 0000000..006d432
--- /dev/null
+++ b/src/vss/inclusions.h
@@ -0,0 +1,35 @@
+
+#ifndef VSS_INCLUSIONS_H
+#define VSS_INCLUSIONS_H
+
+#include
+#include
+
+#include "sqlite3ext.h"
+SQLITE_EXTENSION_INIT1
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+using namespace std;
+
+typedef unique_ptr> vec_ptr;
+
+enum QueryType { search, range_search, fullscan };
+
+#define VSS_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION
+#define VSS_RANGE_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION + 1
+
+#endif // VSS_INCLUSIONS_H
diff --git a/src/vss/meta-methods.h b/src/vss/meta-methods.h
index 2214d15..05a70f7 100644
--- a/src/vss/meta-methods.h
+++ b/src/vss/meta-methods.h
@@ -2,10 +2,7 @@
#ifndef META_METHODS_H
#define META_METHODS_H
-#include "sqlite-vss.h"
-#include
-#include
-
+#include "inclusions.h"
static void vss_version(sqlite3_context *context,
int argc,
@@ -32,4 +29,3 @@ static void vss_debug(sqlite3_context *context,
#endif // META_METHODS_H
-
diff --git a/src/vss/sql-statement.h b/src/vss/sql-statement.h
index b1bdb29..cf4a076 100644
--- a/src/vss/sql-statement.h
+++ b/src/vss/sql-statement.h
@@ -2,7 +2,7 @@
#ifndef SQL_STATEMENT_H
#define SQL_STATEMENT_H
-#include "sqlite-vss.h"
+#include "inclusions.h"
/*
* Helper class encapsulating an SQL statement towards SQLite, with automatic and deterministic destruction
@@ -109,4 +109,3 @@ class SqlStatement {
};
#endif // SQL_STATEMENT_H
-
diff --git a/src/vss/vss-index-cursor.h b/src/vss/vss-index-cursor.h
new file mode 100644
index 0000000..21eaf0d
--- /dev/null
+++ b/src/vss/vss-index-cursor.h
@@ -0,0 +1,43 @@
+
+#ifndef VSS_INDEX_CURSOR_H
+#define VSS_INDEX_CURSOR_H
+
+#include "inclusions.h"
+
+struct vss_index_cursor : public sqlite3_vtab_cursor {
+
+ explicit vss_index_cursor(vss_index_vtab *table)
+ : table(table),
+ sqlite3_vtab_cursor({0}),
+ stmt(nullptr),
+ sql(nullptr) { }
+
+ ~vss_index_cursor() {
+ if (stmt != nullptr)
+ sqlite3_finalize(stmt);
+ if (sql != nullptr)
+ sqlite3_free(sql);
+ }
+
+ vss_index_vtab *table;
+
+ sqlite3_int64 iCurrent;
+ sqlite3_int64 iRowid;
+
+ QueryType query_type;
+
+ // For query_type == QueryType::search
+ sqlite3_int64 limit;
+ vector search_ids;
+ vector search_distances;
+
+ // For query_type == QueryType::range_search
+ unique_ptr range_search_result;
+
+ // For query_type == QueryType::fullscan
+ sqlite3_stmt *stmt;
+ char *sql;
+ int step_result;
+};
+
+#endif // VSS_INDEX_CURSOR_H
diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h
new file mode 100644
index 0000000..5446927
--- /dev/null
+++ b/src/vss/vss-index-vtab.h
@@ -0,0 +1,82 @@
+
+#ifndef VSS_INDEX_VTAB_H
+#define VSS_INDEX_VTAB_H
+
+#include "inclusions.h"
+
+class vss_index_vtab : public sqlite3_vtab {
+
+public:
+
+ vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name)
+ : db(db),
+ vector_api(vector_api),
+ schema(schema),
+ name(name) {
+
+ this->zErrMsg = nullptr;
+ }
+
+ ~vss_index_vtab() {
+
+ if (name)
+ sqlite3_free(name);
+ if (schema)
+ sqlite3_free(schema);
+ if (this->zErrMsg != nullptr)
+ delete this->zErrMsg;
+ for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) {
+ delete (*iter);
+ }
+ }
+
+ void setError(char *error) {
+ if (this->zErrMsg != nullptr) {
+ delete this->zErrMsg;
+ }
+ this->zErrMsg = error;
+ }
+
+ sqlite3 * getDb() {
+
+ return db;
+ }
+
+ vector0_api * getVector0_api() {
+
+ return vector_api;
+ }
+
+ vector & getIndexes() {
+
+ return indexes;
+ }
+
+ char * getName() {
+
+ return name;
+ }
+
+ char * getSchema() {
+
+ return schema;
+ }
+
+private:
+
+ sqlite3 *db;
+ vector0_api *vector_api;
+
+ // Name of the virtual table. Must be freed during disconnect
+ char *name;
+
+ // Name of the schema the virtual table exists in. Must be freed during
+ // disconnect
+ char *schema;
+
+ // Vector holding all the faiss Indices the vtab uses, and their state,
+ // implying which items are to be deleted and inserted.
+ vector indexes;
+};
+
+#endif // VSS_INDEX_VTAB_H
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
new file mode 100644
index 0000000..31dfd43
--- /dev/null
+++ b/src/vss/vss-index.h
@@ -0,0 +1,55 @@
+
+#ifndef VSS_INDEX_H
+#define VSS_INDEX_H
+
+#include "inclusions.h"
+
+// Wrapper around a single faiss index, with training data, insert records, and
+// delete records.
+class vss_index {
+
+public:
+
+ explicit vss_index(faiss::Index *index) : index(index) {}
+
+ ~vss_index() {
+ if (index != nullptr) {
+ delete index;
+ }
+ }
+
+ faiss::Index * getIndex() {
+
+ return index;
+ }
+
+ vector & getTrainings() {
+
+ return trainings;
+ }
+
+ vector & getInsert_data() {
+
+ return insert_data;
+ }
+
+ vector & getInsert_ids() {
+
+ return insert_ids;
+ }
+
+ vector & getDelete_ids() {
+
+ return delete_ids;
+ }
+
+private:
+
+ faiss::Index *index;
+ vector trainings;
+ vector insert_data;
+ vector insert_ids;
+ vector delete_ids;
+};
+
+#endif // VSS_INDEX_H
From d42db30c6dfad6eeeb7f9390ba822d0656a26928 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 10:34:48 +0300
Subject: [PATCH 10/66] Restructuring by encapsulating members
---
src/sqlite-vss.cpp | 79 ++++++++++++++--------------
src/vss/vss-index-cursor.h | 103 ++++++++++++++++++++++++++++++++++++-
2 files changed, 140 insertions(+), 42 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 6f1a978..31f857d 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -566,13 +566,13 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
if (strcmp(idxStr, "search") == 0) {
- pCursor->query_type = QueryType::search;
+ pCursor->setQuery_type(QueryType::search);
vec_ptr query_vector;
auto params = static_cast(sqlite3_value_pointer(argv[0], "vss0_searchparams"));
if (params != nullptr) {
- pCursor->limit = params->k;
+ pCursor->setLimit(params->k);
query_vector = vec_ptr(new vector(*params->vector));
} else if (sqlite3_libversion_number() < 3041000) {
@@ -585,12 +585,12 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
"2nd parameter for SQLite versions below 3.41.0"));
return SQLITE_ERROR;
- } else if ((query_vector = pCursor->table->getVector0_api()->xValueAsVector(
+ } else if ((query_vector = pCursor->getTable()->getVector0_api()->xValueAsVector(
argv[0])) != nullptr) {
if (argc > 1) {
- pCursor->limit = sqlite3_value_int(argv[1]);
+ pCursor->setLimit(sqlite3_value_int(argv[1]));
} else {
auto ptrVtab = static_cast(pCursor->pVtab);
@@ -608,7 +608,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
}
int nq = 1;
- auto index = pCursor->table->getIndexes().at(idxNum)->getIndex();
+ auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex();
if (query_vector->size() != index->d) {
@@ -621,31 +621,30 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
return SQLITE_ERROR;
}
- if (pCursor->limit <= 0) {
+ if (pCursor->getLimit() <= 0) {
auto ptrVtab = static_cast(pCursor->pVtab);
ptrVtab->setError(sqlite3_mprintf(
"Limit must be greater than 0, got %ld",
- pCursor->limit));
+ pCursor->getLimit()));
return SQLITE_ERROR;
}
// To avoid trying to select more records than number of records in index.
- auto searchMax = min(static_cast(pCursor->limit) * nq, index->ntotal * nq);
+ auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->ntotal * nq);
- pCursor->search_distances = vector(searchMax, 0);
- pCursor->search_ids = vector(searchMax, 0);
+ pCursor->resetSearch(searchMax);
index->search(nq,
query_vector->data(),
searchMax,
- pCursor->search_distances.data(),
- pCursor->search_ids.data());
+ pCursor->getSearch_distances().data(),
+ pCursor->getSearch_ids().data());
} else if (strcmp(idxStr, "range_search") == 0) {
- pCursor->query_type = QueryType::range_search;
+ pCursor->setQuery_type(QueryType::range_search);
auto params = static_cast(
sqlite3_value_pointer(argv[0], "vss0_rangesearchparams"));
@@ -653,22 +652,22 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
int nq = 1;
vector nns(params->distance * nq);
- pCursor->range_search_result = unique_ptr(new faiss::RangeSearchResult(nq, true));
+ pCursor->getRange_search_result() = unique_ptr(new faiss::RangeSearchResult(nq, true));
- auto index = pCursor->table->getIndexes().at(idxNum)->getIndex();
+ auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex();
index->range_search(nq,
params->vector->data(),
params->distance,
- pCursor->range_search_result.get());
+ pCursor->getRange_search_result().get());
} else if (strcmp(idxStr, "fullscan") == 0) {
- pCursor->query_type = QueryType::fullscan;
- pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->getName());
+ pCursor->setQuery_type(QueryType::fullscan);
+ pCursor->setSql(sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->getTable()->getName()));
- int res = sqlite3_prepare_v2(pCursor->table->getDb(),
- pCursor->sql,
+ int res = sqlite3_prepare_v2(pCursor->getTable()->getDb(),
+ pCursor->getSql(),
-1,
&pCursor->stmt,
nullptr);
@@ -676,7 +675,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
if (res != SQLITE_OK)
return res;
- pCursor->step_result = sqlite3_step(pCursor->stmt);
+ pCursor->setStep_result(sqlite3_step(pCursor->getStmt()));
} else {
@@ -687,7 +686,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
return SQLITE_ERROR;
}
- pCursor->iCurrent = 0;
+ pCursor->setICurrent(0);
return SQLITE_OK;
}
@@ -695,15 +694,15 @@ static int vssIndexNext(sqlite3_vtab_cursor *cur) {
auto pCursor = static_cast(cur);
- switch (pCursor->query_type) {
+ switch (pCursor->getQuery_type()) {
case QueryType::search:
case QueryType::range_search:
- pCursor->iCurrent++;
+ pCursor->incrementICurrent();
break;
case QueryType::fullscan:
- pCursor->step_result = sqlite3_step(pCursor->stmt);
+ pCursor->setStep_result(sqlite3_step(pCursor->getStmt()));
}
return SQLITE_OK;
@@ -713,18 +712,18 @@ static int vssIndexRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
auto pCursor = static_cast(cur);
- switch (pCursor->query_type) {
+ switch (pCursor->getQuery_type()) {
case QueryType::search:
- *pRowid = pCursor->search_ids.at(pCursor->iCurrent);
+ *pRowid = pCursor->getSearch_ids().at(pCursor->getICurrent());
break;
case QueryType::range_search:
- *pRowid = pCursor->range_search_result->labels[pCursor->iCurrent];
+ *pRowid = pCursor->getRange_search_result()->labels[pCursor->getICurrent()];
break;
case QueryType::fullscan:
- *pRowid = sqlite3_column_int64(pCursor->stmt, 0);
+ *pRowid = sqlite3_column_int64(pCursor->getStmt(), 0);
break;
}
return SQLITE_OK;
@@ -734,18 +733,18 @@ static int vssIndexEof(sqlite3_vtab_cursor *cur) {
auto pCursor = static_cast(cur);
- switch (pCursor->query_type) {
+ switch (pCursor->getQuery_type()) {
case QueryType::search:
- return pCursor->iCurrent >= pCursor->limit ||
- pCursor->iCurrent >= pCursor->search_ids.size()
- || (pCursor->search_ids.at(pCursor->iCurrent) == -1);
+ return pCursor->getICurrent() >= pCursor->getLimit() ||
+ pCursor->getICurrent() >= pCursor->getSearch_ids().size()
+ || (pCursor->getSearch_ids().at(pCursor->getICurrent()) == -1);
case QueryType::range_search:
- return pCursor->iCurrent >= pCursor->range_search_result->lims[1];
+ return pCursor->getICurrent() >= pCursor->getRange_search_result()->lims[1];
case QueryType::fullscan:
- return pCursor->step_result != SQLITE_ROW;
+ return pCursor->getStep_result() != SQLITE_ROW;
}
return 1;
}
@@ -758,16 +757,16 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
if (i == VSS_INDEX_COLUMN_DISTANCE) {
- switch (pCursor->query_type) {
+ switch (pCursor->getQuery_type()) {
case QueryType::search:
sqlite3_result_double(ctx,
- pCursor->search_distances.at(pCursor->iCurrent));
+ pCursor->getSearch_distances().at(pCursor->getICurrent()));
break;
case QueryType::range_search:
sqlite3_result_double(ctx,
- pCursor->range_search_result->distances[pCursor->iCurrent]);
+ pCursor->getRange_search_result()->distances[pCursor->getICurrent()]);
break;
case QueryType::fullscan:
@@ -777,7 +776,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
} else if (i >= VSS_INDEX_COLUMN_VECTORS) {
auto index =
- pCursor->table->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex();
+ pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex();
vector vec(index->d);
sqlite3_int64 rowId;
@@ -798,7 +797,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
sqlite3_free(errmsg);
return SQLITE_ERROR;
}
- pCursor->table->getVector0_api()->xResultVector(ctx, &vec);
+ pCursor->getTable()->getVector0_api()->xResultVector(ctx, &vec);
}
return SQLITE_OK;
}
diff --git a/src/vss/vss-index-cursor.h b/src/vss/vss-index-cursor.h
index 21eaf0d..9581b83 100644
--- a/src/vss/vss-index-cursor.h
+++ b/src/vss/vss-index-cursor.h
@@ -4,7 +4,9 @@
#include "inclusions.h"
-struct vss_index_cursor : public sqlite3_vtab_cursor {
+class vss_index_cursor : public sqlite3_vtab_cursor {
+
+public:
explicit vss_index_cursor(vss_index_vtab *table)
: table(table),
@@ -19,6 +21,104 @@ struct vss_index_cursor : public sqlite3_vtab_cursor {
sqlite3_free(sql);
}
+ vss_index_vtab * getTable() {
+
+ return table;
+ }
+
+ sqlite3_int64 getICurrent() {
+
+ return iCurrent;
+ }
+
+ sqlite3_int64 getIRowid() {
+
+ return iRowid;
+ }
+
+ QueryType getQuery_type() {
+
+ return query_type;
+ }
+
+ sqlite3_int64 getLimit() {
+
+ return limit;
+ }
+
+ vector & getSearch_ids() {
+
+ return search_ids;
+ }
+
+ vector & getSearch_distances() {
+
+ return search_distances;
+ }
+
+ unique_ptr & getRange_search_result() {
+
+ return range_search_result;
+ }
+
+ sqlite3_stmt *getStmt() {
+
+ return stmt;
+ }
+
+ int getStep_result() {
+
+ return step_result;
+ }
+
+ void setStep_result(int value) {
+
+ step_result = value;
+ }
+
+ void incrementICurrent() {
+
+ iCurrent += 1;
+ }
+
+ void setICurrent(sqlite3_int64 value) {
+
+ iCurrent = value;
+ }
+
+ void resetSearch(long noItems) {
+
+ search_distances = vector(noItems, 0);
+ search_ids = vector(noItems, 0);
+ }
+
+ void setQuery_type(QueryType value) {
+
+ query_type = value;
+ }
+
+ void setSql(char * value) {
+
+ if (sql != nullptr)
+ sqlite3_free(sql);
+ sql = value;
+ }
+
+ char * getSql() {
+
+ return sql;
+ }
+
+ void setLimit(sqlite3_int64 value) {
+
+ limit = value;
+ }
+
+ // TODO: Parts of our logic requires the address to the pointer such that we can assign what it's pointing at
+ sqlite3_stmt *stmt;
+
+private:
+
vss_index_vtab *table;
sqlite3_int64 iCurrent;
@@ -35,7 +135,6 @@ struct vss_index_cursor : public sqlite3_vtab_cursor {
unique_ptr range_search_result;
// For query_type == QueryType::fullscan
- sqlite3_stmt *stmt;
char *sql;
int step_result;
};
From 765746ab6479a6bd66138698c9b36c5d3c59f84d Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 10:48:52 +0300
Subject: [PATCH 11/66] Using nullptr + code formatting
---
src/sqlite-vss.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 31f857d..90cd4ec 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -664,7 +664,9 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
} else if (strcmp(idxStr, "fullscan") == 0) {
pCursor->setQuery_type(QueryType::fullscan);
- pCursor->setSql(sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->getTable()->getName()));
+ pCursor->setSql(
+ sqlite3_mprintf("select rowid from \"%w_data\"",
+ pCursor->getTable()->getName()));
int res = sqlite3_prepare_v2(pCursor->getTable()->getDb(),
pCursor->getSql(),
@@ -783,6 +785,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
vssIndexRowid(cur, &rowId);
try {
+
index->reconstruct(rowId, vec.data());
} catch (faiss::FaissException &e) {
@@ -1164,7 +1167,7 @@ __declspec(dllexport)
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
0,
vss_version,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_debug",
@@ -1172,7 +1175,7 @@ __declspec(dllexport)
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
0,
vss_debug,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db,
"vss_distance_l1",
@@ -1180,7 +1183,7 @@ __declspec(dllexport)
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS,
vector_api,
vss_distance_l1,
- 0, 0, 0);
+ nullptr, nullptr, nullptr);
sqlite3_create_function_v2(db, "vss_distance_l2",
2,
From 600f67cb3d7f08905b4d0b8f7e654991d0c3cebe Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 10:51:47 +0300
Subject: [PATCH 12/66] Comment
---
src/sqlite-vss.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 90cd4ec..835a589 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -1096,7 +1096,7 @@ static int vssIndexShadowName(const char *zName) {
}
static sqlite3_module vssIndexModule = {
- /* iVersion */ 3,
+ /* iVersion */ 3, // TODO: Shouldn't this be the same as the version for sqlite-vector.cpp?
/* xCreate */ vssIndexCreate,
/* xConnect */ vssIndexConnect,
/* xBestIndex */ vssIndexBestIndex,
From a33b38c05b1756611971db8c6fc2be39146cb97f Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 10:52:39 +0300
Subject: [PATCH 13/66] Using nullptr to be more semantic correct
---
src/sqlite-vector.cpp | 23 ++++++++++++-----------
src/sqlite-vss.cpp | 11 ++++++-----
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp
index 67985b9..7f22370 100644
--- a/src/sqlite-vector.cpp
+++ b/src/sqlite-vector.cpp
@@ -584,17 +584,18 @@ static sqlite3_module fvecsEachModule = {
/* xEof */ fvecsEachEof,
/* xColumn */ fvecsEachColumn,
/* xRowid */ fvecsEachRowid,
- /* xUpdate */ 0,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0};
+ /* xUpdate */ nullptr,
+ /* xBegin */ nullptr,
+ /* xSync */ nullptr,
+ /* xCommit */ nullptr,
+ /* xRollback */ nullptr,
+ /* xFindMethod */ nullptr,
+ /* xRename */ nullptr,
+ /* xSavepoint */ nullptr,
+ /* xRelease */ nullptr,
+ /* xRollbackTo */ nullptr,
+ /* xShadowName */ nullptr
+};
#pragma endregion
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 835a589..e1b79ff 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -1115,11 +1115,12 @@ static sqlite3_module vssIndexModule = {
/* xCommit */ vssIndexCommit,
/* xRollback */ vssIndexRollback,
/* xFindMethod */ vssIndexFindFunction,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ vssIndexShadowName};
+ /* xRename */ nullptr,
+ /* xSavepoint */ nullptr,
+ /* xRelease */ nullptr,
+ /* xRollbackTo */ nullptr,
+ /* xShadowName */ vssIndexShadowName
+};
#pragma endregion
From 87b058b5c28d2542a6bbf7c802b3d29b58316177 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 11:14:38 +0300
Subject: [PATCH 14/66] Better encapsulation and cohesion
---
src/sqlite-vss.cpp | 62 +++++++--------------------------------------
src/vss/vss-index.h | 62 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+), 53 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index e1b79ff..1a7ebdb 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -818,51 +818,16 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
bool needsWriting = false;
- auto idxCol = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) {
-
- // Checking if index needs training.
- if (!(*iter)->getTrainings().empty()) {
-
- (*iter)->getIndex()->train(
- (*iter)->getTrainings().size() / (*iter)->getIndex()->d,
- (*iter)->getTrainings().data());
-
- (*iter)->getTrainings().clear();
- (*iter)->getTrainings().shrink_to_fit();
-
- needsWriting = true;
- }
-
- // Checking if we're deleting records from the index.
- if (!(*iter)->getDelete_ids().empty()) {
-
- faiss::IDSelectorBatch selector((*iter)->getDelete_ids().size(),
- (*iter)->getDelete_ids().data());
-
- (*iter)->getIndex()->remove_ids(selector);
- (*iter)->getDelete_ids().clear();
- (*iter)->getDelete_ids().shrink_to_fit();
-
- needsWriting = true;
- }
-
- // Checking if we're inserting records to the index.
- if (!(*iter)->getInsert_data().empty()) {
-
- (*iter)->getIndex()->add_with_ids(
- (*iter)->getInsert_ids().size(),
- (*iter)->getInsert_data().data(),
- (faiss::idx_t *)(*iter)->getInsert_ids().data());
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->getInsert_ids().clear();
- (*iter)->getInsert_ids().shrink_to_fit();
+ // Training index, notice no-op unless we've got training data.
+ needsWriting = (*iter)->tryTrain() || needsWriting;
- (*iter)->getInsert_data().clear();
- (*iter)->getInsert_data().shrink_to_fit();
+ // Deleting data from index, notice no-op unless there's something to actually delete.
+ needsWriting = (*iter)->tryDelete() || needsWriting;
- needsWriting = true;
- }
+ // Inserting data to index, notice no-op unless there's something to actually insert.
+ needsWriting = (*iter)->tryInsert() || needsWriting;
}
if (needsWriting) {
@@ -895,17 +860,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->getInsert_ids().clear();
- (*iter)->getInsert_ids().shrink_to_fit();
-
- (*iter)->getInsert_data().clear();
- (*iter)->getInsert_data().shrink_to_fit();
-
- (*iter)->getDelete_ids().clear();
- (*iter)->getDelete_ids().shrink_to_fit();
-
- (*iter)->getTrainings().clear();
- (*iter)->getTrainings().shrink_to_fit();
+ // Cleanups in case we've got hanging data.
+ (*iter)->reset();
}
return SQLITE_ERROR;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 31dfd43..7dd3787 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -43,6 +43,68 @@ class vss_index {
return delete_ids;
}
+ bool tryTrain() {
+
+ if (trainings.empty())
+ return false;
+
+ index->train(trainings.size() / index->d, trainings.data());
+ trainings.clear();
+ trainings.shrink_to_fit();
+
+ return true;
+ }
+
+ bool tryDelete() {
+
+ if (delete_ids.empty())
+ return false;
+
+ faiss::IDSelectorBatch selector(delete_ids.size(),
+ delete_ids.data());
+
+ index->remove_ids(selector);
+ delete_ids.clear();
+ delete_ids.shrink_to_fit();
+
+ return true;
+ }
+
+ bool tryInsert() {
+
+ if (insert_ids.empty())
+ return false;
+
+ index->add_with_ids(
+ insert_ids.size(),
+ insert_data.data(),
+ (faiss::idx_t *)insert_ids.data());
+
+ insert_ids.clear();
+ insert_ids.shrink_to_fit();
+
+ insert_data.clear();
+ insert_data.shrink_to_fit();
+
+ return true;
+ }
+
+ void reset() {
+
+ trainings.clear();
+ trainings.shrink_to_fit();
+
+ insert_data.clear();
+ insert_data.shrink_to_fit();
+
+ insert_ids.clear();
+ insert_ids.shrink_to_fit();
+
+ delete_ids.clear();
+
+ delete_ids.shrink_to_fit();
+ }
+
private:
faiss::Index *index;
From d9d1c414a38a5fc70ffb8490aedbf831a34b3ecd Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 11:42:42 +0300
Subject: [PATCH 15/66] Better cohesion + encapsulation
---
src/sqlite-vss.cpp | 65 ++++++++++------------------------------
src/vss/vss-index.h | 72 +++++++++++++++++++++++++++------------------
2 files changed, 59 insertions(+), 78 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 1a7ebdb..c85ed45 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -820,14 +820,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- // Training index, notice no-op unless we've got training data.
- needsWriting = (*iter)->tryTrain() || needsWriting;
-
- // Deleting data from index, notice no-op unless there's something to actually delete.
- needsWriting = (*iter)->tryDelete() || needsWriting;
-
- // Inserting data to index, notice no-op unless there's something to actually insert.
- needsWriting = (*iter)->tryInsert() || needsWriting;
+ // Synchronizing index, implying deleting, training, and inserting records according to needs.
+ needsWriting = (*iter)->synchronize();
}
if (needsWriting) {
@@ -876,17 +870,7 @@ static int vssIndexRollback(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->getTrainings().clear();
- (*iter)->getTrainings().shrink_to_fit();
-
- (*iter)->getInsert_data().clear();
- (*iter)->getInsert_data().shrink_to_fit();
-
- (*iter)->getInsert_ids().clear();
- (*iter)->getInsert_ids().shrink_to_fit();
-
- (*iter)->getDelete_ids().clear();
- (*iter)->getDelete_ids().shrink_to_fit();
+ (*iter)->reset();
}
return SQLITE_OK;
}
@@ -911,7 +895,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
return rc;
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->getDelete_ids().push_back(rowid_to_delete);
+ (*iter)->addDelete(rowid_to_delete);
}
} else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
@@ -926,7 +910,6 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
vec_ptr vec;
sqlite3_int64 rowid = sqlite3_value_int64(argv[1]);
- bool inserted_rowid = false;
auto i = 0;
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
@@ -938,32 +921,20 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
if (!(*iter)->getIndex()->is_trained) {
pTable->setError(sqlite3_mprintf("Index at i=%d requires training "
- "before inserting data.",
- i));
+ "before inserting data.",
+ i));
return SQLITE_ERROR;
}
- if (!inserted_rowid) {
-
- auto rc = shadow_data_insert(pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- rowid);
- if (rc != SQLITE_OK)
- return rc;
-
- inserted_rowid = true;
- }
-
- (*iter)->getInsert_data().reserve((*iter)->getInsert_data().size() + vec->size());
- (*iter)->getInsert_data().insert(
- (*iter)->getInsert_data().end(),
- vec->begin(),
- vec->end());
-
- (*iter)->getInsert_ids().push_back(rowid);
+ auto rc = shadow_data_insert(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ rowid);
+ if (rc != SQLITE_OK)
+ return rc;
+ (*iter)->addInsertData(rowid, vec);
*pRowid = rowid;
}
}
@@ -978,14 +949,8 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
vec_ptr vec = pTable->getVector0_api()->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]);
- if (vec != nullptr) {
-
- (*iter)->getTrainings().reserve((*iter)->getTrainings().size() + vec->size());
- (*iter)->getTrainings().insert(
- (*iter)->getTrainings().end(),
- vec->begin(),
- vec->end());
- }
+ if (vec != nullptr)
+ (*iter)->addTrainings(vec);
}
} else {
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 7dd3787..1f74925 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -4,8 +4,13 @@
#include "inclusions.h"
-// Wrapper around a single faiss index, with training data, insert records, and
-// delete records.
+/*
+ * Wrapper around a single faiss index, with training data, insert records, and
+ * delete records.
+ *
+ * An attempt at encapsulating everything related to faiss::Index instances, such as
+ * training, inserting, deleting, etc.
+ */
class vss_index {
public:
@@ -13,6 +18,7 @@ class vss_index {
explicit vss_index(faiss::Index *index) : index(index) {}
~vss_index() {
+
if (index != nullptr) {
delete index;
}
@@ -23,26 +29,54 @@ class vss_index {
return index;
}
- vector & getTrainings() {
+ void addTrainings(vec_ptr & vec) {
+
+ trainings.reserve(trainings.size() + vec->size());
+ trainings.insert(trainings.end(), vec->begin(), vec->end());
+ }
+
+ void addInsertData(faiss::idx_t rowId, vec_ptr & vec) {
+
+ insert_data.reserve(insert_data.size() + vec->size());
+ insert_data.insert(insert_data.end(), vec->begin(), vec->end());
- return trainings;
+ insert_ids.push_back(rowId);
}
- vector & getInsert_data() {
+ void addDelete(faiss::idx_t rowid) {
- return insert_data;
+ delete_ids.push_back(rowid);
}
- vector & getInsert_ids() {
+ bool synchronize() {
- return insert_ids;
+ auto result = tryTrain();
+ result = tryDelete() || result;
+ result = tryInsert() || result;
+
+ // Now that we've updated our faiss::index we delete all temporary data.
+ reset();
+
+ return result;
}
- vector & getDelete_ids() {
+ void reset() {
+
+ trainings.clear();
+ trainings.shrink_to_fit();
+
+ insert_ids.clear();
+ insert_ids.shrink_to_fit();
+
+ insert_data.clear();
+ insert_data.shrink_to_fit();
- return delete_ids;
+ delete_ids.clear();
+ delete_ids.shrink_to_fit();
}
+private:
+
bool tryTrain() {
if (trainings.empty())
@@ -89,24 +123,6 @@ class vss_index {
return true;
}
- void reset() {
-
- trainings.clear();
- trainings.shrink_to_fit();
-
- insert_data.clear();
- insert_data.shrink_to_fit();
-
- insert_ids.clear();
- insert_ids.shrink_to_fit();
-
- delete_ids.clear();
-
- delete_ids.shrink_to_fit();
- }
-
-private:
-
faiss::Index *index;
vector trainings;
vector insert_data;
From 89d31380c8b1b9199f0fb313c8cae5663f11a595 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 11:46:49 +0300
Subject: [PATCH 16/66] Comments
---
src/vss/vss-index.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 1f74925..738af43 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -29,12 +29,22 @@ class vss_index {
return index;
}
+ /*
+ * Adds the specified vector to the index' training material.
+ *
+ * Notice, needs to invoke synchronize() later to actually perform training of index.
+ */
void addTrainings(vec_ptr & vec) {
trainings.reserve(trainings.size() + vec->size());
trainings.insert(trainings.end(), vec->begin(), vec->end());
}
+ /*
+ * Adds the specified vector to the index' temporary insert data.
+ *
+ * Notice, needs to invoke synchronize() later to actually add data to index.
+ */
void addInsertData(faiss::idx_t rowId, vec_ptr & vec) {
insert_data.reserve(insert_data.size() + vec->size());
@@ -43,11 +53,19 @@ class vss_index {
insert_ids.push_back(rowId);
}
+ /*
+ * Adds the specified rowid to the index' temporary delete data.
+ *
+ * Notice, needs to invoke synchronize() later to actually delete data from index.
+ */
void addDelete(faiss::idx_t rowid) {
delete_ids.push_back(rowid);
}
+ /*
+ * Synchronizes index by updating index according to trainings, inserts and deletes.
+ */
bool synchronize() {
auto result = tryTrain();
@@ -60,6 +78,9 @@ class vss_index {
return result;
}
+ /*
+ * Resets all temporary training data to free memory.
+ */
void reset() {
trainings.clear();
From 1eeda54837b44fc52c8d1d3df22df72032f7b14a Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 12:14:22 +0300
Subject: [PATCH 17/66] Bug fix
---
src/sqlite-vss.cpp | 14 +++++++-------
src/vss/vss-index.h | 41 +++++++++++++++++++----------------------
2 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index c85ed45..0631e5d 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -830,10 +830,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
int rc = write_index((*iter)->getIndex(),
- pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
+ pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ i);
if (rc != SQLITE_OK) {
@@ -928,9 +928,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
}
auto rc = shadow_data_insert(pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- rowid);
+ pTable->getSchema(),
+ pTable->getName(),
+ rowid);
if (rc != SQLITE_OK)
return rc;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 738af43..cc7a287 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -72,9 +72,6 @@ class vss_index {
result = tryDelete() || result;
result = tryInsert() || result;
- // Now that we've updated our faiss::index we delete all temporary data.
- reset();
-
return result;
}
@@ -86,12 +83,12 @@ class vss_index {
trainings.clear();
trainings.shrink_to_fit();
- insert_ids.clear();
- insert_ids.shrink_to_fit();
-
insert_data.clear();
insert_data.shrink_to_fit();
+ insert_ids.clear();
+ insert_ids.shrink_to_fit();
+
delete_ids.clear();
delete_ids.shrink_to_fit();
}
@@ -110,21 +107,6 @@ class vss_index {
return true;
}
- bool tryDelete() {
-
- if (delete_ids.empty())
- return false;
-
- faiss::IDSelectorBatch selector(delete_ids.size(),
- delete_ids.data());
-
- index->remove_ids(selector);
- delete_ids.clear();
- delete_ids.shrink_to_fit();
-
- return true;
- }
-
bool tryInsert() {
if (insert_ids.empty())
@@ -133,7 +115,7 @@ class vss_index {
index->add_with_ids(
insert_ids.size(),
insert_data.data(),
- (faiss::idx_t *)insert_ids.data());
+ insert_ids.data());
insert_ids.clear();
insert_ids.shrink_to_fit();
@@ -144,6 +126,21 @@ class vss_index {
return true;
}
+ bool tryDelete() {
+
+ if (delete_ids.empty())
+ return false;
+
+ faiss::IDSelectorBatch selector(delete_ids.size(),
+ delete_ids.data());
+
+ index->remove_ids(selector);
+ delete_ids.clear();
+ delete_ids.shrink_to_fit();
+
+ return true;
+ }
+
faiss::Index *index;
vector trainings;
vector insert_data;
From 7ce8e0fc370c60242dc2390868d150aa18689f67 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 12:32:17 +0300
Subject: [PATCH 18/66] Trying to save build
---
src/vss/vss-index.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index cc7a287..4790993 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -69,8 +69,10 @@ class vss_index {
bool synchronize() {
auto result = tryTrain();
- result = tryDelete() || result;
- result = tryInsert() || result;
+ if (tryDelete())
+ result = true;
+ if (tryInsert())
+ result = true;
return result;
}
@@ -101,6 +103,7 @@ class vss_index {
return false;
index->train(trainings.size() / index->d, trainings.data());
+
trainings.clear();
trainings.shrink_to_fit();
@@ -135,6 +138,7 @@ class vss_index {
delete_ids.data());
index->remove_ids(selector);
+
delete_ids.clear();
delete_ids.shrink_to_fit();
From 7253c9f203faebd29314dec6046754696d7c5ae2 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 12:39:30 +0300
Subject: [PATCH 19/66] Puuh, logical error fixed
---
src/sqlite-vss.cpp | 2 +-
src/vss/vss-index.h | 6 ++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 0631e5d..65415cf 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -821,7 +821,7 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
// Synchronizing index, implying deleting, training, and inserting records according to needs.
- needsWriting = (*iter)->synchronize();
+ needsWriting = (*iter)->synchronize() || needsWriting;
}
if (needsWriting) {
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 4790993..897e184 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -69,10 +69,8 @@ class vss_index {
bool synchronize() {
auto result = tryTrain();
- if (tryDelete())
- result = true;
- if (tryInsert())
- result = true;
+ result = tryDelete() || result;
+ result = tryInsert() || result;
return result;
}
From fe7773320e462963c1a5e2d98ea3d5f4a7e49607 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 12:48:09 +0300
Subject: [PATCH 20/66] Optimising and trying to get rid of build bug
---
src/sqlite-vss.cpp | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 65415cf..9c022ff 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -816,18 +816,11 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
try {
- bool needsWriting = false;
-
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
+ auto i = 0;
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
// Synchronizing index, implying deleting, training, and inserting records according to needs.
- needsWriting = (*iter)->synchronize() || needsWriting;
- }
-
- if (needsWriting) {
-
- int i = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
+ if ((*iter)->synchronize()) {
int rc = write_index((*iter)->getIndex(),
pTable->getDb(),
@@ -838,8 +831,14 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
if (rc != SQLITE_OK) {
pTable->setError(sqlite3_mprintf("Error saving index (%d): %s",
- rc,
- sqlite3_errmsg(pTable->getDb())));
+ rc,
+ sqlite3_errmsg(pTable->getDb())));
+
+ // Clearing all indexes to cleanup after ourselves.
+ for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) {
+
+ (*iter2)->reset();
+ }
return rc;
}
}
From feace327f4b960cccf41a686ecead3884a14aeef Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 13:13:25 +0300
Subject: [PATCH 21/66] Update sqlite-vss.cpp
---
src/sqlite-vss.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 9c022ff..c9bdf09 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -822,6 +822,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
// Synchronizing index, implying deleting, training, and inserting records according to needs.
if ((*iter)->synchronize()) {
+ /*
+ * If the above invocation returned true, we've got updates to currently iterated index,
+ * hence writing to db.
+ */
int rc = write_index((*iter)->getIndex(),
pTable->getDb(),
pTable->getSchema(),
From bcef4f35261302ec68d6d57a0bc2eb6be1a3fb6c Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 13:26:04 +0300
Subject: [PATCH 22/66] Fixing bug
---
src/sqlite-vss.cpp | 91 ++++++++++++++++++++++++++++++++++-----------
src/vss/vss-index.h | 2 -
2 files changed, 69 insertions(+), 24 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index c9bdf09..4ccfe97 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -816,33 +816,71 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
try {
- auto i = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
+ bool needsWriting = false;
+
+ auto idxCol = 0;
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) {
+
+ // Checking if index needs training.
+ if (!(*iter)->trainings.empty()) {
+
+ (*iter)->getIndex()->train(
+ (*iter)->trainings.size() / (*iter)->getIndex()->d,
+ (*iter)->trainings.data());
+
+ (*iter)->trainings.clear();
+ (*iter)->trainings.shrink_to_fit();
+
+ needsWriting = true;
+ }
+
+ // Checking if we're deleting records from the index.
+ if (!(*iter)->delete_ids.empty()) {
+
+ faiss::IDSelectorBatch selector((*iter)->delete_ids.size(),
+ (*iter)->delete_ids.data());
+
+ (*iter)->getIndex()->remove_ids(selector);
+ (*iter)->delete_ids.clear();
+ (*iter)->delete_ids.shrink_to_fit();
+
+ needsWriting = true;
+ }
+
+ // Checking if we're inserting records to the index.
+ if (!(*iter)->insert_data.empty()) {
+
+ (*iter)->getIndex()->add_with_ids(
+ (*iter)->insert_ids.size(),
+ (*iter)->insert_data.data(),
+ (faiss::idx_t *)(*iter)->insert_ids.data());
+
+ (*iter)->insert_ids.clear();
+ (*iter)->insert_ids.shrink_to_fit();
+
+ (*iter)->insert_data.clear();
+ (*iter)->insert_data.shrink_to_fit();
- // Synchronizing index, implying deleting, training, and inserting records according to needs.
- if ((*iter)->synchronize()) {
+ needsWriting = true;
+ }
+ }
+
+ if (needsWriting) {
+
+ int i = 0;
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
- /*
- * If the above invocation returned true, we've got updates to currently iterated index,
- * hence writing to db.
- */
int rc = write_index((*iter)->getIndex(),
- pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
+ pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ i);
if (rc != SQLITE_OK) {
pTable->setError(sqlite3_mprintf("Error saving index (%d): %s",
- rc,
- sqlite3_errmsg(pTable->getDb())));
-
- // Clearing all indexes to cleanup after ourselves.
- for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) {
-
- (*iter2)->reset();
- }
+ rc,
+ sqlite3_errmsg(pTable->getDb())));
return rc;
}
}
@@ -857,8 +895,17 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- // Cleanups in case we've got hanging data.
- (*iter)->reset();
+ (*iter)->insert_ids.clear();
+ (*iter)->insert_ids.shrink_to_fit();
+
+ (*iter)->insert_data.clear();
+ (*iter)->insert_data.shrink_to_fit();
+
+ (*iter)->delete_ids.clear();
+ (*iter)->delete_ids.shrink_to_fit();
+
+ (*iter)->trainings.clear();
+ (*iter)->trainings.shrink_to_fit();
}
return SQLITE_ERROR;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 897e184..e19b761 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -93,8 +93,6 @@ class vss_index {
delete_ids.shrink_to_fit();
}
-private:
-
bool tryTrain() {
if (trainings.empty())
From ca7763b812e44912063442d6c760e9f8c3eec5a5 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 13:30:07 +0300
Subject: [PATCH 23/66] Revert "Fixing bug"
This reverts commit bcef4f35261302ec68d6d57a0bc2eb6be1a3fb6c.
---
src/sqlite-vss.cpp | 91 +++++++++++----------------------------------
src/vss/vss-index.h | 2 +
2 files changed, 24 insertions(+), 69 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 4ccfe97..c9bdf09 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -816,71 +816,33 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
try {
- bool needsWriting = false;
-
- auto idxCol = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) {
-
- // Checking if index needs training.
- if (!(*iter)->trainings.empty()) {
-
- (*iter)->getIndex()->train(
- (*iter)->trainings.size() / (*iter)->getIndex()->d,
- (*iter)->trainings.data());
-
- (*iter)->trainings.clear();
- (*iter)->trainings.shrink_to_fit();
-
- needsWriting = true;
- }
-
- // Checking if we're deleting records from the index.
- if (!(*iter)->delete_ids.empty()) {
-
- faiss::IDSelectorBatch selector((*iter)->delete_ids.size(),
- (*iter)->delete_ids.data());
-
- (*iter)->getIndex()->remove_ids(selector);
- (*iter)->delete_ids.clear();
- (*iter)->delete_ids.shrink_to_fit();
-
- needsWriting = true;
- }
-
- // Checking if we're inserting records to the index.
- if (!(*iter)->insert_data.empty()) {
-
- (*iter)->getIndex()->add_with_ids(
- (*iter)->insert_ids.size(),
- (*iter)->insert_data.data(),
- (faiss::idx_t *)(*iter)->insert_ids.data());
-
- (*iter)->insert_ids.clear();
- (*iter)->insert_ids.shrink_to_fit();
-
- (*iter)->insert_data.clear();
- (*iter)->insert_data.shrink_to_fit();
-
- needsWriting = true;
- }
- }
-
- if (needsWriting) {
+ auto i = 0;
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
- int i = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
+ // Synchronizing index, implying deleting, training, and inserting records according to needs.
+ if ((*iter)->synchronize()) {
+ /*
+ * If the above invocation returned true, we've got updates to currently iterated index,
+ * hence writing to db.
+ */
int rc = write_index((*iter)->getIndex(),
- pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
+ pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ i);
if (rc != SQLITE_OK) {
pTable->setError(sqlite3_mprintf("Error saving index (%d): %s",
- rc,
- sqlite3_errmsg(pTable->getDb())));
+ rc,
+ sqlite3_errmsg(pTable->getDb())));
+
+ // Clearing all indexes to cleanup after ourselves.
+ for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) {
+
+ (*iter2)->reset();
+ }
return rc;
}
}
@@ -895,17 +857,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) {
- (*iter)->insert_ids.clear();
- (*iter)->insert_ids.shrink_to_fit();
-
- (*iter)->insert_data.clear();
- (*iter)->insert_data.shrink_to_fit();
-
- (*iter)->delete_ids.clear();
- (*iter)->delete_ids.shrink_to_fit();
-
- (*iter)->trainings.clear();
- (*iter)->trainings.shrink_to_fit();
+ // Cleanups in case we've got hanging data.
+ (*iter)->reset();
}
return SQLITE_ERROR;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index e19b761..897e184 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -93,6 +93,8 @@ class vss_index {
delete_ids.shrink_to_fit();
}
+private:
+
bool tryTrain() {
if (trainings.empty())
From 0d2b5147691f998924bf830060319ab591ab3406 Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 13:40:24 +0300
Subject: [PATCH 24/66] bug fix
---
src/sqlite-vss.cpp | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index c9bdf09..800ced9 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -914,6 +914,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
vec_ptr vec;
sqlite3_int64 rowid = sqlite3_value_int64(argv[1]);
+ bool inserted_rowid = false;
auto i = 0;
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
@@ -930,12 +931,18 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
return SQLITE_ERROR;
}
- auto rc = shadow_data_insert(pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- rowid);
- if (rc != SQLITE_OK)
- return rc;
+ if (!inserted_rowid) {
+
+ auto rc = shadow_data_insert(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ rowid);
+
+ if (rc != SQLITE_OK)
+ return rc;
+
+ inserted_rowid = true;
+ }
(*iter)->addInsertData(rowid, vec);
*pRowid = rowid;
From 8b1c9b0fe9442d1f51633804c2262efc1560d90c Mon Sep 17 00:00:00 2001
From: Thomas Hansen |
Date: Mon, 26 Jun 2023 13:50:38 +0300
Subject: [PATCH 25/66] Minor cleanups
---
src/sqlite-vss.cpp | 37 ++++++++++++++++++++-----------------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 800ced9..eec2484 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -18,18 +18,25 @@ struct VssSearchParams {
sqlite3_int64 k;
};
-void delVssSearchParams(void *p) {
-
- VssSearchParams *self = (VssSearchParams *)p;
- delete self;
-}
-
struct VssRangeSearchParams {
vec_ptr vector;
float distance;
};
+struct VssIndexColumn {
+
+ string name;
+ sqlite3_int64 dimensions;
+ string factory;
+};
+
+void delVssSearchParams(void *p) {
+
+ VssSearchParams *self = (VssSearchParams *)p;
+ delete self;
+}
+
void delVssRangeSearchParams(void *p) {
auto self = (VssRangeSearchParams *)p;
@@ -38,7 +45,7 @@ void delVssRangeSearchParams(void *p) {
#pragma endregion
-#pragma region Vtab
+#pragma region Virtual table implementation
static void vssSearchParamsFunc(sqlite3_context *context,
int argc,
@@ -203,9 +210,9 @@ static int shadow_data_delete(sqlite3 *db,
return SQLITE_OK;
}
-static faiss::Index *read_index_select(sqlite3 *db,
- const char *name,
- int indexId) {
+static faiss::Index * read_index_select(sqlite3 *db,
+ const char *name,
+ int indexId) {
SqlStatement select(db,
sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?",
@@ -262,6 +269,7 @@ static int create_shadow_tables(sqlite3 *db,
static int drop_shadow_tables(sqlite3 *db, char *name) {
+ // Dropping both x_index and x_data shadow tables.
const char *drops[2] = {"drop table \"%w_index\";",
"drop table \"%w_data\";"};
@@ -280,13 +288,6 @@ static int drop_shadow_tables(sqlite3 *db, char *name) {
return SQLITE_OK;
}
-struct VssIndexColumn {
-
- string name;
- sqlite3_int64 dimensions;
- string factory;
-};
-
unique_ptr> parse_constructor(int argc,
const char *const *argv) {
@@ -914,7 +915,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
vec_ptr vec;
sqlite3_int64 rowid = sqlite3_value_int64(argv[1]);
+ // Needed to make sure we insert null record into x_data table.
bool inserted_rowid = false;
+
auto i = 0;
for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
From 3f0551eca2cfe62d6e79c781eb39f8324e35c11e Mon Sep 17 00:00:00 2001
From: thomas-hansen-resolve
Date: Mon, 26 Jun 2023 15:30:49 +0300
Subject: [PATCH 26/66] Better encapsulation and cohesion
---
src/sqlite-vss.cpp | 95 ++++-----------------------------------------
src/vss/vss-index.h | 76 ++++++++++++++++++++++++++++++++++++
2 files changed, 84 insertions(+), 87 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index eec2484..fad9760 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -86,83 +86,6 @@ static void vssRangeSearchParamsFunc(sqlite3_context *context,
sqlite3_result_pointer(context, params, "vss0_rangesearchparams", delVssRangeSearchParams);
}
-static int write_index_insert(faiss::VectorIOWriter &writer,
- sqlite3 *db,
- char *schema,
- char *name,
- int rowId) {
-
- // If inserts fails it means index already exists.
- SqlStatement insert(db,
- sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)",
- schema,
- name));
-
- if (insert.prepare() != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (insert.bind_int64(1, rowId) != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK)
- return SQLITE_ERROR;
-
- auto rc = insert.step();
- if (rc == SQLITE_DONE)
- return SQLITE_OK; // Index did not exist, and we successfully inserted it.
-
- return rc;
-}
-
-static int write_index_update(faiss::VectorIOWriter &writer,
- sqlite3 *db,
- char *schema,
- char *name,
- int rowId) {
-
- // Updating existing index.
- SqlStatement update(db,
- sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?",
- schema,
- name));
-
- if (update.prepare() != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK)
- return SQLITE_ERROR;
-
- if (update.bind_int64(2, rowId) != SQLITE_OK)
- return SQLITE_ERROR;
-
- auto rc = update.step();
- if (rc == SQLITE_DONE)
- return SQLITE_OK; // We successfully updated existing index.
-
- return rc;
-}
-
-static int write_index(faiss::Index *index,
- sqlite3 *db,
- char *schema,
- char *name,
- int rowId) {
-
- // Writing our index
- faiss::VectorIOWriter writer;
- faiss::write_index(index, &writer);
-
- // First trying to insert index, if that fails with ROW constraing error, we try to update existing index.
- if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK)
- return SQLITE_OK;
-
- if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID)
- return SQLITE_ERROR; // Insert failed for unknown error
-
- // Insert failed because index already existed, updating existing index.
- return write_index_update(writer, db, schema, name, rowId);
-}
-
static int shadow_data_insert(sqlite3 *db,
char *schema,
char *name,
@@ -407,11 +330,10 @@ static int init(sqlite3 *db,
try {
- int rc = write_index((*iter)->getIndex(),
- pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
+ int rc = (*iter)->write_index(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ i);
if (rc != SQLITE_OK)
return rc;
@@ -827,11 +749,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) {
* If the above invocation returned true, we've got updates to currently iterated index,
* hence writing to db.
*/
- int rc = write_index((*iter)->getIndex(),
- pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
+ int rc = (*iter)->write_index(pTable->getDb(),
+ pTable->getSchema(),
+ pTable->getName(),
+ i);
if (rc != SQLITE_OK) {
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 897e184..ef94d06 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -93,8 +93,84 @@ class vss_index {
delete_ids.shrink_to_fit();
}
+ int write_index(sqlite3 *db,
+ char *schema,
+ char *name,
+ int rowId) {
+
+ // Writing our index
+ faiss::VectorIOWriter writer;
+ faiss::write_index(index, &writer);
+
+ // First trying to insert index, if that fails with ROW constraing error, we try to update existing index.
+ if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK)
+ return SQLITE_OK;
+
+ if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID)
+ return SQLITE_ERROR; // Insert failed for unknown error
+
+ // Insert failed because index already existed, updating existing index.
+ return write_index_update(writer, db, schema, name, rowId);
+ }
+
private:
+ int write_index_insert(faiss::VectorIOWriter &writer,
+ sqlite3 *db,
+ char *schema,
+ char *name,
+ int rowId) {
+
+ // If inserts fails it means index already exists.
+ SqlStatement insert(db,
+ sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)",
+ schema,
+ name));
+
+ if (insert.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ if (insert.bind_int64(1, rowId) != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ auto rc = insert.step();
+ if (rc == SQLITE_DONE)
+ return SQLITE_OK; // Index did not exist, and we successfully inserted it.
+
+ return rc;
+ }
+
+ int write_index_update(faiss::VectorIOWriter &writer,
+ sqlite3 *db,
+ char *schema,
+ char *name,
+ int rowId) {
+
+ // Updating existing index.
+ SqlStatement update(db,
+ sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?",
+ schema,
+ name));
+
+ if (update.prepare() != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ if (update.bind_int64(2, rowId) != SQLITE_OK)
+ return SQLITE_ERROR;
+
+ auto rc = update.step();
+ if (rc == SQLITE_DONE)
+ return SQLITE_OK; // We successfully updated existing index.
+
+ return rc;
+ }
+
bool tryTrain() {
if (trainings.empty())
From 1ff26192c2d37cb362dbde679b57bef775cf995c Mon Sep 17 00:00:00 2001
From: thomas-hansen-resolve
Date: Mon, 26 Jun 2023 17:13:55 +0300
Subject: [PATCH 27/66] More encapsulation
Getting ready for applying read/write locks
---
src/sqlite-vss.cpp | 28 +++++++++++++-------------
src/vss/inclusions.h | 1 +
src/vss/vss-index.h | 48 +++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 62 insertions(+), 15 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index fad9760..91cabb2 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -531,15 +531,15 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
}
int nq = 1;
- auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex();
+ auto index = pCursor->getTable()->getIndexes().at(idxNum);
- if (query_vector->size() != index->d) {
+ if (!index->canQuery(query_vector)) {
auto ptrVtab = static_cast(pCursor->pVtab);
ptrVtab->setError(sqlite3_mprintf(
"Input query size doesn't match index dimensions: %ld != %ld",
query_vector->size(),
- index->d));
+ index->dimensions()));
return SQLITE_ERROR;
}
@@ -555,15 +555,15 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
}
// To avoid trying to select more records than number of records in index.
- auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->ntotal * nq);
+ auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->size() * nq);
pCursor->resetSearch(searchMax);
index->search(nq,
- query_vector->data(),
+ query_vector,
searchMax,
- pCursor->getSearch_distances().data(),
- pCursor->getSearch_ids().data());
+ pCursor->getSearch_distances(),
+ pCursor->getSearch_ids());
} else if (strcmp(idxStr, "range_search") == 0) {
@@ -577,12 +577,12 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor,
vector nns(params->distance * nq);
pCursor->getRange_search_result() = unique_ptr(new faiss::RangeSearchResult(nq, true));
- auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex();
+ auto index = pCursor->getTable()->getIndexes().at(idxNum);
index->range_search(nq,
- params->vector->data(),
+ params->vector,
params->distance,
- pCursor->getRange_search_result().get());
+ pCursor->getRange_search_result());
} else if (strcmp(idxStr, "fullscan") == 0) {
@@ -701,15 +701,15 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur,
} else if (i >= VSS_INDEX_COLUMN_VECTORS) {
auto index =
- pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex();
+ pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS);
- vector vec(index->d);
+ vector vec(index->dimensions());
sqlite3_int64 rowId;
vssIndexRowid(cur, &rowId);
try {
- index->reconstruct(rowId, vec.data());
+ index->reconstruct(rowId, vec);
} catch (faiss::FaissException &e) {
@@ -846,7 +846,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab,
argv[2 + VSS_INDEX_COLUMN_VECTORS + i])) != nullptr) {
// Make sure the index is already trained, if it's needed
- if (!(*iter)->getIndex()->is_trained) {
+ if (!(*iter)->isTrained()) {
pTable->setError(sqlite3_mprintf("Index at i=%d requires training "
"before inserting data.",
diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h
index 006d432..bd0ab91 100644
--- a/src/vss/inclusions.h
+++ b/src/vss/inclusions.h
@@ -12,6 +12,7 @@ SQLITE_EXTENSION_INIT1
#include
#include
#include
+#include
#include
#include
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index ef94d06..cff825b 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -29,6 +29,52 @@ class vss_index {
return index;
}
+ // Returns false if index requires training before inserting items to it.
+ bool isTrained() {
+
+ return index->is_trained;
+ }
+
+ // Reconstructs the original vector, requires IDMap2 string in index factory to work.
+ void reconstruct(sqlite3_int64 rowid, vector & vector) {
+
+ index->reconstruct(rowid, vector.data());
+ }
+
+ // Returns true if specified vector is allowed to query index.
+ bool canQuery(vec_ptr & vec) {
+
+ return vec->size() == index->d;
+ }
+
+ // Queries the index for matches matching the specified vector
+ void search(int nq,
+ vec_ptr & vec,
+ faiss::idx_t max,
+ vector & distances,
+ vector & ids) {
+
+ index->search(nq, vec->data(), max, distances.data(), ids.data());
+ }
+
+ // Queries the index for a range of items.
+ void range_search(int nq, vec_ptr & vec, float distance, unique_ptr & result) {
+
+ index->range_search(nq, vec->data(), distance, result.get());
+ }
+
+ // Returns dimensions of index.
+ faiss::idx_t dimensions() {
+
+ return index->d;
+ }
+
+ // Returns the size of index.
+ faiss::idx_t size() {
+
+ return index->ntotal;
+ }
+
/*
* Adds the specified vector to the index' training material.
*
@@ -219,7 +265,7 @@ class vss_index {
return true;
}
- faiss::Index *index;
+ faiss::Index * index;
vector trainings;
vector insert_data;
vector insert_ids;
From dd6cfd114705b46a76692c833db3606d0dee9a2d Mon Sep 17 00:00:00 2001
From: thomas-hansen-resolve
Date: Mon, 26 Jun 2023 17:50:40 +0300
Subject: [PATCH 28/66] Bumping to C++ 17 - Required to have shared_mutex
---
CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c59d993..5100d5f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,7 @@ endif()
configure_file(src/sqlite-vss.h.in sqlite-vss.h)
configure_file(src/sqlite-vector.h.in sqlite-vector.h)
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
option(FAISS_ENABLE_GPU "" OFF)
From 543384c3656c3eba8519285708ae5c2d4515022f Mon Sep 17 00:00:00 2001
From: thomas-hansen-resolve
Date: Mon, 26 Jun 2023 17:51:05 +0300
Subject: [PATCH 29/66] Further encapsulating getting ready for mutex
---
src/vss/inclusions.h | 1 -
src/vss/vss-index.h | 9 +++------
2 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h
index bd0ab91..006d432 100644
--- a/src/vss/inclusions.h
+++ b/src/vss/inclusions.h
@@ -12,7 +12,6 @@ SQLITE_EXTENSION_INIT1
#include
#include
#include
-#include
#include
#include
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index cff825b..b4cb235 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -3,6 +3,7 @@
#define VSS_INDEX_H
#include "inclusions.h"
+#include
/*
* Wrapper around a single faiss index, with training data, insert records, and
@@ -15,7 +16,7 @@ class vss_index {
public:
- explicit vss_index(faiss::Index *index) : index(index) {}
+ explicit vss_index(faiss::Index *index) : index(index) { }
~vss_index() {
@@ -24,11 +25,6 @@ class vss_index {
}
}
- faiss::Index * getIndex() {
-
- return index;
- }
-
// Returns false if index requires training before inserting items to it.
bool isTrained() {
@@ -265,6 +261,7 @@ class vss_index {
return true;
}
+ std::shared_mutex lock;
faiss::Index * index;
vector trainings;
vector insert_data;
From 4b540be2d3a64fd5b1202c7e84dc8a6299fc7dd7 Mon Sep 17 00:00:00 2001
From: thomas-hansen-resolve
Date: Mon, 26 Jun 2023 18:05:49 +0300
Subject: [PATCH 30/66] Adding locks in crucial methods
---
src/vss/vss-index.h | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index b4cb235..9f6019d 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -50,12 +50,16 @@ class vss_index {
vector & distances,
vector & ids) {
+ shared_lock lock(_lock);
+
index->search(nq, vec->data(), max, distances.data(), ids.data());
}
// Queries the index for a range of items.
void range_search(int nq, vec_ptr & vec, float distance, unique_ptr & result) {
+ shared_lock lock(_lock);
+
index->range_search(nq, vec->data(), distance, result.get());
}
@@ -78,6 +82,8 @@ class vss_index {
*/
void addTrainings(vec_ptr & vec) {
+ unique_lock lock(_lock);
+
trainings.reserve(trainings.size() + vec->size());
trainings.insert(trainings.end(), vec->begin(), vec->end());
}
@@ -89,6 +95,8 @@ class vss_index {
*/
void addInsertData(faiss::idx_t rowId, vec_ptr & vec) {
+ unique_lock lock(_lock);
+
insert_data.reserve(insert_data.size() + vec->size());
insert_data.insert(insert_data.end(), vec->begin(), vec->end());
@@ -102,6 +110,8 @@ class vss_index {
*/
void addDelete(faiss::idx_t rowid) {
+ unique_lock lock(_lock);
+
delete_ids.push_back(rowid);
}
@@ -110,6 +120,8 @@ class vss_index {
*/
bool synchronize() {
+ unique_lock lock(_lock);
+
auto result = tryTrain();
result = tryDelete() || result;
result = tryInsert() || result;
@@ -122,6 +134,8 @@ class vss_index {
*/
void reset() {
+ unique_lock lock(_lock);
+
trainings.clear();
trainings.shrink_to_fit();
@@ -140,6 +154,8 @@ class vss_index {
char *name,
int rowId) {
+ unique_lock lock(_lock);
+
// Writing our index
faiss::VectorIOWriter writer;
faiss::write_index(index, &writer);
@@ -261,7 +277,7 @@ class vss_index {
return true;
}
- std::shared_mutex lock;
+ std::shared_mutex _lock;
faiss::Index * index;
vector trainings;
vector insert_data;
From 4d98e5561e9d6e10492068c4b71fd2a1a29808fd Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 19:19:52 +0300
Subject: [PATCH 31/66] Creating factory constructor getting ready to cache
faiss Index *
---
src/sqlite-vss.cpp | 92 +++++++++++++--------------------------------
src/vss/vss-index.h | 53 +++++++++++++++++++++++++-
2 files changed, 78 insertions(+), 67 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 91cabb2..b19e96a 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -133,34 +133,6 @@ static int shadow_data_delete(sqlite3 *db,
return SQLITE_OK;
}
-static faiss::Index * read_index_select(sqlite3 *db,
- const char *name,
- int indexId) {
-
- SqlStatement select(db,
- sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?",
- name));
-
- if (select.prepare() != SQLITE_OK)
- return nullptr;
-
- if (select.bind_int64(1, indexId) != SQLITE_OK)
- return nullptr;
-
- if (select.step() != SQLITE_ROW)
- return nullptr;
-
- auto index_data = select.column_blob(0);
- auto size = select.column_bytes(0);
-
- faiss::VectorIOReader reader;
- copy((const uint8_t *)index_data,
- ((const uint8_t *)index_data) + size,
- back_inserter(reader.data));
-
- return faiss::read_index(&reader);
-}
-
static int create_shadow_tables(sqlite3 *db,
const char *schema,
const char *name,
@@ -299,36 +271,31 @@ static int init(sqlite3 *db,
*ppVtab = pTable;
- if (isCreate) {
-
- for (auto iter = columns->begin(); iter != columns->end(); ++iter) {
-
- try {
+ try {
- auto index = faiss::index_factory(iter->dimensions, iter->factory.c_str());
- pTable->getIndexes().push_back(new vss_index(index));
+ if (isCreate) {
- } catch (faiss::FaissException &e) {
+ auto i = 0;
+ for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) {
- *pzErr = sqlite3_mprintf("Error building index factory for %s, exception was: %s",
- iter->name.c_str(),
- e.msg.c_str());
+ pTable->getIndexes().push_back(
+ vss_index::factory(db,
+ argv[2],
+ i,
+ &iter->factory,
+ iter->dimensions));
- return SQLITE_ERROR;
}
- }
-
- rc = create_shadow_tables(db, argv[1], argv[2], columns->size());
- if (rc != SQLITE_OK)
- return rc;
- // Shadow tables were successully created.
- // After shadow tables are created, write the initial index state to
- // shadow _index.
- auto i = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
+ rc = create_shadow_tables(db, argv[1], argv[2], columns->size());
+ if (rc != SQLITE_OK)
+ return rc;
- try {
+ // Shadow tables were successully created.
+ // After shadow tables are created, write the initial index state to
+ // shadow _index.
+ i = 0;
+ for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
int rc = (*iter)->write_index(pTable->getDb(),
pTable->getSchema(),
@@ -337,27 +304,22 @@ static int init(sqlite3 *db,
if (rc != SQLITE_OK)
return rc;
+ }
- } catch (faiss::FaissException &e) {
+ } else {
- return SQLITE_ERROR;
+ for (int i = 0; i < columns->size(); i++) {
+
+ pTable->getIndexes().push_back(vss_index::factory(db, argv[2], i, nullptr, -1));
}
}
- } else {
-
- for (int i = 0; i < columns->size(); i++) {
+ } catch (faiss::FaissException &e) {
- auto index = read_index_select(db, argv[2], i);
+ *pzErr = sqlite3_mprintf("Error building index factory, exception was: %s",
+ e.msg.c_str());
- // Index in shadow table should always be available, integrity check
- // to avoid null pointer
- if (index == nullptr) {
- *pzErr = sqlite3_mprintf("Could not read index at position %d", i);
- return SQLITE_ERROR;
- }
- pTable->getIndexes().push_back(new vss_index(index));
- }
+ return SQLITE_ERROR;
}
return SQLITE_OK;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 9f6019d..188dcf1 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -16,8 +16,6 @@ class vss_index {
public:
- explicit vss_index(faiss::Index *index) : index(index) { }
-
~vss_index() {
if (index != nullptr) {
@@ -171,8 +169,59 @@ class vss_index {
return write_index_update(writer, db, schema, name, rowId);
}
+ // Creates a new vss_index or returns a cached index to caller.
+ static vss_index * factory(sqlite3 *db,
+ const char *name,
+ int indexId,
+ string * factoryArgs,
+ int dimensions) {
+
+ string key = name;
+ key += indexId;
+ if (factoryArgs == nullptr) {
+
+ unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId)));
+ return tmp.release();
+
+ } else {
+
+ unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
+ return tmp.release();
+ }
+ }
+
private:
+ explicit vss_index(faiss::Index *index) : index(index) { }
+
+ static faiss::Index * read_index_select(sqlite3 *db,
+ const char *name,
+ int indexId) {
+
+ SqlStatement select(db,
+ sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?",
+ name));
+
+ if (select.prepare() != SQLITE_OK)
+ return nullptr;
+
+ if (select.bind_int64(1, indexId) != SQLITE_OK)
+ return nullptr;
+
+ if (select.step() != SQLITE_ROW)
+ return nullptr;
+
+ auto index_data = select.column_blob(0);
+ auto size = select.column_bytes(0);
+
+ faiss::VectorIOReader reader;
+ copy((const uint8_t *)index_data,
+ ((const uint8_t *)index_data) + size,
+ back_inserter(reader.data));
+
+ return faiss::read_index(&reader);
+ }
+
int write_index_insert(faiss::VectorIOWriter &writer,
sqlite3 *db,
char *schema,
From ebcc2913f5f270a7b0b3d2ae50e79def19622b12 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 19:34:40 +0300
Subject: [PATCH 32/66] Moving more stuff around to get ready for caching of
index
---
src/sqlite-vss.cpp | 56 +++++++++++++--------------------------------
src/vss/vss-index.h | 44 +++++++++++++++++++++++++++++++----
2 files changed, 56 insertions(+), 44 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index b19e96a..7a3c550 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -133,35 +133,6 @@ static int shadow_data_delete(sqlite3 *db,
return SQLITE_OK;
}
-static int create_shadow_tables(sqlite3 *db,
- const char *schema,
- const char *name,
- int n) {
-
- SqlStatement create1(db,
- sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
- schema,
- name));
-
- auto rc = create1.exec();
- if (rc != SQLITE_OK)
- return rc;
-
- /*
- * Notice, we'll need to explicitly finalize this object since we can only
- * have one open statement at the same time to the same connetion.
- */
- create1.finalize();
-
- SqlStatement create2(db,
- sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
- schema,
- name));
-
- rc = create2.exec();
- return rc;
-}
-
static int drop_shadow_tables(sqlite3 *db, char *name) {
// Dropping both x_index and x_data shadow tables.
@@ -280,17 +251,15 @@ static int init(sqlite3 *db,
pTable->getIndexes().push_back(
vss_index::factory(db,
- argv[2],
- i,
- &iter->factory,
- iter->dimensions));
+ argv[1],
+ argv[2],
+ i,
+ &iter->factory,
+ iter->dimensions,
+ columns->size()));
}
- rc = create_shadow_tables(db, argv[1], argv[2], columns->size());
- if (rc != SQLITE_OK)
- return rc;
-
// Shadow tables were successully created.
// After shadow tables are created, write the initial index state to
// shadow _index.
@@ -310,14 +279,21 @@ static int init(sqlite3 *db,
for (int i = 0; i < columns->size(); i++) {
- pTable->getIndexes().push_back(vss_index::factory(db, argv[2], i, nullptr, -1));
+ pTable->getIndexes().push_back(
+ vss_index::factory(db,
+ argv[1],
+ argv[2],
+ i,
+ nullptr,
+ -1,
+ -1));
}
}
- } catch (faiss::FaissException &e) {
+ } catch (exception & e) {
*pzErr = sqlite3_mprintf("Error building index factory, exception was: %s",
- e.msg.c_str());
+ e.what());
return SQLITE_ERROR;
}
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 188dcf1..92ab92e 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -171,10 +171,12 @@ class vss_index {
// Creates a new vss_index or returns a cached index to caller.
static vss_index * factory(sqlite3 *db,
- const char *name,
- int indexId,
- string * factoryArgs,
- int dimensions) {
+ const char *schema,
+ const char *name,
+ int indexId,
+ string * factoryArgs,
+ int dimensions,
+ int colSize) {
string key = name;
key += indexId;
@@ -186,12 +188,46 @@ class vss_index {
} else {
unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
+
+ auto rc = create_shadow_tables(db, schema, name, colSize);
+ if (rc != SQLITE_OK)
+ throw domain_error("Couldn't create shadow tables");
+
return tmp.release();
}
}
private:
+ static int create_shadow_tables(sqlite3 *db,
+ const char *schema,
+ const char *name,
+ int n) {
+
+ SqlStatement create1(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
+ schema,
+ name));
+
+ auto rc = create1.exec();
+ if (rc != SQLITE_OK)
+ return rc;
+
+ /*
+ * Notice, we'll need to explicitly finalize this object since we can only
+ * have one open statement at the same time to the same connetion.
+ */
+ create1.finalize();
+
+ SqlStatement create2(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
+ schema,
+ name));
+
+ rc = create2.exec();
+ return rc;
+ }
+
explicit vss_index(faiss::Index *index) : index(index) { }
static faiss::Index * read_index_select(sqlite3 *db,
From cbe056fb95a5b600b914b9079e6deab971299800 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 19:51:49 +0300
Subject: [PATCH 33/66] Oops, bug fix
---
src/sqlite-vss.cpp | 36 +++++++++++++++++++++++++---
src/vss/vss-index.h | 58 +++++++++++++--------------------------------
2 files changed, 49 insertions(+), 45 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 7a3c550..2c22c5f 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -201,6 +201,34 @@ unique_ptr> parse_constructor(int argc,
return columns;
}
+static int create_shadow_tables(sqlite3 *db,
+ const char *schema,
+ const char *name) {
+
+ SqlStatement create1(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
+ schema,
+ name));
+
+ auto rc = create1.exec();
+ if (rc != SQLITE_OK)
+ return rc;
+
+ /*
+ * Notice, we'll need to explicitly finalize this object since we can only
+ * have one open statement at the same time to the same connetion.
+ */
+ create1.finalize();
+
+ SqlStatement create2(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
+ schema,
+ name));
+
+ rc = create2.exec();
+ return rc;
+}
+
#define VSS_INDEX_COLUMN_DISTANCE 0
#define VSS_INDEX_COLUMN_OPERATION 1
#define VSS_INDEX_COLUMN_VECTORS 2
@@ -255,11 +283,14 @@ static int init(sqlite3 *db,
argv[2],
i,
&iter->factory,
- iter->dimensions,
- columns->size()));
+ iter->dimensions));
}
+ rc = create_shadow_tables(db, argv[1], argv[2]);
+ if (rc != SQLITE_OK)
+ return rc;
+
// Shadow tables were successully created.
// After shadow tables are created, write the initial index state to
// shadow _index.
@@ -285,7 +316,6 @@ static int init(sqlite3 *db,
argv[2],
i,
nullptr,
- -1,
-1));
}
}
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 92ab92e..cc71081 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -148,8 +148,8 @@ class vss_index {
}
int write_index(sqlite3 *db,
- char *schema,
- char *name,
+ const char *schema,
+ const char *name,
int rowId) {
unique_lock lock(_lock);
@@ -175,59 +175,33 @@ class vss_index {
const char *name,
int indexId,
string * factoryArgs,
- int dimensions,
- int colSize) {
+ int dimensions) {
string key = name;
key += indexId;
if (factoryArgs == nullptr) {
unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId)));
+
+ int rc = tmp->write_index(db,
+ schema,
+ name,
+ indexId);
+
+ if (rc != SQLITE_OK)
+ throw domain_error("Couldn't write initial state of index");
+
return tmp.release();
} else {
unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
-
- auto rc = create_shadow_tables(db, schema, name, colSize);
- if (rc != SQLITE_OK)
- throw domain_error("Couldn't create shadow tables");
-
return tmp.release();
}
}
private:
- static int create_shadow_tables(sqlite3 *db,
- const char *schema,
- const char *name,
- int n) {
-
- SqlStatement create1(db,
- sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
- schema,
- name));
-
- auto rc = create1.exec();
- if (rc != SQLITE_OK)
- return rc;
-
- /*
- * Notice, we'll need to explicitly finalize this object since we can only
- * have one open statement at the same time to the same connetion.
- */
- create1.finalize();
-
- SqlStatement create2(db,
- sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
- schema,
- name));
-
- rc = create2.exec();
- return rc;
- }
-
explicit vss_index(faiss::Index *index) : index(index) { }
static faiss::Index * read_index_select(sqlite3 *db,
@@ -260,8 +234,8 @@ class vss_index {
int write_index_insert(faiss::VectorIOWriter &writer,
sqlite3 *db,
- char *schema,
- char *name,
+ const char *schema,
+ const char *name,
int rowId) {
// If inserts fails it means index already exists.
@@ -288,8 +262,8 @@ class vss_index {
int write_index_update(faiss::VectorIOWriter &writer,
sqlite3 *db,
- char *schema,
- char *name,
+ const char *schema,
+ const char *name,
int rowId) {
// Updating existing index.
From 452566028efa65d3f5abe26242e7c1b3d34908b1 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 20:06:08 +0300
Subject: [PATCH 34/66] Improving structure further to get ready for caching
---
src/sqlite-vss.cpp | 27 +++++----------------------
src/vss/vss-index.h | 44 ++++++++++++++++++++++++++++++--------------
2 files changed, 35 insertions(+), 36 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 2c22c5f..98b6fe6 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -274,6 +274,10 @@ static int init(sqlite3 *db,
if (isCreate) {
+ rc = create_shadow_tables(db, argv[1], argv[2]);
+ if (rc != SQLITE_OK)
+ return rc;
+
auto i = 0;
for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) {
@@ -287,25 +291,6 @@ static int init(sqlite3 *db,
}
- rc = create_shadow_tables(db, argv[1], argv[2]);
- if (rc != SQLITE_OK)
- return rc;
-
- // Shadow tables were successully created.
- // After shadow tables are created, write the initial index state to
- // shadow _index.
- i = 0;
- for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) {
-
- int rc = (*iter)->write_index(pTable->getDb(),
- pTable->getSchema(),
- pTable->getName(),
- i);
-
- if (rc != SQLITE_OK)
- return rc;
- }
-
} else {
for (int i = 0; i < columns->size(); i++) {
@@ -314,9 +299,7 @@ static int init(sqlite3 *db,
vss_index::factory(db,
argv[1],
argv[2],
- i,
- nullptr,
- -1));
+ i));
}
}
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index cc71081..1cf4c47 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -169,7 +169,10 @@ class vss_index {
return write_index_update(writer, db, schema, name, rowId);
}
- // Creates a new vss_index or returns a cached index to caller.
+ /*
+ * Creates a new vss_index as a virtual table is being
+ * created using the VSS module.
+ */
static vss_index * factory(sqlite3 *db,
const char *schema,
const char *name,
@@ -177,27 +180,40 @@ class vss_index {
string * factoryArgs,
int dimensions) {
+ // Figuring out cache key to use to store index into cache.
string key = name;
key += indexId;
- if (factoryArgs == nullptr) {
- unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId)));
+ // Creating a new index and storing in cache.
+ unique_ptr newIndex(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
+
+ int rc = newIndex->write_index(db,
+ schema,
+ name,
+ indexId);
- int rc = tmp->write_index(db,
- schema,
- name,
- indexId);
+ // Returning index to caller.
+ return newIndex.release();
+ }
- if (rc != SQLITE_OK)
- throw domain_error("Couldn't write initial state of index");
+ /*
+ * Creates a new vss_index by reading existing data fromdb,
+ * or returns a cached index to caller.
+ */
+ static vss_index * factory(sqlite3 *db,
+ const char *schema,
+ const char *name,
+ int indexId) {
- return tmp.release();
+ // Figuring out cache key to use to lookup into cache to see if index already has been created and cached.
+ string key = name;
+ key += indexId;
- } else {
+ // Reading index from db.
+ unique_ptr tmp(new vss_index(read_index_select(db, name, indexId)));
- unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
- return tmp.release();
- }
+ // Returning index to caller.
+ return tmp.release();
}
private:
From d71031cce41bf63d9ded76d37304fc7bb20b7455 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 20:13:12 +0300
Subject: [PATCH 35/66] Re-structuring
---
src/sqlite-vss.cpp | 32 -------------------------------
src/vss/vss-index.h | 46 ++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 41 insertions(+), 37 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index 98b6fe6..db16394 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -201,34 +201,6 @@ unique_ptr> parse_constructor(int argc,
return columns;
}
-static int create_shadow_tables(sqlite3 *db,
- const char *schema,
- const char *name) {
-
- SqlStatement create1(db,
- sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
- schema,
- name));
-
- auto rc = create1.exec();
- if (rc != SQLITE_OK)
- return rc;
-
- /*
- * Notice, we'll need to explicitly finalize this object since we can only
- * have one open statement at the same time to the same connetion.
- */
- create1.finalize();
-
- SqlStatement create2(db,
- sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
- schema,
- name));
-
- rc = create2.exec();
- return rc;
-}
-
#define VSS_INDEX_COLUMN_DISTANCE 0
#define VSS_INDEX_COLUMN_OPERATION 1
#define VSS_INDEX_COLUMN_VECTORS 2
@@ -274,10 +246,6 @@ static int init(sqlite3 *db,
if (isCreate) {
- rc = create_shadow_tables(db, argv[1], argv[2]);
- if (rc != SQLITE_OK)
- return rc;
-
auto i = 0;
for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) {
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 1cf4c47..56567a6 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -170,8 +170,8 @@ class vss_index {
}
/*
- * Creates a new vss_index as a virtual table is being
- * created using the VSS module.
+ * Creates a new vss_index as a virtual table and stores
+ * its initial (empty) state.
*/
static vss_index * factory(sqlite3 *db,
const char *schema,
@@ -192,12 +192,20 @@ class vss_index {
name,
indexId);
+ // Checking if this is our first index, at which point we create our shadow tables.
+ if (indexId == 0) {
+
+ auto rc = create_shadow_tables(db, schema, name);
+ if (rc != SQLITE_OK)
+ throw domain_error("Couldn't create shadow tables");
+ }
+
// Returning index to caller.
return newIndex.release();
}
/*
- * Creates a new vss_index by reading existing data fromdb,
+ * Creates a new vss_index by reading existing data from db,
* or returns a cached index to caller.
*/
static vss_index * factory(sqlite3 *db,
@@ -210,14 +218,42 @@ class vss_index {
key += indexId;
// Reading index from db.
- unique_ptr tmp(new vss_index(read_index_select(db, name, indexId)));
+ unique_ptr newIndex(new vss_index(read_index_select(db, name, indexId)));
// Returning index to caller.
- return tmp.release();
+ return newIndex.release();
}
private:
+ static int create_shadow_tables(sqlite3 *db,
+ const char *schema,
+ const char *name) {
+
+ SqlStatement create1(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)",
+ schema,
+ name));
+
+ auto rc = create1.exec();
+ if (rc != SQLITE_OK)
+ return rc;
+
+ /*
+ * Notice, we'll need to explicitly finalize this object since we can only
+ * have one open statement at the same time to the same connetion.
+ */
+ create1.finalize();
+
+ SqlStatement create2(db,
+ sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);",
+ schema,
+ name));
+
+ rc = create2.exec();
+ return rc;
+ }
+
explicit vss_index(faiss::Index *index) : index(index) { }
static faiss::Index * read_index_select(sqlite3 *db,
From 13479442025d5c748608ae33026e0e14dffa9679 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 20:15:00 +0300
Subject: [PATCH 36/66] Update vss-index.h
---
src/vss/vss-index.h | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 56567a6..4f4aee6 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -187,12 +187,7 @@ class vss_index {
// Creating a new index and storing in cache.
unique_ptr newIndex(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())));
- int rc = newIndex->write_index(db,
- schema,
- name,
- indexId);
-
- // Checking if this is our first index, at which point we create our shadow tables.
+ // Checking if this is our first index for table, at which point we create our shadow tables.
if (indexId == 0) {
auto rc = create_shadow_tables(db, schema, name);
@@ -200,6 +195,12 @@ class vss_index {
throw domain_error("Couldn't create shadow tables");
}
+ // Writing its initial (empty) state.
+ int rc = newIndex->write_index(db,
+ schema,
+ name,
+ indexId);
+
// Returning index to caller.
return newIndex.release();
}
From cbf854645f9e91aa0f803d77b6c6160eafce9653 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 20:32:18 +0300
Subject: [PATCH 37/66] Locking access to factory methods
---
src/sqlite-vss.cpp | 5 +++++
src/vss/vss-index.h | 8 +++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index db16394..c6ea8fe 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -205,6 +205,8 @@ unique_ptr> parse_constructor(int argc,
#define VSS_INDEX_COLUMN_OPERATION 1
#define VSS_INDEX_COLUMN_VECTORS 2
+shared_mutex vss_index::_globalLock;
+
static int init(sqlite3 *db,
void *pAux,
int argc,
@@ -244,6 +246,9 @@ static int init(sqlite3 *db,
try {
+ // To avoid race conditions towards cache we lock creation of indexes.
+ unique_lock globalLock(*vss_index::getGlobalLock());
+
if (isCreate) {
auto i = 0;
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 4f4aee6..3a7a5e2 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -225,6 +225,11 @@ class vss_index {
return newIndex.release();
}
+ static shared_mutex * getGlobalLock() {
+
+ return &_globalLock;
+ }
+
private:
static int create_shadow_tables(sqlite3 *db,
@@ -389,7 +394,8 @@ class vss_index {
return true;
}
- std::shared_mutex _lock;
+ static shared_mutex _globalLock;
+ shared_mutex _lock;
faiss::Index * index;
vector trainings;
vector insert_data;
From f773d25a37c30875cabfe5ae083019cd4aea3d90 Mon Sep 17 00:00:00 2001
From: Thomas Hansen
Date: Mon, 26 Jun 2023 21:09:59 +0300
Subject: [PATCH 38/66] Caching faiss indexes sharing between multiple
connections
Hopefully synchronising access correctly and cleaning up stuff ;)
---
src/sqlite-vss.cpp | 6 ++++
src/vss/vss-index-vtab.h | 3 --
src/vss/vss-index.h | 65 +++++++++++++++++++++++++++++++---------
3 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp
index c6ea8fe..6d65b31 100644
--- a/src/sqlite-vss.cpp
+++ b/src/sqlite-vss.cpp
@@ -205,7 +205,9 @@ unique_ptr> parse_constructor(int argc,
#define VSS_INDEX_COLUMN_OPERATION 1
#define VSS_INDEX_COLUMN_VECTORS 2
+// Declaration of static objects required to do caching.
shared_mutex vss_index::_globalLock;
+map vss_index::_instances;
static int init(sqlite3 *db,
void *pAux,
@@ -316,6 +318,10 @@ static int vssIndexDestroy(sqlite3_vtab *pVtab) {
auto pTable = static_cast(pVtab);
drop_shadow_tables(pTable->getDb(), pTable->getName());
+
+ // Removing from cache.
+ vss_index::destroy(pTable->getSchema(), pTable->getName());
+
vssIndexDisconnect(pVtab);
return SQLITE_OK;
}
diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h
index 5446927..5da2652 100644
--- a/src/vss/vss-index-vtab.h
+++ b/src/vss/vss-index-vtab.h
@@ -25,9 +25,6 @@ class vss_index_vtab : public sqlite3_vtab {
sqlite3_free(schema);
if (this->zErrMsg != nullptr)
delete this->zErrMsg;
- for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) {
- delete (*iter);
- }
}
void setError(char *error) {
diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h
index 3a7a5e2..0cc1a88 100644
--- a/src/vss/vss-index.h
+++ b/src/vss/vss-index.h
@@ -4,7 +4,7 @@
#include "inclusions.h"
#include
-
+#include | | | | | | | | | | | | | | | | | |