From 5e599472bdcb8dc04e2212c391447692dc66b22a Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 08:09:14 +0300 Subject: [PATCH 01/66] Adding back SqlStatement --- src/sqlite-vss.cpp | 419 +++++++++++++++++++++++++-------------------- 1 file changed, 231 insertions(+), 188 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index b9f8c6a..6608eec 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -246,6 +246,102 @@ void delVssRangeSearchParams(void *p) { delete self; } +struct SqlStatement { + + SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) { + + this->sql = sql; + } + + ~SqlStatement() { + + if (stmt != nullptr) + sqlite3_finalize(stmt); + if (sql != nullptr) + sqlite3_free((void *)sql); + } + + int prepare() { + + auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr); + if (res != SQLITE_OK || stmt == nullptr) { + + stmt = nullptr; + return SQLITE_ERROR; + } + return res; + } + + int bind_int64(int colNo, sqlite3_int64 value) { + + return sqlite3_bind_int64(stmt, colNo, value); + } + + int bind_blob64(int colNo, const void * data, int size) { + + return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT); + } + + int bind_null(int colNo) { + + return sqlite3_bind_null(stmt, colNo); + } + + int bind_pointer(int paramNo, void *ptr, const char * name) { + + return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr); + } + + int step() { + + return sqlite3_step(stmt); + } + + int exec() { + + return sqlite3_exec(db, sql, nullptr, nullptr, nullptr); + } + + int declare_vtab() { + + return sqlite3_declare_vtab(db, sql); + } + + const void * column_blob(int colNo) { + + return sqlite3_column_blob(stmt, colNo); + } + + int column_bytes(int colNo) { + + return sqlite3_column_bytes(stmt, colNo); + } + + int column_int64(int colNo) { + + return sqlite3_column_int64(stmt, colNo); + } + + int last_insert_rowid() { + + return sqlite3_last_insert_rowid(db); + } + + void finalize() { + + if (stmt != nullptr) + sqlite3_finalize(stmt); + stmt = nullptr; + if (sql != nullptr) + sqlite3_free((void *)sql); + sql = nullptr; + } + + sqlite3 *db; + sqlite3_stmt *stmt; + const char * sql; +}; + #pragma endregion #pragma region Vtab @@ -288,95 +384,81 @@ static void vssRangeSearchParamsFunc(sqlite3_context *context, int argc, sqlite3_result_pointer(context, params, "vss0_rangesearchparams", delVssRangeSearchParams); } -static int write_index_insert(faiss::Index *index, +static int write_index_insert(faiss::VectorIOWriter &writer, sqlite3 *db, char *schema, char *name, int rowId) { - faiss::VectorIOWriter writer; - faiss::write_index(index, &writer); - sqlite3_int64 indexSize = writer.data.size(); - - // First try to insert into xyz_index. If that fails with a rowid constraint - // error, that means the index is already on disk, we just have to UPDATE - // instead. + // If inserts fails it means index already exists. + SqlStatement insert(db, + sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)", + schema, + name)); - sqlite3_stmt *stmt; - char *sql = sqlite3_mprintf( - "insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)", - schema, - name); - - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - if (rc != SQLITE_OK || stmt == nullptr) { - sqlite3_free(sql); + if (insert.prepare() != SQLITE_OK) return SQLITE_ERROR; - } - rc = sqlite3_bind_int64(stmt, 1, rowId); - if (rc != SQLITE_OK) { - sqlite3_finalize(stmt); - sqlite3_free(sql); + if (insert.bind_int64(1, rowId) != SQLITE_OK) return SQLITE_ERROR; - } - rc = sqlite3_bind_blob64(stmt, 2, writer.data.data(), indexSize, SQLITE_TRANSIENT); - if (rc != SQLITE_OK) { - sqlite3_finalize(stmt); - sqlite3_free(sql); + if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK) return SQLITE_ERROR; - } - int result = sqlite3_step(stmt); - sqlite3_finalize(stmt); - sqlite3_free(sql); + auto rc = insert.step(); + if (rc == SQLITE_DONE) + return SQLITE_OK; // Index did not exist, and we successfully inserted it. - if (result == SQLITE_DONE) { + return rc; +} - // INSERT was success, index wasn't written yet, all good to exit - return SQLITE_OK; +static int write_index_update(faiss::VectorIOWriter &writer, + sqlite3 *db, + char *schema, + char *name, + int rowId) { - } else if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID) { + // Updating existing index. + SqlStatement update(db, + sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?", + schema, + name)); - // INSERT failed for another unknown reason, bad, return error + if (update.prepare() != SQLITE_OK) return SQLITE_ERROR; - } - - // INSERT failed because index already is on disk, so we do an UPDATE instead - sql = sqlite3_mprintf( - "update \"%w\".\"%w_index\" set idx = ? where rowid = ?", schema, name); - - rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - if (rc != SQLITE_OK || stmt == nullptr) { - sqlite3_free(sql); + if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK) return SQLITE_ERROR; - } - rc = sqlite3_bind_blob64(stmt, 1, writer.data.data(), indexSize, SQLITE_TRANSIENT); - if (rc != SQLITE_OK) { - sqlite3_finalize(stmt); - sqlite3_free(sql); + if (update.bind_int64(2, rowId) != SQLITE_OK) return SQLITE_ERROR; - } - rc = sqlite3_bind_int64(stmt, 2, rowId); - if (rc != SQLITE_OK) { - sqlite3_finalize(stmt); - sqlite3_free(sql); - return SQLITE_ERROR; - } + auto rc = update.step(); + if (rc == SQLITE_DONE) + return SQLITE_OK; // We successfully updated existing index. + + return rc; +} - result = sqlite3_step(stmt); - sqlite3_finalize(stmt); - sqlite3_free(sql); +static int write_index(faiss::Index *index, + sqlite3 *db, + char *schema, + char *name, + int rowId) { - if (result == SQLITE_DONE) { + // Writing our index + faiss::VectorIOWriter writer; + faiss::write_index(index, &writer); + + // First trying to insert index, if that fails with ROW constraing error, we try to update existing index. + if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK) return SQLITE_OK; - } - return result; + if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID) + return SQLITE_ERROR; // Insert failed for unknown error + + // Insert failed because index already existed, updating existing index. + return write_index_update(writer, db, schema, name, rowId); } static int shadow_data_insert(sqlite3 *db, @@ -385,50 +467,45 @@ static int shadow_data_insert(sqlite3 *db, sqlite3_int64 *rowid, sqlite3_int64 *retRowid) { - sqlite3_stmt *stmt; - if (rowid == nullptr) { - auto sql = sqlite3_mprintf( - "insert into \"%w\".\"%w_data\"(x) values (?)", schema, name); + SqlStatement insert(db, + sqlite3_mprintf("insert into \"%w\".\"%w_data\"(x) values (?)", + schema, + name)); - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - sqlite3_free(sql); + if (insert.prepare() != SQLITE_OK) + return SQLITE_ERROR; - if (rc != SQLITE_OK || stmt == nullptr) { + if (insert.bind_null(1) != SQLITE_OK) return SQLITE_ERROR; - } - sqlite3_bind_null(stmt, 1); - if (sqlite3_step(stmt) != SQLITE_DONE) { - sqlite3_finalize(stmt); + if (insert.step() != SQLITE_DONE) return SQLITE_ERROR; - } } else { - auto sql = sqlite3_mprintf( - "insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);", schema, - name); + SqlStatement insert(db, + sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);", + schema, + name)); - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - sqlite3_free(sql); + if (insert.prepare() != SQLITE_OK) + return SQLITE_ERROR; - if (rc != SQLITE_OK || stmt == nullptr) + if (insert.bind_int64(1, *rowid) != SQLITE_OK) return SQLITE_ERROR; - sqlite3_bind_int64(stmt, 1, *rowid); - sqlite3_bind_null(stmt, 2); - if (sqlite3_step(stmt) != SQLITE_DONE) { - sqlite3_finalize(stmt); + if (insert.bind_null(2) != SQLITE_OK) + return SQLITE_ERROR; + + if (insert.step() != SQLITE_DONE) return SQLITE_ERROR; - } if (retRowid != nullptr) - *retRowid = sqlite3_last_insert_rowid(db); + *retRowid = insert.last_insert_rowid(); } - sqlite3_finalize(stmt); return SQLITE_OK; } @@ -436,62 +513,47 @@ static int shadow_data_delete(sqlite3 *db, char *schema, char *name, sqlite3_int64 rowid) { - sqlite3_stmt *stmt; - // TODO: We should strive to use only one concept and idea while creating - // SQL statements. - auto query = sqlite3_str_new(0); + SqlStatement del(db, + sqlite3_mprintf("delete from \"%w\".\"%w_data\" where rowid = ?", + schema, + name)); - sqlite3_str_appendf(query, "delete from \"%w\".\"%w_data\" where rowid = ?", - schema, name); - - auto sql = sqlite3_str_finish(query); + if (del.prepare() != SQLITE_OK) + return SQLITE_ERROR; - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - if (rc != SQLITE_OK || stmt == nullptr) + if (del.bind_int64(1, rowid) != SQLITE_OK) return SQLITE_ERROR; - sqlite3_bind_int64(stmt, 1, rowid); - if (sqlite3_step(stmt) != SQLITE_DONE) { - sqlite3_finalize(stmt); + if (del.step() != SQLITE_DONE) return SQLITE_ERROR; - } - sqlite3_free(sql); - sqlite3_finalize(stmt); return SQLITE_OK; } static faiss::Index *read_index_select(sqlite3 *db, const char *name, int indexId) { - sqlite3_stmt *stmt; - auto sql = sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", name); + SqlStatement select(db, + sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", + name)); - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr); - if (rc != SQLITE_OK || stmt == nullptr) { - sqlite3_finalize(stmt); - sqlite3_free(sql); + if (select.prepare() != SQLITE_OK) return nullptr; - } - sqlite3_bind_int64(stmt, 1, indexId); - if (sqlite3_step(stmt) != SQLITE_ROW) { - sqlite3_finalize(stmt); - sqlite3_free(sql); + if (select.bind_int64(1, indexId) != SQLITE_OK) return nullptr; - } - auto index_data = sqlite3_column_blob(stmt, 0); - int64_t size = sqlite3_column_bytes(stmt, 0); + if (select.step() != SQLITE_ROW) + return nullptr; + + auto index_data = select.column_blob(0); + auto size = select.column_bytes(0); faiss::VectorIOReader reader; copy((const uint8_t *)index_data, ((const uint8_t *)index_data) + size, back_inserter(reader.data)); - sqlite3_free(sql); - sqlite3_finalize(stmt); - return faiss::read_index(&reader); } @@ -500,21 +562,27 @@ static int create_shadow_tables(sqlite3 *db, const char *name, int n) { - auto sql = sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", - schema, - name); + SqlStatement create1(db, + sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", + schema, + name)); - auto rc = sqlite3_exec(db, sql, 0, 0, 0); - sqlite3_free(sql); + auto rc = create1.exec(); if (rc != SQLITE_OK) return rc; - sql = sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", - schema, - name); + /* + * Notice, we'll need to explicitly finalize this object since we can only + * have one open statement at the same time to the same connetion. + */ + create1.finalize(); - rc = sqlite3_exec(db, sql, nullptr, nullptr, nullptr); - sqlite3_free(sql); + SqlStatement create2(db, + sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", + schema, + name)); + + rc = create2.exec(); return rc; } @@ -525,29 +593,15 @@ static int drop_shadow_tables(sqlite3 *db, char *name) { for (int i = 0; i < 2; i++) { - auto curSql = drops[i]; - - sqlite3_stmt *stmt; - - // TODO: Use of one construct to create SQL statements. - sqlite3_str *query = sqlite3_str_new(0); - sqlite3_str_appendf(query, curSql, name); - char *sql = sqlite3_str_finish(query); + SqlStatement cur(db, + sqlite3_mprintf(drops[i], + name)); - int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); - if (rc != SQLITE_OK || stmt == nullptr) { - sqlite3_free(sql); + if (cur.prepare() != SQLITE_OK) return SQLITE_ERROR; - } - if (sqlite3_step(stmt) != SQLITE_DONE) { - sqlite3_free(sql); - sqlite3_finalize(stmt); + if (cur.step() != SQLITE_DONE) return SQLITE_ERROR; - } - - sqlite3_free(sql); - sqlite3_finalize(stmt); } return SQLITE_OK; } @@ -696,6 +750,10 @@ unique_ptr> parse_constructor(int argc, return columns; } +#define VSS_INDEX_COLUMN_DISTANCE 0 +#define VSS_INDEX_COLUMN_OPERATION 1 +#define VSS_INDEX_COLUMN_VECTORS 2 + static int init(sqlite3 *db, void *pAux, int argc, @@ -705,31 +763,23 @@ static int init(sqlite3 *db, bool isCreate) { sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); - int rc; - - sqlite3_str *str = sqlite3_str_new(nullptr); - sqlite3_str_appendall(str, - "create table x(distance hidden, operation hidden"); auto columns = parse_constructor(argc, argv); - if (columns == nullptr) { - *pzErr = sqlite3_mprintf("Error parsing constructor"); - return rc; + *pzErr = sqlite3_mprintf("Error parsing VSS index factory constructor"); + return SQLITE_ERROR; } - for (auto column = columns->begin(); column != columns->end(); ++column) { - sqlite3_str_appendf(str, ", \"%w\"", column->name.c_str()); + string sql = "create table x(distance hidden, operation hidden"; + for (auto colIter = columns->begin(); colIter != columns->end(); ++colIter) { + sql += ", \"" + colIter->name + "\""; } + sql += ")"; - sqlite3_str_appendall(str, ")"); - auto sql = sqlite3_str_finish(str); - rc = sqlite3_declare_vtab(db, sql); - sqlite3_free(sql); + SqlStatement create(db, + sqlite3_mprintf(sql.c_str())); -#define VSS_INDEX_COLUMN_DISTANCE 0 -#define VSS_INDEX_COLUMN_OPERATION 1 -#define VSS_INDEX_COLUMN_VECTORS 2 + auto rc = create.declare_vtab(); if (rc != SQLITE_OK) return rc; @@ -738,6 +788,7 @@ static int init(sqlite3 *db, (vector0_api *)pAux, sqlite3_mprintf("%s", argv[1]), sqlite3_mprintf("%s", argv[2])); + *ppVtab = pTable; if (isCreate) { @@ -751,7 +802,7 @@ static int init(sqlite3 *db, } catch (faiss::FaissException &e) { - *pzErr = sqlite3_mprintf("Error building index factory for %s: %s", + *pzErr = sqlite3_mprintf("Error building index factory for %s, exception was: %s", iter->name.c_str(), e.msg.c_str()); @@ -771,7 +822,7 @@ static int init(sqlite3 *db, try { - int rc = write_index_insert((*iter)->index, + int rc = write_index((*iter)->index, pTable->db, pTable->schema, pTable->name, @@ -1232,7 +1283,7 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { int i = 0; for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) { - int rc = write_index_insert((*iter)->index, + int rc = write_index((*iter)->index, pTable->db, pTable->schema, pTable->name, @@ -1357,8 +1408,11 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, if (!inserted_rowid) { sqlite_int64 retrowid; - auto rc = shadow_data_insert(pTable->db, pTable->schema, pTable->name, - &rowid, &retrowid); + auto rc = shadow_data_insert(pTable->db, + pTable->schema, + pTable->name, + &rowid, + &retrowid); if (rc != SQLITE_OK) return rc; @@ -1496,28 +1550,17 @@ static sqlite3_module vssIndexModule = { vector0_api *vector0_api_from_db(sqlite3 *db) { - vector0_api *pRet = nullptr; - sqlite3_stmt *pStmt = nullptr; - - auto rc = sqlite3_prepare(db, "select vector0(?1)", -1, &pStmt, nullptr); - if (rc != SQLITE_OK) + SqlStatement select(db, sqlite3_mprintf("select vector0(?1)")); + if (select.prepare() != SQLITE_OK) return nullptr; - rc = sqlite3_bind_pointer(pStmt, 1, (void *)&pRet, "vector0_api_ptr", nullptr); - if (rc != SQLITE_OK) { - - sqlite3_finalize(pStmt); + vector0_api *pRet = nullptr; + if (select.bind_pointer(1, (void *)&pRet, "vector0_api_ptr") != SQLITE_OK) return nullptr; - } - rc = sqlite3_step(pStmt); - if (rc != SQLITE_ROW) { - - sqlite3_finalize(pStmt); + if (select.step() != SQLITE_ROW) return nullptr; - } - sqlite3_finalize(pStmt); return pRet; } From 6e2e09a7b4bc91d141df86d210c45462a9416f60 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 08:09:43 +0300 Subject: [PATCH 02/66] Removing unused code path --- src/sqlite-vss.cpp | 53 ++++++++++++---------------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 6608eec..3340924 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -464,47 +464,24 @@ static int write_index(faiss::Index *index, static int shadow_data_insert(sqlite3 *db, char *schema, char *name, - sqlite3_int64 *rowid, - sqlite3_int64 *retRowid) { - - if (rowid == nullptr) { - - SqlStatement insert(db, - sqlite3_mprintf("insert into \"%w\".\"%w_data\"(x) values (?)", - schema, - name)); - - if (insert.prepare() != SQLITE_OK) - return SQLITE_ERROR; - - if (insert.bind_null(1) != SQLITE_OK) - return SQLITE_ERROR; - - if (insert.step() != SQLITE_DONE) - return SQLITE_ERROR; - - } else { - - SqlStatement insert(db, - sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);", - schema, - name)); + sqlite3_int64 rowid) { - if (insert.prepare() != SQLITE_OK) - return SQLITE_ERROR; + SqlStatement insert(db, + sqlite3_mprintf("insert into \"%w\".\"%w_data\"(rowid, x) values (?, ?);", + schema, + name)); - if (insert.bind_int64(1, *rowid) != SQLITE_OK) - return SQLITE_ERROR; + if (insert.prepare() != SQLITE_OK) + return SQLITE_ERROR; - if (insert.bind_null(2) != SQLITE_OK) - return SQLITE_ERROR; + if (insert.bind_int64(1, rowid) != SQLITE_OK) + return SQLITE_ERROR; - if (insert.step() != SQLITE_DONE) - return SQLITE_ERROR; + if (insert.bind_null(2) != SQLITE_OK) + return SQLITE_ERROR; - if (retRowid != nullptr) - *retRowid = insert.last_insert_rowid(); - } + if (insert.step() != SQLITE_DONE) + return SQLITE_ERROR; return SQLITE_OK; } @@ -1407,12 +1384,10 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, if (!inserted_rowid) { - sqlite_int64 retrowid; auto rc = shadow_data_insert(pTable->db, pTable->schema, pTable->name, - &rowid, - &retrowid); + rowid); if (rc != SQLITE_OK) return rc; From f513a1d733346b9922474f7e85b320c10179b01f Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 08:14:05 +0300 Subject: [PATCH 03/66] Making sure we use nullptr instead of 0 --- src/sqlite-vector.cpp | 6 +++--- src/sqlite-vss.cpp | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp index a780fcf..67985b9 100644 --- a/src/sqlite-vector.cpp +++ b/src/sqlite-vector.cpp @@ -675,9 +675,9 @@ __declspec(dllexport) aFunc[i].flags, aFunc[i].pAux, aFunc[i].xFunc, - 0, - 0, - 0); + nullptr, + nullptr, + nullptr); if (rc != SQLITE_OK) { diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 3340924..16e1003 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -1588,42 +1588,42 @@ __declspec(dllexport) SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_distance_l2, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_distance_linf", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_distance_linf, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_inner_product", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_inner_product, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_fvec_add", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_fvec_add, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_fvec_sub", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_fvec_sub, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_search", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vssSearchFunc, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_search_params", @@ -1631,7 +1631,7 @@ __declspec(dllexport) 0, vector_api, vssSearchParamsFunc, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_range_search", @@ -1639,7 +1639,7 @@ __declspec(dllexport) SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vssRangeSearchFunc, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_range_search_params", @@ -1647,7 +1647,7 @@ __declspec(dllexport) 0, vector_api, vssRangeSearchParamsFunc, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_memory_usage", @@ -1655,7 +1655,7 @@ __declspec(dllexport) 0, nullptr, faissMemoryUsageFunc, - 0, 0, 0); + nullptr, nullptr, nullptr); auto rc = sqlite3_create_module_v2(db, "vss0", &vssIndexModule, vector_api, nullptr); if (rc != SQLITE_OK) { From 3f9d111c4ad1f479748e75cf638efa2286e3e41e Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 08:34:25 +0300 Subject: [PATCH 04/66] Update sqlite-vss.cpp --- src/sqlite-vss.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 16e1003..f9feb89 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -646,11 +646,14 @@ struct vss_index_cursor : public sqlite3_vtab_cursor { explicit vss_index_cursor(vss_index_vtab *table) : table(table), sqlite3_vtab_cursor({0}), - stmt(nullptr) { } + stmt(nullptr), + sql(nullptr) { } ~vss_index_cursor() { if (stmt != nullptr) sqlite3_finalize(stmt); + if (sql != nullptr) + sqlite3_free(sql); } vss_index_vtab *table; @@ -670,6 +673,7 @@ struct vss_index_cursor : public sqlite3_vtab_cursor { // For query_type == QueryType::fullscan sqlite3_stmt *stmt; + char *sql; int step_result; }; @@ -1057,12 +1061,13 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } else if (strcmp(idxStr, "fullscan") == 0) { pCursor->query_type = QueryType::fullscan; - sqlite3_stmt *stmt; + pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name); - int res = sqlite3_prepare_v2( - pCursor->table->db, - sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name), - -1, &pCursor->stmt, nullptr); + int res = sqlite3_prepare_v2(pCursor->table->db, + pCursor->sql, + -1, + &pCursor->stmt, + nullptr); if (res != SQLITE_OK) return res; From f83febe7b43fcda4ead0009e3cab4e0294f3be21 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 08:59:09 +0300 Subject: [PATCH 05/66] Update sqlite-vss.cpp --- src/sqlite-vss.cpp | 86 ++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index f9feb89..90360a0 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -611,7 +611,10 @@ struct vss_index_vtab : public sqlite3_vtab { : db(db), vector_api(vector_api), schema(schema), - name(name) { } + name(name) { + + this->zErrMsg = nullptr; + } ~vss_index_vtab() { @@ -619,11 +622,20 @@ struct vss_index_vtab : public sqlite3_vtab { sqlite3_free(name); if (schema) sqlite3_free(schema); + if (this->zErrMsg != nullptr) + delete this->zErrMsg; for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { delete (*iter); } } + void setError(char *error) { + if (this->zErrMsg != nullptr) { + delete this->zErrMsg; + } + this->zErrMsg = error; + } + sqlite3 *db; vector0_api *vector_api; @@ -975,31 +987,32 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } else if (sqlite3_libversion_number() < 3041000) { // https://sqlite.org/forum/info/6b32f818ba1d97ef - sqlite3_free(pVtabCursor->pVtab->zErrMsg); - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "vss_search() only support vss_search_params() as a " - "2nd parameter for SQLite versions below 3.41.0"); + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError( + sqlite3_mprintf( + "vss_search() only support vss_search_params() as a " + "2nd parameter for SQLite versions below 3.41.0")); return SQLITE_ERROR; } else if ((query_vector = pCursor->table->vector_api->xValueAsVector( argv[0])) != nullptr) { if (argc > 1) { + pCursor->limit = sqlite3_value_int(argv[1]); } else { - sqlite3_free(pVtabCursor->pVtab->zErrMsg); - pVtabCursor->pVtab->zErrMsg = - sqlite3_mprintf("LIMIT required on vss_search() queries"); + + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError(sqlite3_mprintf("LIMIT required on vss_search() queries")); + return SQLITE_ERROR; } } else { - if (pVtabCursor->pVtab->zErrMsg != nullptr) - sqlite3_free(pVtabCursor->pVtab->zErrMsg); + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError(sqlite3_mprintf("2nd argument to vss_search() must be a vector")); - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "2nd argument to vss_search() must be a vector"); return SQLITE_ERROR; } @@ -1008,22 +1021,22 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, if (query_vector->size() != index->d) { - // TODO: To support index that transforms vectors - // (to conserve spage, eg?), we should probably - // have some logic in place that transforms the vectors here? - sqlite3_free(pVtabCursor->pVtab->zErrMsg); - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError(sqlite3_mprintf( "Input query size doesn't match index dimensions: %ld != %ld", query_vector->size(), - index->d); + index->d)); + return SQLITE_ERROR; } if (pCursor->limit <= 0) { - sqlite3_free(pVtabCursor->pVtab->zErrMsg); - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "Limit must be greater than 0, got %ld", pCursor->limit); + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError(sqlite3_mprintf( + "Limit must be greater than 0, got %ld", + pCursor->limit)); + return SQLITE_ERROR; } @@ -1076,11 +1089,10 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } else { - if (pVtabCursor->pVtab->zErrMsg != 0) - sqlite3_free(pVtabCursor->pVtab->zErrMsg); + auto ptrVtab = static_cast(pCursor->pVtab); + ptrVtab->setError(sqlite3_mprintf( + "%s %s", "vssIndexFilter error: unhandled idxStr", idxStr)); - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "%s %s", "vssIndexFilter error: unhandled idxStr", idxStr); return SQLITE_ERROR; } @@ -1273,9 +1285,9 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { if (rc != SQLITE_OK) { - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = sqlite3_mprintf("Error saving index (%d): %s", - rc, sqlite3_errmsg(pTable->db)); + pTable->setError(sqlite3_mprintf("Error saving index (%d): %s", + rc, + sqlite3_errmsg(pTable->db))); return rc; } } @@ -1285,10 +1297,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { } catch (faiss::FaissException &e) { - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = - sqlite3_mprintf("Error during synchroning index. Full error: %s", - e.msg.c_str()); + pTable->setError(sqlite3_mprintf("Error during synchroning index. Full error: %s", + e.msg.c_str())); for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) { @@ -1378,11 +1388,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, // Make sure the index is already trained, if it's needed if (!(*iter)->index->is_trained) { - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = - sqlite3_mprintf("Index at i=%d requires training " + pTable->setError(sqlite3_mprintf("Index at i=%d requires training " "before inserting data.", - i); + i)); return SQLITE_ERROR; } @@ -1440,11 +1448,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, } else { // TODO: Implement - UPDATE operation - sqlite3_free(pVTab->zErrMsg); - - pVTab->zErrMsg = - sqlite3_mprintf("UPDATE statements on vss0 virtual tables not supported yet."); - + pTable->setError(sqlite3_mprintf("UPDATE statements on vss0 virtual tables not supported yet.")); return SQLITE_ERROR; } From 98420ad046ca23953f3c68d5638d77df7d4d0133 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 09:40:03 +0300 Subject: [PATCH 06/66] Encapsulating members --- src/sqlite-vss.cpp | 230 ++++++++++++++++++++++++++++----------------- 1 file changed, 146 insertions(+), 84 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 90360a0..ed2074d 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -246,7 +246,9 @@ void delVssRangeSearchParams(void *p) { delete self; } -struct SqlStatement { +class SqlStatement { + +public: SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) { @@ -337,6 +339,8 @@ struct SqlStatement { sql = nullptr; } +private: + sqlite3 *db; sqlite3_stmt *stmt; const char * sql; @@ -588,7 +592,9 @@ static int drop_shadow_tables(sqlite3 *db, char *name) { // Wrapper around a single faiss index, with training data, insert records, and // delete records. -struct vss_index { +class vss_index { + +public: explicit vss_index(faiss::Index *index) : index(index) {} @@ -598,6 +604,33 @@ struct vss_index { } } + faiss::Index * getIndex() { + + return index; + } + + vector & getTrainings() { + + return trainings; + } + + vector & getInsert_data() { + + return insert_data; + } + + vector & getInsert_ids() { + + return insert_ids; + } + + vector & getDelete_ids() { + + return delete_ids; + } + +private: + faiss::Index *index; vector trainings; vector insert_data; @@ -605,7 +638,9 @@ struct vss_index { vector delete_ids; }; -struct vss_index_vtab : public sqlite3_vtab { +class vss_index_vtab : public sqlite3_vtab { + +public: vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name) : db(db), @@ -636,6 +671,33 @@ struct vss_index_vtab : public sqlite3_vtab { this->zErrMsg = error; } + sqlite3 * getDb() { + + return db; + } + + vector0_api * getVector0_api() { + + return vector_api; + } + + vector & getIndexes() { + + return indexes; + } + + char * getName() { + + return name; + } + + char * getSchema() { + + return schema; + } + +private: + sqlite3 *db; vector0_api *vector_api; @@ -791,7 +853,7 @@ static int init(sqlite3 *db, try { auto index = faiss::index_factory(iter->dimensions, iter->factory.c_str()); - pTable->indexes.push_back(new vss_index(index)); + pTable->getIndexes().push_back(new vss_index(index)); } catch (faiss::FaissException &e) { @@ -811,14 +873,14 @@ static int init(sqlite3 *db, // After shadow tables are created, write the initial index state to // shadow _index. auto i = 0; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { try { - int rc = write_index((*iter)->index, - pTable->db, - pTable->schema, - pTable->name, + int rc = write_index((*iter)->getIndex(), + pTable->getDb(), + pTable->getSchema(), + pTable->getName(), i); if (rc != SQLITE_OK) @@ -842,7 +904,7 @@ static int init(sqlite3 *db, *pzErr = sqlite3_mprintf("Could not read index at position %d", i); return SQLITE_ERROR; } - pTable->indexes.push_back(new vss_index(index)); + pTable->getIndexes().push_back(new vss_index(index)); } } @@ -877,7 +939,7 @@ static int vssIndexDisconnect(sqlite3_vtab *pVtab) { static int vssIndexDestroy(sqlite3_vtab *pVtab) { auto pTable = static_cast(pVtab); - drop_shadow_tables(pTable->db, pTable->name); + drop_shadow_tables(pTable->getDb(), pTable->getName()); vssIndexDisconnect(pVtab); return SQLITE_OK; } @@ -994,7 +1056,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, "2nd parameter for SQLite versions below 3.41.0")); return SQLITE_ERROR; - } else if ((query_vector = pCursor->table->vector_api->xValueAsVector( + } else if ((query_vector = pCursor->table->getVector0_api()->xValueAsVector( argv[0])) != nullptr) { if (argc > 1) { @@ -1017,7 +1079,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } int nq = 1; - auto index = pCursor->table->indexes.at(idxNum)->index; + auto index = pCursor->table->getIndexes().at(idxNum)->getIndex(); if (query_vector->size() != index->d) { @@ -1064,7 +1126,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, vector nns(params->distance * nq); pCursor->range_search_result = unique_ptr(new faiss::RangeSearchResult(nq, true)); - auto index = pCursor->table->indexes.at(idxNum)->index; + auto index = pCursor->table->getIndexes().at(idxNum)->getIndex(); index->range_search(nq, params->vector->data(), @@ -1074,9 +1136,9 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } else if (strcmp(idxStr, "fullscan") == 0) { pCursor->query_type = QueryType::fullscan; - pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->name); + pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->getName()); - int res = sqlite3_prepare_v2(pCursor->table->db, + int res = sqlite3_prepare_v2(pCursor->table->getDb(), pCursor->sql, -1, &pCursor->stmt, @@ -1186,7 +1248,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, } else if (i >= VSS_INDEX_COLUMN_VECTORS) { auto index = - pCursor->table->indexes.at(i - VSS_INDEX_COLUMN_VECTORS)->index; + pCursor->table->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex(); vector vec(index->d); sqlite3_int64 rowId; @@ -1207,7 +1269,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, sqlite3_free(errmsg); return SQLITE_ERROR; } - pCursor->table->vector_api->xResultVector(ctx, &vec); + pCursor->table->getVector0_api()->xResultVector(ctx, &vec); } return SQLITE_OK; } @@ -1226,47 +1288,47 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { bool needsWriting = false; auto idxCol = 0; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, idxCol++) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) { // Checking if index needs training. - if (!(*iter)->trainings.empty()) { + if (!(*iter)->getTrainings().empty()) { - (*iter)->index->train( - (*iter)->trainings.size() / (*iter)->index->d, - (*iter)->trainings.data()); + (*iter)->getIndex()->train( + (*iter)->getTrainings().size() / (*iter)->getIndex()->d, + (*iter)->getTrainings().data()); - (*iter)->trainings.clear(); - (*iter)->trainings.shrink_to_fit(); + (*iter)->getTrainings().clear(); + (*iter)->getTrainings().shrink_to_fit(); needsWriting = true; } // Checking if we're deleting records from the index. - if (!(*iter)->delete_ids.empty()) { + if (!(*iter)->getDelete_ids().empty()) { - faiss::IDSelectorBatch selector((*iter)->delete_ids.size(), - (*iter)->delete_ids.data()); + faiss::IDSelectorBatch selector((*iter)->getDelete_ids().size(), + (*iter)->getDelete_ids().data()); - (*iter)->index->remove_ids(selector); - (*iter)->delete_ids.clear(); - (*iter)->delete_ids.shrink_to_fit(); + (*iter)->getIndex()->remove_ids(selector); + (*iter)->getDelete_ids().clear(); + (*iter)->getDelete_ids().shrink_to_fit(); needsWriting = true; } // Checking if we're inserting records to the index. - if (!(*iter)->insert_data.empty()) { + if (!(*iter)->getInsert_data().empty()) { - (*iter)->index->add_with_ids( - (*iter)->insert_ids.size(), - (*iter)->insert_data.data(), - (faiss::idx_t *)(*iter)->insert_ids.data()); + (*iter)->getIndex()->add_with_ids( + (*iter)->getInsert_ids().size(), + (*iter)->getInsert_data().data(), + (faiss::idx_t *)(*iter)->getInsert_ids().data()); - (*iter)->insert_ids.clear(); - (*iter)->insert_ids.shrink_to_fit(); + (*iter)->getInsert_ids().clear(); + (*iter)->getInsert_ids().shrink_to_fit(); - (*iter)->insert_data.clear(); - (*iter)->insert_data.shrink_to_fit(); + (*iter)->getInsert_data().clear(); + (*iter)->getInsert_data().shrink_to_fit(); needsWriting = true; } @@ -1275,19 +1337,19 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { if (needsWriting) { int i = 0; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - int rc = write_index((*iter)->index, - pTable->db, - pTable->schema, - pTable->name, + int rc = write_index((*iter)->getIndex(), + pTable->getDb(), + pTable->getSchema(), + pTable->getName(), i); if (rc != SQLITE_OK) { pTable->setError(sqlite3_mprintf("Error saving index (%d): %s", rc, - sqlite3_errmsg(pTable->db))); + sqlite3_errmsg(pTable->getDb()))); return rc; } } @@ -1300,19 +1362,19 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { pTable->setError(sqlite3_mprintf("Error during synchroning index. Full error: %s", e.msg.c_str())); - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->insert_ids.clear(); - (*iter)->insert_ids.shrink_to_fit(); + (*iter)->getInsert_ids().clear(); + (*iter)->getInsert_ids().shrink_to_fit(); - (*iter)->insert_data.clear(); - (*iter)->insert_data.shrink_to_fit(); + (*iter)->getInsert_data().clear(); + (*iter)->getInsert_data().shrink_to_fit(); - (*iter)->delete_ids.clear(); - (*iter)->delete_ids.shrink_to_fit(); + (*iter)->getDelete_ids().clear(); + (*iter)->getDelete_ids().shrink_to_fit(); - (*iter)->trainings.clear(); - (*iter)->trainings.shrink_to_fit(); + (*iter)->getTrainings().clear(); + (*iter)->getTrainings().shrink_to_fit(); } return SQLITE_ERROR; @@ -1325,19 +1387,19 @@ static int vssIndexRollback(sqlite3_vtab *pVTab) { auto pTable = static_cast(pVTab); - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->trainings.clear(); - (*iter)->trainings.shrink_to_fit(); + (*iter)->getTrainings().clear(); + (*iter)->getTrainings().shrink_to_fit(); - (*iter)->insert_data.clear(); - (*iter)->insert_data.shrink_to_fit(); + (*iter)->getInsert_data().clear(); + (*iter)->getInsert_data().shrink_to_fit(); - (*iter)->insert_ids.clear(); - (*iter)->insert_ids.shrink_to_fit(); + (*iter)->getInsert_ids().clear(); + (*iter)->getInsert_ids().shrink_to_fit(); - (*iter)->delete_ids.clear(); - (*iter)->delete_ids.shrink_to_fit(); + (*iter)->getDelete_ids().clear(); + (*iter)->getDelete_ids().shrink_to_fit(); } return SQLITE_OK; } @@ -1354,15 +1416,15 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, // DELETE operation sqlite3_int64 rowid_to_delete = sqlite3_value_int64(argv[0]); - auto rc = shadow_data_delete(pTable->db, - pTable->schema, - pTable->name, + auto rc = shadow_data_delete(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), rowid_to_delete); if (rc != SQLITE_OK) return rc; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter) { - (*iter)->delete_ids.push_back(rowid_to_delete); + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { + (*iter)->getDelete_ids().push_back(rowid_to_delete); } } else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) { @@ -1380,13 +1442,13 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, bool inserted_rowid = false; auto i = 0; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - if ((vec = pTable->vector_api->xValueAsVector( + if ((vec = pTable->getVector0_api()->xValueAsVector( argv[2 + VSS_INDEX_COLUMN_VECTORS + i])) != nullptr) { // Make sure the index is already trained, if it's needed - if (!(*iter)->index->is_trained) { + if (!(*iter)->getIndex()->is_trained) { pTable->setError(sqlite3_mprintf("Index at i=%d requires training " "before inserting data.", @@ -1397,9 +1459,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, if (!inserted_rowid) { - auto rc = shadow_data_insert(pTable->db, - pTable->schema, - pTable->name, + auto rc = shadow_data_insert(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), rowid); if (rc != SQLITE_OK) return rc; @@ -1407,13 +1469,13 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, inserted_rowid = true; } - (*iter)->insert_data.reserve((*iter)->insert_data.size() + vec->size()); - (*iter)->insert_data.insert( - (*iter)->insert_data.end(), + (*iter)->getInsert_data().reserve((*iter)->getInsert_data().size() + vec->size()); + (*iter)->getInsert_data().insert( + (*iter)->getInsert_data().end(), vec->begin(), vec->end()); - (*iter)->insert_ids.push_back(rowid); + (*iter)->getInsert_ids().push_back(rowid); *pRowid = rowid; } @@ -1426,14 +1488,14 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, if (operation.compare("training") == 0) { auto i = 0; - for (auto iter = pTable->indexes.begin(); iter != pTable->indexes.end(); ++iter, i++) { + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - vec_ptr vec = pTable->vector_api->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]); + vec_ptr vec = pTable->getVector0_api()->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]); if (vec != nullptr) { - (*iter)->trainings.reserve((*iter)->trainings.size() + vec->size()); - (*iter)->trainings.insert( - (*iter)->trainings.end(), + (*iter)->getTrainings().reserve((*iter)->getTrainings().size() + vec->size()); + (*iter)->getTrainings().insert( + (*iter)->getTrainings().end(), vec->begin(), vec->end()); } From 53f6206dc821f13cecdcd2ea7452ca5c1a40fa8d Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 09:49:18 +0300 Subject: [PATCH 07/66] Moving SqlStatement into separate header file --- src/sql-statement.h | 108 ++++++++++++++++++++++++++++++++++++++++++++ src/sqlite-vss.cpp | 101 +---------------------------------------- 2 files changed, 109 insertions(+), 100 deletions(-) create mode 100644 src/sql-statement.h diff --git a/src/sql-statement.h b/src/sql-statement.h new file mode 100644 index 0000000..bac04ec --- /dev/null +++ b/src/sql-statement.h @@ -0,0 +1,108 @@ + +#ifndef SQL_STATEMENT_H +#define SQL_STATEMENT_H + +#include "sqlite-vss.h" + +class SqlStatement { + +public: + + SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) { + + this->sql = sql; + } + + ~SqlStatement() { + + if (stmt != nullptr) + sqlite3_finalize(stmt); + if (sql != nullptr) + sqlite3_free((void *)sql); + } + + int prepare() { + + auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr); + if (res != SQLITE_OK || stmt == nullptr) { + + stmt = nullptr; + return SQLITE_ERROR; + } + return res; + } + + int bind_int64(int colNo, sqlite3_int64 value) { + + return sqlite3_bind_int64(stmt, colNo, value); + } + + int bind_blob64(int colNo, const void * data, int size) { + + return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT); + } + + int bind_null(int colNo) { + + return sqlite3_bind_null(stmt, colNo); + } + + int bind_pointer(int paramNo, void *ptr, const char * name) { + + return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr); + } + + int step() { + + return sqlite3_step(stmt); + } + + int exec() { + + return sqlite3_exec(db, sql, nullptr, nullptr, nullptr); + } + + int declare_vtab() { + + return sqlite3_declare_vtab(db, sql); + } + + const void * column_blob(int colNo) { + + return sqlite3_column_blob(stmt, colNo); + } + + int column_bytes(int colNo) { + + return sqlite3_column_bytes(stmt, colNo); + } + + int column_int64(int colNo) { + + return sqlite3_column_int64(stmt, colNo); + } + + int last_insert_rowid() { + + return sqlite3_last_insert_rowid(db); + } + + void finalize() { + + if (stmt != nullptr) + sqlite3_finalize(stmt); + stmt = nullptr; + if (sql != nullptr) + sqlite3_free((void *)sql); + sql = nullptr; + } + +private: + + sqlite3 *db; + sqlite3_stmt *stmt; + const char * sql; +}; + +#endif // SQL_STATEMENT_H + diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index ed2074d..52ce8ca 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -21,6 +21,7 @@ SQLITE_EXTENSION_INIT1 #include #include "sqlite-vector.h" +#include "sql-statement.h" using namespace std; @@ -246,106 +247,6 @@ void delVssRangeSearchParams(void *p) { delete self; } -class SqlStatement { - -public: - - SqlStatement(sqlite3 *db, const char * sql) : db(db), sql(sql), stmt(nullptr) { - - this->sql = sql; - } - - ~SqlStatement() { - - if (stmt != nullptr) - sqlite3_finalize(stmt); - if (sql != nullptr) - sqlite3_free((void *)sql); - } - - int prepare() { - - auto res = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr); - if (res != SQLITE_OK || stmt == nullptr) { - - stmt = nullptr; - return SQLITE_ERROR; - } - return res; - } - - int bind_int64(int colNo, sqlite3_int64 value) { - - return sqlite3_bind_int64(stmt, colNo, value); - } - - int bind_blob64(int colNo, const void * data, int size) { - - return sqlite3_bind_blob64(stmt, colNo, data, size, SQLITE_TRANSIENT); - } - - int bind_null(int colNo) { - - return sqlite3_bind_null(stmt, colNo); - } - - int bind_pointer(int paramNo, void *ptr, const char * name) { - - return sqlite3_bind_pointer(stmt, paramNo, ptr, name, nullptr); - } - - int step() { - - return sqlite3_step(stmt); - } - - int exec() { - - return sqlite3_exec(db, sql, nullptr, nullptr, nullptr); - } - - int declare_vtab() { - - return sqlite3_declare_vtab(db, sql); - } - - const void * column_blob(int colNo) { - - return sqlite3_column_blob(stmt, colNo); - } - - int column_bytes(int colNo) { - - return sqlite3_column_bytes(stmt, colNo); - } - - int column_int64(int colNo) { - - return sqlite3_column_int64(stmt, colNo); - } - - int last_insert_rowid() { - - return sqlite3_last_insert_rowid(db); - } - - void finalize() { - - if (stmt != nullptr) - sqlite3_finalize(stmt); - stmt = nullptr; - if (sql != nullptr) - sqlite3_free((void *)sql); - sql = nullptr; - } - -private: - - sqlite3 *db; - sqlite3_stmt *stmt; - const char * sql; -}; - #pragma endregion #pragma region Vtab From e5f412d216c839a692a55401a2bf67a991e90532 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 09:58:23 +0300 Subject: [PATCH 08/66] Better structure Moving concepts into separate files --- src/sqlite-vss.cpp | 200 +--------------------------------- src/vss/calculations.h | 177 ++++++++++++++++++++++++++++++ src/vss/meta-methods.h | 35 ++++++ src/{ => vss}/sql-statement.h | 4 + 4 files changed, 220 insertions(+), 196 deletions(-) create mode 100644 src/vss/calculations.h create mode 100644 src/vss/meta-methods.h rename src/{ => vss}/sql-statement.h (92%) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 52ce8ca..d9f6912 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -20,206 +20,14 @@ SQLITE_EXTENSION_INIT1 #include #include -#include "sqlite-vector.h" -#include "sql-statement.h" - using namespace std; typedef unique_ptr> vec_ptr; -#pragma region Meta - -static void vss_version(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC); -} - -static void vss_debug(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - auto resTxt = sqlite3_mprintf( - "version: %s\nfaiss version: %d.%d.%d\nfaiss compile options: %s", - SQLITE_VSS_VERSION, - FAISS_VERSION_MAJOR, - FAISS_VERSION_MINOR, - FAISS_VERSION_PATCH, - faiss::get_compile_options().c_str()); - - sqlite3_result_text(context, resTxt, -1, SQLITE_TRANSIENT); - sqlite3_free(resTxt); -} - -#pragma endregion - -#pragma region Distances - -static void vss_distance_l1(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", - -1); - return; - } - - sqlite3_result_double(context, faiss::fvec_L1(lhs->data(), rhs->data(), lhs->size())); -} - -static void vss_distance_l2(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", - -1); - return; - } - - sqlite3_result_double(context, faiss::fvec_L2sqr(lhs->data(), rhs->data(), lhs->size())); -} - -static void vss_distance_linf(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", - -1); - return; - } - - sqlite3_result_double(context, faiss::fvec_Linf(lhs->data(), rhs->data(), lhs->size())); -} - -static void vss_inner_product(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", - -1); - return; - } - - sqlite3_result_double(context, - faiss::fvec_inner_product(lhs->data(), rhs->data(), lhs->size())); -} - -static void vss_fvec_add(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", - -1); - return; - } - - auto size = lhs->size(); - vec_ptr c(new vector(size)); - faiss::fvec_add(size, lhs->data(), rhs->data(), c->data()); - - vector_api->xResultVector(context, c.get()); -} - -static void vss_fvec_sub(sqlite3_context *context, int argc, - sqlite3_value **argv) { - - auto vector_api = (vector0_api *)sqlite3_user_data(context); - - vec_ptr lhs = vector_api->xValueAsVector(argv[0]); - if (lhs == nullptr) { - sqlite3_result_error(context, "LHS is not a vector", -1); - return; - } - - vec_ptr rhs = vector_api->xValueAsVector(argv[1]); - if (rhs == nullptr) { - sqlite3_result_error(context, "RHS is not a vector", -1); - return; - } - - if (lhs->size() != rhs->size()) { - sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", -1); - return; - } - - int size = lhs->size(); - vec_ptr c = vec_ptr(new vector(size)); - faiss::fvec_sub(size, lhs->data(), rhs->data(), c->data()); - vector_api->xResultVector(context, c.get()); -} - -#pragma endregion +#include "sqlite-vector.h" +#include "vss/sql-statement.h" +#include "vss/meta-methods.h" +#include "vss/calculations.h" #pragma region Structs and cleanup functions diff --git a/src/vss/calculations.h b/src/vss/calculations.h new file mode 100644 index 0000000..3cef0e7 --- /dev/null +++ b/src/vss/calculations.h @@ -0,0 +1,177 @@ + +#ifndef VSS_CALCULATIONS_H +#define VSS_CALCULATIONS_H + +#include "sqlite-vss.h" +#include +#include + +using namespace std; + +typedef unique_ptr> vec_ptr; + +static void vss_distance_l1(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", + -1); + return; + } + + sqlite3_result_double(context, faiss::fvec_L1(lhs->data(), rhs->data(), lhs->size())); +} + +static void vss_distance_l2(sqlite3_context *context, int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", + -1); + return; + } + + sqlite3_result_double(context, faiss::fvec_L2sqr(lhs->data(), rhs->data(), lhs->size())); +} + +static void vss_distance_linf(sqlite3_context *context, int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", + -1); + return; + } + + sqlite3_result_double(context, faiss::fvec_Linf(lhs->data(), rhs->data(), lhs->size())); +} + +static void vss_inner_product(sqlite3_context *context, int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", + -1); + return; + } + + sqlite3_result_double(context, + faiss::fvec_inner_product(lhs->data(), rhs->data(), lhs->size())); +} + +static void vss_fvec_add(sqlite3_context *context, int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", + -1); + return; + } + + auto size = lhs->size(); + vec_ptr c(new vector(size)); + faiss::fvec_add(size, lhs->data(), rhs->data(), c->data()); + + vector_api->xResultVector(context, c.get()); +} + +static void vss_fvec_sub(sqlite3_context *context, int argc, + sqlite3_value **argv) { + + auto vector_api = (vector0_api *)sqlite3_user_data(context); + + vec_ptr lhs = vector_api->xValueAsVector(argv[0]); + if (lhs == nullptr) { + sqlite3_result_error(context, "LHS is not a vector", -1); + return; + } + + vec_ptr rhs = vector_api->xValueAsVector(argv[1]); + if (rhs == nullptr) { + sqlite3_result_error(context, "RHS is not a vector", -1); + return; + } + + if (lhs->size() != rhs->size()) { + sqlite3_result_error(context, "LHS and RHS are not vectors of the same size", -1); + return; + } + + int size = lhs->size(); + vec_ptr c = vec_ptr(new vector(size)); + faiss::fvec_sub(size, lhs->data(), rhs->data(), c->data()); + vector_api->xResultVector(context, c.get()); +} + +#endif // VSS_CALCULATIONS_H diff --git a/src/vss/meta-methods.h b/src/vss/meta-methods.h new file mode 100644 index 0000000..2214d15 --- /dev/null +++ b/src/vss/meta-methods.h @@ -0,0 +1,35 @@ + +#ifndef META_METHODS_H +#define META_METHODS_H + +#include "sqlite-vss.h" +#include +#include + + +static void vss_version(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC); +} + +static void vss_debug(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + auto resTxt = sqlite3_mprintf( + "version: %s\nfaiss version: %d.%d.%d\nfaiss compile options: %s", + SQLITE_VSS_VERSION, + FAISS_VERSION_MAJOR, + FAISS_VERSION_MINOR, + FAISS_VERSION_PATCH, + faiss::get_compile_options().c_str()); + + sqlite3_result_text(context, resTxt, -1, SQLITE_TRANSIENT); + sqlite3_free(resTxt); +} + + +#endif // META_METHODS_H + diff --git a/src/sql-statement.h b/src/vss/sql-statement.h similarity index 92% rename from src/sql-statement.h rename to src/vss/sql-statement.h index bac04ec..b1bdb29 100644 --- a/src/sql-statement.h +++ b/src/vss/sql-statement.h @@ -4,6 +4,10 @@ #include "sqlite-vss.h" +/* + * Helper class encapsulating an SQL statement towards SQLite, with automatic and deterministic destruction + * and cleanup of any heap memory, etc. + */ class SqlStatement { public: From 8590e0b70bd02c5521d83bc2a3a79a23253d33a4 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 10:12:49 +0300 Subject: [PATCH 09/66] Moving more stuff into separate files --- src/sqlite-vss.cpp | 200 ++----------------------------------- src/vss/calculations.h | 8 +- src/vss/inclusions.h | 35 +++++++ src/vss/meta-methods.h | 6 +- src/vss/sql-statement.h | 3 +- src/vss/vss-index-cursor.h | 43 ++++++++ src/vss/vss-index-vtab.h | 82 +++++++++++++++ src/vss/vss-index.h | 55 ++++++++++ 8 files changed, 228 insertions(+), 204 deletions(-) create mode 100644 src/vss/inclusions.h create mode 100644 src/vss/vss-index-cursor.h create mode 100644 src/vss/vss-index-vtab.h create mode 100644 src/vss/vss-index.h diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index d9f6912..6f1a978 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -1,33 +1,14 @@ -#include "sqlite-vss.h" -#include -#include - -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -typedef unique_ptr> vec_ptr; +#include "sqlite-vss.h" +#include "vss/inclusions.h" #include "sqlite-vector.h" #include "vss/sql-statement.h" #include "vss/meta-methods.h" #include "vss/calculations.h" +#include "vss/vss-index.h" +#include "vss/vss-index-vtab.h" +#include "vss/vss-index-cursor.h" #pragma region Structs and cleanup functions @@ -78,7 +59,8 @@ static void vssSearchParamsFunc(sqlite3_context *context, sqlite3_result_pointer(context, params, "vss0_searchparams", delVssSearchParams); } -static void vssRangeSearchParamsFunc(sqlite3_context *context, int argc, +static void vssRangeSearchParamsFunc(sqlite3_context *context, + int argc, sqlite3_value **argv) { auto vector_api = (vector0_api *)sqlite3_user_data(context); @@ -221,7 +203,9 @@ static int shadow_data_delete(sqlite3 *db, return SQLITE_OK; } -static faiss::Index *read_index_select(sqlite3 *db, const char *name, int indexId) { +static faiss::Index *read_index_select(sqlite3 *db, + const char *name, + int indexId) { SqlStatement select(db, sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", @@ -296,170 +280,6 @@ static int drop_shadow_tables(sqlite3 *db, char *name) { return SQLITE_OK; } -#define VSS_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION -#define VSS_RANGE_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION + 1 - -// Wrapper around a single faiss index, with training data, insert records, and -// delete records. -class vss_index { - -public: - - explicit vss_index(faiss::Index *index) : index(index) {} - - ~vss_index() { - if (index != nullptr) { - delete index; - } - } - - faiss::Index * getIndex() { - - return index; - } - - vector & getTrainings() { - - return trainings; - } - - vector & getInsert_data() { - - return insert_data; - } - - vector & getInsert_ids() { - - return insert_ids; - } - - vector & getDelete_ids() { - - return delete_ids; - } - -private: - - faiss::Index *index; - vector trainings; - vector insert_data; - vector insert_ids; - vector delete_ids; -}; - -class vss_index_vtab : public sqlite3_vtab { - -public: - - vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name) - : db(db), - vector_api(vector_api), - schema(schema), - name(name) { - - this->zErrMsg = nullptr; - } - - ~vss_index_vtab() { - - if (name) - sqlite3_free(name); - if (schema) - sqlite3_free(schema); - if (this->zErrMsg != nullptr) - delete this->zErrMsg; - for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { - delete (*iter); - } - } - - void setError(char *error) { - if (this->zErrMsg != nullptr) { - delete this->zErrMsg; - } - this->zErrMsg = error; - } - - sqlite3 * getDb() { - - return db; - } - - vector0_api * getVector0_api() { - - return vector_api; - } - - vector & getIndexes() { - - return indexes; - } - - char * getName() { - - return name; - } - - char * getSchema() { - - return schema; - } - -private: - - sqlite3 *db; - vector0_api *vector_api; - - // Name of the virtual table. Must be freed during disconnect - char *name; - - // Name of the schema the virtual table exists in. Must be freed during - // disconnect - char *schema; - - // Vector holding all the faiss Indices the vtab uses, and their state, - // implying which items are to be deleted and inserted. - vector indexes; -}; - -enum QueryType { search, range_search, fullscan }; - -struct vss_index_cursor : public sqlite3_vtab_cursor { - - explicit vss_index_cursor(vss_index_vtab *table) - : table(table), - sqlite3_vtab_cursor({0}), - stmt(nullptr), - sql(nullptr) { } - - ~vss_index_cursor() { - if (stmt != nullptr) - sqlite3_finalize(stmt); - if (sql != nullptr) - sqlite3_free(sql); - } - - vss_index_vtab *table; - - sqlite3_int64 iCurrent; - sqlite3_int64 iRowid; - - QueryType query_type; - - // For query_type == QueryType::search - sqlite3_int64 limit; - vector search_ids; - vector search_distances; - - // For query_type == QueryType::range_search - unique_ptr range_search_result; - - // For query_type == QueryType::fullscan - sqlite3_stmt *stmt; - char *sql; - int step_result; -}; - struct VssIndexColumn { string name; diff --git a/src/vss/calculations.h b/src/vss/calculations.h index 3cef0e7..8d55e95 100644 --- a/src/vss/calculations.h +++ b/src/vss/calculations.h @@ -2,13 +2,7 @@ #ifndef VSS_CALCULATIONS_H #define VSS_CALCULATIONS_H -#include "sqlite-vss.h" -#include -#include - -using namespace std; - -typedef unique_ptr> vec_ptr; +#include "inclusions.h" static void vss_distance_l1(sqlite3_context *context, int argc, diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h new file mode 100644 index 0000000..006d432 --- /dev/null +++ b/src/vss/inclusions.h @@ -0,0 +1,35 @@ + +#ifndef VSS_INCLUSIONS_H +#define VSS_INCLUSIONS_H + +#include +#include + +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +typedef unique_ptr> vec_ptr; + +enum QueryType { search, range_search, fullscan }; + +#define VSS_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION +#define VSS_RANGE_SEARCH_FUNCTION SQLITE_INDEX_CONSTRAINT_FUNCTION + 1 + +#endif // VSS_INCLUSIONS_H diff --git a/src/vss/meta-methods.h b/src/vss/meta-methods.h index 2214d15..05a70f7 100644 --- a/src/vss/meta-methods.h +++ b/src/vss/meta-methods.h @@ -2,10 +2,7 @@ #ifndef META_METHODS_H #define META_METHODS_H -#include "sqlite-vss.h" -#include -#include - +#include "inclusions.h" static void vss_version(sqlite3_context *context, int argc, @@ -32,4 +29,3 @@ static void vss_debug(sqlite3_context *context, #endif // META_METHODS_H - diff --git a/src/vss/sql-statement.h b/src/vss/sql-statement.h index b1bdb29..cf4a076 100644 --- a/src/vss/sql-statement.h +++ b/src/vss/sql-statement.h @@ -2,7 +2,7 @@ #ifndef SQL_STATEMENT_H #define SQL_STATEMENT_H -#include "sqlite-vss.h" +#include "inclusions.h" /* * Helper class encapsulating an SQL statement towards SQLite, with automatic and deterministic destruction @@ -109,4 +109,3 @@ class SqlStatement { }; #endif // SQL_STATEMENT_H - diff --git a/src/vss/vss-index-cursor.h b/src/vss/vss-index-cursor.h new file mode 100644 index 0000000..21eaf0d --- /dev/null +++ b/src/vss/vss-index-cursor.h @@ -0,0 +1,43 @@ + +#ifndef VSS_INDEX_CURSOR_H +#define VSS_INDEX_CURSOR_H + +#include "inclusions.h" + +struct vss_index_cursor : public sqlite3_vtab_cursor { + + explicit vss_index_cursor(vss_index_vtab *table) + : table(table), + sqlite3_vtab_cursor({0}), + stmt(nullptr), + sql(nullptr) { } + + ~vss_index_cursor() { + if (stmt != nullptr) + sqlite3_finalize(stmt); + if (sql != nullptr) + sqlite3_free(sql); + } + + vss_index_vtab *table; + + sqlite3_int64 iCurrent; + sqlite3_int64 iRowid; + + QueryType query_type; + + // For query_type == QueryType::search + sqlite3_int64 limit; + vector search_ids; + vector search_distances; + + // For query_type == QueryType::range_search + unique_ptr range_search_result; + + // For query_type == QueryType::fullscan + sqlite3_stmt *stmt; + char *sql; + int step_result; +}; + +#endif // VSS_INDEX_CURSOR_H diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h new file mode 100644 index 0000000..5446927 --- /dev/null +++ b/src/vss/vss-index-vtab.h @@ -0,0 +1,82 @@ + +#ifndef VSS_INDEX_VTAB_H +#define VSS_INDEX_VTAB_H + +#include "inclusions.h" + +class vss_index_vtab : public sqlite3_vtab { + +public: + + vss_index_vtab(sqlite3 *db, vector0_api *vector_api, char *schema, char *name) + : db(db), + vector_api(vector_api), + schema(schema), + name(name) { + + this->zErrMsg = nullptr; + } + + ~vss_index_vtab() { + + if (name) + sqlite3_free(name); + if (schema) + sqlite3_free(schema); + if (this->zErrMsg != nullptr) + delete this->zErrMsg; + for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { + delete (*iter); + } + } + + void setError(char *error) { + if (this->zErrMsg != nullptr) { + delete this->zErrMsg; + } + this->zErrMsg = error; + } + + sqlite3 * getDb() { + + return db; + } + + vector0_api * getVector0_api() { + + return vector_api; + } + + vector & getIndexes() { + + return indexes; + } + + char * getName() { + + return name; + } + + char * getSchema() { + + return schema; + } + +private: + + sqlite3 *db; + vector0_api *vector_api; + + // Name of the virtual table. Must be freed during disconnect + char *name; + + // Name of the schema the virtual table exists in. Must be freed during + // disconnect + char *schema; + + // Vector holding all the faiss Indices the vtab uses, and their state, + // implying which items are to be deleted and inserted. + vector indexes; +}; + +#endif // VSS_INDEX_VTAB_H diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h new file mode 100644 index 0000000..31dfd43 --- /dev/null +++ b/src/vss/vss-index.h @@ -0,0 +1,55 @@ + +#ifndef VSS_INDEX_H +#define VSS_INDEX_H + +#include "inclusions.h" + +// Wrapper around a single faiss index, with training data, insert records, and +// delete records. +class vss_index { + +public: + + explicit vss_index(faiss::Index *index) : index(index) {} + + ~vss_index() { + if (index != nullptr) { + delete index; + } + } + + faiss::Index * getIndex() { + + return index; + } + + vector & getTrainings() { + + return trainings; + } + + vector & getInsert_data() { + + return insert_data; + } + + vector & getInsert_ids() { + + return insert_ids; + } + + vector & getDelete_ids() { + + return delete_ids; + } + +private: + + faiss::Index *index; + vector trainings; + vector insert_data; + vector insert_ids; + vector delete_ids; +}; + +#endif // VSS_INDEX_H From d42db30c6dfad6eeeb7f9390ba822d0656a26928 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 10:34:48 +0300 Subject: [PATCH 10/66] Restructuring by encapsulating members --- src/sqlite-vss.cpp | 79 ++++++++++++++-------------- src/vss/vss-index-cursor.h | 103 ++++++++++++++++++++++++++++++++++++- 2 files changed, 140 insertions(+), 42 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 6f1a978..31f857d 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -566,13 +566,13 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, if (strcmp(idxStr, "search") == 0) { - pCursor->query_type = QueryType::search; + pCursor->setQuery_type(QueryType::search); vec_ptr query_vector; auto params = static_cast(sqlite3_value_pointer(argv[0], "vss0_searchparams")); if (params != nullptr) { - pCursor->limit = params->k; + pCursor->setLimit(params->k); query_vector = vec_ptr(new vector(*params->vector)); } else if (sqlite3_libversion_number() < 3041000) { @@ -585,12 +585,12 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, "2nd parameter for SQLite versions below 3.41.0")); return SQLITE_ERROR; - } else if ((query_vector = pCursor->table->getVector0_api()->xValueAsVector( + } else if ((query_vector = pCursor->getTable()->getVector0_api()->xValueAsVector( argv[0])) != nullptr) { if (argc > 1) { - pCursor->limit = sqlite3_value_int(argv[1]); + pCursor->setLimit(sqlite3_value_int(argv[1])); } else { auto ptrVtab = static_cast(pCursor->pVtab); @@ -608,7 +608,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } int nq = 1; - auto index = pCursor->table->getIndexes().at(idxNum)->getIndex(); + auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex(); if (query_vector->size() != index->d) { @@ -621,31 +621,30 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, return SQLITE_ERROR; } - if (pCursor->limit <= 0) { + if (pCursor->getLimit() <= 0) { auto ptrVtab = static_cast(pCursor->pVtab); ptrVtab->setError(sqlite3_mprintf( "Limit must be greater than 0, got %ld", - pCursor->limit)); + pCursor->getLimit())); return SQLITE_ERROR; } // To avoid trying to select more records than number of records in index. - auto searchMax = min(static_cast(pCursor->limit) * nq, index->ntotal * nq); + auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->ntotal * nq); - pCursor->search_distances = vector(searchMax, 0); - pCursor->search_ids = vector(searchMax, 0); + pCursor->resetSearch(searchMax); index->search(nq, query_vector->data(), searchMax, - pCursor->search_distances.data(), - pCursor->search_ids.data()); + pCursor->getSearch_distances().data(), + pCursor->getSearch_ids().data()); } else if (strcmp(idxStr, "range_search") == 0) { - pCursor->query_type = QueryType::range_search; + pCursor->setQuery_type(QueryType::range_search); auto params = static_cast( sqlite3_value_pointer(argv[0], "vss0_rangesearchparams")); @@ -653,22 +652,22 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, int nq = 1; vector nns(params->distance * nq); - pCursor->range_search_result = unique_ptr(new faiss::RangeSearchResult(nq, true)); + pCursor->getRange_search_result() = unique_ptr(new faiss::RangeSearchResult(nq, true)); - auto index = pCursor->table->getIndexes().at(idxNum)->getIndex(); + auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex(); index->range_search(nq, params->vector->data(), params->distance, - pCursor->range_search_result.get()); + pCursor->getRange_search_result().get()); } else if (strcmp(idxStr, "fullscan") == 0) { - pCursor->query_type = QueryType::fullscan; - pCursor->sql = sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->table->getName()); + pCursor->setQuery_type(QueryType::fullscan); + pCursor->setSql(sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->getTable()->getName())); - int res = sqlite3_prepare_v2(pCursor->table->getDb(), - pCursor->sql, + int res = sqlite3_prepare_v2(pCursor->getTable()->getDb(), + pCursor->getSql(), -1, &pCursor->stmt, nullptr); @@ -676,7 +675,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, if (res != SQLITE_OK) return res; - pCursor->step_result = sqlite3_step(pCursor->stmt); + pCursor->setStep_result(sqlite3_step(pCursor->getStmt())); } else { @@ -687,7 +686,7 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, return SQLITE_ERROR; } - pCursor->iCurrent = 0; + pCursor->setICurrent(0); return SQLITE_OK; } @@ -695,15 +694,15 @@ static int vssIndexNext(sqlite3_vtab_cursor *cur) { auto pCursor = static_cast(cur); - switch (pCursor->query_type) { + switch (pCursor->getQuery_type()) { case QueryType::search: case QueryType::range_search: - pCursor->iCurrent++; + pCursor->incrementICurrent(); break; case QueryType::fullscan: - pCursor->step_result = sqlite3_step(pCursor->stmt); + pCursor->setStep_result(sqlite3_step(pCursor->getStmt())); } return SQLITE_OK; @@ -713,18 +712,18 @@ static int vssIndexRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { auto pCursor = static_cast(cur); - switch (pCursor->query_type) { + switch (pCursor->getQuery_type()) { case QueryType::search: - *pRowid = pCursor->search_ids.at(pCursor->iCurrent); + *pRowid = pCursor->getSearch_ids().at(pCursor->getICurrent()); break; case QueryType::range_search: - *pRowid = pCursor->range_search_result->labels[pCursor->iCurrent]; + *pRowid = pCursor->getRange_search_result()->labels[pCursor->getICurrent()]; break; case QueryType::fullscan: - *pRowid = sqlite3_column_int64(pCursor->stmt, 0); + *pRowid = sqlite3_column_int64(pCursor->getStmt(), 0); break; } return SQLITE_OK; @@ -734,18 +733,18 @@ static int vssIndexEof(sqlite3_vtab_cursor *cur) { auto pCursor = static_cast(cur); - switch (pCursor->query_type) { + switch (pCursor->getQuery_type()) { case QueryType::search: - return pCursor->iCurrent >= pCursor->limit || - pCursor->iCurrent >= pCursor->search_ids.size() - || (pCursor->search_ids.at(pCursor->iCurrent) == -1); + return pCursor->getICurrent() >= pCursor->getLimit() || + pCursor->getICurrent() >= pCursor->getSearch_ids().size() + || (pCursor->getSearch_ids().at(pCursor->getICurrent()) == -1); case QueryType::range_search: - return pCursor->iCurrent >= pCursor->range_search_result->lims[1]; + return pCursor->getICurrent() >= pCursor->getRange_search_result()->lims[1]; case QueryType::fullscan: - return pCursor->step_result != SQLITE_ROW; + return pCursor->getStep_result() != SQLITE_ROW; } return 1; } @@ -758,16 +757,16 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, if (i == VSS_INDEX_COLUMN_DISTANCE) { - switch (pCursor->query_type) { + switch (pCursor->getQuery_type()) { case QueryType::search: sqlite3_result_double(ctx, - pCursor->search_distances.at(pCursor->iCurrent)); + pCursor->getSearch_distances().at(pCursor->getICurrent())); break; case QueryType::range_search: sqlite3_result_double(ctx, - pCursor->range_search_result->distances[pCursor->iCurrent]); + pCursor->getRange_search_result()->distances[pCursor->getICurrent()]); break; case QueryType::fullscan: @@ -777,7 +776,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, } else if (i >= VSS_INDEX_COLUMN_VECTORS) { auto index = - pCursor->table->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex(); + pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex(); vector vec(index->d); sqlite3_int64 rowId; @@ -798,7 +797,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, sqlite3_free(errmsg); return SQLITE_ERROR; } - pCursor->table->getVector0_api()->xResultVector(ctx, &vec); + pCursor->getTable()->getVector0_api()->xResultVector(ctx, &vec); } return SQLITE_OK; } diff --git a/src/vss/vss-index-cursor.h b/src/vss/vss-index-cursor.h index 21eaf0d..9581b83 100644 --- a/src/vss/vss-index-cursor.h +++ b/src/vss/vss-index-cursor.h @@ -4,7 +4,9 @@ #include "inclusions.h" -struct vss_index_cursor : public sqlite3_vtab_cursor { +class vss_index_cursor : public sqlite3_vtab_cursor { + +public: explicit vss_index_cursor(vss_index_vtab *table) : table(table), @@ -19,6 +21,104 @@ struct vss_index_cursor : public sqlite3_vtab_cursor { sqlite3_free(sql); } + vss_index_vtab * getTable() { + + return table; + } + + sqlite3_int64 getICurrent() { + + return iCurrent; + } + + sqlite3_int64 getIRowid() { + + return iRowid; + } + + QueryType getQuery_type() { + + return query_type; + } + + sqlite3_int64 getLimit() { + + return limit; + } + + vector & getSearch_ids() { + + return search_ids; + } + + vector & getSearch_distances() { + + return search_distances; + } + + unique_ptr & getRange_search_result() { + + return range_search_result; + } + + sqlite3_stmt *getStmt() { + + return stmt; + } + + int getStep_result() { + + return step_result; + } + + void setStep_result(int value) { + + step_result = value; + } + + void incrementICurrent() { + + iCurrent += 1; + } + + void setICurrent(sqlite3_int64 value) { + + iCurrent = value; + } + + void resetSearch(long noItems) { + + search_distances = vector(noItems, 0); + search_ids = vector(noItems, 0); + } + + void setQuery_type(QueryType value) { + + query_type = value; + } + + void setSql(char * value) { + + if (sql != nullptr) + sqlite3_free(sql); + sql = value; + } + + char * getSql() { + + return sql; + } + + void setLimit(sqlite3_int64 value) { + + limit = value; + } + + // TODO: Parts of our logic requires the address to the pointer such that we can assign what it's pointing at + sqlite3_stmt *stmt; + +private: + vss_index_vtab *table; sqlite3_int64 iCurrent; @@ -35,7 +135,6 @@ struct vss_index_cursor : public sqlite3_vtab_cursor { unique_ptr range_search_result; // For query_type == QueryType::fullscan - sqlite3_stmt *stmt; char *sql; int step_result; }; From 765746ab6479a6bd66138698c9b36c5d3c59f84d Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 10:48:52 +0300 Subject: [PATCH 11/66] Using nullptr + code formatting --- src/sqlite-vss.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 31f857d..90cd4ec 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -664,7 +664,9 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } else if (strcmp(idxStr, "fullscan") == 0) { pCursor->setQuery_type(QueryType::fullscan); - pCursor->setSql(sqlite3_mprintf("select rowid from \"%w_data\"", pCursor->getTable()->getName())); + pCursor->setSql( + sqlite3_mprintf("select rowid from \"%w_data\"", + pCursor->getTable()->getName())); int res = sqlite3_prepare_v2(pCursor->getTable()->getDb(), pCursor->getSql(), @@ -783,6 +785,7 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, vssIndexRowid(cur, &rowId); try { + index->reconstruct(rowId, vec.data()); } catch (faiss::FaissException &e) { @@ -1164,7 +1167,7 @@ __declspec(dllexport) SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0, vss_version, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_debug", @@ -1172,7 +1175,7 @@ __declspec(dllexport) SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0, vss_debug, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_distance_l1", @@ -1180,7 +1183,7 @@ __declspec(dllexport) SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, vector_api, vss_distance_l1, - 0, 0, 0); + nullptr, nullptr, nullptr); sqlite3_create_function_v2(db, "vss_distance_l2", 2, From 600f67cb3d7f08905b4d0b8f7e654991d0c3cebe Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 10:51:47 +0300 Subject: [PATCH 12/66] Comment --- src/sqlite-vss.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 90cd4ec..835a589 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -1096,7 +1096,7 @@ static int vssIndexShadowName(const char *zName) { } static sqlite3_module vssIndexModule = { - /* iVersion */ 3, + /* iVersion */ 3, // TODO: Shouldn't this be the same as the version for sqlite-vector.cpp? /* xCreate */ vssIndexCreate, /* xConnect */ vssIndexConnect, /* xBestIndex */ vssIndexBestIndex, From a33b38c05b1756611971db8c6fc2be39146cb97f Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 10:52:39 +0300 Subject: [PATCH 13/66] Using nullptr to be more semantic correct --- src/sqlite-vector.cpp | 23 ++++++++++++----------- src/sqlite-vss.cpp | 11 ++++++----- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp index 67985b9..7f22370 100644 --- a/src/sqlite-vector.cpp +++ b/src/sqlite-vector.cpp @@ -584,17 +584,18 @@ static sqlite3_module fvecsEachModule = { /* xEof */ fvecsEachEof, /* xColumn */ fvecsEachColumn, /* xRowid */ fvecsEachRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0}; + /* xUpdate */ nullptr, + /* xBegin */ nullptr, + /* xSync */ nullptr, + /* xCommit */ nullptr, + /* xRollback */ nullptr, + /* xFindMethod */ nullptr, + /* xRename */ nullptr, + /* xSavepoint */ nullptr, + /* xRelease */ nullptr, + /* xRollbackTo */ nullptr, + /* xShadowName */ nullptr +}; #pragma endregion diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 835a589..e1b79ff 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -1115,11 +1115,12 @@ static sqlite3_module vssIndexModule = { /* xCommit */ vssIndexCommit, /* xRollback */ vssIndexRollback, /* xFindMethod */ vssIndexFindFunction, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ vssIndexShadowName}; + /* xRename */ nullptr, + /* xSavepoint */ nullptr, + /* xRelease */ nullptr, + /* xRollbackTo */ nullptr, + /* xShadowName */ vssIndexShadowName +}; #pragma endregion From 87b058b5c28d2542a6bbf7c802b3d29b58316177 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 11:14:38 +0300 Subject: [PATCH 14/66] Better encapsulation and cohesion --- src/sqlite-vss.cpp | 62 +++++++-------------------------------------- src/vss/vss-index.h | 62 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 53 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index e1b79ff..1a7ebdb 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -818,51 +818,16 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { bool needsWriting = false; - auto idxCol = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) { - - // Checking if index needs training. - if (!(*iter)->getTrainings().empty()) { - - (*iter)->getIndex()->train( - (*iter)->getTrainings().size() / (*iter)->getIndex()->d, - (*iter)->getTrainings().data()); - - (*iter)->getTrainings().clear(); - (*iter)->getTrainings().shrink_to_fit(); - - needsWriting = true; - } - - // Checking if we're deleting records from the index. - if (!(*iter)->getDelete_ids().empty()) { - - faiss::IDSelectorBatch selector((*iter)->getDelete_ids().size(), - (*iter)->getDelete_ids().data()); - - (*iter)->getIndex()->remove_ids(selector); - (*iter)->getDelete_ids().clear(); - (*iter)->getDelete_ids().shrink_to_fit(); - - needsWriting = true; - } - - // Checking if we're inserting records to the index. - if (!(*iter)->getInsert_data().empty()) { - - (*iter)->getIndex()->add_with_ids( - (*iter)->getInsert_ids().size(), - (*iter)->getInsert_data().data(), - (faiss::idx_t *)(*iter)->getInsert_ids().data()); + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->getInsert_ids().clear(); - (*iter)->getInsert_ids().shrink_to_fit(); + // Training index, notice no-op unless we've got training data. + needsWriting = (*iter)->tryTrain() || needsWriting; - (*iter)->getInsert_data().clear(); - (*iter)->getInsert_data().shrink_to_fit(); + // Deleting data from index, notice no-op unless there's something to actually delete. + needsWriting = (*iter)->tryDelete() || needsWriting; - needsWriting = true; - } + // Inserting data to index, notice no-op unless there's something to actually insert. + needsWriting = (*iter)->tryInsert() || needsWriting; } if (needsWriting) { @@ -895,17 +860,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->getInsert_ids().clear(); - (*iter)->getInsert_ids().shrink_to_fit(); - - (*iter)->getInsert_data().clear(); - (*iter)->getInsert_data().shrink_to_fit(); - - (*iter)->getDelete_ids().clear(); - (*iter)->getDelete_ids().shrink_to_fit(); - - (*iter)->getTrainings().clear(); - (*iter)->getTrainings().shrink_to_fit(); + // Cleanups in case we've got hanging data. + (*iter)->reset(); } return SQLITE_ERROR; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 31dfd43..7dd3787 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -43,6 +43,68 @@ class vss_index { return delete_ids; } + bool tryTrain() { + + if (trainings.empty()) + return false; + + index->train(trainings.size() / index->d, trainings.data()); + trainings.clear(); + trainings.shrink_to_fit(); + + return true; + } + + bool tryDelete() { + + if (delete_ids.empty()) + return false; + + faiss::IDSelectorBatch selector(delete_ids.size(), + delete_ids.data()); + + index->remove_ids(selector); + delete_ids.clear(); + delete_ids.shrink_to_fit(); + + return true; + } + + bool tryInsert() { + + if (insert_ids.empty()) + return false; + + index->add_with_ids( + insert_ids.size(), + insert_data.data(), + (faiss::idx_t *)insert_ids.data()); + + insert_ids.clear(); + insert_ids.shrink_to_fit(); + + insert_data.clear(); + insert_data.shrink_to_fit(); + + return true; + } + + void reset() { + + trainings.clear(); + trainings.shrink_to_fit(); + + insert_data.clear(); + insert_data.shrink_to_fit(); + + insert_ids.clear(); + insert_ids.shrink_to_fit(); + + delete_ids.clear(); + + delete_ids.shrink_to_fit(); + } + private: faiss::Index *index; From d9d1c414a38a5fc70ffb8490aedbf831a34b3ecd Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 11:42:42 +0300 Subject: [PATCH 15/66] Better cohesion + encapsulation --- src/sqlite-vss.cpp | 65 ++++++++++------------------------------ src/vss/vss-index.h | 72 +++++++++++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 78 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 1a7ebdb..c85ed45 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -820,14 +820,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - // Training index, notice no-op unless we've got training data. - needsWriting = (*iter)->tryTrain() || needsWriting; - - // Deleting data from index, notice no-op unless there's something to actually delete. - needsWriting = (*iter)->tryDelete() || needsWriting; - - // Inserting data to index, notice no-op unless there's something to actually insert. - needsWriting = (*iter)->tryInsert() || needsWriting; + // Synchronizing index, implying deleting, training, and inserting records according to needs. + needsWriting = (*iter)->synchronize(); } if (needsWriting) { @@ -876,17 +870,7 @@ static int vssIndexRollback(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->getTrainings().clear(); - (*iter)->getTrainings().shrink_to_fit(); - - (*iter)->getInsert_data().clear(); - (*iter)->getInsert_data().shrink_to_fit(); - - (*iter)->getInsert_ids().clear(); - (*iter)->getInsert_ids().shrink_to_fit(); - - (*iter)->getDelete_ids().clear(); - (*iter)->getDelete_ids().shrink_to_fit(); + (*iter)->reset(); } return SQLITE_OK; } @@ -911,7 +895,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, return rc; for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->getDelete_ids().push_back(rowid_to_delete); + (*iter)->addDelete(rowid_to_delete); } } else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) { @@ -926,7 +910,6 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, vec_ptr vec; sqlite3_int64 rowid = sqlite3_value_int64(argv[1]); - bool inserted_rowid = false; auto i = 0; for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { @@ -938,32 +921,20 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, if (!(*iter)->getIndex()->is_trained) { pTable->setError(sqlite3_mprintf("Index at i=%d requires training " - "before inserting data.", - i)); + "before inserting data.", + i)); return SQLITE_ERROR; } - if (!inserted_rowid) { - - auto rc = shadow_data_insert(pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - rowid); - if (rc != SQLITE_OK) - return rc; - - inserted_rowid = true; - } - - (*iter)->getInsert_data().reserve((*iter)->getInsert_data().size() + vec->size()); - (*iter)->getInsert_data().insert( - (*iter)->getInsert_data().end(), - vec->begin(), - vec->end()); - - (*iter)->getInsert_ids().push_back(rowid); + auto rc = shadow_data_insert(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + rowid); + if (rc != SQLITE_OK) + return rc; + (*iter)->addInsertData(rowid, vec); *pRowid = rowid; } } @@ -978,14 +949,8 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { vec_ptr vec = pTable->getVector0_api()->xValueAsVector(argv[2 + VSS_INDEX_COLUMN_VECTORS + i]); - if (vec != nullptr) { - - (*iter)->getTrainings().reserve((*iter)->getTrainings().size() + vec->size()); - (*iter)->getTrainings().insert( - (*iter)->getTrainings().end(), - vec->begin(), - vec->end()); - } + if (vec != nullptr) + (*iter)->addTrainings(vec); } } else { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 7dd3787..1f74925 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -4,8 +4,13 @@ #include "inclusions.h" -// Wrapper around a single faiss index, with training data, insert records, and -// delete records. +/* + * Wrapper around a single faiss index, with training data, insert records, and + * delete records. + * + * An attempt at encapsulating everything related to faiss::Index instances, such as + * training, inserting, deleting, etc. + */ class vss_index { public: @@ -13,6 +18,7 @@ class vss_index { explicit vss_index(faiss::Index *index) : index(index) {} ~vss_index() { + if (index != nullptr) { delete index; } @@ -23,26 +29,54 @@ class vss_index { return index; } - vector & getTrainings() { + void addTrainings(vec_ptr & vec) { + + trainings.reserve(trainings.size() + vec->size()); + trainings.insert(trainings.end(), vec->begin(), vec->end()); + } + + void addInsertData(faiss::idx_t rowId, vec_ptr & vec) { + + insert_data.reserve(insert_data.size() + vec->size()); + insert_data.insert(insert_data.end(), vec->begin(), vec->end()); - return trainings; + insert_ids.push_back(rowId); } - vector & getInsert_data() { + void addDelete(faiss::idx_t rowid) { - return insert_data; + delete_ids.push_back(rowid); } - vector & getInsert_ids() { + bool synchronize() { - return insert_ids; + auto result = tryTrain(); + result = tryDelete() || result; + result = tryInsert() || result; + + // Now that we've updated our faiss::index we delete all temporary data. + reset(); + + return result; } - vector & getDelete_ids() { + void reset() { + + trainings.clear(); + trainings.shrink_to_fit(); + + insert_ids.clear(); + insert_ids.shrink_to_fit(); + + insert_data.clear(); + insert_data.shrink_to_fit(); - return delete_ids; + delete_ids.clear(); + delete_ids.shrink_to_fit(); } +private: + bool tryTrain() { if (trainings.empty()) @@ -89,24 +123,6 @@ class vss_index { return true; } - void reset() { - - trainings.clear(); - trainings.shrink_to_fit(); - - insert_data.clear(); - insert_data.shrink_to_fit(); - - insert_ids.clear(); - insert_ids.shrink_to_fit(); - - delete_ids.clear(); - - delete_ids.shrink_to_fit(); - } - -private: - faiss::Index *index; vector trainings; vector insert_data; From 89d31380c8b1b9199f0fb313c8cae5663f11a595 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 11:46:49 +0300 Subject: [PATCH 16/66] Comments --- src/vss/vss-index.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 1f74925..738af43 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -29,12 +29,22 @@ class vss_index { return index; } + /* + * Adds the specified vector to the index' training material. + * + * Notice, needs to invoke synchronize() later to actually perform training of index. + */ void addTrainings(vec_ptr & vec) { trainings.reserve(trainings.size() + vec->size()); trainings.insert(trainings.end(), vec->begin(), vec->end()); } + /* + * Adds the specified vector to the index' temporary insert data. + * + * Notice, needs to invoke synchronize() later to actually add data to index. + */ void addInsertData(faiss::idx_t rowId, vec_ptr & vec) { insert_data.reserve(insert_data.size() + vec->size()); @@ -43,11 +53,19 @@ class vss_index { insert_ids.push_back(rowId); } + /* + * Adds the specified rowid to the index' temporary delete data. + * + * Notice, needs to invoke synchronize() later to actually delete data from index. + */ void addDelete(faiss::idx_t rowid) { delete_ids.push_back(rowid); } + /* + * Synchronizes index by updating index according to trainings, inserts and deletes. + */ bool synchronize() { auto result = tryTrain(); @@ -60,6 +78,9 @@ class vss_index { return result; } + /* + * Resets all temporary training data to free memory. + */ void reset() { trainings.clear(); From 1eeda54837b44fc52c8d1d3df22df72032f7b14a Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 12:14:22 +0300 Subject: [PATCH 17/66] Bug fix --- src/sqlite-vss.cpp | 14 +++++++------- src/vss/vss-index.h | 41 +++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index c85ed45..0631e5d 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -830,10 +830,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { int rc = write_index((*iter)->getIndex(), - pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); + pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + i); if (rc != SQLITE_OK) { @@ -928,9 +928,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, } auto rc = shadow_data_insert(pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - rowid); + pTable->getSchema(), + pTable->getName(), + rowid); if (rc != SQLITE_OK) return rc; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 738af43..cc7a287 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -72,9 +72,6 @@ class vss_index { result = tryDelete() || result; result = tryInsert() || result; - // Now that we've updated our faiss::index we delete all temporary data. - reset(); - return result; } @@ -86,12 +83,12 @@ class vss_index { trainings.clear(); trainings.shrink_to_fit(); - insert_ids.clear(); - insert_ids.shrink_to_fit(); - insert_data.clear(); insert_data.shrink_to_fit(); + insert_ids.clear(); + insert_ids.shrink_to_fit(); + delete_ids.clear(); delete_ids.shrink_to_fit(); } @@ -110,21 +107,6 @@ class vss_index { return true; } - bool tryDelete() { - - if (delete_ids.empty()) - return false; - - faiss::IDSelectorBatch selector(delete_ids.size(), - delete_ids.data()); - - index->remove_ids(selector); - delete_ids.clear(); - delete_ids.shrink_to_fit(); - - return true; - } - bool tryInsert() { if (insert_ids.empty()) @@ -133,7 +115,7 @@ class vss_index { index->add_with_ids( insert_ids.size(), insert_data.data(), - (faiss::idx_t *)insert_ids.data()); + insert_ids.data()); insert_ids.clear(); insert_ids.shrink_to_fit(); @@ -144,6 +126,21 @@ class vss_index { return true; } + bool tryDelete() { + + if (delete_ids.empty()) + return false; + + faiss::IDSelectorBatch selector(delete_ids.size(), + delete_ids.data()); + + index->remove_ids(selector); + delete_ids.clear(); + delete_ids.shrink_to_fit(); + + return true; + } + faiss::Index *index; vector trainings; vector insert_data; From 7ce8e0fc370c60242dc2390868d150aa18689f67 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 12:32:17 +0300 Subject: [PATCH 18/66] Trying to save build --- src/vss/vss-index.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index cc7a287..4790993 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -69,8 +69,10 @@ class vss_index { bool synchronize() { auto result = tryTrain(); - result = tryDelete() || result; - result = tryInsert() || result; + if (tryDelete()) + result = true; + if (tryInsert()) + result = true; return result; } @@ -101,6 +103,7 @@ class vss_index { return false; index->train(trainings.size() / index->d, trainings.data()); + trainings.clear(); trainings.shrink_to_fit(); @@ -135,6 +138,7 @@ class vss_index { delete_ids.data()); index->remove_ids(selector); + delete_ids.clear(); delete_ids.shrink_to_fit(); From 7253c9f203faebd29314dec6046754696d7c5ae2 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 12:39:30 +0300 Subject: [PATCH 19/66] Puuh, logical error fixed --- src/sqlite-vss.cpp | 2 +- src/vss/vss-index.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 0631e5d..65415cf 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -821,7 +821,7 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { // Synchronizing index, implying deleting, training, and inserting records according to needs. - needsWriting = (*iter)->synchronize(); + needsWriting = (*iter)->synchronize() || needsWriting; } if (needsWriting) { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 4790993..897e184 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -69,10 +69,8 @@ class vss_index { bool synchronize() { auto result = tryTrain(); - if (tryDelete()) - result = true; - if (tryInsert()) - result = true; + result = tryDelete() || result; + result = tryInsert() || result; return result; } From fe7773320e462963c1a5e2d98ea3d5f4a7e49607 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 12:48:09 +0300 Subject: [PATCH 20/66] Optimising and trying to get rid of build bug --- src/sqlite-vss.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 65415cf..9c022ff 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -816,18 +816,11 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { try { - bool needsWriting = false; - - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { + auto i = 0; + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { // Synchronizing index, implying deleting, training, and inserting records according to needs. - needsWriting = (*iter)->synchronize() || needsWriting; - } - - if (needsWriting) { - - int i = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { + if ((*iter)->synchronize()) { int rc = write_index((*iter)->getIndex(), pTable->getDb(), @@ -838,8 +831,14 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { if (rc != SQLITE_OK) { pTable->setError(sqlite3_mprintf("Error saving index (%d): %s", - rc, - sqlite3_errmsg(pTable->getDb()))); + rc, + sqlite3_errmsg(pTable->getDb()))); + + // Clearing all indexes to cleanup after ourselves. + for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) { + + (*iter2)->reset(); + } return rc; } } From feace327f4b960cccf41a686ecead3884a14aeef Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 13:13:25 +0300 Subject: [PATCH 21/66] Update sqlite-vss.cpp --- src/sqlite-vss.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 9c022ff..c9bdf09 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -822,6 +822,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { // Synchronizing index, implying deleting, training, and inserting records according to needs. if ((*iter)->synchronize()) { + /* + * If the above invocation returned true, we've got updates to currently iterated index, + * hence writing to db. + */ int rc = write_index((*iter)->getIndex(), pTable->getDb(), pTable->getSchema(), From bcef4f35261302ec68d6d57a0bc2eb6be1a3fb6c Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 13:26:04 +0300 Subject: [PATCH 22/66] Fixing bug --- src/sqlite-vss.cpp | 91 ++++++++++++++++++++++++++++++++++----------- src/vss/vss-index.h | 2 - 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index c9bdf09..4ccfe97 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -816,33 +816,71 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { try { - auto i = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { + bool needsWriting = false; + + auto idxCol = 0; + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) { + + // Checking if index needs training. + if (!(*iter)->trainings.empty()) { + + (*iter)->getIndex()->train( + (*iter)->trainings.size() / (*iter)->getIndex()->d, + (*iter)->trainings.data()); + + (*iter)->trainings.clear(); + (*iter)->trainings.shrink_to_fit(); + + needsWriting = true; + } + + // Checking if we're deleting records from the index. + if (!(*iter)->delete_ids.empty()) { + + faiss::IDSelectorBatch selector((*iter)->delete_ids.size(), + (*iter)->delete_ids.data()); + + (*iter)->getIndex()->remove_ids(selector); + (*iter)->delete_ids.clear(); + (*iter)->delete_ids.shrink_to_fit(); + + needsWriting = true; + } + + // Checking if we're inserting records to the index. + if (!(*iter)->insert_data.empty()) { + + (*iter)->getIndex()->add_with_ids( + (*iter)->insert_ids.size(), + (*iter)->insert_data.data(), + (faiss::idx_t *)(*iter)->insert_ids.data()); + + (*iter)->insert_ids.clear(); + (*iter)->insert_ids.shrink_to_fit(); + + (*iter)->insert_data.clear(); + (*iter)->insert_data.shrink_to_fit(); - // Synchronizing index, implying deleting, training, and inserting records according to needs. - if ((*iter)->synchronize()) { + needsWriting = true; + } + } + + if (needsWriting) { + + int i = 0; + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - /* - * If the above invocation returned true, we've got updates to currently iterated index, - * hence writing to db. - */ int rc = write_index((*iter)->getIndex(), - pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); + pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + i); if (rc != SQLITE_OK) { pTable->setError(sqlite3_mprintf("Error saving index (%d): %s", - rc, - sqlite3_errmsg(pTable->getDb()))); - - // Clearing all indexes to cleanup after ourselves. - for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) { - - (*iter2)->reset(); - } + rc, + sqlite3_errmsg(pTable->getDb()))); return rc; } } @@ -857,8 +895,17 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - // Cleanups in case we've got hanging data. - (*iter)->reset(); + (*iter)->insert_ids.clear(); + (*iter)->insert_ids.shrink_to_fit(); + + (*iter)->insert_data.clear(); + (*iter)->insert_data.shrink_to_fit(); + + (*iter)->delete_ids.clear(); + (*iter)->delete_ids.shrink_to_fit(); + + (*iter)->trainings.clear(); + (*iter)->trainings.shrink_to_fit(); } return SQLITE_ERROR; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 897e184..e19b761 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -93,8 +93,6 @@ class vss_index { delete_ids.shrink_to_fit(); } -private: - bool tryTrain() { if (trainings.empty()) From ca7763b812e44912063442d6c760e9f8c3eec5a5 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 13:30:07 +0300 Subject: [PATCH 23/66] Revert "Fixing bug" This reverts commit bcef4f35261302ec68d6d57a0bc2eb6be1a3fb6c. --- src/sqlite-vss.cpp | 91 +++++++++++---------------------------------- src/vss/vss-index.h | 2 + 2 files changed, 24 insertions(+), 69 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 4ccfe97..c9bdf09 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -816,71 +816,33 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { try { - bool needsWriting = false; - - auto idxCol = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, idxCol++) { - - // Checking if index needs training. - if (!(*iter)->trainings.empty()) { - - (*iter)->getIndex()->train( - (*iter)->trainings.size() / (*iter)->getIndex()->d, - (*iter)->trainings.data()); - - (*iter)->trainings.clear(); - (*iter)->trainings.shrink_to_fit(); - - needsWriting = true; - } - - // Checking if we're deleting records from the index. - if (!(*iter)->delete_ids.empty()) { - - faiss::IDSelectorBatch selector((*iter)->delete_ids.size(), - (*iter)->delete_ids.data()); - - (*iter)->getIndex()->remove_ids(selector); - (*iter)->delete_ids.clear(); - (*iter)->delete_ids.shrink_to_fit(); - - needsWriting = true; - } - - // Checking if we're inserting records to the index. - if (!(*iter)->insert_data.empty()) { - - (*iter)->getIndex()->add_with_ids( - (*iter)->insert_ids.size(), - (*iter)->insert_data.data(), - (faiss::idx_t *)(*iter)->insert_ids.data()); - - (*iter)->insert_ids.clear(); - (*iter)->insert_ids.shrink_to_fit(); - - (*iter)->insert_data.clear(); - (*iter)->insert_data.shrink_to_fit(); - - needsWriting = true; - } - } - - if (needsWriting) { + auto i = 0; + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - int i = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { + // Synchronizing index, implying deleting, training, and inserting records according to needs. + if ((*iter)->synchronize()) { + /* + * If the above invocation returned true, we've got updates to currently iterated index, + * hence writing to db. + */ int rc = write_index((*iter)->getIndex(), - pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); + pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + i); if (rc != SQLITE_OK) { pTable->setError(sqlite3_mprintf("Error saving index (%d): %s", - rc, - sqlite3_errmsg(pTable->getDb()))); + rc, + sqlite3_errmsg(pTable->getDb()))); + + // Clearing all indexes to cleanup after ourselves. + for (auto iter2 = pTable->getIndexes().begin(); iter2 != pTable->getIndexes().end(); ++iter2) { + + (*iter2)->reset(); + } return rc; } } @@ -895,17 +857,8 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter) { - (*iter)->insert_ids.clear(); - (*iter)->insert_ids.shrink_to_fit(); - - (*iter)->insert_data.clear(); - (*iter)->insert_data.shrink_to_fit(); - - (*iter)->delete_ids.clear(); - (*iter)->delete_ids.shrink_to_fit(); - - (*iter)->trainings.clear(); - (*iter)->trainings.shrink_to_fit(); + // Cleanups in case we've got hanging data. + (*iter)->reset(); } return SQLITE_ERROR; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index e19b761..897e184 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -93,6 +93,8 @@ class vss_index { delete_ids.shrink_to_fit(); } +private: + bool tryTrain() { if (trainings.empty()) From 0d2b5147691f998924bf830060319ab591ab3406 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 13:40:24 +0300 Subject: [PATCH 24/66] bug fix --- src/sqlite-vss.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index c9bdf09..800ced9 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -914,6 +914,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, vec_ptr vec; sqlite3_int64 rowid = sqlite3_value_int64(argv[1]); + bool inserted_rowid = false; auto i = 0; for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { @@ -930,12 +931,18 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, return SQLITE_ERROR; } - auto rc = shadow_data_insert(pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - rowid); - if (rc != SQLITE_OK) - return rc; + if (!inserted_rowid) { + + auto rc = shadow_data_insert(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + rowid); + + if (rc != SQLITE_OK) + return rc; + + inserted_rowid = true; + } (*iter)->addInsertData(rowid, vec); *pRowid = rowid; From 8b1c9b0fe9442d1f51633804c2262efc1560d90c Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 13:50:38 +0300 Subject: [PATCH 25/66] Minor cleanups --- src/sqlite-vss.cpp | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 800ced9..eec2484 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -18,18 +18,25 @@ struct VssSearchParams { sqlite3_int64 k; }; -void delVssSearchParams(void *p) { - - VssSearchParams *self = (VssSearchParams *)p; - delete self; -} - struct VssRangeSearchParams { vec_ptr vector; float distance; }; +struct VssIndexColumn { + + string name; + sqlite3_int64 dimensions; + string factory; +}; + +void delVssSearchParams(void *p) { + + VssSearchParams *self = (VssSearchParams *)p; + delete self; +} + void delVssRangeSearchParams(void *p) { auto self = (VssRangeSearchParams *)p; @@ -38,7 +45,7 @@ void delVssRangeSearchParams(void *p) { #pragma endregion -#pragma region Vtab +#pragma region Virtual table implementation static void vssSearchParamsFunc(sqlite3_context *context, int argc, @@ -203,9 +210,9 @@ static int shadow_data_delete(sqlite3 *db, return SQLITE_OK; } -static faiss::Index *read_index_select(sqlite3 *db, - const char *name, - int indexId) { +static faiss::Index * read_index_select(sqlite3 *db, + const char *name, + int indexId) { SqlStatement select(db, sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", @@ -262,6 +269,7 @@ static int create_shadow_tables(sqlite3 *db, static int drop_shadow_tables(sqlite3 *db, char *name) { + // Dropping both x_index and x_data shadow tables. const char *drops[2] = {"drop table \"%w_index\";", "drop table \"%w_data\";"}; @@ -280,13 +288,6 @@ static int drop_shadow_tables(sqlite3 *db, char *name) { return SQLITE_OK; } -struct VssIndexColumn { - - string name; - sqlite3_int64 dimensions; - string factory; -}; - unique_ptr> parse_constructor(int argc, const char *const *argv) { @@ -914,7 +915,9 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, vec_ptr vec; sqlite3_int64 rowid = sqlite3_value_int64(argv[1]); + // Needed to make sure we insert null record into x_data table. bool inserted_rowid = false; + auto i = 0; for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { From 3f0551eca2cfe62d6e79c781eb39f8324e35c11e Mon Sep 17 00:00:00 2001 From: thomas-hansen-resolve Date: Mon, 26 Jun 2023 15:30:49 +0300 Subject: [PATCH 26/66] Better encapsulation and cohesion --- src/sqlite-vss.cpp | 95 ++++----------------------------------------- src/vss/vss-index.h | 76 ++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 87 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index eec2484..fad9760 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -86,83 +86,6 @@ static void vssRangeSearchParamsFunc(sqlite3_context *context, sqlite3_result_pointer(context, params, "vss0_rangesearchparams", delVssRangeSearchParams); } -static int write_index_insert(faiss::VectorIOWriter &writer, - sqlite3 *db, - char *schema, - char *name, - int rowId) { - - // If inserts fails it means index already exists. - SqlStatement insert(db, - sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)", - schema, - name)); - - if (insert.prepare() != SQLITE_OK) - return SQLITE_ERROR; - - if (insert.bind_int64(1, rowId) != SQLITE_OK) - return SQLITE_ERROR; - - if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK) - return SQLITE_ERROR; - - auto rc = insert.step(); - if (rc == SQLITE_DONE) - return SQLITE_OK; // Index did not exist, and we successfully inserted it. - - return rc; -} - -static int write_index_update(faiss::VectorIOWriter &writer, - sqlite3 *db, - char *schema, - char *name, - int rowId) { - - // Updating existing index. - SqlStatement update(db, - sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?", - schema, - name)); - - if (update.prepare() != SQLITE_OK) - return SQLITE_ERROR; - - if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK) - return SQLITE_ERROR; - - if (update.bind_int64(2, rowId) != SQLITE_OK) - return SQLITE_ERROR; - - auto rc = update.step(); - if (rc == SQLITE_DONE) - return SQLITE_OK; // We successfully updated existing index. - - return rc; -} - -static int write_index(faiss::Index *index, - sqlite3 *db, - char *schema, - char *name, - int rowId) { - - // Writing our index - faiss::VectorIOWriter writer; - faiss::write_index(index, &writer); - - // First trying to insert index, if that fails with ROW constraing error, we try to update existing index. - if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK) - return SQLITE_OK; - - if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID) - return SQLITE_ERROR; // Insert failed for unknown error - - // Insert failed because index already existed, updating existing index. - return write_index_update(writer, db, schema, name, rowId); -} - static int shadow_data_insert(sqlite3 *db, char *schema, char *name, @@ -407,11 +330,10 @@ static int init(sqlite3 *db, try { - int rc = write_index((*iter)->getIndex(), - pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); + int rc = (*iter)->write_index(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + i); if (rc != SQLITE_OK) return rc; @@ -827,11 +749,10 @@ static int vssIndexSync(sqlite3_vtab *pVTab) { * If the above invocation returned true, we've got updates to currently iterated index, * hence writing to db. */ - int rc = write_index((*iter)->getIndex(), - pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); + int rc = (*iter)->write_index(pTable->getDb(), + pTable->getSchema(), + pTable->getName(), + i); if (rc != SQLITE_OK) { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 897e184..ef94d06 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -93,8 +93,84 @@ class vss_index { delete_ids.shrink_to_fit(); } + int write_index(sqlite3 *db, + char *schema, + char *name, + int rowId) { + + // Writing our index + faiss::VectorIOWriter writer; + faiss::write_index(index, &writer); + + // First trying to insert index, if that fails with ROW constraing error, we try to update existing index. + if (write_index_insert(writer, db, schema, name, rowId) == SQLITE_OK) + return SQLITE_OK; + + if (sqlite3_extended_errcode(db) != SQLITE_CONSTRAINT_ROWID) + return SQLITE_ERROR; // Insert failed for unknown error + + // Insert failed because index already existed, updating existing index. + return write_index_update(writer, db, schema, name, rowId); + } + private: + int write_index_insert(faiss::VectorIOWriter &writer, + sqlite3 *db, + char *schema, + char *name, + int rowId) { + + // If inserts fails it means index already exists. + SqlStatement insert(db, + sqlite3_mprintf("insert into \"%w\".\"%w_index\"(rowid, idx) values (?, ?)", + schema, + name)); + + if (insert.prepare() != SQLITE_OK) + return SQLITE_ERROR; + + if (insert.bind_int64(1, rowId) != SQLITE_OK) + return SQLITE_ERROR; + + if (insert.bind_blob64(2, writer.data.data(), writer.data.size()) != SQLITE_OK) + return SQLITE_ERROR; + + auto rc = insert.step(); + if (rc == SQLITE_DONE) + return SQLITE_OK; // Index did not exist, and we successfully inserted it. + + return rc; + } + + int write_index_update(faiss::VectorIOWriter &writer, + sqlite3 *db, + char *schema, + char *name, + int rowId) { + + // Updating existing index. + SqlStatement update(db, + sqlite3_mprintf("update \"%w\".\"%w_index\" set idx = ? where rowid = ?", + schema, + name)); + + if (update.prepare() != SQLITE_OK) + return SQLITE_ERROR; + + if (update.bind_blob64(1, writer.data.data(), writer.data.size()) != SQLITE_OK) + return SQLITE_ERROR; + + if (update.bind_int64(2, rowId) != SQLITE_OK) + return SQLITE_ERROR; + + auto rc = update.step(); + if (rc == SQLITE_DONE) + return SQLITE_OK; // We successfully updated existing index. + + return rc; + } + bool tryTrain() { if (trainings.empty()) From 1ff26192c2d37cb362dbde679b57bef775cf995c Mon Sep 17 00:00:00 2001 From: thomas-hansen-resolve Date: Mon, 26 Jun 2023 17:13:55 +0300 Subject: [PATCH 27/66] More encapsulation Getting ready for applying read/write locks --- src/sqlite-vss.cpp | 28 +++++++++++++------------- src/vss/inclusions.h | 1 + src/vss/vss-index.h | 48 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index fad9760..91cabb2 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -531,15 +531,15 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } int nq = 1; - auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex(); + auto index = pCursor->getTable()->getIndexes().at(idxNum); - if (query_vector->size() != index->d) { + if (!index->canQuery(query_vector)) { auto ptrVtab = static_cast(pCursor->pVtab); ptrVtab->setError(sqlite3_mprintf( "Input query size doesn't match index dimensions: %ld != %ld", query_vector->size(), - index->d)); + index->dimensions())); return SQLITE_ERROR; } @@ -555,15 +555,15 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, } // To avoid trying to select more records than number of records in index. - auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->ntotal * nq); + auto searchMax = min(static_cast(pCursor->getLimit()) * nq, index->size() * nq); pCursor->resetSearch(searchMax); index->search(nq, - query_vector->data(), + query_vector, searchMax, - pCursor->getSearch_distances().data(), - pCursor->getSearch_ids().data()); + pCursor->getSearch_distances(), + pCursor->getSearch_ids()); } else if (strcmp(idxStr, "range_search") == 0) { @@ -577,12 +577,12 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, vector nns(params->distance * nq); pCursor->getRange_search_result() = unique_ptr(new faiss::RangeSearchResult(nq, true)); - auto index = pCursor->getTable()->getIndexes().at(idxNum)->getIndex(); + auto index = pCursor->getTable()->getIndexes().at(idxNum); index->range_search(nq, - params->vector->data(), + params->vector, params->distance, - pCursor->getRange_search_result().get()); + pCursor->getRange_search_result()); } else if (strcmp(idxStr, "fullscan") == 0) { @@ -701,15 +701,15 @@ static int vssIndexColumn(sqlite3_vtab_cursor *cur, } else if (i >= VSS_INDEX_COLUMN_VECTORS) { auto index = - pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS)->getIndex(); + pCursor->getTable()->getIndexes().at(i - VSS_INDEX_COLUMN_VECTORS); - vector vec(index->d); + vector vec(index->dimensions()); sqlite3_int64 rowId; vssIndexRowid(cur, &rowId); try { - index->reconstruct(rowId, vec.data()); + index->reconstruct(rowId, vec); } catch (faiss::FaissException &e) { @@ -846,7 +846,7 @@ static int vssIndexUpdate(sqlite3_vtab *pVTab, argv[2 + VSS_INDEX_COLUMN_VECTORS + i])) != nullptr) { // Make sure the index is already trained, if it's needed - if (!(*iter)->getIndex()->is_trained) { + if (!(*iter)->isTrained()) { pTable->setError(sqlite3_mprintf("Index at i=%d requires training " "before inserting data.", diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h index 006d432..bd0ab91 100644 --- a/src/vss/inclusions.h +++ b/src/vss/inclusions.h @@ -12,6 +12,7 @@ SQLITE_EXTENSION_INIT1 #include #include #include +#include #include #include diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index ef94d06..cff825b 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -29,6 +29,52 @@ class vss_index { return index; } + // Returns false if index requires training before inserting items to it. + bool isTrained() { + + return index->is_trained; + } + + // Reconstructs the original vector, requires IDMap2 string in index factory to work. + void reconstruct(sqlite3_int64 rowid, vector & vector) { + + index->reconstruct(rowid, vector.data()); + } + + // Returns true if specified vector is allowed to query index. + bool canQuery(vec_ptr & vec) { + + return vec->size() == index->d; + } + + // Queries the index for matches matching the specified vector + void search(int nq, + vec_ptr & vec, + faiss::idx_t max, + vector & distances, + vector & ids) { + + index->search(nq, vec->data(), max, distances.data(), ids.data()); + } + + // Queries the index for a range of items. + void range_search(int nq, vec_ptr & vec, float distance, unique_ptr & result) { + + index->range_search(nq, vec->data(), distance, result.get()); + } + + // Returns dimensions of index. + faiss::idx_t dimensions() { + + return index->d; + } + + // Returns the size of index. + faiss::idx_t size() { + + return index->ntotal; + } + /* * Adds the specified vector to the index' training material. * @@ -219,7 +265,7 @@ class vss_index { return true; } - faiss::Index *index; + faiss::Index * index; vector trainings; vector insert_data; vector insert_ids; From dd6cfd114705b46a76692c833db3606d0dee9a2d Mon Sep 17 00:00:00 2001 From: thomas-hansen-resolve Date: Mon, 26 Jun 2023 17:50:40 +0300 Subject: [PATCH 28/66] Bumping to C++ 17 - Required to have shared_mutex --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c59d993..5100d5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ endif() configure_file(src/sqlite-vss.h.in sqlite-vss.h) configure_file(src/sqlite-vector.h.in sqlite-vector.h) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) option(FAISS_ENABLE_GPU "" OFF) From 543384c3656c3eba8519285708ae5c2d4515022f Mon Sep 17 00:00:00 2001 From: thomas-hansen-resolve Date: Mon, 26 Jun 2023 17:51:05 +0300 Subject: [PATCH 29/66] Further encapsulating getting ready for mutex --- src/vss/inclusions.h | 1 - src/vss/vss-index.h | 9 +++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/vss/inclusions.h b/src/vss/inclusions.h index bd0ab91..006d432 100644 --- a/src/vss/inclusions.h +++ b/src/vss/inclusions.h @@ -12,7 +12,6 @@ SQLITE_EXTENSION_INIT1 #include #include #include -#include #include #include diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index cff825b..b4cb235 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -3,6 +3,7 @@ #define VSS_INDEX_H #include "inclusions.h" +#include /* * Wrapper around a single faiss index, with training data, insert records, and @@ -15,7 +16,7 @@ class vss_index { public: - explicit vss_index(faiss::Index *index) : index(index) {} + explicit vss_index(faiss::Index *index) : index(index) { } ~vss_index() { @@ -24,11 +25,6 @@ class vss_index { } } - faiss::Index * getIndex() { - - return index; - } - // Returns false if index requires training before inserting items to it. bool isTrained() { @@ -265,6 +261,7 @@ class vss_index { return true; } + std::shared_mutex lock; faiss::Index * index; vector trainings; vector insert_data; From 4b540be2d3a64fd5b1202c7e84dc8a6299fc7dd7 Mon Sep 17 00:00:00 2001 From: thomas-hansen-resolve Date: Mon, 26 Jun 2023 18:05:49 +0300 Subject: [PATCH 30/66] Adding locks in crucial methods --- src/vss/vss-index.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index b4cb235..9f6019d 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -50,12 +50,16 @@ class vss_index { vector & distances, vector & ids) { + shared_lock lock(_lock); + index->search(nq, vec->data(), max, distances.data(), ids.data()); } // Queries the index for a range of items. void range_search(int nq, vec_ptr & vec, float distance, unique_ptr & result) { + shared_lock lock(_lock); + index->range_search(nq, vec->data(), distance, result.get()); } @@ -78,6 +82,8 @@ class vss_index { */ void addTrainings(vec_ptr & vec) { + unique_lock lock(_lock); + trainings.reserve(trainings.size() + vec->size()); trainings.insert(trainings.end(), vec->begin(), vec->end()); } @@ -89,6 +95,8 @@ class vss_index { */ void addInsertData(faiss::idx_t rowId, vec_ptr & vec) { + unique_lock lock(_lock); + insert_data.reserve(insert_data.size() + vec->size()); insert_data.insert(insert_data.end(), vec->begin(), vec->end()); @@ -102,6 +110,8 @@ class vss_index { */ void addDelete(faiss::idx_t rowid) { + unique_lock lock(_lock); + delete_ids.push_back(rowid); } @@ -110,6 +120,8 @@ class vss_index { */ bool synchronize() { + unique_lock lock(_lock); + auto result = tryTrain(); result = tryDelete() || result; result = tryInsert() || result; @@ -122,6 +134,8 @@ class vss_index { */ void reset() { + unique_lock lock(_lock); + trainings.clear(); trainings.shrink_to_fit(); @@ -140,6 +154,8 @@ class vss_index { char *name, int rowId) { + unique_lock lock(_lock); + // Writing our index faiss::VectorIOWriter writer; faiss::write_index(index, &writer); @@ -261,7 +277,7 @@ class vss_index { return true; } - std::shared_mutex lock; + std::shared_mutex _lock; faiss::Index * index; vector trainings; vector insert_data; From 4d98e5561e9d6e10492068c4b71fd2a1a29808fd Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 19:19:52 +0300 Subject: [PATCH 31/66] Creating factory constructor getting ready to cache faiss Index * --- src/sqlite-vss.cpp | 92 +++++++++++++-------------------------------- src/vss/vss-index.h | 53 +++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 67 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 91cabb2..b19e96a 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -133,34 +133,6 @@ static int shadow_data_delete(sqlite3 *db, return SQLITE_OK; } -static faiss::Index * read_index_select(sqlite3 *db, - const char *name, - int indexId) { - - SqlStatement select(db, - sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", - name)); - - if (select.prepare() != SQLITE_OK) - return nullptr; - - if (select.bind_int64(1, indexId) != SQLITE_OK) - return nullptr; - - if (select.step() != SQLITE_ROW) - return nullptr; - - auto index_data = select.column_blob(0); - auto size = select.column_bytes(0); - - faiss::VectorIOReader reader; - copy((const uint8_t *)index_data, - ((const uint8_t *)index_data) + size, - back_inserter(reader.data)); - - return faiss::read_index(&reader); -} - static int create_shadow_tables(sqlite3 *db, const char *schema, const char *name, @@ -299,36 +271,31 @@ static int init(sqlite3 *db, *ppVtab = pTable; - if (isCreate) { - - for (auto iter = columns->begin(); iter != columns->end(); ++iter) { - - try { + try { - auto index = faiss::index_factory(iter->dimensions, iter->factory.c_str()); - pTable->getIndexes().push_back(new vss_index(index)); + if (isCreate) { - } catch (faiss::FaissException &e) { + auto i = 0; + for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) { - *pzErr = sqlite3_mprintf("Error building index factory for %s, exception was: %s", - iter->name.c_str(), - e.msg.c_str()); + pTable->getIndexes().push_back( + vss_index::factory(db, + argv[2], + i, + &iter->factory, + iter->dimensions)); - return SQLITE_ERROR; } - } - - rc = create_shadow_tables(db, argv[1], argv[2], columns->size()); - if (rc != SQLITE_OK) - return rc; - // Shadow tables were successully created. - // After shadow tables are created, write the initial index state to - // shadow _index. - auto i = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { + rc = create_shadow_tables(db, argv[1], argv[2], columns->size()); + if (rc != SQLITE_OK) + return rc; - try { + // Shadow tables were successully created. + // After shadow tables are created, write the initial index state to + // shadow _index. + i = 0; + for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { int rc = (*iter)->write_index(pTable->getDb(), pTable->getSchema(), @@ -337,27 +304,22 @@ static int init(sqlite3 *db, if (rc != SQLITE_OK) return rc; + } - } catch (faiss::FaissException &e) { + } else { - return SQLITE_ERROR; + for (int i = 0; i < columns->size(); i++) { + + pTable->getIndexes().push_back(vss_index::factory(db, argv[2], i, nullptr, -1)); } } - } else { - - for (int i = 0; i < columns->size(); i++) { + } catch (faiss::FaissException &e) { - auto index = read_index_select(db, argv[2], i); + *pzErr = sqlite3_mprintf("Error building index factory, exception was: %s", + e.msg.c_str()); - // Index in shadow table should always be available, integrity check - // to avoid null pointer - if (index == nullptr) { - *pzErr = sqlite3_mprintf("Could not read index at position %d", i); - return SQLITE_ERROR; - } - pTable->getIndexes().push_back(new vss_index(index)); - } + return SQLITE_ERROR; } return SQLITE_OK; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 9f6019d..188dcf1 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -16,8 +16,6 @@ class vss_index { public: - explicit vss_index(faiss::Index *index) : index(index) { } - ~vss_index() { if (index != nullptr) { @@ -171,8 +169,59 @@ class vss_index { return write_index_update(writer, db, schema, name, rowId); } + // Creates a new vss_index or returns a cached index to caller. + static vss_index * factory(sqlite3 *db, + const char *name, + int indexId, + string * factoryArgs, + int dimensions) { + + string key = name; + key += indexId; + if (factoryArgs == nullptr) { + + unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId))); + return tmp.release(); + + } else { + + unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); + return tmp.release(); + } + } + private: + explicit vss_index(faiss::Index *index) : index(index) { } + + static faiss::Index * read_index_select(sqlite3 *db, + const char *name, + int indexId) { + + SqlStatement select(db, + sqlite3_mprintf("select idx from \"%w_index\" where rowid = ?", + name)); + + if (select.prepare() != SQLITE_OK) + return nullptr; + + if (select.bind_int64(1, indexId) != SQLITE_OK) + return nullptr; + + if (select.step() != SQLITE_ROW) + return nullptr; + + auto index_data = select.column_blob(0); + auto size = select.column_bytes(0); + + faiss::VectorIOReader reader; + copy((const uint8_t *)index_data, + ((const uint8_t *)index_data) + size, + back_inserter(reader.data)); + + return faiss::read_index(&reader); + } + int write_index_insert(faiss::VectorIOWriter &writer, sqlite3 *db, char *schema, From ebcc2913f5f270a7b0b3d2ae50e79def19622b12 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 19:34:40 +0300 Subject: [PATCH 32/66] Moving more stuff around to get ready for caching of index --- src/sqlite-vss.cpp | 56 +++++++++++++-------------------------------- src/vss/vss-index.h | 44 +++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index b19e96a..7a3c550 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -133,35 +133,6 @@ static int shadow_data_delete(sqlite3 *db, return SQLITE_OK; } -static int create_shadow_tables(sqlite3 *db, - const char *schema, - const char *name, - int n) { - - SqlStatement create1(db, - sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", - schema, - name)); - - auto rc = create1.exec(); - if (rc != SQLITE_OK) - return rc; - - /* - * Notice, we'll need to explicitly finalize this object since we can only - * have one open statement at the same time to the same connetion. - */ - create1.finalize(); - - SqlStatement create2(db, - sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", - schema, - name)); - - rc = create2.exec(); - return rc; -} - static int drop_shadow_tables(sqlite3 *db, char *name) { // Dropping both x_index and x_data shadow tables. @@ -280,17 +251,15 @@ static int init(sqlite3 *db, pTable->getIndexes().push_back( vss_index::factory(db, - argv[2], - i, - &iter->factory, - iter->dimensions)); + argv[1], + argv[2], + i, + &iter->factory, + iter->dimensions, + columns->size())); } - rc = create_shadow_tables(db, argv[1], argv[2], columns->size()); - if (rc != SQLITE_OK) - return rc; - // Shadow tables were successully created. // After shadow tables are created, write the initial index state to // shadow _index. @@ -310,14 +279,21 @@ static int init(sqlite3 *db, for (int i = 0; i < columns->size(); i++) { - pTable->getIndexes().push_back(vss_index::factory(db, argv[2], i, nullptr, -1)); + pTable->getIndexes().push_back( + vss_index::factory(db, + argv[1], + argv[2], + i, + nullptr, + -1, + -1)); } } - } catch (faiss::FaissException &e) { + } catch (exception & e) { *pzErr = sqlite3_mprintf("Error building index factory, exception was: %s", - e.msg.c_str()); + e.what()); return SQLITE_ERROR; } diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 188dcf1..92ab92e 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -171,10 +171,12 @@ class vss_index { // Creates a new vss_index or returns a cached index to caller. static vss_index * factory(sqlite3 *db, - const char *name, - int indexId, - string * factoryArgs, - int dimensions) { + const char *schema, + const char *name, + int indexId, + string * factoryArgs, + int dimensions, + int colSize) { string key = name; key += indexId; @@ -186,12 +188,46 @@ class vss_index { } else { unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); + + auto rc = create_shadow_tables(db, schema, name, colSize); + if (rc != SQLITE_OK) + throw domain_error("Couldn't create shadow tables"); + return tmp.release(); } } private: + static int create_shadow_tables(sqlite3 *db, + const char *schema, + const char *name, + int n) { + + SqlStatement create1(db, + sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", + schema, + name)); + + auto rc = create1.exec(); + if (rc != SQLITE_OK) + return rc; + + /* + * Notice, we'll need to explicitly finalize this object since we can only + * have one open statement at the same time to the same connetion. + */ + create1.finalize(); + + SqlStatement create2(db, + sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", + schema, + name)); + + rc = create2.exec(); + return rc; + } + explicit vss_index(faiss::Index *index) : index(index) { } static faiss::Index * read_index_select(sqlite3 *db, From cbe056fb95a5b600b914b9079e6deab971299800 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 19:51:49 +0300 Subject: [PATCH 33/66] Oops, bug fix --- src/sqlite-vss.cpp | 36 +++++++++++++++++++++++++--- src/vss/vss-index.h | 58 +++++++++++++-------------------------------- 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 7a3c550..2c22c5f 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -201,6 +201,34 @@ unique_ptr> parse_constructor(int argc, return columns; } +static int create_shadow_tables(sqlite3 *db, + const char *schema, + const char *name) { + + SqlStatement create1(db, + sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", + schema, + name)); + + auto rc = create1.exec(); + if (rc != SQLITE_OK) + return rc; + + /* + * Notice, we'll need to explicitly finalize this object since we can only + * have one open statement at the same time to the same connetion. + */ + create1.finalize(); + + SqlStatement create2(db, + sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", + schema, + name)); + + rc = create2.exec(); + return rc; +} + #define VSS_INDEX_COLUMN_DISTANCE 0 #define VSS_INDEX_COLUMN_OPERATION 1 #define VSS_INDEX_COLUMN_VECTORS 2 @@ -255,11 +283,14 @@ static int init(sqlite3 *db, argv[2], i, &iter->factory, - iter->dimensions, - columns->size())); + iter->dimensions)); } + rc = create_shadow_tables(db, argv[1], argv[2]); + if (rc != SQLITE_OK) + return rc; + // Shadow tables were successully created. // After shadow tables are created, write the initial index state to // shadow _index. @@ -285,7 +316,6 @@ static int init(sqlite3 *db, argv[2], i, nullptr, - -1, -1)); } } diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 92ab92e..cc71081 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -148,8 +148,8 @@ class vss_index { } int write_index(sqlite3 *db, - char *schema, - char *name, + const char *schema, + const char *name, int rowId) { unique_lock lock(_lock); @@ -175,59 +175,33 @@ class vss_index { const char *name, int indexId, string * factoryArgs, - int dimensions, - int colSize) { + int dimensions) { string key = name; key += indexId; if (factoryArgs == nullptr) { unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId))); + + int rc = tmp->write_index(db, + schema, + name, + indexId); + + if (rc != SQLITE_OK) + throw domain_error("Couldn't write initial state of index"); + return tmp.release(); } else { unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); - - auto rc = create_shadow_tables(db, schema, name, colSize); - if (rc != SQLITE_OK) - throw domain_error("Couldn't create shadow tables"); - return tmp.release(); } } private: - static int create_shadow_tables(sqlite3 *db, - const char *schema, - const char *name, - int n) { - - SqlStatement create1(db, - sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", - schema, - name)); - - auto rc = create1.exec(); - if (rc != SQLITE_OK) - return rc; - - /* - * Notice, we'll need to explicitly finalize this object since we can only - * have one open statement at the same time to the same connetion. - */ - create1.finalize(); - - SqlStatement create2(db, - sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", - schema, - name)); - - rc = create2.exec(); - return rc; - } - explicit vss_index(faiss::Index *index) : index(index) { } static faiss::Index * read_index_select(sqlite3 *db, @@ -260,8 +234,8 @@ class vss_index { int write_index_insert(faiss::VectorIOWriter &writer, sqlite3 *db, - char *schema, - char *name, + const char *schema, + const char *name, int rowId) { // If inserts fails it means index already exists. @@ -288,8 +262,8 @@ class vss_index { int write_index_update(faiss::VectorIOWriter &writer, sqlite3 *db, - char *schema, - char *name, + const char *schema, + const char *name, int rowId) { // Updating existing index. From 452566028efa65d3f5abe26242e7c1b3d34908b1 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 20:06:08 +0300 Subject: [PATCH 34/66] Improving structure further to get ready for caching --- src/sqlite-vss.cpp | 27 +++++---------------------- src/vss/vss-index.h | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 2c22c5f..98b6fe6 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -274,6 +274,10 @@ static int init(sqlite3 *db, if (isCreate) { + rc = create_shadow_tables(db, argv[1], argv[2]); + if (rc != SQLITE_OK) + return rc; + auto i = 0; for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) { @@ -287,25 +291,6 @@ static int init(sqlite3 *db, } - rc = create_shadow_tables(db, argv[1], argv[2]); - if (rc != SQLITE_OK) - return rc; - - // Shadow tables were successully created. - // After shadow tables are created, write the initial index state to - // shadow _index. - i = 0; - for (auto iter = pTable->getIndexes().begin(); iter != pTable->getIndexes().end(); ++iter, i++) { - - int rc = (*iter)->write_index(pTable->getDb(), - pTable->getSchema(), - pTable->getName(), - i); - - if (rc != SQLITE_OK) - return rc; - } - } else { for (int i = 0; i < columns->size(); i++) { @@ -314,9 +299,7 @@ static int init(sqlite3 *db, vss_index::factory(db, argv[1], argv[2], - i, - nullptr, - -1)); + i)); } } diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index cc71081..1cf4c47 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -169,7 +169,10 @@ class vss_index { return write_index_update(writer, db, schema, name, rowId); } - // Creates a new vss_index or returns a cached index to caller. + /* + * Creates a new vss_index as a virtual table is being + * created using the VSS module. + */ static vss_index * factory(sqlite3 *db, const char *schema, const char *name, @@ -177,27 +180,40 @@ class vss_index { string * factoryArgs, int dimensions) { + // Figuring out cache key to use to store index into cache. string key = name; key += indexId; - if (factoryArgs == nullptr) { - unique_ptr tmp(new vss_index(vss_index::read_index_select(db, name, indexId))); + // Creating a new index and storing in cache. + unique_ptr newIndex(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); + + int rc = newIndex->write_index(db, + schema, + name, + indexId); - int rc = tmp->write_index(db, - schema, - name, - indexId); + // Returning index to caller. + return newIndex.release(); + } - if (rc != SQLITE_OK) - throw domain_error("Couldn't write initial state of index"); + /* + * Creates a new vss_index by reading existing data fromdb, + * or returns a cached index to caller. + */ + static vss_index * factory(sqlite3 *db, + const char *schema, + const char *name, + int indexId) { - return tmp.release(); + // Figuring out cache key to use to lookup into cache to see if index already has been created and cached. + string key = name; + key += indexId; - } else { + // Reading index from db. + unique_ptr tmp(new vss_index(read_index_select(db, name, indexId))); - unique_ptr tmp(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); - return tmp.release(); - } + // Returning index to caller. + return tmp.release(); } private: From d71031cce41bf63d9ded76d37304fc7bb20b7455 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 20:13:12 +0300 Subject: [PATCH 35/66] Re-structuring --- src/sqlite-vss.cpp | 32 ------------------------------- src/vss/vss-index.h | 46 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 98b6fe6..db16394 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -201,34 +201,6 @@ unique_ptr> parse_constructor(int argc, return columns; } -static int create_shadow_tables(sqlite3 *db, - const char *schema, - const char *name) { - - SqlStatement create1(db, - sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", - schema, - name)); - - auto rc = create1.exec(); - if (rc != SQLITE_OK) - return rc; - - /* - * Notice, we'll need to explicitly finalize this object since we can only - * have one open statement at the same time to the same connetion. - */ - create1.finalize(); - - SqlStatement create2(db, - sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", - schema, - name)); - - rc = create2.exec(); - return rc; -} - #define VSS_INDEX_COLUMN_DISTANCE 0 #define VSS_INDEX_COLUMN_OPERATION 1 #define VSS_INDEX_COLUMN_VECTORS 2 @@ -274,10 +246,6 @@ static int init(sqlite3 *db, if (isCreate) { - rc = create_shadow_tables(db, argv[1], argv[2]); - if (rc != SQLITE_OK) - return rc; - auto i = 0; for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 1cf4c47..56567a6 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -170,8 +170,8 @@ class vss_index { } /* - * Creates a new vss_index as a virtual table is being - * created using the VSS module. + * Creates a new vss_index as a virtual table and stores + * its initial (empty) state. */ static vss_index * factory(sqlite3 *db, const char *schema, @@ -192,12 +192,20 @@ class vss_index { name, indexId); + // Checking if this is our first index, at which point we create our shadow tables. + if (indexId == 0) { + + auto rc = create_shadow_tables(db, schema, name); + if (rc != SQLITE_OK) + throw domain_error("Couldn't create shadow tables"); + } + // Returning index to caller. return newIndex.release(); } /* - * Creates a new vss_index by reading existing data fromdb, + * Creates a new vss_index by reading existing data from db, * or returns a cached index to caller. */ static vss_index * factory(sqlite3 *db, @@ -210,14 +218,42 @@ class vss_index { key += indexId; // Reading index from db. - unique_ptr tmp(new vss_index(read_index_select(db, name, indexId))); + unique_ptr newIndex(new vss_index(read_index_select(db, name, indexId))); // Returning index to caller. - return tmp.release(); + return newIndex.release(); } private: + static int create_shadow_tables(sqlite3 *db, + const char *schema, + const char *name) { + + SqlStatement create1(db, + sqlite3_mprintf("create table \"%w\".\"%w_index\"(idx)", + schema, + name)); + + auto rc = create1.exec(); + if (rc != SQLITE_OK) + return rc; + + /* + * Notice, we'll need to explicitly finalize this object since we can only + * have one open statement at the same time to the same connetion. + */ + create1.finalize(); + + SqlStatement create2(db, + sqlite3_mprintf("create table \"%w\".\"%w_data\"(x);", + schema, + name)); + + rc = create2.exec(); + return rc; + } + explicit vss_index(faiss::Index *index) : index(index) { } static faiss::Index * read_index_select(sqlite3 *db, From 13479442025d5c748608ae33026e0e14dffa9679 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 20:15:00 +0300 Subject: [PATCH 36/66] Update vss-index.h --- src/vss/vss-index.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 56567a6..4f4aee6 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -187,12 +187,7 @@ class vss_index { // Creating a new index and storing in cache. unique_ptr newIndex(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); - int rc = newIndex->write_index(db, - schema, - name, - indexId); - - // Checking if this is our first index, at which point we create our shadow tables. + // Checking if this is our first index for table, at which point we create our shadow tables. if (indexId == 0) { auto rc = create_shadow_tables(db, schema, name); @@ -200,6 +195,12 @@ class vss_index { throw domain_error("Couldn't create shadow tables"); } + // Writing its initial (empty) state. + int rc = newIndex->write_index(db, + schema, + name, + indexId); + // Returning index to caller. return newIndex.release(); } From cbf854645f9e91aa0f803d77b6c6160eafce9653 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 20:32:18 +0300 Subject: [PATCH 37/66] Locking access to factory methods --- src/sqlite-vss.cpp | 5 +++++ src/vss/vss-index.h | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index db16394..c6ea8fe 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -205,6 +205,8 @@ unique_ptr> parse_constructor(int argc, #define VSS_INDEX_COLUMN_OPERATION 1 #define VSS_INDEX_COLUMN_VECTORS 2 +shared_mutex vss_index::_globalLock; + static int init(sqlite3 *db, void *pAux, int argc, @@ -244,6 +246,9 @@ static int init(sqlite3 *db, try { + // To avoid race conditions towards cache we lock creation of indexes. + unique_lock globalLock(*vss_index::getGlobalLock()); + if (isCreate) { auto i = 0; diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 4f4aee6..3a7a5e2 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -225,6 +225,11 @@ class vss_index { return newIndex.release(); } + static shared_mutex * getGlobalLock() { + + return &_globalLock; + } + private: static int create_shadow_tables(sqlite3 *db, @@ -389,7 +394,8 @@ class vss_index { return true; } - std::shared_mutex _lock; + static shared_mutex _globalLock; + shared_mutex _lock; faiss::Index * index; vector trainings; vector insert_data; From f773d25a37c30875cabfe5ae083019cd4aea3d90 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 21:09:59 +0300 Subject: [PATCH 38/66] Caching faiss indexes sharing between multiple connections Hopefully synchronising access correctly and cleaning up stuff ;) --- src/sqlite-vss.cpp | 6 ++++ src/vss/vss-index-vtab.h | 3 -- src/vss/vss-index.h | 65 +++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index c6ea8fe..6d65b31 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -205,7 +205,9 @@ unique_ptr> parse_constructor(int argc, #define VSS_INDEX_COLUMN_OPERATION 1 #define VSS_INDEX_COLUMN_VECTORS 2 +// Declaration of static objects required to do caching. shared_mutex vss_index::_globalLock; +map vss_index::_instances; static int init(sqlite3 *db, void *pAux, @@ -316,6 +318,10 @@ static int vssIndexDestroy(sqlite3_vtab *pVtab) { auto pTable = static_cast(pVtab); drop_shadow_tables(pTable->getDb(), pTable->getName()); + + // Removing from cache. + vss_index::destroy(pTable->getSchema(), pTable->getName()); + vssIndexDisconnect(pVtab); return SQLITE_OK; } diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h index 5446927..5da2652 100644 --- a/src/vss/vss-index-vtab.h +++ b/src/vss/vss-index-vtab.h @@ -25,9 +25,6 @@ class vss_index_vtab : public sqlite3_vtab { sqlite3_free(schema); if (this->zErrMsg != nullptr) delete this->zErrMsg; - for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { - delete (*iter); - } } void setError(char *error) { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 3a7a5e2..0cc1a88 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -4,7 +4,7 @@ #include "inclusions.h" #include - +#include /* * Wrapper around a single faiss index, with training data, insert records, and * delete records. @@ -16,13 +16,6 @@ class vss_index { public: - ~vss_index() { - - if (index != nullptr) { - delete index; - } - } - // Returns false if index requires training before inserting items to it. bool isTrained() { @@ -184,8 +177,12 @@ class vss_index { string key = name; key += indexId; + auto cached = _instances.find(key); + if (cached != _instances.end()) + return cached->second; + // Creating a new index and storing in cache. - unique_ptr newIndex(new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str()))); + auto newIndex = new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())); // Checking if this is our first index for table, at which point we create our shadow tables. if (indexId == 0) { @@ -201,8 +198,11 @@ class vss_index { name, indexId); + // Caching index. + _instances[key] = newIndex; + // Returning index to caller. - return newIndex.release(); + return newIndex; } /* @@ -218,11 +218,40 @@ class vss_index { string key = name; key += indexId; + auto cached = _instances.find(key); + if (cached != _instances.end()) + return cached->second; + // Reading index from db. - unique_ptr newIndex(new vss_index(read_index_select(db, name, indexId))); + auto newIndex = new vss_index(read_index_select(db, name, indexId)); + + // Caching index. + _instances[key] = newIndex; // Returning index to caller. - return newIndex.release(); + return newIndex; + } + + // Deletes all indexes associated with the specified schema and table. + static void destroy(const char * schema, const char * name) { + + // Synchronising access. + unique_lock lock(_globalLock); + + string filter = schema; + filter += name; + for (auto iter = _instances.begin(); iter != _instances.end();) { + + if (iter->first.compare(filter) == 0) { + + delete iter->second; + _instances.erase(iter++); + + } else { + + ++iter; + } + } } static shared_mutex * getGlobalLock() { @@ -232,6 +261,15 @@ class vss_index { private: + explicit vss_index(faiss::Index *index) : index(index) { } + + ~vss_index() { + + if (index != nullptr) { + delete index; + } + } + static int create_shadow_tables(sqlite3 *db, const char *schema, const char *name) { @@ -260,8 +298,6 @@ class vss_index { return rc; } - explicit vss_index(faiss::Index *index) : index(index) { } - static faiss::Index * read_index_select(sqlite3 *db, const char *name, int indexId) { @@ -394,6 +430,7 @@ class vss_index { return true; } + static map _instances; static shared_mutex _globalLock; shared_mutex _lock; faiss::Index * index; From b6c1aa4274970564d97fe6c9bc8390381d51c2c0 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 21:23:24 +0300 Subject: [PATCH 39/66] Resetting index on close --- src/vss/vss-index-vtab.h | 5 +++++ src/vss/vss-index.h | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h index 5da2652..611f045 100644 --- a/src/vss/vss-index-vtab.h +++ b/src/vss/vss-index-vtab.h @@ -25,6 +25,11 @@ class vss_index_vtab : public sqlite3_vtab { sqlite3_free(schema); if (this->zErrMsg != nullptr) delete this->zErrMsg; + + // Resetting all indexes since we cannot delete them since they're reused and cached. + for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { + (*iter)->reset(); + } } void setError(char *error) { diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index 0cc1a88..cdb8c5c 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -178,8 +178,11 @@ class vss_index { key += indexId; auto cached = _instances.find(key); - if (cached != _instances.end()) + if (cached != _instances.end()) { + + cached->second->reset(); return cached->second; + } // Creating a new index and storing in cache. auto newIndex = new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())); @@ -219,8 +222,11 @@ class vss_index { key += indexId; auto cached = _instances.find(key); - if (cached != _instances.end()) + if (cached != _instances.end()) { + + cached->second->reset(); return cached->second; + } // Reading index from db. auto newIndex = new vss_index(read_index_select(db, name, indexId)); From 97dc2c773b09cf9d1bbe9dad2e4561751adb6cfe Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 21:49:36 +0300 Subject: [PATCH 40/66] Minor bugfix --- src/vss/vss-index.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index cdb8c5c..b7ca05d 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -174,8 +174,9 @@ class vss_index { int dimensions) { // Figuring out cache key to use to store index into cache. - string key = name; - key += indexId; + string key = schema; + key += name; + key += to_string(indexId); auto cached = _instances.find(key); if (cached != _instances.end()) { @@ -218,8 +219,9 @@ class vss_index { int indexId) { // Figuring out cache key to use to lookup into cache to see if index already has been created and cached. - string key = name; - key += indexId; + string key = schema; + key += name; + key += to_string(indexId); auto cached = _instances.find(key); if (cached != _instances.end()) { From 247db3c2aae40633641dd9469f7e70631331fc8b Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 22:15:38 +0300 Subject: [PATCH 41/66] Formatting --- src/vss/vss-index-vtab.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h index 611f045..3cf4922 100644 --- a/src/vss/vss-index-vtab.h +++ b/src/vss/vss-index-vtab.h @@ -21,8 +21,10 @@ class vss_index_vtab : public sqlite3_vtab { if (name) sqlite3_free(name); + if (schema) sqlite3_free(schema); + if (this->zErrMsg != nullptr) delete this->zErrMsg; @@ -33,9 +35,11 @@ class vss_index_vtab : public sqlite3_vtab { } void setError(char *error) { + if (this->zErrMsg != nullptr) { delete this->zErrMsg; } + this->zErrMsg = error; } From aadc50b2d450d04e4447a0b246fad9fb42bf292a Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 26 Jun 2023 22:15:45 +0300 Subject: [PATCH 42/66] Triggering build --- .github/workflows/release.yaml | 222 ------------------------ .github/workflows/upload-deno-assets.js | 8 - .github/workflows/upload.js | 10 -- 3 files changed, 240 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index cbdfcc8..c6235ab 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,11 +8,6 @@ permissions: env: ARTIFACT-LINUX-X86_64-EXTENSION: sqlite-vss-linux-x86_64 ARTIFACT-MACOS-X86_64-EXTENSION: sqlite-vss-macos-x86_64 - ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 - ARTIFACT-WINDOWS-X86_64-EXTENSION: sqlite-vss-windows-x86_64 - ARTIFACT-LINUX-X86_64-WHEELS: sqlite-vss-linux-x86_64-wheels - ARTIFACT-MACOS-X86_64-WHEELS: sqlite-vss-macos-x86_64-wheels - ARTIFACT-MACOS-AARCH64-WHEELS: sqlite-vss-macos-aarch64-wheels jobs: build-linux-x86_64-extension: runs-on: ubuntu-20.04 @@ -38,23 +33,6 @@ jobs: with: name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} path: dist/release/* - build-linux-x86_64-python: - runs-on: ubuntu-20.04 - needs: [build-linux-x86_64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} - path: dist/release/ - - uses: actions/setup-python@v3 - - run: pip install wheel - - run: make python-release - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} - path: dist/release/wheels/*.whl build-macos-x86_64-extension: runs-on: macos-latest steps: @@ -89,72 +67,10 @@ jobs: with: name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} path: dist/release/* - build-macos-x86_64-python: - runs-on: macos-latest - needs: [build-macos-x86_64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} - path: dist/release/ - - uses: actions/setup-python@v3 - - run: pip install wheel - - run: make python-release - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} - path: dist/release/wheels/*.whl - build-macos-aarch64-extension: - runs-on: [self-hosted, mm1] - steps: - - uses: actions/checkout@v3 - with: - submodules: "recursive" - - id: cache-sqlite-build - uses: actions/cache@v3 - with: - path: vendor/sqlite - key: ${{ runner.os }}-${{ hashFiles('vendor/get_sqlite.sh') }} - - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' - run: ./vendor/get_sqlite.sh - - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' - working-directory: vendor/sqlite - run: ./configure && make - - run: make patch-openmp - - run: make loadable-release static-release - env: - # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /opt/homebrew/opt/llvm/bin/clang - CXX: /opt/homebrew/opt/llvm/bin/clang++ - LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" - CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} - path: dist/release/* - build-macos-aarch64-python: - runs-on: [self-hosted, mm1] - needs: [build-macos-aarch64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} - path: dist/release/ - - run: pip3 install wheel - - run: make python-release IS_MACOS_ARM=1 - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} - path: dist/release/wheels/*.whl upload-deno: needs: [ build-macos-x86_64-extension, - build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -170,7 +86,6 @@ jobs: env: ARTIFACT-LINUX-X86_64-EXTENSION: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} ARTIFACT-MACOS-X86_64-EXTENSION: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} - ARTIFACT-MACOS-AARCH64-EXTENSION: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} with: github-token: ${{ secrets.GITHUB_TOKEN }} result-encoding: string @@ -181,7 +96,6 @@ jobs: needs: [ build-macos-x86_64-extension, - build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -199,7 +113,6 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} platforms: | macos-x86_64: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/* - macos-aarch64: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/* linux-x86_64: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/* upload-checksums: needs: [upload-extensions, upload-deno] @@ -227,138 +140,3 @@ jobs: name: "checksums.txt", data: process.env.CHECKSUMS, }); - upload-hex: - runs-on: ubuntu-latest - needs: [upload-extensions] - steps: - - uses: actions/checkout@v2 - - uses: erlef/setup-beam@v1 - with: - otp-version: "24" - rebar3-version: "3.16.1" - elixir-version: "1.14" - - run: ./scripts/elixir_generate_checksum.sh "${{ needs.upload-extensions.outputs.checksums }}" - - run: mix deps.get - working-directory: ./bindings/elixir - - run: mix compile --docs - working-directory: ./bindings/elixir - - run: mix hex.publish --yes - working-directory: ./bindings/elixir - env: - HEX_API_KEY: ${{ secrets.HEX_API_KEY }} - upload-npm: - needs: - [ - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-linux-x86_64-extension, - ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 - - run: | - cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/node/sqlite-vss-linux-x64/lib/ - cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-x64/lib/ - cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-arm64/lib/ - - uses: actions/setup-node@v3 - with: - node-version: "16" - registry-url: "https://registry.npmjs.org" - - name: Publish NPM sqlite-vss-linux-x64 - working-directory: bindings/node/sqlite-vss-linux-x64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss-darwin-x64 - working-directory: bindings/node/sqlite-vss-darwin-x64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss-darwin-arm64 - working-directory: bindings/node/sqlite-vss-darwin-arm64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss - working-directory: bindings/node/sqlite-vss - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - upload-pypi: - needs: - [ - build-linux-x86_64-python, - build-macos-x86_64-python, - build-macos-aarch64-python, - ] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} - path: dist - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} - path: dist - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} - path: dist - - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - skip-existing: true - upload-gem: - needs: - [ - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-linux-x86_64-extension, - ] - permissions: - contents: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/download-artifact@v2 - - uses: ruby/setup-ruby@v1 - with: - ruby-version: 3.2 - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/ruby/lib - gem -C bindings/ruby build -o x86_64-darwin.gem sqlite_vss.gemspec - env: - PLATFORM: x86_64-darwin - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/ruby/lib - gem -C bindings/ruby build -o arm64-darwin.gem sqlite_vss.gemspec - env: - PLATFORM: arm64-darwin - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/ruby/lib - gem -C bindings/ruby build -o x86_64-linux.gem sqlite_vss.gemspec - env: - PLATFORM: x86_64-linux - - run: | - gem push bindings/ruby/x86_64-darwin.gem - gem push bindings/ruby/arm64-darwin.gem - gem push bindings/ruby/x86_64-linux.gem - env: - GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} - upload-crate: - runs-on: ubuntu-latest - needs: [upload-extensions] - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: cargo publish --no-verify - working-directory: ./bindings/rust - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/upload-deno-assets.js b/.github/workflows/upload-deno-assets.js index dee6e23..beb4b48 100644 --- a/.github/workflows/upload-deno-assets.js +++ b/.github/workflows/upload-deno-assets.js @@ -14,14 +14,6 @@ module.exports = async ({ github, context }) => { path: `${process.env["ARTIFACT-MACOS-X86_64-EXTENSION"]}/vss0.dylib`, name: `sqlite-vss-${VERSION}-deno-darwin-x86_64.vss0.dylib`, }, - { - path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vector0.dylib`, - name: `sqlite-vss-${VERSION}-deno-darwin-aarch64.vector0.dylib`, - }, - { - path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vss0.dylib`, - name: `sqlite-vss-${VERSION}-deno-darwin-aarch64.vss0.dylib`, - }, { path: `${process.env["ARTIFACT-LINUX-X86_64-EXTENSION"]}/vector0.so`, name: `sqlite-vss-${VERSION}-deno-linux-x86_64.vector0.so`, diff --git a/.github/workflows/upload.js b/.github/workflows/upload.js index 6ac55d5..97f9acc 100644 --- a/.github/workflows/upload.js +++ b/.github/workflows/upload.js @@ -19,11 +19,6 @@ const vss = { os: "darwin", cpu: "x86_64", }, - { - path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vss0.dylib`, - os: "darwin", - cpu: "aarch64", - }, ], }; const vector = { @@ -40,11 +35,6 @@ const vector = { os: "darwin", cpu: "x86_64", }, - { - path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vector0.dylib`, - os: "darwin", - cpu: "aarch64", - }, ], }; From 1c3f9ff02f7b8ae801f8966d8031b5235546f29d Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 08:41:37 +0300 Subject: [PATCH 43/66] Cleanups --- src/sqlite-vector.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp index 7f22370..1be5068 100644 --- a/src/sqlite-vector.cpp +++ b/src/sqlite-vector.cpp @@ -407,6 +407,10 @@ struct fvecsEach_cursor : public sqlite3_vtab_cursor { vec_ptr pCurrentVector; }; +#define FVECS_EACH_DIMENSIONS 0 +#define FVECS_EACH_VECTOR 1 +#define FVECS_EACH_INPUT 2 + static int fvecsEachConnect(sqlite3 *db, void *pAux, int argc, @@ -418,10 +422,6 @@ static int fvecsEachConnect(sqlite3 *db, rc = sqlite3_declare_vtab(db, "create table x(dimensions, vector, input hidden)"); -#define FVECS_EACH_DIMENSIONS 0 -#define FVECS_EACH_VECTOR 1 -#define FVECS_EACH_INPUT 2 - if (rc == SQLITE_OK) { auto pNew = new fvecsEach_vtab(); From 1c73d9c3bfcc2d4a0da576c631386bb1db2ca8c8 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 08:55:29 +0300 Subject: [PATCH 44/66] Started moving vector parts into separate files --- src/sqlite-vector.cpp | 406 +------------------------------------ src/vec/functions.h | 311 ++++++++++++++++++++++++++++ src/vec/fvecsEach_cursor.h | 56 +++++ src/vec/fvecsEach_vtab.h | 22 ++ 4 files changed, 399 insertions(+), 396 deletions(-) create mode 100644 src/vec/functions.h create mode 100644 src/vec/fvecsEach_cursor.h create mode 100644 src/vec/fvecsEach_vtab.h diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp index 1be5068..b7743ac 100644 --- a/src/sqlite-vector.cpp +++ b/src/sqlite-vector.cpp @@ -14,399 +14,16 @@ SQLITE_EXTENSION_INIT1 using namespace std; - typedef unique_ptr> vec_ptr; -// https://github.com/sqlite/sqlite/blob/master/src/json.c#L88-L89 -#define JSON_SUBTYPE 74 /* Ascii for "J" */ - -#include -using json = nlohmann::json; - -char VECTOR_BLOB_HEADER_BYTE = 'v'; -char VECTOR_BLOB_HEADER_TYPE = 1; -const char *VECTOR_FLOAT_POINTER_NAME = "vectorf32v0"; - -#pragma endregion - -#pragma region Generic - -void delVectorFloat(void *p) { - - auto vx = static_cast(p); - sqlite3_free(vx->data); - delete vx; -} - -void resultVector(sqlite3_context *context, vector *vecIn) { - - auto vecRes = new VectorFloat(); - - vecRes->size = vecIn->size(); - vecRes->data = (float *)sqlite3_malloc(vecIn->size() * sizeof(float)); - - memcpy(vecRes->data, vecIn->data(), vecIn->size() * sizeof(float)); - - sqlite3_result_pointer(context, vecRes, VECTOR_FLOAT_POINTER_NAME, delVectorFloat); -} - -vec_ptr vectorFromBlobValue(sqlite3_value *value, const char **pzErrMsg) { - - int size = sqlite3_value_bytes(value); - char header; - char type; - - if (size < (2)) { - *pzErrMsg = "Vector blob size less than header length"; - return nullptr; - } - - const void *pBlob = sqlite3_value_blob(value); - memcpy(&header, ((char *)pBlob + 0), sizeof(char)); - memcpy(&type, ((char *)pBlob + 1), sizeof(char)); - - if (header != VECTOR_BLOB_HEADER_BYTE) { - *pzErrMsg = "Blob not well-formatted vector blob"; - return nullptr; - } - - if (type != VECTOR_BLOB_HEADER_TYPE) { - *pzErrMsg = "Blob type not right"; - return nullptr; - } - - int numElements = (size - 2) / sizeof(float); - float *vec = (float *)((char *)pBlob + 2); - return vec_ptr(new vector(vec, vec + numElements)); -} - -vec_ptr vectorFromRawBlobValue(sqlite3_value *value, const char **pzErrMsg) { - - int size = sqlite3_value_bytes(value); - - // Must be divisible by 4 - if (size % 4) { - *pzErrMsg = "Invalid raw blob length, blob must be divisible by 4"; - return nullptr; - } - const void *pBlob = sqlite3_value_blob(value); - - float *vec = (float *)((char *)pBlob); - return vec_ptr(new vector(vec, vec + (size / 4))); -} - -vec_ptr vectorFromTextValue(sqlite3_value *value) { - - try { - - json json = json::parse(sqlite3_value_text(value)); - vec_ptr pVec(new vector()); - json.get_to(*pVec); - return pVec; - - } catch (const json::exception &) { - return nullptr; - } - - return nullptr; -} - -static vec_ptr valueAsVector(sqlite3_value *value) { - - // Option 1: If the value is a "vectorf32v0" pointer, create vector from - // that - auto vec = (VectorFloat *)sqlite3_value_pointer(value, VECTOR_FLOAT_POINTER_NAME); - - if (vec != nullptr) - return vec_ptr(new vector(vec->data, vec->data + vec->size)); - - vec_ptr pVec; - - // Option 2: value is a blob in vector format - if (sqlite3_value_type(value) == SQLITE_BLOB) { - - const char *pzErrMsg = nullptr; - - if ((pVec = vectorFromBlobValue(value, &pzErrMsg)) != nullptr) - return pVec; - - if ((pVec = vectorFromRawBlobValue(value, &pzErrMsg)) != nullptr) - return pVec; - } - - // Option 3: if value is a JSON array coercible to float vector, use that - if (sqlite3_value_type(value) == SQLITE_TEXT) { - - if ((pVec = vectorFromTextValue(value)) != nullptr) - return pVec; - else - return nullptr; - } - - // Else, value isn't a vector - return nullptr; -} - -#pragma endregion - -#pragma region Meta - -static void vector_version(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC); -} - -static void vector_debug(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vec_ptr pVec = valueAsVector(argv[0]); - - if (pVec == nullptr) { - - sqlite3_result_error(context, "Value not a vector", -1); - return; - } - - sqlite3_str *str = sqlite3_str_new(0); - sqlite3_str_appendf(str, "size: %lld [", pVec->size()); - - for (int i = 0; i < pVec->size(); i++) { - - if (i == 0) - sqlite3_str_appendf(str, "%f", pVec->at(i)); - else - sqlite3_str_appendf(str, ", %f", pVec->at(i)); - } - - sqlite3_str_appendchar(str, 1, ']'); - sqlite3_result_text(context, sqlite3_str_finish(str), -1, sqlite3_free); -} - -#pragma endregion - -#pragma region Vector generation - -// TODO should return fvec, ivec, or bvec depending on input. How do bvec, -// though? -static void vector_from(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vector vec; - vec.reserve(argc); - for (int i = 0; i < argc; i++) { - vec.push_back(sqlite3_value_double(argv[i])); - } - - resultVector(context, &vec); -} - -#pragma endregion - -#pragma region Vector general - -static void vector_value_at(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vec_ptr pVec = valueAsVector(argv[0]); - - if (pVec == nullptr) - return; - - int pos = sqlite3_value_int(argv[1]); - - try { - - float result = pVec->at(pos); - sqlite3_result_double(context, result); - - } catch (const out_of_range &oor) { - - char *errmsg = sqlite3_mprintf("%d out of range: %s", pos, oor.what()); - - if (errmsg != nullptr) { - sqlite3_result_error(context, errmsg, -1); - sqlite3_free(errmsg); - } else { - sqlite3_result_error_nomem(context); - } - } -} - -static void vector_length(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - auto pVec = (VectorFloat *)sqlite3_value_pointer(argv[0], VECTOR_FLOAT_POINTER_NAME); - if (pVec == nullptr) - return; - - sqlite3_result_int64(context, pVec->size); -} - -#pragma endregion - -#pragma region Json - -static void vector_to_json(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vec_ptr pVec = valueAsVector(argv[0]); - if (pVec == nullptr) - return; - - json j = json(*pVec); - - sqlite3_result_text(context, j.dump().c_str(), -1, SQLITE_TRANSIENT); - sqlite3_result_subtype(context, JSON_SUBTYPE); -} - -static void vector_from_json(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - const char *text = (const char *)sqlite3_value_text(argv[0]); - vec_ptr pVec = vectorFromTextValue(argv[0]); - - if (pVec == nullptr) { - sqlite3_result_error( - context, "input not valid json, or contains non-float data", -1); - } else { - resultVector(context, pVec.get()); - } -} - -#pragma endregion - -#pragma region Blob - -/* - -|Offset | Size | Description -|-|-|- -|a|a|A -*/ -static void vector_to_blob(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vec_ptr pVec = valueAsVector(argv[0]); - if (pVec == nullptr) - return; - - int size = pVec->size(); - int memSize = (sizeof(char)) + (sizeof(char)) + (size * 4); - void *pBlob = sqlite3_malloc(memSize); - memset(pBlob, 0, memSize); - - memcpy((void *)((char *)pBlob + 0), (void *)&VECTOR_BLOB_HEADER_BYTE, sizeof(char)); - memcpy((void *)((char *)pBlob + 1), (void *)&VECTOR_BLOB_HEADER_TYPE, sizeof(char)); - memcpy((void *)((char *)pBlob + 2), (void *)pVec->data(), size * 4); - - sqlite3_result_blob64(context, pBlob, memSize, sqlite3_free); -} - -static void vector_from_blob(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - const char *pzErrMsg; - - vec_ptr pVec = vectorFromBlobValue(argv[0], &pzErrMsg); - if (pVec == nullptr) - sqlite3_result_error(context, pzErrMsg, -1); - else - resultVector(context, pVec.get()); -} - -static void vector_to_raw(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - vec_ptr pVec = valueAsVector(argv[0]); - if (pVec == nullptr) - return; - - int size = pVec->size(); - int n = size * sizeof(float); - void *pBlob = sqlite3_malloc(n); - memset(pBlob, 0, n); - memcpy((void *)((char *)pBlob), (void *)pVec->data(), n); - sqlite3_result_blob64(context, pBlob, n, sqlite3_free); -} - -static void vector_from_raw(sqlite3_context *context, - int argc, - sqlite3_value **argv) { - - const char *pzErrMsg; // TODO: Shouldn't we have like error messages here? - - vec_ptr pVec = vectorFromRawBlobValue(argv[0], &pzErrMsg); - if (pVec == nullptr) - sqlite3_result_error(context, pzErrMsg, -1); - else - resultVector(context, pVec.get()); -} +#include "vec/functions.h" +#include "vec/fvecsEach_cursor.h" +#include "vec/fvecsEach_vtab.h" #pragma endregion #pragma region fvecs vtab -struct fvecsEach_vtab : public sqlite3_vtab { - - fvecsEach_vtab() { - - pModule = nullptr; - nRef = 0; - zErrMsg = nullptr; - } - - ~fvecsEach_vtab() { - - if (zErrMsg != nullptr) { - sqlite3_free(zErrMsg); - } - } -}; - -struct fvecsEach_cursor : public sqlite3_vtab_cursor { - - fvecsEach_cursor(sqlite3_vtab *pVtab) { - - this->pVtab = pVtab; - iRowid = 0; - pBlob = nullptr; - iBlobN = 0; - p = 0; - iCurrentD = 0; - } - - ~fvecsEach_cursor() { - if (pBlob != nullptr) - sqlite3_free(pBlob); - } - - sqlite3_int64 iRowid; - - // Copy of fvecs input blob - void *pBlob; - - // Total size of pBlob in bytes - sqlite3_int64 iBlobN; - sqlite3_int64 p; - - // Current dimensions - int iCurrentD; - - // Pointer to current vector being read in - vec_ptr pCurrentVector; -}; - #define FVECS_EACH_DIMENSIONS 0 #define FVECS_EACH_VECTOR 1 #define FVECS_EACH_INPUT 2 @@ -425,7 +42,7 @@ static int fvecsEachConnect(sqlite3 *db, if (rc == SQLITE_OK) { auto pNew = new fvecsEach_vtab(); - if (pNew == 0) + if (pNew == nullptr) return SQLITE_NOMEM; *ppVtab = pNew; @@ -491,16 +108,13 @@ static int fvecsEachFilter(sqlite3_vtab_cursor *pVtabCursor, int size = sqlite3_value_bytes(argv[0]); const void *blob = sqlite3_value_blob(argv[0]); - if (pCur->pBlob) - sqlite3_free(pCur->pBlob); - - pCur->pBlob = sqlite3_malloc(size); + pCur->setBlob(sqlite3_malloc(size)); pCur->iBlobN = size; pCur->iRowid = 1; - memcpy(pCur->pBlob, blob, size); + memcpy(pCur->getBlob(), blob, size); - memcpy(&pCur->iCurrentD, pCur->pBlob, sizeof(int)); - float *vecBegin = (float *)((char *)pCur->pBlob + sizeof(int)); + memcpy(&pCur->iCurrentD, pCur->getBlob(), sizeof(int)); + float *vecBegin = (float *)((char *)pCur->getBlob() + sizeof(int)); // TODO: Shouldn't this multiply by sizeof(float)? pCur->pCurrentVector = vec_ptr(new vector(vecBegin, vecBegin + pCur->iCurrentD)); @@ -515,8 +129,8 @@ static int fvecsEachNext(sqlite3_vtab_cursor *cur) { auto pCur = static_cast(cur); // TODO: Shouldn't this multiply by sizeof(float)? - memcpy(&pCur->iCurrentD, ((char *)pCur->pBlob + pCur->p), sizeof(int)); - float *vecBegin = (float *)(((char *)pCur->pBlob + pCur->p) + sizeof(int)); + memcpy(&pCur->iCurrentD, ((char *)pCur->getBlob() + pCur->p), sizeof(int)); + float *vecBegin = (float *)(((char *)pCur->getBlob() + pCur->p) + sizeof(int)); pCur->pCurrentVector->clear(); pCur->pCurrentVector->shrink_to_fit(); diff --git a/src/vec/functions.h b/src/vec/functions.h new file mode 100644 index 0000000..a96c403 --- /dev/null +++ b/src/vec/functions.h @@ -0,0 +1,311 @@ + +#ifndef TRANSFORMERS_H +#define TRANSFORMERS_H + +char VECTOR_BLOB_HEADER_BYTE = 'v'; +char VECTOR_BLOB_HEADER_TYPE = 1; +const char *VECTOR_FLOAT_POINTER_NAME = "vectorf32v0"; + +// https://github.com/sqlite/sqlite/blob/master/src/json.c#L88-L89 +#define JSON_SUBTYPE 74 /* Ascii for "J" */ + +#include +using json = nlohmann::json; + +void delVectorFloat(void *p) { + + auto vx = static_cast(p); + sqlite3_free(vx->data); + delete vx; +} + +void resultVector(sqlite3_context *context, vector *vecIn) { + + auto vecRes = new VectorFloat(); + + vecRes->size = vecIn->size(); + vecRes->data = (float *)sqlite3_malloc(vecIn->size() * sizeof(float)); + + memcpy(vecRes->data, vecIn->data(), vecIn->size() * sizeof(float)); + + sqlite3_result_pointer(context, vecRes, VECTOR_FLOAT_POINTER_NAME, delVectorFloat); +} + +vec_ptr vectorFromBlobValue(sqlite3_value *value, const char **pzErrMsg) { + + int size = sqlite3_value_bytes(value); + char header; + char type; + + if (size < (2)) { + *pzErrMsg = "Vector blob size less than header length"; + return nullptr; + } + + const void *pBlob = sqlite3_value_blob(value); + memcpy(&header, ((char *)pBlob + 0), sizeof(char)); + memcpy(&type, ((char *)pBlob + 1), sizeof(char)); + + if (header != VECTOR_BLOB_HEADER_BYTE) { + *pzErrMsg = "Blob not well-formatted vector blob"; + return nullptr; + } + + if (type != VECTOR_BLOB_HEADER_TYPE) { + *pzErrMsg = "Blob type not right"; + return nullptr; + } + + int numElements = (size - 2) / sizeof(float); + float *vec = (float *)((char *)pBlob + 2); + return vec_ptr(new vector(vec, vec + numElements)); +} + +vec_ptr vectorFromRawBlobValue(sqlite3_value *value, const char **pzErrMsg) { + + int size = sqlite3_value_bytes(value); + + // Must be divisible by 4 + if (size % 4) { + *pzErrMsg = "Invalid raw blob length, blob must be divisible by 4"; + return nullptr; + } + const void *pBlob = sqlite3_value_blob(value); + + float *vec = (float *)((char *)pBlob); + return vec_ptr(new vector(vec, vec + (size / 4))); +} + +vec_ptr vectorFromTextValue(sqlite3_value *value) { + + try { + + json json = json::parse(sqlite3_value_text(value)); + vec_ptr pVec(new vector()); + json.get_to(*pVec); + return pVec; + + } catch (const json::exception &) { + return nullptr; + } + + return nullptr; +} + +static vec_ptr valueAsVector(sqlite3_value *value) { + + // Option 1: If the value is a "vectorf32v0" pointer, create vector from + // that + auto vec = (VectorFloat *)sqlite3_value_pointer(value, VECTOR_FLOAT_POINTER_NAME); + + if (vec != nullptr) + return vec_ptr(new vector(vec->data, vec->data + vec->size)); + + vec_ptr pVec; + + // Option 2: value is a blob in vector format + if (sqlite3_value_type(value) == SQLITE_BLOB) { + + const char *pzErrMsg = nullptr; + + if ((pVec = vectorFromBlobValue(value, &pzErrMsg)) != nullptr) + return pVec; + + if ((pVec = vectorFromRawBlobValue(value, &pzErrMsg)) != nullptr) + return pVec; + } + + // Option 3: if value is a JSON array coercible to float vector, use that + if (sqlite3_value_type(value) == SQLITE_TEXT) { + + if ((pVec = vectorFromTextValue(value)) != nullptr) + return pVec; + else + return nullptr; + } + + // Else, value isn't a vector + return nullptr; +} + +// TODO should return fvec, ivec, or bvec depending on input. How do bvec, +// though? +static void vector_from(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vector vec; + vec.reserve(argc); + for (int i = 0; i < argc; i++) { + vec.push_back(sqlite3_value_double(argv[i])); + } + + resultVector(context, &vec); +} + +static void vector_value_at(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vec_ptr pVec = valueAsVector(argv[0]); + + if (pVec == nullptr) + return; + + int pos = sqlite3_value_int(argv[1]); + + try { + + float result = pVec->at(pos); + sqlite3_result_double(context, result); + + } catch (const out_of_range &oor) { + + char *errmsg = sqlite3_mprintf("%d out of range: %s", pos, oor.what()); + + if (errmsg != nullptr) { + sqlite3_result_error(context, errmsg, -1); + sqlite3_free(errmsg); + } else { + sqlite3_result_error_nomem(context); + } + } +} + +static void vector_length(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + auto pVec = (VectorFloat *)sqlite3_value_pointer(argv[0], VECTOR_FLOAT_POINTER_NAME); + if (pVec == nullptr) + return; + + sqlite3_result_int64(context, pVec->size); +} + +static void vector_to_json(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vec_ptr pVec = valueAsVector(argv[0]); + if (pVec == nullptr) + return; + + json j = json(*pVec); + + sqlite3_result_text(context, j.dump().c_str(), -1, SQLITE_TRANSIENT); + sqlite3_result_subtype(context, JSON_SUBTYPE); +} + +static void vector_from_json(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + const char *text = (const char *)sqlite3_value_text(argv[0]); + vec_ptr pVec = vectorFromTextValue(argv[0]); + + if (pVec == nullptr) { + sqlite3_result_error( + context, "input not valid json, or contains non-float data", -1); + } else { + resultVector(context, pVec.get()); + } +} + +static void vector_to_blob(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vec_ptr pVec = valueAsVector(argv[0]); + if (pVec == nullptr) + return; + + int size = pVec->size(); + int memSize = (sizeof(char)) + (sizeof(char)) + (size * 4); + void *pBlob = sqlite3_malloc(memSize); + memset(pBlob, 0, memSize); + + memcpy((void *)((char *)pBlob + 0), (void *)&VECTOR_BLOB_HEADER_BYTE, sizeof(char)); + memcpy((void *)((char *)pBlob + 1), (void *)&VECTOR_BLOB_HEADER_TYPE, sizeof(char)); + memcpy((void *)((char *)pBlob + 2), (void *)pVec->data(), size * 4); + + sqlite3_result_blob64(context, pBlob, memSize, sqlite3_free); +} + +static void vector_from_blob(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + const char *pzErrMsg; + + vec_ptr pVec = vectorFromBlobValue(argv[0], &pzErrMsg); + if (pVec == nullptr) + sqlite3_result_error(context, pzErrMsg, -1); + else + resultVector(context, pVec.get()); +} + +static void vector_to_raw(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vec_ptr pVec = valueAsVector(argv[0]); + if (pVec == nullptr) + return; + + int size = pVec->size(); + int n = size * sizeof(float); + void *pBlob = sqlite3_malloc(n); + memset(pBlob, 0, n); + memcpy((void *)((char *)pBlob), (void *)pVec->data(), n); + sqlite3_result_blob64(context, pBlob, n, sqlite3_free); +} + +static void vector_from_raw(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + const char *pzErrMsg; // TODO: Shouldn't we have like error messages here? + + vec_ptr pVec = vectorFromRawBlobValue(argv[0], &pzErrMsg); + if (pVec == nullptr) + sqlite3_result_error(context, pzErrMsg, -1); + else + resultVector(context, pVec.get()); +} + +static void vector_version(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + sqlite3_result_text(context, SQLITE_VSS_VERSION, -1, SQLITE_STATIC); +} + +static void vector_debug(sqlite3_context *context, + int argc, + sqlite3_value **argv) { + + vec_ptr pVec = valueAsVector(argv[0]); + + if (pVec == nullptr) { + + sqlite3_result_error(context, "Value not a vector", -1); + return; + } + + sqlite3_str *str = sqlite3_str_new(0); + sqlite3_str_appendf(str, "size: %lld [", pVec->size()); + + for (int i = 0; i < pVec->size(); i++) { + + if (i == 0) + sqlite3_str_appendf(str, "%f", pVec->at(i)); + else + sqlite3_str_appendf(str, ", %f", pVec->at(i)); + } + + sqlite3_str_appendchar(str, 1, ']'); + sqlite3_result_text(context, sqlite3_str_finish(str), -1, sqlite3_free); +} + +#endif // TRANSFORMERS_H diff --git a/src/vec/fvecsEach_cursor.h b/src/vec/fvecsEach_cursor.h new file mode 100644 index 0000000..13ae749 --- /dev/null +++ b/src/vec/fvecsEach_cursor.h @@ -0,0 +1,56 @@ + +#ifndef FVECSEACH_CURSOR_H +#define FVECSEACH_CURSOR_H + +class fvecsEach_cursor : public sqlite3_vtab_cursor { + +public: + + fvecsEach_cursor(sqlite3_vtab *pVtab) { + + this->pVtab = pVtab; + iRowid = 0; + pBlob = nullptr; + iBlobN = 0; + p = 0; + iCurrentD = 0; + } + + ~fvecsEach_cursor() { + + if (pBlob != nullptr) + sqlite3_free(pBlob); + } + + void * getBlob() { + + return pBlob; + } + + void setBlob(void * blob) { + + if (pBlob != nullptr) + sqlite3_free(pBlob); + + pBlob = blob; + } + + sqlite3_int64 iRowid; + + // Total size of pBlob in bytes + sqlite3_int64 iBlobN; + sqlite3_int64 p; + + // Current dimensions + int iCurrentD; + + // Pointer to current vector being read in + vec_ptr pCurrentVector; + +private: + + // Copy of fvecs input blob + void *pBlob; +}; + +#endif // FVECSEACH_CURSOR_H diff --git a/src/vec/fvecsEach_vtab.h b/src/vec/fvecsEach_vtab.h new file mode 100644 index 0000000..009273c --- /dev/null +++ b/src/vec/fvecsEach_vtab.h @@ -0,0 +1,22 @@ + +#ifndef FVECSEACH_VTAB_H +#define FVECSEACH_VTAB_H + +struct fvecsEach_vtab : public sqlite3_vtab { + + fvecsEach_vtab() { + + pModule = nullptr; + nRef = 0; + zErrMsg = nullptr; + } + + ~fvecsEach_vtab() { + + if (zErrMsg != nullptr) { + sqlite3_free(zErrMsg); + } + } +}; + +#endif // FVECSEACH_VTAB_H From cf0a7db081a0f17b2fa7ca7211864fd83e45af44 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 09:05:32 +0300 Subject: [PATCH 45/66] Cleanups --- src/sqlite-vector.cpp | 82 +++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/src/sqlite-vector.cpp b/src/sqlite-vector.cpp index b7743ac..818c4b1 100644 --- a/src/sqlite-vector.cpp +++ b/src/sqlite-vector.cpp @@ -35,9 +35,7 @@ static int fvecsEachConnect(sqlite3 *db, sqlite3_vtab **ppVtab, char **pzErr) { - int rc; - - rc = sqlite3_declare_vtab(db, "create table x(dimensions, vector, input hidden)"); + auto rc = sqlite3_declare_vtab(db, "create table x(dimensions, vector, input hidden)"); if (rc == SQLITE_OK) { @@ -52,25 +50,25 @@ static int fvecsEachConnect(sqlite3 *db, static int fvecsEachDisconnect(sqlite3_vtab *pVtab) { - auto pTable = static_cast(pVtab); - delete pTable; + auto table = static_cast(pVtab); + delete table; return SQLITE_OK; } static int fvecsEachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { - auto pCur = new fvecsEach_cursor(p); - if (pCur == nullptr) + auto cursor = new fvecsEach_cursor(p); + if (cursor == nullptr) return SQLITE_NOMEM; - *ppCursor = pCur; + *ppCursor = cursor; return SQLITE_OK; } -static int fvecsEachClose(sqlite3_vtab_cursor *cur) { +static int fvecsEachClose(sqlite3_vtab_cursor *pCursor) { - auto pCur = static_cast(cur); - delete pCur; + auto cursor = static_cast(pCursor); + delete cursor; return SQLITE_OK; } @@ -97,80 +95,80 @@ static int fvecsEachBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) { return SQLITE_OK; } -static int fvecsEachFilter(sqlite3_vtab_cursor *pVtabCursor, +static int fvecsEachFilter(sqlite3_vtab_cursor *pCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv) { - auto pCur = static_cast(pVtabCursor); + auto cursor = static_cast(pCursor); int size = sqlite3_value_bytes(argv[0]); const void *blob = sqlite3_value_blob(argv[0]); - pCur->setBlob(sqlite3_malloc(size)); - pCur->iBlobN = size; - pCur->iRowid = 1; - memcpy(pCur->getBlob(), blob, size); + cursor->setBlob(sqlite3_malloc(size)); + cursor->iBlobN = size; + cursor->iRowid = 1; + memcpy(cursor->getBlob(), blob, size); - memcpy(&pCur->iCurrentD, pCur->getBlob(), sizeof(int)); - float *vecBegin = (float *)((char *)pCur->getBlob() + sizeof(int)); + memcpy(&cursor->iCurrentD, cursor->getBlob(), sizeof(int)); + float *vecBegin = (float *)((char *)cursor->getBlob() + sizeof(int)); // TODO: Shouldn't this multiply by sizeof(float)? - pCur->pCurrentVector = vec_ptr(new vector(vecBegin, vecBegin + pCur->iCurrentD)); + cursor->pCurrentVector = vec_ptr(new vector(vecBegin, vecBegin + cursor->iCurrentD)); - pCur->p = sizeof(int) + (pCur->iCurrentD * sizeof(float)); + cursor->p = sizeof(int) + (cursor->iCurrentD * sizeof(float)); return SQLITE_OK; } -static int fvecsEachNext(sqlite3_vtab_cursor *cur) { +static int fvecsEachNext(sqlite3_vtab_cursor *pCursor) { - auto pCur = static_cast(cur); + auto cursor = static_cast(pCursor); // TODO: Shouldn't this multiply by sizeof(float)? - memcpy(&pCur->iCurrentD, ((char *)pCur->getBlob() + pCur->p), sizeof(int)); - float *vecBegin = (float *)(((char *)pCur->getBlob() + pCur->p) + sizeof(int)); + memcpy(&cursor->iCurrentD, ((char *)cursor->getBlob() + cursor->p), sizeof(int)); + float *vecBegin = (float *)(((char *)cursor->getBlob() + cursor->p) + sizeof(int)); - pCur->pCurrentVector->clear(); - pCur->pCurrentVector->shrink_to_fit(); - pCur->pCurrentVector->reserve(pCur->iCurrentD); - pCur->pCurrentVector->insert(pCur->pCurrentVector->begin(), + cursor->pCurrentVector->clear(); + cursor->pCurrentVector->shrink_to_fit(); + cursor->pCurrentVector->reserve(cursor->iCurrentD); + cursor->pCurrentVector->insert(cursor->pCurrentVector->begin(), vecBegin, - vecBegin + pCur->iCurrentD); + vecBegin + cursor->iCurrentD); - pCur->p += (sizeof(int) + (pCur->iCurrentD * sizeof(float))); - pCur->iRowid++; + cursor->p += (sizeof(int) + (cursor->iCurrentD * sizeof(float))); + cursor->iRowid++; return SQLITE_OK; } -static int fvecsEachEof(sqlite3_vtab_cursor *cur) { +static int fvecsEachEof(sqlite3_vtab_cursor *pCursor) { - auto pCur = (fvecsEach_cursor *)cur; - return pCur->p > pCur->iBlobN; + auto cursor = (fvecsEach_cursor *)pCursor; + return cursor->p > cursor->iBlobN; } -static int fvecsEachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { +static int fvecsEachRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid) { - fvecsEach_cursor *pCur = (fvecsEach_cursor *)cur; - *pRowid = pCur->iRowid; + fvecsEach_cursor *cursor = (fvecsEach_cursor *)pCursor; + *pRowid = cursor->iRowid; return SQLITE_OK; } -static int fvecsEachColumn(sqlite3_vtab_cursor *cur, +static int fvecsEachColumn(sqlite3_vtab_cursor *pCursor, sqlite3_context *context, int i) { - auto pCur = static_cast(cur); + auto cursor = static_cast(pCursor); switch (i) { case FVECS_EACH_DIMENSIONS: - sqlite3_result_int(context, pCur->iCurrentD); + sqlite3_result_int(context, cursor->iCurrentD); break; case FVECS_EACH_VECTOR: - resultVector(context, pCur->pCurrentVector.get()); + resultVector(context, cursor->pCurrentVector.get()); break; case FVECS_EACH_INPUT: From 202367a4524da5f63741df0c07a5060efa68b0e7 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 10:00:34 +0300 Subject: [PATCH 46/66] Minor changes --- src/sqlite-vss.cpp | 2 +- src/vss/vss-index-vtab.h | 2 +- src/vss/vss-index.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 6d65b31..6977a2b 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -261,7 +261,7 @@ static int init(sqlite3 *db, argv[1], argv[2], i, - &iter->factory, + iter->factory, iter->dimensions)); } diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h index 3cf4922..31dc28a 100644 --- a/src/vss/vss-index-vtab.h +++ b/src/vss/vss-index-vtab.h @@ -53,7 +53,7 @@ class vss_index_vtab : public sqlite3_vtab { return vector_api; } - vector & getIndexes() { + vector & getIndexes() { return indexes; } diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index b7ca05d..c0413df 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -170,7 +170,7 @@ class vss_index { const char *schema, const char *name, int indexId, - string * factoryArgs, + string & factoryArgs, int dimensions) { // Figuring out cache key to use to store index into cache. @@ -186,7 +186,7 @@ class vss_index { } // Creating a new index and storing in cache. - auto newIndex = new vss_index(faiss::index_factory(dimensions, factoryArgs->c_str())); + auto newIndex = new vss_index(faiss::index_factory(dimensions, factoryArgs.c_str())); // Checking if this is our first index for table, at which point we create our shadow tables. if (indexId == 0) { From b84561648e6bf60f3168bdd2b4564ac92c733ee4 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 11:43:33 +0300 Subject: [PATCH 47/66] Removing caching - Doesn't work Library is loaded for each connection, so static members doesn't last --- CMakeLists.txt | 2 +- src/sqlite-vss.cpp | 21 ++------ src/vss/vss-index-cursor.h | 2 + src/vss/vss-index-vtab.h | 4 +- src/vss/vss-index.h | 103 +++++-------------------------------- 5 files changed, 22 insertions(+), 110 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5100d5f..c59d993 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ endif() configure_file(src/sqlite-vss.h.in sqlite-vss.h) configure_file(src/sqlite-vector.h.in sqlite-vector.h) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) option(FAISS_ENABLE_GPU "" OFF) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index 6977a2b..c00a0f7 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -205,10 +205,6 @@ unique_ptr> parse_constructor(int argc, #define VSS_INDEX_COLUMN_OPERATION 1 #define VSS_INDEX_COLUMN_VECTORS 2 -// Declaration of static objects required to do caching. -shared_mutex vss_index::_globalLock; -map vss_index::_instances; - static int init(sqlite3 *db, void *pAux, int argc, @@ -248,19 +244,16 @@ static int init(sqlite3 *db, try { - // To avoid race conditions towards cache we lock creation of indexes. - unique_lock globalLock(*vss_index::getGlobalLock()); - if (isCreate) { - auto i = 0; - for (auto iter = columns->begin(); iter != columns->end(); ++iter, i++) { + auto idxNo = 0; + for (auto iter = columns->begin(); iter != columns->end(); ++iter, idxNo++) { pTable->getIndexes().push_back( vss_index::factory(db, argv[1], argv[2], - i, + idxNo, iter->factory, iter->dimensions)); @@ -268,13 +261,12 @@ static int init(sqlite3 *db, } else { - for (int i = 0; i < columns->size(); i++) { + for (int idxNo = 0; idxNo < columns->size(); idxNo++) { pTable->getIndexes().push_back( vss_index::factory(db, - argv[1], argv[2], - i)); + idxNo)); } } @@ -319,9 +311,6 @@ static int vssIndexDestroy(sqlite3_vtab *pVtab) { auto pTable = static_cast(pVtab); drop_shadow_tables(pTable->getDb(), pTable->getName()); - // Removing from cache. - vss_index::destroy(pTable->getSchema(), pTable->getName()); - vssIndexDisconnect(pVtab); return SQLITE_OK; } diff --git a/src/vss/vss-index-cursor.h b/src/vss/vss-index-cursor.h index 9581b83..b4f58fd 100644 --- a/src/vss/vss-index-cursor.h +++ b/src/vss/vss-index-cursor.h @@ -15,8 +15,10 @@ class vss_index_cursor : public sqlite3_vtab_cursor { sql(nullptr) { } ~vss_index_cursor() { + if (stmt != nullptr) sqlite3_finalize(stmt); + if (sql != nullptr) sqlite3_free(sql); } diff --git a/src/vss/vss-index-vtab.h b/src/vss/vss-index-vtab.h index 31dc28a..8d179c3 100644 --- a/src/vss/vss-index-vtab.h +++ b/src/vss/vss-index-vtab.h @@ -28,9 +28,9 @@ class vss_index_vtab : public sqlite3_vtab { if (this->zErrMsg != nullptr) delete this->zErrMsg; - // Resetting all indexes since we cannot delete them since they're reused and cached. + // Deleting all indexes associated with table. for (auto iter = indexes.begin(); iter != indexes.end(); ++iter) { - (*iter)->reset(); + delete (*iter); } } diff --git a/src/vss/vss-index.h b/src/vss/vss-index.h index c0413df..d4636e4 100644 --- a/src/vss/vss-index.h +++ b/src/vss/vss-index.h @@ -3,8 +3,6 @@ #define VSS_INDEX_H #include "inclusions.h" -#include -#include /* * Wrapper around a single faiss index, with training data, insert records, and * delete records. @@ -16,6 +14,13 @@ class vss_index { public: + ~vss_index() { + + if (index != nullptr) { + delete index; + } + } + // Returns false if index requires training before inserting items to it. bool isTrained() { @@ -41,16 +46,12 @@ class vss_index { vector & distances, vector & ids) { - shared_lock lock(_lock); - index->search(nq, vec->data(), max, distances.data(), ids.data()); } // Queries the index for a range of items. void range_search(int nq, vec_ptr & vec, float distance, unique_ptr & result) { - shared_lock lock(_lock); - index->range_search(nq, vec->data(), distance, result.get()); } @@ -73,8 +74,6 @@ class vss_index { */ void addTrainings(vec_ptr & vec) { - unique_lock lock(_lock); - trainings.reserve(trainings.size() + vec->size()); trainings.insert(trainings.end(), vec->begin(), vec->end()); } @@ -86,8 +85,6 @@ class vss_index { */ void addInsertData(faiss::idx_t rowId, vec_ptr & vec) { - unique_lock lock(_lock); - insert_data.reserve(insert_data.size() + vec->size()); insert_data.insert(insert_data.end(), vec->begin(), vec->end()); @@ -101,8 +98,6 @@ class vss_index { */ void addDelete(faiss::idx_t rowid) { - unique_lock lock(_lock); - delete_ids.push_back(rowid); } @@ -111,8 +106,6 @@ class vss_index { */ bool synchronize() { - unique_lock lock(_lock); - auto result = tryTrain(); result = tryDelete() || result; result = tryInsert() || result; @@ -125,8 +118,6 @@ class vss_index { */ void reset() { - unique_lock lock(_lock); - trainings.clear(); trainings.shrink_to_fit(); @@ -145,8 +136,6 @@ class vss_index { const char *name, int rowId) { - unique_lock lock(_lock); - // Writing our index faiss::VectorIOWriter writer; faiss::write_index(index, &writer); @@ -169,27 +158,15 @@ class vss_index { static vss_index * factory(sqlite3 *db, const char *schema, const char *name, - int indexId, + bool indexNo, string & factoryArgs, int dimensions) { - // Figuring out cache key to use to store index into cache. - string key = schema; - key += name; - key += to_string(indexId); - - auto cached = _instances.find(key); - if (cached != _instances.end()) { - - cached->second->reset(); - return cached->second; - } - // Creating a new index and storing in cache. auto newIndex = new vss_index(faiss::index_factory(dimensions, factoryArgs.c_str())); // Checking if this is our first index for table, at which point we create our shadow tables. - if (indexId == 0) { + if (indexNo == 0) { auto rc = create_shadow_tables(db, schema, name); if (rc != SQLITE_OK) @@ -200,10 +177,7 @@ class vss_index { int rc = newIndex->write_index(db, schema, name, - indexId); - - // Caching index. - _instances[key] = newIndex; + indexNo); // Returning index to caller. return newIndex; @@ -214,70 +188,20 @@ class vss_index { * or returns a cached index to caller. */ static vss_index * factory(sqlite3 *db, - const char *schema, const char *name, - int indexId) { - - // Figuring out cache key to use to lookup into cache to see if index already has been created and cached. - string key = schema; - key += name; - key += to_string(indexId); - - auto cached = _instances.find(key); - if (cached != _instances.end()) { - - cached->second->reset(); - return cached->second; - } + int indexNo) { // Reading index from db. - auto newIndex = new vss_index(read_index_select(db, name, indexId)); - - // Caching index. - _instances[key] = newIndex; + auto newIndex = new vss_index(read_index_select(db, name, indexNo)); // Returning index to caller. return newIndex; } - // Deletes all indexes associated with the specified schema and table. - static void destroy(const char * schema, const char * name) { - - // Synchronising access. - unique_lock lock(_globalLock); - - string filter = schema; - filter += name; - for (auto iter = _instances.begin(); iter != _instances.end();) { - - if (iter->first.compare(filter) == 0) { - - delete iter->second; - _instances.erase(iter++); - - } else { - - ++iter; - } - } - } - - static shared_mutex * getGlobalLock() { - - return &_globalLock; - } - private: explicit vss_index(faiss::Index *index) : index(index) { } - ~vss_index() { - - if (index != nullptr) { - delete index; - } - } - static int create_shadow_tables(sqlite3 *db, const char *schema, const char *name) { @@ -438,9 +362,6 @@ class vss_index { return true; } - static map _instances; - static shared_mutex _globalLock; - shared_mutex _lock; faiss::Index * index; vector trainings; vector insert_data; From 85012194ad41d4ada1dfc36e5da8ea6b00df9f98 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Tue, 27 Jun 2023 14:38:19 +0300 Subject: [PATCH 48/66] Revert "Triggering build" This reverts commit aadc50b2d450d04e4447a0b246fad9fb42bf292a. --- .github/workflows/release.yaml | 222 ++++++++++++++++++++++++ .github/workflows/upload-deno-assets.js | 8 + .github/workflows/upload.js | 10 ++ 3 files changed, 240 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c6235ab..cbdfcc8 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,6 +8,11 @@ permissions: env: ARTIFACT-LINUX-X86_64-EXTENSION: sqlite-vss-linux-x86_64 ARTIFACT-MACOS-X86_64-EXTENSION: sqlite-vss-macos-x86_64 + ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 + ARTIFACT-WINDOWS-X86_64-EXTENSION: sqlite-vss-windows-x86_64 + ARTIFACT-LINUX-X86_64-WHEELS: sqlite-vss-linux-x86_64-wheels + ARTIFACT-MACOS-X86_64-WHEELS: sqlite-vss-macos-x86_64-wheels + ARTIFACT-MACOS-AARCH64-WHEELS: sqlite-vss-macos-aarch64-wheels jobs: build-linux-x86_64-extension: runs-on: ubuntu-20.04 @@ -33,6 +38,23 @@ jobs: with: name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} path: dist/release/* + build-linux-x86_64-python: + runs-on: ubuntu-20.04 + needs: [build-linux-x86_64-extension] + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} + path: dist/release/ + - uses: actions/setup-python@v3 + - run: pip install wheel + - run: make python-release + - run: make datasette-release + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} + path: dist/release/wheels/*.whl build-macos-x86_64-extension: runs-on: macos-latest steps: @@ -67,10 +89,72 @@ jobs: with: name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} path: dist/release/* + build-macos-x86_64-python: + runs-on: macos-latest + needs: [build-macos-x86_64-extension] + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} + path: dist/release/ + - uses: actions/setup-python@v3 + - run: pip install wheel + - run: make python-release + - run: make datasette-release + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} + path: dist/release/wheels/*.whl + build-macos-aarch64-extension: + runs-on: [self-hosted, mm1] + steps: + - uses: actions/checkout@v3 + with: + submodules: "recursive" + - id: cache-sqlite-build + uses: actions/cache@v3 + with: + path: vendor/sqlite + key: ${{ runner.os }}-${{ hashFiles('vendor/get_sqlite.sh') }} + - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' + run: ./vendor/get_sqlite.sh + - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' + working-directory: vendor/sqlite + run: ./configure && make + - run: make patch-openmp + - run: make loadable-release static-release + env: + # `brew info libomp` gives the correct one, with .a file for static openmp builds + CC: /opt/homebrew/opt/llvm/bin/clang + CXX: /opt/homebrew/opt/llvm/bin/clang++ + LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" + CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} + path: dist/release/* + build-macos-aarch64-python: + runs-on: [self-hosted, mm1] + needs: [build-macos-aarch64-extension] + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} + path: dist/release/ + - run: pip3 install wheel + - run: make python-release IS_MACOS_ARM=1 + - run: make datasette-release + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} + path: dist/release/wheels/*.whl upload-deno: needs: [ build-macos-x86_64-extension, + build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -86,6 +170,7 @@ jobs: env: ARTIFACT-LINUX-X86_64-EXTENSION: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} ARTIFACT-MACOS-X86_64-EXTENSION: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} + ARTIFACT-MACOS-AARCH64-EXTENSION: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} with: github-token: ${{ secrets.GITHUB_TOKEN }} result-encoding: string @@ -96,6 +181,7 @@ jobs: needs: [ build-macos-x86_64-extension, + build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -113,6 +199,7 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} platforms: | macos-x86_64: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/* + macos-aarch64: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/* linux-x86_64: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/* upload-checksums: needs: [upload-extensions, upload-deno] @@ -140,3 +227,138 @@ jobs: name: "checksums.txt", data: process.env.CHECKSUMS, }); + upload-hex: + runs-on: ubuntu-latest + needs: [upload-extensions] + steps: + - uses: actions/checkout@v2 + - uses: erlef/setup-beam@v1 + with: + otp-version: "24" + rebar3-version: "3.16.1" + elixir-version: "1.14" + - run: ./scripts/elixir_generate_checksum.sh "${{ needs.upload-extensions.outputs.checksums }}" + - run: mix deps.get + working-directory: ./bindings/elixir + - run: mix compile --docs + working-directory: ./bindings/elixir + - run: mix hex.publish --yes + working-directory: ./bindings/elixir + env: + HEX_API_KEY: ${{ secrets.HEX_API_KEY }} + upload-npm: + needs: + [ + build-macos-x86_64-extension, + build-macos-aarch64-extension, + build-linux-x86_64-extension, + ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v2 + - run: | + cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/node/sqlite-vss-linux-x64/lib/ + cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-x64/lib/ + cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-arm64/lib/ + - uses: actions/setup-node@v3 + with: + node-version: "16" + registry-url: "https://registry.npmjs.org" + - name: Publish NPM sqlite-vss-linux-x64 + working-directory: bindings/node/sqlite-vss-linux-x64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - name: Publish NPM sqlite-vss-darwin-x64 + working-directory: bindings/node/sqlite-vss-darwin-x64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - name: Publish NPM sqlite-vss-darwin-arm64 + working-directory: bindings/node/sqlite-vss-darwin-arm64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - name: Publish NPM sqlite-vss + working-directory: bindings/node/sqlite-vss + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + upload-pypi: + needs: + [ + build-linux-x86_64-python, + build-macos-x86_64-python, + build-macos-aarch64-python, + ] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} + path: dist + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} + path: dist + - uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} + path: dist + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true + upload-gem: + needs: + [ + build-macos-x86_64-extension, + build-macos-aarch64-extension, + build-linux-x86_64-extension, + ] + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/download-artifact@v2 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.2 + - run: | + rm bindings/ruby/lib/*.{dylib,so,dll} || true + cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/ruby/lib + gem -C bindings/ruby build -o x86_64-darwin.gem sqlite_vss.gemspec + env: + PLATFORM: x86_64-darwin + - run: | + rm bindings/ruby/lib/*.{dylib,so,dll} || true + cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/ruby/lib + gem -C bindings/ruby build -o arm64-darwin.gem sqlite_vss.gemspec + env: + PLATFORM: arm64-darwin + - run: | + rm bindings/ruby/lib/*.{dylib,so,dll} || true + cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/ruby/lib + gem -C bindings/ruby build -o x86_64-linux.gem sqlite_vss.gemspec + env: + PLATFORM: x86_64-linux + - run: | + gem push bindings/ruby/x86_64-darwin.gem + gem push bindings/ruby/arm64-darwin.gem + gem push bindings/ruby/x86_64-linux.gem + env: + GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} + upload-crate: + runs-on: ubuntu-latest + needs: [upload-extensions] + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: cargo publish --no-verify + working-directory: ./bindings/rust + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/upload-deno-assets.js b/.github/workflows/upload-deno-assets.js index beb4b48..dee6e23 100644 --- a/.github/workflows/upload-deno-assets.js +++ b/.github/workflows/upload-deno-assets.js @@ -14,6 +14,14 @@ module.exports = async ({ github, context }) => { path: `${process.env["ARTIFACT-MACOS-X86_64-EXTENSION"]}/vss0.dylib`, name: `sqlite-vss-${VERSION}-deno-darwin-x86_64.vss0.dylib`, }, + { + path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vector0.dylib`, + name: `sqlite-vss-${VERSION}-deno-darwin-aarch64.vector0.dylib`, + }, + { + path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vss0.dylib`, + name: `sqlite-vss-${VERSION}-deno-darwin-aarch64.vss0.dylib`, + }, { path: `${process.env["ARTIFACT-LINUX-X86_64-EXTENSION"]}/vector0.so`, name: `sqlite-vss-${VERSION}-deno-linux-x86_64.vector0.so`, diff --git a/.github/workflows/upload.js b/.github/workflows/upload.js index 97f9acc..6ac55d5 100644 --- a/.github/workflows/upload.js +++ b/.github/workflows/upload.js @@ -19,6 +19,11 @@ const vss = { os: "darwin", cpu: "x86_64", }, + { + path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vss0.dylib`, + os: "darwin", + cpu: "aarch64", + }, ], }; const vector = { @@ -35,6 +40,11 @@ const vector = { os: "darwin", cpu: "x86_64", }, + { + path: `${process.env["ARTIFACT-MACOS-AARCH64-EXTENSION"]}/vector0.dylib`, + os: "darwin", + cpu: "aarch64", + }, ], }; From a42d4e3cf6c9f368ddbc2f777dd4bec1994790f0 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 4 Sep 2023 10:37:43 +0300 Subject: [PATCH 49/66] Removing limit requirement --- src/sqlite-vss.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index c00a0f7..e16e477 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -433,12 +433,6 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, if (argc > 1) { pCursor->setLimit(sqlite3_value_int(argv[1])); - } else { - - auto ptrVtab = static_cast(pCursor->pVtab); - ptrVtab->setError(sqlite3_mprintf("LIMIT required on vss_search() queries")); - - return SQLITE_ERROR; } } else { From 400d4d54424359c740586bff5f59528b9d5e8580 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 4 Sep 2023 11:41:00 +0300 Subject: [PATCH 50/66] Update release.yaml --- .github/workflows/release.yaml | 227 --------------------------------- 1 file changed, 227 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index cbdfcc8..a0e3aa2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,11 +8,7 @@ permissions: env: ARTIFACT-LINUX-X86_64-EXTENSION: sqlite-vss-linux-x86_64 ARTIFACT-MACOS-X86_64-EXTENSION: sqlite-vss-macos-x86_64 - ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 ARTIFACT-WINDOWS-X86_64-EXTENSION: sqlite-vss-windows-x86_64 - ARTIFACT-LINUX-X86_64-WHEELS: sqlite-vss-linux-x86_64-wheels - ARTIFACT-MACOS-X86_64-WHEELS: sqlite-vss-macos-x86_64-wheels - ARTIFACT-MACOS-AARCH64-WHEELS: sqlite-vss-macos-aarch64-wheels jobs: build-linux-x86_64-extension: runs-on: ubuntu-20.04 @@ -38,23 +34,6 @@ jobs: with: name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} path: dist/release/* - build-linux-x86_64-python: - runs-on: ubuntu-20.04 - needs: [build-linux-x86_64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} - path: dist/release/ - - uses: actions/setup-python@v3 - - run: pip install wheel - - run: make python-release - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} - path: dist/release/wheels/*.whl build-macos-x86_64-extension: runs-on: macos-latest steps: @@ -89,99 +68,10 @@ jobs: with: name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} path: dist/release/* - build-macos-x86_64-python: - runs-on: macos-latest - needs: [build-macos-x86_64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} - path: dist/release/ - - uses: actions/setup-python@v3 - - run: pip install wheel - - run: make python-release - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} - path: dist/release/wheels/*.whl - build-macos-aarch64-extension: - runs-on: [self-hosted, mm1] - steps: - - uses: actions/checkout@v3 - with: - submodules: "recursive" - - id: cache-sqlite-build - uses: actions/cache@v3 - with: - path: vendor/sqlite - key: ${{ runner.os }}-${{ hashFiles('vendor/get_sqlite.sh') }} - - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' - run: ./vendor/get_sqlite.sh - - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' - working-directory: vendor/sqlite - run: ./configure && make - - run: make patch-openmp - - run: make loadable-release static-release - env: - # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /opt/homebrew/opt/llvm/bin/clang - CXX: /opt/homebrew/opt/llvm/bin/clang++ - LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" - CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} - path: dist/release/* - build-macos-aarch64-python: - runs-on: [self-hosted, mm1] - needs: [build-macos-aarch64-extension] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} - path: dist/release/ - - run: pip3 install wheel - - run: make python-release IS_MACOS_ARM=1 - - run: make datasette-release - - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} - path: dist/release/wheels/*.whl - upload-deno: - needs: - [ - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-linux-x86_64-extension, - ] - permissions: - contents: write - runs-on: ubuntu-latest - outputs: - deno-checksums: ${{ steps.deno-assets.outputs.result }} - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 - - id: deno-assets - uses: actions/github-script@v6 - env: - ARTIFACT-LINUX-X86_64-EXTENSION: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }} - ARTIFACT-MACOS-X86_64-EXTENSION: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} - ARTIFACT-MACOS-AARCH64-EXTENSION: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - result-encoding: string - script: | - const script = require('.github/workflows/upload-deno-assets.js') - return await script({github, context}) upload-extensions: needs: [ build-macos-x86_64-extension, - build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -199,7 +89,6 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} platforms: | macos-x86_64: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/* - macos-aarch64: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/* linux-x86_64: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/* upload-checksums: needs: [upload-extensions, upload-deno] @@ -246,119 +135,3 @@ jobs: working-directory: ./bindings/elixir env: HEX_API_KEY: ${{ secrets.HEX_API_KEY }} - upload-npm: - needs: - [ - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-linux-x86_64-extension, - ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 - - run: | - cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/node/sqlite-vss-linux-x64/lib/ - cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-x64/lib/ - cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/node/sqlite-vss-darwin-arm64/lib/ - - uses: actions/setup-node@v3 - with: - node-version: "16" - registry-url: "https://registry.npmjs.org" - - name: Publish NPM sqlite-vss-linux-x64 - working-directory: bindings/node/sqlite-vss-linux-x64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss-darwin-x64 - working-directory: bindings/node/sqlite-vss-darwin-x64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss-darwin-arm64 - working-directory: bindings/node/sqlite-vss-darwin-arm64 - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Publish NPM sqlite-vss - working-directory: bindings/node/sqlite-vss - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - upload-pypi: - needs: - [ - build-linux-x86_64-python, - build-macos-x86_64-python, - build-macos-aarch64-python, - ] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-LINUX-X86_64-WHEELS }} - path: dist - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-X86_64-WHEELS }} - path: dist - - uses: actions/download-artifact@v3 - with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-WHEELS }} - path: dist - - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - skip-existing: true - upload-gem: - needs: - [ - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-linux-x86_64-extension, - ] - permissions: - contents: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/download-artifact@v2 - - uses: ruby/setup-ruby@v1 - with: - ruby-version: 3.2 - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/*.dylib bindings/ruby/lib - gem -C bindings/ruby build -o x86_64-darwin.gem sqlite_vss.gemspec - env: - PLATFORM: x86_64-darwin - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/*.dylib bindings/ruby/lib - gem -C bindings/ruby build -o arm64-darwin.gem sqlite_vss.gemspec - env: - PLATFORM: arm64-darwin - - run: | - rm bindings/ruby/lib/*.{dylib,so,dll} || true - cp ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/*.so bindings/ruby/lib - gem -C bindings/ruby build -o x86_64-linux.gem sqlite_vss.gemspec - env: - PLATFORM: x86_64-linux - - run: | - gem push bindings/ruby/x86_64-darwin.gem - gem push bindings/ruby/arm64-darwin.gem - gem push bindings/ruby/x86_64-linux.gem - env: - GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} - upload-crate: - runs-on: ubuntu-latest - needs: [upload-extensions] - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: cargo publish --no-verify - working-directory: ./bindings/rust - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file From 62e136372361a048929a8b2b34bd8ffa59e8490a Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 4 Sep 2023 11:42:35 +0300 Subject: [PATCH 51/66] Update release.yaml --- .github/workflows/release.yaml | 45 ---------------------------------- 1 file changed, 45 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a0e3aa2..2391c39 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -90,48 +90,3 @@ jobs: platforms: | macos-x86_64: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/* linux-x86_64: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/* - upload-checksums: - needs: [upload-extensions, upload-deno] - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - uses: actions/github-script@v6 - env: - CHECKSUMS: "${{ needs.upload-extensions.outputs.checksums }}\n${{ needs.upload-deno.outputs.deno-checksums }}" - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const release = await github.rest.repos.getReleaseByTag({ - owner, - repo, - tag: process.env.GITHUB_REF.replace("refs/tags/", ""), - }); - const release_id = release.data.id; - github.rest.repos.uploadReleaseAsset({ - owner, - repo, - release_id, - name: "checksums.txt", - data: process.env.CHECKSUMS, - }); - upload-hex: - runs-on: ubuntu-latest - needs: [upload-extensions] - steps: - - uses: actions/checkout@v2 - - uses: erlef/setup-beam@v1 - with: - otp-version: "24" - rebar3-version: "3.16.1" - elixir-version: "1.14" - - run: ./scripts/elixir_generate_checksum.sh "${{ needs.upload-extensions.outputs.checksums }}" - - run: mix deps.get - working-directory: ./bindings/elixir - - run: mix compile --docs - working-directory: ./bindings/elixir - - run: mix hex.publish --yes - working-directory: ./bindings/elixir - env: - HEX_API_KEY: ${{ secrets.HEX_API_KEY }} From 86161daceef0b37e4cd4806a8237ecb043f4e046 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Mon, 4 Sep 2023 12:22:14 +0300 Subject: [PATCH 52/66] Update sqlite-vss.cpp --- src/sqlite-vss.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/sqlite-vss.cpp b/src/sqlite-vss.cpp index e16e477..27b2343 100644 --- a/src/sqlite-vss.cpp +++ b/src/sqlite-vss.cpp @@ -411,6 +411,9 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, pCursor->setQuery_type(QueryType::search); vec_ptr query_vector; + int nq = 1; + auto index = pCursor->getTable()->getIndexes().at(idxNum); + auto params = static_cast(sqlite3_value_pointer(argv[0], "vss0_searchparams")); if (params != nullptr) { @@ -433,6 +436,10 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, if (argc > 1) { pCursor->setLimit(sqlite3_value_int(argv[1])); + } else { + + auto ptrVtab = static_cast(pCursor->pVtab); + pCursor->setLimit(index->size()); } } else { @@ -443,9 +450,6 @@ static int vssIndexFilter(sqlite3_vtab_cursor *pVtabCursor, return SQLITE_ERROR; } - int nq = 1; - auto index = pCursor->getTable()->getIndexes().at(idxNum); - if (!index->canQuery(query_vector)) { auto ptrVtab = static_cast(pCursor->pVtab); From 8180c32a5a2371900fc0aabb8cce585beb6d6b7f Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 16:30:16 +0200 Subject: [PATCH 53/66] Update release.yaml --- .github/workflows/release.yaml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 2391c39..035f364 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -9,7 +9,36 @@ env: ARTIFACT-LINUX-X86_64-EXTENSION: sqlite-vss-linux-x86_64 ARTIFACT-MACOS-X86_64-EXTENSION: sqlite-vss-macos-x86_64 ARTIFACT-WINDOWS-X86_64-EXTENSION: sqlite-vss-windows-x86_64 + ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 jobs: + build-macos-aarch64-extension: + runs-on: [self-hosted, mm1] + steps: + - uses: actions/checkout@v3 + with: + submodules: "recursive" + - id: cache-sqlite-build + uses: actions/cache@v3 + with: + path: vendor/sqlite + key: ${{ runner.os }}-${{ hashFiles('vendor/get_sqlite.sh') }} + - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' + run: ./vendor/get_sqlite.sh + - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' + working-directory: vendor/sqlite + run: ./configure && make + - run: make patch-openmp + - run: make loadable-release static-release + env: + # `brew info libomp` gives the correct one, with .a file for static openmp builds + CC: /opt/homebrew/opt/llvm/bin/clang + CXX: /opt/homebrew/opt/llvm/bin/clang++ + LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" + CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} + path: dist/release/* build-linux-x86_64-extension: runs-on: ubuntu-20.04 steps: @@ -72,6 +101,7 @@ jobs: needs: [ build-macos-x86_64-extension, + build-macos-aarch64-extension, build-linux-x86_64-extension, ] permissions: @@ -89,4 +119,5 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} platforms: | macos-x86_64: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }}/* + macos-aarch64: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }}/* linux-x86_64: ${{ env.ARTIFACT-LINUX-X86_64-EXTENSION }}/* From c8f5d23a91a686ab98ad214198089541f718555a Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 16:34:42 +0200 Subject: [PATCH 54/66] Update release.yaml --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 035f364..6624f14 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -12,7 +12,7 @@ env: ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 jobs: build-macos-aarch64-extension: - runs-on: [self-hosted, mm1] + runs-on: macos-13-xlarge steps: - uses: actions/checkout@v3 with: From e2aef0e43763b75a9bfcc99f0ec4b243b8899032 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 16:38:27 +0200 Subject: [PATCH 55/66] Update release.yaml --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 6624f14..035f364 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -12,7 +12,7 @@ env: ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 jobs: build-macos-aarch64-extension: - runs-on: macos-13-xlarge + runs-on: [self-hosted, mm1] steps: - uses: actions/checkout@v3 with: From 7d378282cefbc6ab1e18d55836aec032f640d12f Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:18:20 +0200 Subject: [PATCH 56/66] Update release.yaml --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 035f364..7a264a0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -12,7 +12,7 @@ env: ARTIFACT-MACOS-AARCH64-EXTENSION: sqlite-vss-macos-aarch64 jobs: build-macos-aarch64-extension: - runs-on: [self-hosted, mm1] + runs-on: flyci-macos-large-latest-m1 steps: - uses: actions/checkout@v3 with: From 5831d6128878dbbc170c5a65fe6e711645775f82 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:26:26 +0200 Subject: [PATCH 57/66] Update release.yaml --- .github/workflows/release.yaml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7a264a0..b8a2bbc 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,10 +13,11 @@ env: jobs: build-macos-aarch64-extension: runs-on: flyci-macos-large-latest-m1 + steps: steps: - uses: actions/checkout@v3 with: - submodules: "recursive" + submodules: recursive - id: cache-sqlite-build uses: actions/cache@v3 with: @@ -27,17 +28,23 @@ jobs: - if: steps.cache-sqlite-build.outputs.cache-hit != 'true' working-directory: vendor/sqlite run: ./configure && make + - run: brew install llvm + - id: cache-cmake-build + uses: actions/cache@v3 + with: + path: build + key: ${{ runner.os }}-build - run: make patch-openmp - run: make loadable-release static-release env: # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /opt/homebrew/opt/llvm/bin/clang - CXX: /opt/homebrew/opt/llvm/bin/clang++ - LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" - CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" + CC: /usr/local/opt/llvm/bin/clang + CXX: /usr/local/opt/llvm/bin/clang++ + LDFLAGS: "-L/usr/local/opt/libomp/lib/" + CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3 with: - name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} + name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} path: dist/release/* build-linux-x86_64-extension: runs-on: ubuntu-20.04 From 313210bd6b99e65d9c1f1da8217348937fa08dc5 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:28:12 +0200 Subject: [PATCH 58/66] Update release.yaml --- .github/workflows/release.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b8a2bbc..a042c2d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,7 +13,6 @@ env: jobs: build-macos-aarch64-extension: runs-on: flyci-macos-large-latest-m1 - steps: steps: - uses: actions/checkout@v3 with: From 6eb557bb0bc245a5a8115f67e458e0d4d614ce0d Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:30:24 +0200 Subject: [PATCH 59/66] Update release.yaml --- .github/workflows/release.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a042c2d..cf10a30 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,6 +13,7 @@ env: jobs: build-macos-aarch64-extension: runs-on: flyci-macos-large-latest-m1 + steps: steps: - uses: actions/checkout@v3 with: @@ -43,7 +44,7 @@ jobs: CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3 with: - name: ${{ env.ARTIFACT-MACOS-X86_64-EXTENSION }} + name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} path: dist/release/* build-linux-x86_64-extension: runs-on: ubuntu-20.04 From 60482aa1027745c746742dbbbc48a5aa9e1c50a8 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:31:41 +0200 Subject: [PATCH 60/66] Update release.yaml --- .github/workflows/release.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index cf10a30..f221b9a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,7 +13,6 @@ env: jobs: build-macos-aarch64-extension: runs-on: flyci-macos-large-latest-m1 - steps: steps: - uses: actions/checkout@v3 with: From 93398a9624a3bca476112424e8b4b2798e8f438e Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:44:06 +0200 Subject: [PATCH 61/66] Update release.yaml --- .github/workflows/release.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f221b9a..d48fddf 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -34,6 +34,7 @@ jobs: path: build key: ${{ runner.os }}-build - run: make patch-openmp + - run: brew install llvm - run: make loadable-release static-release env: # `brew info libomp` gives the correct one, with .a file for static openmp builds From 197a4850874c69de07046beb697ec039922503e2 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:51:31 +0200 Subject: [PATCH 62/66] Update release.yaml --- .github/workflows/release.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index d48fddf..fb1211b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -34,14 +34,7 @@ jobs: path: build key: ${{ runner.os }}-build - run: make patch-openmp - - run: brew install llvm - run: make loadable-release static-release - env: - # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /usr/local/opt/llvm/bin/clang - CXX: /usr/local/opt/llvm/bin/clang++ - LDFLAGS: "-L/usr/local/opt/libomp/lib/" - CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3 with: name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} From a787053efdc9902a02331acc98dcda3a2696e0c4 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 17:56:36 +0200 Subject: [PATCH 63/66] Update release.yaml --- .github/workflows/release.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index fb1211b..efa4c95 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -35,6 +35,10 @@ jobs: key: ${{ runner.os }}-build - run: make patch-openmp - run: make loadable-release static-release + env: + # `brew info libomp` gives the correct one, with .a file for static openmp builds + LDFLAGS: "-L/usr/local/opt/libomp/lib/" + CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3 with: name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} From ecdc4e4bed35278614a949cdacaa142f96532fad Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Fri, 8 Mar 2024 18:04:20 +0200 Subject: [PATCH 64/66] Update release.yaml --- .github/workflows/release.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index efa4c95..0528f05 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -37,8 +37,10 @@ jobs: - run: make loadable-release static-release env: # `brew info libomp` gives the correct one, with .a file for static openmp builds - LDFLAGS: "-L/usr/local/opt/libomp/lib/" - CPPFLAGS: "-I/usr/local/opt/libomp/include/" + CC: /opt/homebrew/opt/llvm/bin/clang + CXX: /opt/homebrew/opt/llvm/bin/clang++ + LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" + CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" - uses: actions/upload-artifact@v3 with: name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} From c86725041b2df1a446518365e1d96146b7c67dff Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Sat, 9 Mar 2024 07:04:09 +0200 Subject: [PATCH 65/66] Update release.yaml --- .github/workflows/release.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0528f05..f221b9a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -37,10 +37,10 @@ jobs: - run: make loadable-release static-release env: # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /opt/homebrew/opt/llvm/bin/clang - CXX: /opt/homebrew/opt/llvm/bin/clang++ - LDFLAGS: "-L/opt/homebrew/opt/libomp/lib" - CPPFLAGS: "-I/opt/homebrew/opt/libomp/include" + CC: /usr/local/opt/llvm/bin/clang + CXX: /usr/local/opt/llvm/bin/clang++ + LDFLAGS: "-L/usr/local/opt/libomp/lib/" + CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3 with: name: ${{ env.ARTIFACT-MACOS-AARCH64-EXTENSION }} From aafe8ae2f8dd22fe437c745e53a2701f0b9e0d18 Mon Sep 17 00:00:00 2001 From: "AINIRO.IO" Date: Sat, 9 Mar 2024 07:07:55 +0200 Subject: [PATCH 66/66] Update release.yaml --- .github/workflows/release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f221b9a..9a6f2db 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -37,8 +37,8 @@ jobs: - run: make loadable-release static-release env: # `brew info libomp` gives the correct one, with .a file for static openmp builds - CC: /usr/local/opt/llvm/bin/clang - CXX: /usr/local/opt/llvm/bin/clang++ + CC: /opt/homebrew/opt/llvm/bin/clang + CXX: /opt/homebrew/opt/llvm/bin/clang++ LDFLAGS: "-L/usr/local/opt/libomp/lib/" CPPFLAGS: "-I/usr/local/opt/libomp/include/" - uses: actions/upload-artifact@v3