diff --git a/ydb/public/sdk/cpp/examples/vector_index/clusterizer.cpp b/ydb/public/sdk/cpp/examples/vector_index/clusterizer.cpp new file mode 100644 index 000000000000..b72fded28dab --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/clusterizer.cpp @@ -0,0 +1,384 @@ +#include "clusterizer.h" + +#include "util/stream/output.h" +#include "util/system/yassert.h" + +#include + +static constexpr ui64 kMinClusters = 4; +static TId gId = 1; + +template +static std::span GetArray(std::string_view str) { + const char* buf = str.data(); + const size_t len = str.size() - 1; + if (Y_UNLIKELY(len % sizeof(T) != 0)) + return {}; + + const auto count = len / sizeof(T); + return {reinterpret_cast(buf), count}; +} + +template +void TClusterizer::TProgress::Reset(std::string_view operation, ui64 rows) { + // Y_UNUSED(rows, Rows, Curr, Last); + Cout << "Start " << operation << ": " << rows << Endl; + Curr = 0; + Rows = rows; + Last = std::chrono::steady_clock::now(); +} + +template +void TClusterizer::TProgress::ForceReport() { + auto now = std::chrono::steady_clock::now(); + Cout << "Already read\t" << static_cast(Curr / Rows * 100.0) + << "% rows, time spent:\t" << std::chrono::duration{now - Last}.count() + << " sec, " << Curr << " / " << Rows << " rows" << Endl; + Last = now; +} + +template +void TClusterizer::TProgress::Report(ui64 read) { + if (auto now = std::chrono::steady_clock::now(); (now - Last) >= std::chrono::seconds{1}) { + ForceReport(); + } + Curr += read; +} + +template +void TClusterizer::TBatch::Swap(TBatch& other) { + std::swap(RawData, other.RawData); + IdData.swap(other.IdData); + RawDataStorage.swap(other.RawDataStorage); + Min.swap(other.Min); +} + +template +void TClusterizer::TBatch::Clear() { + RawData = {}; + IdData.clear(); + RawDataStorage.clear(); + Min.clear(); +} + +template +bool TClusterizer::TBatch::Empty() const { + return RawData.empty(); +} + +template +TClusterizer::TClusterizer(TDatasetIterator& it, TDistance distance, TCreateParentChild create, NVectorIndex::TThreadPool* tp) + : It{it} + , Distance{std::move(distance)} + , Create{std::move(create)} + , ThreadPool{tp} +{ +} + +template +auto TClusterizer::Compute(TEmbedding embedding) -> TMin { + float minDistance = std::numeric_limits::max(); + ui32 minPos = 0; + Y_ASSERT(!Clusters.Coords.empty()); + for (ui32 pos = 0; auto& cluster : Clusters.Coords) { + auto distance = Distance(cluster, embedding); + if (distance < minDistance) { + minDistance = distance; + minPos = pos; + } + ++pos; + } + return {minDistance, minPos}; +} + +template +template +void TClusterizer::ComputeBatch(Func&& func) { + if (ThreadPool) { + auto threads = ThreadPool->Size(); + std::unique_lock lock{M}; + while (Work != 0) { + WaitWork.wait(lock); + } + ToCompute.Swap(ToFill); + if (!ToCompute.Empty()) { + Work = threads; + lock.unlock(); + for (ui32 i = 0; i != threads; ++i) { + ThreadPool->Submit([this, i, threads] { + auto size = ToCompute.RawData.size(); + auto batchSize = (size + threads - 1) / threads; + auto start = i * batchSize; + auto len = start < size ? std::min(size - start, batchSize) : 0; + auto batch = ToCompute.RawData.subspan(start, len); + for (const auto& rawEmbedding : batch) { + auto embedding = GetArray(rawEmbedding); + auto min = Compute(embedding); + ToCompute.Min[start++] = min; + } + std::lock_guard lock{M}; + if (--Work == 0) { + WaitWork.notify_one(); + } + }); + } + } + Progress.Report(0); + } else { + for (size_t start = 0; const auto& rawEmbedding : ToFill.RawData) { + auto embedding = GetArray(rawEmbedding); + auto min = Compute(embedding); + ToFill.Min[start++] = min; + } + } + if (!ToFill.Empty()) { + func(); + ToFill.Clear(); + if (ThreadPool) { + Progress.Report(0); + } + } +} + +template +void TClusterizer::Update(TMin min, TEmbedding embedding) { + Y_ASSERT(Clusters.Coords.size() == AggregatedClusters.size()); + auto& cluster = AggregatedClusters[min.Pos]; + cluster.Distance += min.Distance; + for (size_t pos = 0; auto& coord : cluster.Coords) { + coord += embedding[pos++]; + } + cluster.Count++; +} + +template +auto TClusterizer::Run(const TOptions& options) -> TClusters { + Y_ASSERT(!options.normalize); // normalize not supported + const ui64 clusters = std::min(options.maxK, 1000); + if (Init(clusters)) { + for (ui32 i = 1; i <= options.maxIterations; ++i) { + if (!Step(i, options.maxIterations, 1.1)) { + break; + } + } + Finalize(); + } else { + BadCluster(options); + } + return std::move(Clusters); +} + +template +void TClusterizer::BadCluster(const TOptions& options) { + auto rows = It.Rows(); + Cout << "Bad dataset {" << rows << "} for such clusterization { iterations: " << options.maxIterations << ", k: " << options.maxK << " }" << Endl; + if (ThreadPool) { + Progress.Reset("finalize bad dataset", It.Rows()); + } + Clusters.Ids.emplace_back(options.parentId); + Clusters.Count.emplace_back(rows); + + It.IterateId([&](TId id, TRawEmbedding rawEmbedding) { + auto embedding = GetArray(rawEmbedding); + if (AggregatedClusters.empty()) { + auto& aggregated = AggregatedClusters.emplace_back(); + aggregated.Coords.resize(embedding.size(), 0); + } + Update({}, embedding); + Create(options.parentId, id, std::move(rawEmbedding)); + }); + if (!AggregatedClusters.empty()) { + auto& aggregated = AggregatedClusters[0]; + auto& coords = Clusters.Coords.emplace_back(aggregated.Coords.size()); + auto coordsCount = static_cast(aggregated.Count); + for (size_t j = 0; auto& coord : aggregated.Coords) { + coords[j++] = static_cast(coord / coordsCount); + coord = 0; + } + aggregated.Count = 0; + } + if (ThreadPool) { + Progress.ForceReport(); + } +} + +template +bool TClusterizer::Init(ui64 k) { + // TODO kmeans++, kmeans||? + if (k < kMinClusters || k * kMinClusterSize >= It.Rows()) { + return false; + } + Clusters.Coords.clear(); + Clusters.Ids.clear(); + Clusters.Ids.resize(k, 0); + Clusters.Count.resize(k, 0); + Clusters.Coords.reserve(k); + It.RandomK(k, [&](TRawEmbedding rawEmbedding) { + if (rawEmbedding.empty()) { + Clusters.Coords.clear(); + return; + } + auto embedding = GetArray(rawEmbedding); + Clusters.Coords.emplace_back(embedding.begin(), embedding.end()); + }); + if (Clusters.Coords.size() < k) { + return false; + } + // TODO check distance between vectors in initial set + auto d = Clusters.Coords.front().size(); + AggregatedClusters.resize(k); + for (auto& cluster : AggregatedClusters) { + cluster.Coords.resize(d, 0); + } + OldMean = std::numeric_limits::max(); + BatchSize = (ui64{900'000} * ui64{ThreadPool ? ThreadPool->Size() : 1}) / (ui64{Clusters.Coords.size()} * ui64{Clusters.Coords.front().size()}); + return true; +} + +template +void TClusterizer::StepUpdate() { + for (size_t i = 0; i != ToFill.RawData.size(); ++i) { + auto embedding = GetArray(ToFill.RawData[i]); + Update(ToFill.Min[i], embedding); + } +} + +template +void TClusterizer::TriggerEmbeddings() { + ToFill.RawData = ToFill.RawDataStorage; + ComputeBatch([this] { StepUpdate(); }); // wait last and compute tail + ComputeBatch([this] { StepUpdate(); }); // wait tail +} + +template +void TClusterizer::Handle(std::span embeddings) { + if (ThreadPool) { + Progress.Report(embeddings.size()); + } + ToFill.RawData = embeddings; + ToFill.Min.resize(embeddings.size()); + ComputeBatch([this] { StepUpdate(); }); +} + +template +void TClusterizer::Handle(TRawEmbedding rawEmbedding) { + if (ThreadPool) { + Progress.Report(1); + } + ToFill.RawDataStorage.emplace_back(std::move(rawEmbedding)); + ToFill.Min.emplace_back(); + if (ToFill.RawDataStorage.size() >= BatchSize) { + ToFill.RawData = ToFill.RawDataStorage; + ComputeBatch([this] { StepUpdate(); }); + } +} + +template +bool TClusterizer::Step(ui32 iteration, ui32 maxIterations, float neededDiff) { + if (ThreadPool) { + Progress.Reset(std::format("step {} / {}", iteration, maxIterations), It.Rows()); + } + + It.IterateEmbedding(*this); + TriggerEmbeddings(); + + ui64 zeroCount = 0; + float maxDistance = std::numeric_limits::min(); + float newMean = 0; + for (auto& cluster : AggregatedClusters) { + if (Y_UNLIKELY(cluster.Count == 0)) { + ++zeroCount; + continue; + } + cluster.Distance /= cluster.Count; + if (maxDistance < cluster.Distance) { + maxDistance = cluster.Distance; + } + newMean += cluster.Distance; + } + auto it = AggregatedClusters.begin(); + It.RandomK(zeroCount, [&](TRawEmbedding rawEmbedding) { + if (rawEmbedding.empty()) { + it = AggregatedClusters.begin(); + return; + } + for (; it != AggregatedClusters.end(); ++it) { + if (it->Count == 0) { + auto embedding = GetArray(rawEmbedding); + it->Coords.assign(embedding.begin(), embedding.end()); + it->Count = 1; + } + } + }); + newMean += zeroCount * maxDistance; + if (ThreadPool) { + Progress.ForceReport(); + Cout << "old mean: " << OldMean / AggregatedClusters.size() + << " new mean: " << newMean / AggregatedClusters.size() << Endl; + } + if (newMean > OldMean) { + return false; + } + + for (size_t i = 0; auto& coords : Clusters.Coords) { + auto& aggregated = AggregatedClusters[i++]; + auto coordsCount = static_cast(aggregated.Count); + for (size_t j = 0; auto& coord : aggregated.Coords) { + coords[j++] = static_cast(coord / coordsCount); + coord = 0; + } + aggregated.Count = 0; + } + bool stop = newMean * neededDiff >= OldMean; + OldMean = newMean; + return !stop; +} + +template +void TClusterizer::FinalizeUpdate() { + for (size_t i = 0; i != ToFill.RawDataStorage.size(); ++i) { + const auto minPos = ToFill.Min[i].Pos; + Clusters.Count[minPos]++; + auto& parentId = Clusters.Ids[minPos]; + if (Y_UNLIKELY(!parentId)) + parentId = gId++; + Create(parentId, ToFill.IdData[i], std::move(ToFill.RawDataStorage[i])); + } +} + +template +void TClusterizer::TriggerIds() { + ToFill.RawData = ToFill.RawDataStorage; + ComputeBatch([this] { FinalizeUpdate(); }); // wait last and compute tail + ComputeBatch([this] { FinalizeUpdate(); }); // wait tail +} + +template +void TClusterizer::Handle(TId id, TRawEmbedding rawEmbedding) { + if (ThreadPool) { + Progress.Report(1); + } + ToFill.IdData.emplace_back(id); + ToFill.RawDataStorage.emplace_back(std::move(rawEmbedding)); + ToFill.Min.emplace_back(); + if (ToFill.RawDataStorage.size() >= BatchSize) { + ToFill.RawData = ToFill.RawDataStorage; + ComputeBatch([this] { FinalizeUpdate(); }); + } +} + +template +void TClusterizer::Finalize() { + if (ThreadPool) { + Progress.Reset("finalize", It.Rows()); + } + + It.IterateId(*this); + TriggerIds(); + if (ThreadPool) { + Progress.ForceReport(); + } +} + +template class TClusterizer; +template class TClusterizer; diff --git a/ydb/public/sdk/cpp/examples/vector_index/clusterizer.h b/ydb/public/sdk/cpp/examples/vector_index/clusterizer.h new file mode 100644 index 000000000000..959416b37e58 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/clusterizer.h @@ -0,0 +1,135 @@ +#include "thread_pool.h" + +#include "util/generic/fwd.h" +#include "util/generic/string.h" +#include "util/system/types.h" + +#include +#include +#include +#include + +inline constexpr ui64 kMinClusterSize = 8; + +using TId = ui32; +using TRawEmbedding = TString&&; + +class TReadCallback { +public: + virtual void Handle(std::span embeddings) = 0; + virtual void Handle(TRawEmbedding embedding) = 0; + virtual void Handle(TId id, TRawEmbedding embedding) = 0; + + virtual void TriggerEmbeddings() = 0; + virtual void TriggerIds() = 0; +}; + +class TDatasetIterator { +public: + virtual ui64 Rows() const = 0; + virtual void RandomK(ui64 k, std::function cb) = 0; + virtual void IterateEmbedding(TReadCallback& cb) = 0; + virtual void IterateId(TReadCallback& cb) = 0; + virtual void IterateId(std::function cb) = 0; +}; + +using TCreateParentChild = std::function; + +template +class TClusterizer final: TReadCallback { +public: + using TEmbedding = std::span; + using TDistance = std::function; + + TClusterizer(TDatasetIterator& it, TDistance distance, TCreateParentChild create, NVectorIndex::TThreadPool* tp = nullptr); + + struct TOptions { + TId parentId = 0; + ui32 maxIterations = 10; + ui32 maxK = 10; + bool normalize = false; + }; + + struct TClusters { + std::vector Count; + std::vector Ids; + std::vector> Coords; + }; + + TClusters Run(const TOptions& options); + +private: + void Handle(std::span embeddings) final; + void Handle(TRawEmbedding embedding) final; + void Handle(TId id, TRawEmbedding embedding) final; + + void TriggerEmbeddings() final; + void TriggerIds() final; + + void BadCluster(const TOptions& options); + bool Init(ui64 k); + void StepUpdate(); + bool Step(ui32 iteration, ui32 maxIterations, float neededDiff); + + void FinalizeUpdate(); + void Finalize(); + + struct TMin { + float Distance = 0; + ui32 Pos = 0; + }; + + TMin Compute(TEmbedding embedding); + void Update(TMin min, TEmbedding embedding); + + template + void ComputeBatch(Func&& func); + + TClusters Clusters; + + TDatasetIterator& It; + TDistance Distance; + TCreateParentChild Create; + + using TSum = std::conditional_t, int64_t, T>; + struct TAggregatedCluster { + float Distance = 0; + std::vector Coords; + i64 Count = 0; + }; + + std::vector AggregatedClusters; + float OldMean = std::numeric_limits::max(); + + struct TProgress { + void Reset(std::string_view operation, ui64 rows); + void Report(ui64 read); + void ForceReport(); + + private: + double Curr = 0; + double Rows = 0; + std::chrono::steady_clock::time_point Last{}; + }; + + TProgress Progress; + + struct TBatch { + std::span RawData; + std::vector IdData; + std::vector RawDataStorage; + std::vector Min; + + void Swap(TBatch& other); + void Clear(); + bool Empty() const; + }; + + NVectorIndex::TThreadPool* ThreadPool = nullptr; + TBatch ToCompute; + TBatch ToFill; + std::mutex M; + std::condition_variable WaitWork; + ui64 Work = 0; + ui64 BatchSize = 0; +}; diff --git a/ydb/public/sdk/cpp/examples/vector_index/main.cpp b/ydb/public/sdk/cpp/examples/vector_index/main.cpp new file mode 100644 index 000000000000..e7b0fd03fef9 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/main.cpp @@ -0,0 +1,71 @@ +#include "vector_index.h" + +#include +#include + +using namespace NLastGetopt; +using namespace NYdb; + +int main(int argc, char** argv) { + TString endpoint; + TString command; + TOptions options; + + TOpts opts = TOpts::Default(); + + opts.AddLongOption('e', "endpoint", "YDB endpoint").Required().RequiredArgument("HOST:PORT").StoreResult(&endpoint); + opts.AddLongOption('d', "database", "YDB database").Required().RequiredArgument("PATH").StoreResult(&options.Database); + opts.AddLongOption('c', "command", "execute command").Required().RequiredArgument("COMMAND").StoreResult(&command); + opts.AddLongOption("table", "table name").Required().RequiredArgument("TABLE").StoreResult(&options.Table); + opts.AddLongOption("index_type", "index type").Required().RequiredArgument("TYPE").StoreResult(&options.IndexType); + opts.AddLongOption("index_quantizer", "index quantizer").Required().RequiredArgument("QUANTIZER").StoreResult(&options.IndexQuantizer); + opts.AddLongOption("primary_key", "primary key column").Required().RequiredArgument("PK").StoreResult(&options.PrimaryKey); + opts.AddLongOption("embedding", "embedding column").Required().RequiredArgument("EMBEDDING").StoreResult(&options.Embedding); + opts.AddLongOption("distance", "distance function").Required().RequiredArgument("DISTANCE").StoreResult(&options.Distance); + opts.AddLongOption("rows", "count of rows in table").Required().RequiredArgument("ROWS").StoreResult(&options.Rows); + opts.AddLongOption("top_k", "count of top").Required().RequiredArgument("TOPK").StoreResult(&options.TopK); + opts.AddLongOption("data", "list of columns to read").Required().RequiredArgument("DATA").StoreResult(&options.Data); + + opts.SetFreeArgsMin(0); + TOptsParseResult result(&opts, argc, argv); + + ECommand cmd = Parse(command); + + if (cmd == ECommand::None) { + Cerr << "Unsupported command: " << command << Endl; + return 1; + } + + auto config = TDriverConfig() + .SetEndpoint(endpoint) + .SetDatabase(options.Database) + .SetAuthToken(GetEnv("YDB_TOKEN")); + + TDriver driver(config); + + try { + switch (cmd) { + case ECommand::DropIndex: + return DropIndex(driver, options); + case ECommand::CreateIndex: + return CreateIndex(driver, options); + case ECommand::UpdateIndex: + return UpdateIndex(driver, options); + case ECommand::RecreateIndex: + if (auto r = DropIndex(driver, options); r != 0) { + return r; + } + if (auto r = CreateIndex(driver, options); r != 0) { + return r; + } + return UpdateIndex(driver, options); + case ECommand::TopK: + return TopK(driver, options); + default: + break; + } + } catch (const std::exception& e) { + Cerr << "Execution failed: " << e.what() << Endl; + } + return 1; +} diff --git a/ydb/public/sdk/cpp/examples/vector_index/search.sql b/ydb/public/sdk/cpp/examples/vector_index/search.sql new file mode 100644 index 000000000000..2d8f8f9fbad3 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/search.sql @@ -0,0 +1,92 @@ +-- $LimitCentroid0 = 10; +-- $LimitCentroid1 = 10; +-- $LimitVectors = 10; +-- $Target = Knn::ToBinaryStringFloat(Cast([0.1961289,0.51426697,0.03864574,0.5552187,-0.041873194,0.24177523,0.46322846,-0.3476358,-0.0802049,0.44246107,-0.06727136,-0.04970105,-0.0012320493,0.29773152,-0.3771864,0.047693416,0.30664062,0.15911901,0.27795044,0.11875397,-0.056650203,0.33322853,-0.28901896,-0.43791273,-0.014167095,0.36109218,-0.16923136,0.29162315,-0.22875166,0.122518055,0.030670911,-0.13762642,-0.13884683,0.31455114,-0.21587017,0.32154146,-0.4452795,-0.058932953,0.07103838,0.4289945,-0.6023675,-0.14161813,0.11005565,0.19201005,0.2591869,-0.24074492,0.18088372,-0.16547637,0.08194011,0.10669302,-0.049760908,0.15548608,0.011035396,0.16121127,-0.4862669,0.5691393,-0.4885568,0.90131176,0.20769958,0.010636337,-0.2094356,-0.15292564,-0.2704138,-0.01326699,0.11226809,0.37113565,-0.018971693,0.86532146,0.28991342,0.004782651,-0.0024367527,-0.0861291,0.39704522,0.25665164,-0.45121723,-0.2728092,0.1441502,-0.5042585,0.3507123,-0.38818485,0.5468399,0.16378048,-0.11177127,0.5224827,-0.05927702,0.44906104,-0.036211397,-0.08465567,-0.33162776,0.25222498,-0.22274417,0.15050206,-0.012386843,0.23640677,-0.18704978,0.1139806,0.19379948,-0.2326912,0.36477265,-0.2544955,0.27143118,-0.095495716,-0.1727166,0.29109988,0.32738894,0.0016002139,0.052142758,0.37208632,0.034044757,0.17740013,0.16472393,-0.20134833,0.055949032,-0.06671674,0.04691583,0.13196157,-0.13174891,-0.17132106,-0.4257385,-1.1067779,0.55262613,0.37117195,-0.37033138,-0.16229,-0.31594914,-0.87293816,0.62064904,-0.32178572,0.28461748,0.41640115,-0.050539408,0.009697271,0.3483608,0.4401717,-0.08273758,0.4873984,0.057845585,0.28128678,-0.43955156,-0.18790118,0.40001884,0.54413813,0.054571174,0.65416795,0.04503013,0.40744695,-0.048226677,0.4787822,0.09700139,0.07739511,0.6503141,0.39685145,-0.54047453,0.041596334,-0.22190939,0.25528133,0.17406437,-0.17308964,0.22076453,0.31207982,0.8434676,0.2086337,-0.014262581,0.05081182,-0.30908328,-0.35717097,0.17224313,0.5266846,0.58924395,-0.29272506,0.01910475,0.061457288,0.18099669,0.04807291,0.34706554,0.32477927,0.17174402,-0.070991516,0.5819317,0.71045977,0.07172716,0.32184732,0.19009985,0.04727492,0.3004647,0.26943457,0.61640364,0.1655051,-0.6033329,0.09797926,-0.20623252,0.10987298,1.016591,-0.29540864,0.25161317,0.19790122,0.14642714,0.5081536,-0.22128952,0.4286613,-0.029895071,0.23768105,-0.0023987228,0.086968,0.42884818,-0.33578634,-0.38033295,-0.16163215,-0.18072455,-0.5015756,0.28035417,-0.0066010267,0.67613393,-0.026721207,0.22796173,-0.008428602,-0.38017297,-0.33044866,0.4519961,-0.05542353,-0.2976922,0.37046987,0.23409955,-0.24246313,-0.12839256,-0.4206849,-0.049280513,-0.7651326,0.1649417,-0.2321146,0.106625736,-0.37506104,0.14470209,-0.114986554,-0.17738944,0.612335,0.25292027,-0.092776075,-0.3876576,-0.08905502,0.3793106,0.7376429,-0.3080258,-0.3869677,0.5239047,-0.41152182,0.22852719,0.42226496,-0.28244498,0.0651847,0.3525671,-0.5396397,-0.17514983,0.29470462,-0.47671098,0.43471992,0.38677526,0.054752454,0.2183725,0.06853758,-0.12792642,0.67841107,0.24607432,0.18936129,0.24056062,-0.30873874,0.62442464,0.5792256,0.20426203,0.54328054,0.56583667,-0.7724596,-0.08384111,-0.16767848,-0.21682987,0.05710991,-0.015403866,0.38889074,-0.6050326,0.4075437,0.40839496,0.2507789,-0.32695654,0.24276069,0.1271161,-0.010688765,-0.31864303,0.15747054,-0.4670915,-0.21059138,0.7470888,0.47273478,-0.119508654,-0.63659865,0.64500844,0.5370401,0.28596714,0.0046216915,0.12771192,-0.18660222,0.47342712,-0.32039297,0.10946048,0.25172964,0.021965463,-0.12397459,-0.048939236,0.2881649,-0.61231786,-0.33459276,-0.29495123,-0.14027011,-0.23020774,0.73250633,0.71871173,0.78408533,0.4140183,0.1398299,0.7395877,0.06801048,-0.8895956,-0.64981127,-0.37226167,0.1905936,0.12819989,-0.47098637,-0.14334664,-0.933116,0.4597078,0.09895813,0.38114703,0.14368558,-0.42793563,-0.10805895,0.025374172,0.40162122,-0.1686769,0.5257471,-0.3540743,0.08181256,-0.34759146,0.0053078625,0.09163392,0.074487045,-0.14934056,0.034427803,0.19613744,-0.00032829077,0.27792764,0.09889235,-0.029708104,0.3528952,0.22679164,-0.27263018,0.6655268,-0.21362385,0.13035864,0.41666874,0.1253278,-0.22861275,0.105085365,0.09412938,0.03228179,0.11568338,0.23504587,-0.044100706,0.0104857525,-0.07461301,0.1034835,0.3078725,0.5257031,-0.015183647,-0.0060899477,-0.02852683,-0.39821762,-0.20495597,-0.14892153,0.44850922,0.40366673,-0.10324784,0.4095244,0.8356313,0.21190739,-0.12822983,0.06830399,0.036365107,0.044244137,0.26112562,0.033477627,-0.41074416,-0.009961431,0.23717403,0.12438699,-0.05255729,-0.18411024,-0.18563229,-0.16543737,-0.122300245,0.40962145,-0.4751102,0.5309857,0.04474563,0.103834346,0.14118321,4.2373734,0.45751426,0.21709882,0.6866778,0.14838168,-0.1831362,0.10963214,-0.33557487,-0.1084519,0.3299757,0.076113895,0.12850489,-0.07326015,-0.23770756,0.11080451,0.29712623,-0.13904962,0.25797644,-0.5074562,0.4018296,-0.23186816,0.24427155,0.39540753,0.015477164,0.14021018,0.273185,0.013538655,0.47227964,0.52339536,0.54428,0.16983595,0.5470162,-0.0042650895,0.21768,0.090606116,-0.13433483,0.5818122,-0.1384567,0.2354754,0.08440857,-0.2166868,0.48664945,-0.13175073,0.45613387,0.089229666,0.15436831,0.08720108,0.37597507,0.52855235,-0.019367872,0.544358,-0.327109,-0.20839518,-0.33598265,0.033363096,0.42312673,0.13452567,0.40526676,0.08402101,-0.19661862,-0.24802914,0.23069139,0.5153508,0.13562717,-0.23842931,-0.23257096,-0.009195984,0.41388315,0.56304437,-0.23492545,-0.2642354,0.3038204,-0.09548942,-0.22467934,-0.2561862,-0.34057313,-0.19744347,0.0007430283,-0.12842518,-0.13980682,0.6849243,0.1795335,-0.5626032,-0.07626079,-0.062749654,0.6660117,-0.4479761,0.07978033,0.6269782,0.536793,0.6801336,-0.22563715,0.38902125,-0.09493616,0.21312712,0.17763247,0.1796997,-3.868085,0.08134122,0.10347531,-0.034904435,-0.2792477,-0.17850947,0.083218865,0.26535586,-0.25551575,0.28172702,0.1383222,0.10376686,-0.123248994,0.1985073,-0.40000066,0.44763976,0.028454497,0.37575415,0.071487874,-0.16965964,0.38927504,0.29088503,-0.011822928,-0.19522227,-0.1766321,0.1731763,0.49192554,0.44358602,-0.49064636,0.024170646,0.025736902,-0.17963372,0.38337404,0.07339889,0.042465065,0.5910191,0.07904464,-0.043729525,-0.16969916,0.4008944,-0.04921039,-0.3757768,0.6075314,-0.24661873,-0.1780646,0.60300773,-0.09518917,0.2213779,-0.46496615,-0.41421738,0.23309247,0.14687467,-0.36499617,0.04227981,0.88024706,0.57489127,0.21026954,-0.13666761,0.05710815,0.22095469,-0.033460964,0.13861561,0.22527887,0.1660716,-0.3286249,-0.060175333,-0.2971499,0.2454142,0.6536238,-0.22991207,0.046677545,-0.026631566,-0.04271381,-0.53681016,0.11866242,-0.24970472,-0.37882543,0.33650783,0.7634871,-0.2858582,0.029164914,0.28833458,-0.39263156,0.64842117,2.6358266,0.058920268,2.2507918,0.6809379,-0.41290292,0.36954543,-0.60793567,0.42561662,0.2498035,0.27133986,-0.005307673,0.32910514,-0.03169463,-0.02270061,-0.14702365,-0.25256258,0.54468036,-0.46112943,-0.07411629,-0.030253865,0.20578359,0.6495886,-0.11674013,0.029835526,0.019896187,-0.008101909,0.3706806,-0.26088533,-0.018712807,0.17228629,0.15223767,0.0675542,0.6338221,-0.15303946,0.02908536,0.27217266,-0.10829474,4.503505,-0.37745082,0.20543274,-0.087563366,-0.14404398,0.5562983,0.41639867,-0.38191214,-0.16266975,-0.46071815,0.51874137,0.36326376,0.027115177,-0.06804209,0.35159302,-0.41162485,0.30493516,0.18828706,0.63608,-0.04735176,0.13811842,0.09368063,0.037441075,-0.0012712433,-0.19929455,0.34804425,0.46975428,0.38857734,-0.061463855,0.122808196,0.37608445,5.2436657,0.25659403,-0.19236223,-0.25611007,0.22265173,0.5898642,-0.28255892,-0.4123271,-0.4214137,0.09197922,-0.060595497,-0.13819462,-0.13570791,0.25433356,0.5907837,0.2548469,-0.39375016,-0.37651995,0.701745,-0.0359955,-0.048193086,0.4458719,0.088069156,-0.015497342,0.52568024,-0.4795603,-0.025876174,0.76476455,-0.32245165,-0.038828112,0.6325802,0.06385053,-0.26389623,0.2439906,-0.4231506,0.19213657,0.5828574,0.053197365,0.45217928,0.040650904,0.83714896,0.63782233,-0.737095,-0.41026706,0.23113042,0.19471557,-0.24410644,-0.35155243,0.20881484,-0.01721743,-0.29494065,-0.114185065,1.2226206,-0.16469914,0.083336286,0.63608664,0.41011855,-0.032080106,-0.08833447,-0.6261006,0.22665286,0.08313674,-0.16372047,0.5235312,0.39580458,0.0007253827,0.10186727,-0.15955615,0.54162663,0.32992217,-0.02491269,0.16312002,0.118171245,-0.029900813,0.038405042,0.31396118,0.45241603,-0.07010825,0.07611299,0.084779754,0.34168348,-0.60676336,0.054825004,-0.16054128,0.2525291,0.20532744,-0.1510394,0.4857572,0.32150552,0.35749313,0.4483151,0.0057622716,0.28705776,-0.018361313,0.08605509,-0.08649293,0.26918742,0.4806176,0.098294765,0.3284613,0.00010664656,0.43832678,-0.33351916,0.02354738,0.004953976,-0.14319824,-0.33351237,-0.7268964,0.56292313,0.1275613,0.4438945,0.7984555,-0.19372283,0.2940397,-0.11770557] AS List)); + +-- $Centroid0Ids = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id +-- FROM wikipedia_kmeans_none +-- WHERE parent_id = 0 +-- ORDER BY cosine +-- LIMIT $LimitCentroid0; + +-- $Centroid1Ids = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id +-- FROM wikipedia_kmeans_none +-- WHERE parent_id IN (SELECT id FROM $Centroid0Ids) +-- ORDER BY cosine +-- LIMIT $LimitCentroid1; + + +-- -- first option + +-- -- $VectorIds = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id +-- -- FROM wikipedia_kmeans_none +-- -- WHERE parent_id IN (SELECT id FROM $Centroid1Ids) +-- -- ORDER BY cosine +-- -- LIMIT $LimitVectors; + +-- -- SELECT title +-- -- FROM wikipedia +-- -- WHERE id IN (SELECT id FROM $VectorIds) +-- -- LIMIT $LimitVectors; + +-- -- second option +-- $VectorIds = SELECT id +-- FROM wikipedia_kmeans_none +-- WHERE parent_id IN (SELECT id FROM $Centroid1Ids); + +-- SELECT Knn::CosineDistance($Target, embedding) AS cosine, title +-- FROM wikipedia +-- WHERE id IN (SELECT id FROM $VectorIds) +-- ORDER BY cosine +-- LIMIT $LimitVectors; + +$Target = Knn::ToBinaryStringFloat(Cast([-1.7363281,-1.7363281,0.4609375,-1.7363281,-0.41796875,0.021484375,0.4609375,-0.41796875,-1.296875,0.021484375,1.7792969,-0.41796875,-0.8574219,-0.41796875,-1.296875,0.4609375,0.9003906,-1.296875,0.021484375,0.021484375,0.9003906,0.4609375,1.3398438,0.4609375,0.4609375,-0.41796875,-0.8574219,1.3398438,-0.41796875,-0.41796875,-0.41796875,-0.8574219,-2.1757812,-2.1757812,0.4609375,0.4609375,2.21875,-1.7363281,-1.296875,0.9003906,-2.1757812,-1.7363281,-1.296875,-0.41796875,-0.8574219,2.21875,-0.8574219,0.021484375,0.021484375,0.4609375,0.021484375,0.9003906,-0.41796875,-0.41796875,1.3398438,-0.41796875,0.021484375,0.021484375,0.9003906,-0.41796875,-1.296875,0.021484375,0.4609375,-0.8574219,-2.1757812,0.021484375,0.4609375,0.021484375,-0.8574219,0.021484375,-1.296875,-0.8574219,-0.8574219,0.4609375,-0.8574219,-0.41796875,-0.8574219,0.021484375,-1.296875,0.4609375,1.3398438,0.021484375,0.4609375,0.021484375,0.4609375,-1.7363281,1.3398438,0.9003906,1.3398438,-0.8574219,0.021484375,0.9003906,-0.8574219,-0.41796875,-1.296875,-0.8574219,-0.8574219,-1.296875,0.4609375,0.4609375,2.6582031,0.4609375,0.9003906,0.9003906,-3.4941406,-0.8574219,-1.296875,-0.8574219,-0.8574219,-0.8574219,-2.1757812,-0.41796875,0.4609375,0.021484375,0.021484375,0.4609375,-0.41796875,-0.8574219,0.9003906,0.021484375,0.4609375,-0.8574219,0.021484375,0.9003906,-0.8574219,-0.41796875,0.4609375,-0.41796875,-2.1757812,0.4609375,0.021484375,0.4609375,0.021484375,-0.41796875,-1.7363281,-0.8574219,-1.7363281,0.4609375,-0.8574219,-0.41796875,-0.41796875,0.9003906,-1.7363281,0.021484375,0.4609375,1.3398438,0.9003906,0.9003906,0.4609375,-1.296875,0.021484375,-0.41796875,2.21875,-0.8574219,-1.296875,1.3398438,-0.8574219,0.4609375,0.021484375,-0.8574219,0.4609375,-1.296875,-0.41796875,0.021484375,1.3398438,0.4609375,0.9003906,0.9003906,-2.6152344,-1.7363281,-1.296875,-0.41796875,0.021484375,-0.41796875,-0.8574219,-0.41796875,0.9003906,-1.296875,0.021484375,0.9003906,-0.8574219,-0.41796875,1.7792969,-1.296875,0.021484375,-0.8574219,-0.8574219,1.3398438,-0.8574219,-0.8574219,0.021484375,-1.296875,-2.1757812,-0.41796875,0.4609375,-0.41796875,-0.41796875,0.021484375,-0.8574219,-1.7363281,-0.41796875,0.9003906,-0.8574219,-1.296875,-1.296875,0.021484375,-1.296875,0.4609375,1.7792969,0.021484375,-0.41796875,0.4609375,0.4609375,0.021484375,0.9003906,1.3398438,0.021484375,-0.41796875,0.4609375,1.3398438,0.021484375,0.4609375,-1.7363281,-0.8574219,-1.7363281,-1.7363281,0.4609375,0.9003906,3.0976562,-0.8574219,0.4609375,0.9003906,-3.4941406,-0.41796875,-1.296875,-0.8574219,-0.8574219,-0.41796875,-2.1757812,-0.41796875,0.9003906,0.021484375,-0.41796875,0.021484375,0.4609375,-0.41796875,0.9003906,0.4609375,0.4609375,-1.7363281,0.021484375,-0.41796875,-0.41796875,0.4609375,0.021484375,-0.41796875,-2.1757812,0.9003906,-0.8574219,0.9003906,-0.41796875,-0.41796875,-1.7363281,-0.8574219,0.021484375,0.4609375,-0.41796875,-0.8574219,-0.41796875,-1.296875,-0.8574219,-0.8574219,0.9003906,0.9003906,-0.41796875,0.9003906,0.021484375,-0.41796875,0.4609375,2.21875,0.4609375,-0.8574219,-1.296875,0.021484375,-1.296875,1.7792969,-0.41796875,-0.8574219,-0.41796875,-1.296875,0.4609375,1.3398438,2.21875,-0.41796875,1.3398438,0.4609375,-2.6152344,-0.8574219,-1.296875,-1.296875,-1.296875,0.021484375,-0.8574219,-0.8574219,0.4609375,0.021484375,-0.41796875,1.3398438,0.021484375,0.021484375,1.3398438,-0.41796875,0.4609375,-1.7363281,0.021484375,-0.41796875,-0.8574219,0.4609375,-0.8574219,-0.8574219,-1.296875,-0.41796875,0.021484375,0.021484375,-0.8574219,-1.296875,-0.8574219,-1.296875,0.021484375,1.3398438,-1.296875,-2.6152344,-0.8574219,-1.296875,-1.296875,0.021484375,0.9003906,-0.41796875,-0.41796875,0.9003906,0.4609375,0.021484375,1.3398438,1.3398438,0.4609375,0.021484375,0.4609375,1.3398438,-0.41796875,0.4609375,-1.296875,-0.8574219,-1.7363281,-1.7363281,0.9003906,0.9003906,1.3398438,-0.41796875,0.9003906,0.021484375,-3.0546875,0.021484375,-1.296875,-0.8574219,0.021484375,0.9003906,-1.7363281,-0.41796875,0.4609375,-0.41796875,-0.41796875,0.9003906,0.021484375,0.021484375,1.7792969,-0.41796875,0.9003906,-1.296875,1.3398438,-1.7363281,-0.8574219,0.9003906,0.4609375,-0.41796875,-0.41796875,-0.8574219,-0.41796875,0.4609375,-1.7363281,-1.7363281,-0.8574219,-1.296875,0.4609375,1.3398438,-1.7363281,-2.6152344,0.021484375,-2.1757812,-1.296875,0.021484375,0.4609375,-0.41796875,0.021484375,0.9003906,0.021484375,0.4609375,0.9003906,1.3398438,0.9003906,-0.41796875,0.021484375,1.3398438,-0.41796875,0.9003906,-0.8574219,-0.8574219,-0.8574219,-1.296875,0.4609375,0.9003906,0.4609375,-0.41796875,0.4609375,-0.8574219,-2.6152344,0.021484375,-0.8574219,-1.296875,0.021484375,1.3398438,-1.296875,-1.296875,0.4609375,0.021484375,-0.41796875,1.7792969,0.021484375,0.4609375,1.3398438,-0.8574219,0.4609375,-0.8574219,0.4609375,-1.296875,-0.8574219,0.9003906,0.021484375,0.021484375,1.3398438,-0.41796875,-1.296875,-0.8574219,1.3398438,0.021484375,0.4609375,0.9003906,-1.296875,0.4609375,-1.296875,-1.296875,0.021484375,-0.8574219,-0.8574219,-0.41796875,0.021484375,0.4609375,0.021484375,0.9003906,-0.41796875,0.021484375,0.021484375,0.9003906,0.9003906,-2.1757812,-1.7363281,0.021484375,-3.0546875,0.021484375,0.021484375,-1.7363281,1.3398438,-0.41796875,0.9003906,-0.8574219,-0.41796875,-2.6152344,0.4609375,-0.41796875,0.021484375,0.021484375,-2.1757812,0.021484375,0.9003906,0.021484375,1.3398438,-1.296875,0.9003906,-0.8574219,0.4609375,0.9003906,-0.41796875,0.9003906,0.4609375,0.021484375,0.4609375,-1.7363281,-1.296875,-0.8574219,0.9003906,-0.8574219,-1.7363281,-0.41796875,0.9003906,0.021484375,0.4609375,0.021484375,0.021484375,-1.7363281,-0.8574219,-0.8574219,0.021484375,0.9003906,-1.296875,-2.1757812,-0.41796875,-0.8574219,-3.4941406,0.021484375,-0.41796875,0.4609375,0.4609375,0.4609375,0.4609375,0.4609375,0.021484375,-0.8574219,1.7792969,0.021484375,-1.7363281,1.7792969,-0.8574219,0.021484375,0.4609375,0.4609375,0.021484375,-1.7363281,0.021484375,-0.41796875,0.4609375,-0.41796875,0.9003906,0.021484375,-1.296875,-0.8574219,-1.296875,-0.8574219,0.4609375,0.021484375,-0.41796875,-1.296875,1.3398438,-1.7363281,0.021484375,0.4609375,-0.41796875,0.4609375,2.6582031,-1.7363281,0.021484375,-1.7363281,-0.41796875,0.4609375,0.021484375,-0.8574219,0.021484375,-0.8574219,0.4609375,-0.8574219,0.4609375,0.4609375,2.21875,-1.7363281,-0.8574219,-0.8574219,-1.296875,0.021484375,-0.41796875,-0.8574219,-1.296875,0.021484375,-2.6152344,-0.41796875,-0.41796875,0.9003906,0.021484375,0.9003906,0.021484375,-0.8574219,-0.8574219,-0.41796875,1.7792969,-0.41796875,-0.41796875,0.9003906,-0.8574219,-0.41796875,0.021484375,0.9003906,-0.41796875,-1.7363281,0.4609375,0.021484375,0.4609375,-2.1757812,-0.8574219,0.021484375,-0.8574219,0.4609375,-2.1757812,-1.296875,0.9003906,0.9003906,-1.296875,-0.41796875,0.021484375,-0.8574219,0.4609375,0.4609375,0.021484375,0.021484375,2.6582031,-0.41796875,-0.41796875,-1.7363281,0.4609375,-0.41796875,1.3398438,-0.41796875,0.4609375,-0.41796875,0.4609375,-0.8574219,0.4609375,0.4609375,2.21875,-1.7363281,-0.8574219,-0.8574219,-1.296875,0.021484375,-0.41796875,-0.8574219,-1.296875,0.021484375,-2.6152344,-0.41796875,-0.41796875,0.9003906,0.021484375,0.9003906,0.021484375,-0.8574219,-0.8574219,-0.41796875,1.7792969,-0.41796875,-0.41796875,0.9003906,-0.8574219,-0.41796875,0.021484375,0.9003906,-0.41796875,-1.7363281,0.4609375,0.021484375,0.4609375,-2.1757812,-0.8574219,0.021484375,-0.8574219,0.4609375,-2.1757812,-1.296875,0.9003906,0.9003906,-1.296875,-0.41796875,0.021484375,-0.8574219,0.4609375,0.4609375,0.021484375,0.021484375,2.6582031,-0.41796875,-0.41796875,-1.7363281,0.4609375,-0.41796875,1.3398438,-0.41796875,0.4609375,-0.41796875,0.4609375,-0.8574219,0.4609375,0.4609375,2.21875,-1.7363281,-0.8574219,-0.8574219,-1.296875,0.021484375,-0.41796875,-0.8574219,-1.296875,0.021484375,-2.6152344,-0.41796875,-0.41796875,0.9003906,0.021484375,0.9003906,0.021484375,-0.8574219,-0.8574219,-0.41796875,1.7792969,-0.41796875,-0.41796875,0.9003906,-0.8574219,-0.41796875,0.021484375,0.9003906,-0.41796875,-1.7363281,0.4609375,0.021484375,0.4609375,-2.1757812,-0.8574219,0.021484375,-0.8574219,0.4609375,-2.1757812,-1.296875,0.9003906,0.9003906,-1.296875,-0.41796875,0.021484375,-0.8574219,0.4609375,0.4609375,0.021484375,0.021484375,2.6582031,-0.41796875,-0.41796875,-1.7363281,0.4609375,-0.41796875,1.3398438,-0.41796875,0.4609375,-0.41796875,0.4609375,-0.8574219,0.4609375,0.4609375,2.21875,-2.1757812,-0.8574219,-0.8574219,-0.8574219,0.021484375,-0.41796875,-0.8574219,-1.296875,0.021484375,-2.6152344,-0.41796875,-0.41796875,0.9003906,0.021484375,0.9003906,0.021484375,-0.8574219,-0.8574219,-0.41796875,1.7792969,0.021484375,-0.41796875,0.9003906,-0.8574219,-0.41796875,0.021484375,0.9003906,-0.41796875,-1.7363281,0.4609375,0.021484375,0.4609375,-2.1757812,-0.8574219,0.021484375,-0.8574219,0.4609375,-2.1757812,-1.296875,0.9003906,0.9003906,-1.296875,-0.41796875,0.021484375,-0.8574219,0.4609375,0.4609375,0.021484375,0.021484375,2.6582031,-0.41796875,-0.41796875,-1.7363281,0.4609375,-0.41796875,1.3398438,-0.41796875,0.4609375,-0.41796875,0.9003906,0.021484375,0.021484375,0.9003906,-0.41796875,0.021484375,0.021484375,1.3398438,0.9003906,0.021484375,0.4609375,0.021484375,-0.41796875,-0.41796875,-0.8574219,1.3398438,0.4609375,-0.41796875,-0.41796875,0.021484375,0.4609375,0.9003906,0.9003906,-0.41796875,0.9003906,-2.1757812,0.021484375,-1.296875,-1.296875,-0.8574219,-0.41796875,0.021484375,0.021484375,-0.8574219,0.4609375,0.4609375,-0.8574219,-3.4941406,0.021484375,0.9003906,-1.296875,1.3398438,0.021484375,0.9003906,-0.8574219,0.021484375,-0.8574219,-0.8574219,-1.296875,-1.296875,0.4609375,1.3398438,-1.7363281,0.021484375,-0.41796875,0.4609375,-0.8574219,-3.0546875,0.021484375,0.021484375,0.4609375,-2.1757812,-1.7363281,0.4609375,0.9003906,0.021484375,0.021484375,0.9003906,-0.41796875,0.021484375,0.021484375,1.3398438,0.9003906,0.021484375,0.4609375,0.021484375,-0.41796875,-0.41796875,-1.296875,1.3398438,0.4609375,-0.41796875,-0.8574219,0.4609375,0.9003906,0.9003906,0.9003906,-0.41796875,0.9003906,-2.1757812,0.021484375,-1.296875,-1.296875,-1.296875,-0.41796875,0.021484375,0.021484375,-0.8574219,0.4609375,0.4609375,-0.41796875,-3.4941406,-0.41796875,0.9003906,-1.296875,1.3398438,0.021484375,0.9003906,-0.8574219,-0.41796875,-0.8574219,-0.8574219,-1.296875,-1.7363281,0.4609375,1.3398438,-1.7363281,0.021484375,0.021484375,0.4609375,-0.8574219,-3.0546875,0.021484375,0.4609375,0.4609375,-2.1757812,-1.7363281,0.4609375,0.9003906,-0.41796875,0.021484375,0.9003906,-0.41796875,-0.41796875,0.021484375,1.3398438,0.9003906,0.021484375,0.4609375,0.4609375,-0.41796875,-0.41796875,-1.296875,1.3398438,0.4609375,-0.41796875,-0.8574219,0.021484375,0.4609375,0.9003906,0.9003906,-0.8574219,0.9003906,-2.1757812,0.021484375,-1.296875,-1.296875,-1.296875,-0.8574219,0.021484375,0.021484375,-0.8574219,0.021484375,0.021484375,-0.41796875,-3.4941406,-0.41796875,0.9003906,-1.296875,1.7792969,0.021484375,1.3398438,-0.41796875,-0.41796875,-0.8574219,-0.8574219,-1.296875,-1.7363281,0.4609375,1.3398438,-1.7363281,0.021484375,0.021484375,0.4609375,-0.41796875,-2.6152344,0.021484375,0.021484375,0.4609375,-2.1757812,-1.7363281,0.4609375] as List)); + +$LimitCentroid0 = 1; +$LimitCentroid1 = 1; +$LimitCentroid2 = 1; +$LimitVectors = 10; + +$Centroid0Ids = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id + FROM alice_float_kmeans_none + WHERE parent_id = 0 + ORDER BY cosine + LIMIT $LimitCentroid0; + +$Centroid1Ids = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id + FROM alice_float_kmeans_none + WHERE parent_id IN (SELECT id FROM $Centroid0Ids) + ORDER BY cosine + LIMIT $LimitCentroid1; + + +$Centroid2Ids = SELECT Knn::CosineDistance($Target, embedding) AS cosine, id + FROM alice_float_kmeans_none + WHERE parent_id IN (SELECT id FROM $Centroid1Ids) + ORDER BY cosine + LIMIT $LimitCentroid2; + +$VectorIds = SELECT id + FROM alice_float_kmeans_none + WHERE parent_id IN (SELECT id FROM $Centroid2Ids); + +-- select * FROM $Centroid0Ids; +-- select * FROM $Centroid1Ids; +-- select * FROM $Centroid2Ids; +-- select * FROM $VectorIds; + +SELECT Knn::CosineDistance($Target, embedding) AS cosine, text + FROM alice_float + WHERE id IN (SELECT id FROM $VectorIds) + ORDER BY cosine + LIMIT $LimitVectors; + +-- $Result = SELECT * +-- FROM ( +-- SELECT Knn::CosineDistance($Target, embedding) AS cosine, text +-- FROM alice_float WHERE id IN (SELECT id FROM $VectorIds) +-- ) +-- WHERE cosine < 0.05; + +-- select * FROM $Result ORDER BY cosine; +-- select COUNT(*) FROM $Result; \ No newline at end of file diff --git a/ydb/public/sdk/cpp/examples/vector_index/thread_pool.h b/ydb/public/sdk/cpp/examples/vector_index/thread_pool.h new file mode 100644 index 000000000000..d71ddb273cb4 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/thread_pool.h @@ -0,0 +1,146 @@ +#pragma once + +#include "vector_index.h" + +#include "util/stream/output.h" +#include "util/system/compiler.h" +#include "util/system/types.h" + +#include +#include +#include +#include + +namespace NVectorIndex { + +// naive thread pool: it's enough for our example +class TThreadPool { +public: + struct TTask { + virtual void Call() noexcept = 0; + }; + + template + struct TTaskImpl final: TFunc, TTask { + template + TTaskImpl(TArg&& func) + : TFunc{std::forward(func)} + { + } + + void Call() noexcept final { + try { + (*this)(); + } catch (const std::exception& e) { + Cerr << "Call failed: " << e.what() << Endl; + } + delete this; + } + }; + + TThreadPool(ui32 n = std::thread::hardware_concurrency()) noexcept { + n = std::max(1, n); + Threads.reserve(n); + for (ui32 i = 0; i != n; ++i) { + Threads.emplace_back([this] { Work(); }); + } + } + + ui32 Size() const noexcept { + return Threads.size(); + } + + template + void Submit(TFunc&& func) noexcept { + auto task = std::make_unique>>(std::forward(func)); + std::unique_lock lock{M}; + while (true) { + auto tasks = Tasks.size(); + auto threads = Threads.size(); + if (Y_LIKELY(tasks < threads * 2)) { + break; + } + lock.unlock(); + Cout << "ThreadPool overloaded " << tasks << " / " << threads << Endl; + std::this_thread::sleep_for(std::chrono::seconds{1}); + lock.lock(); + } + Tasks.emplace(task.release()); + lock.unlock(); + CV.notify_one(); + } + + void Join() noexcept { + { + std::lock_guard lock{M}; + Tasks.emplace(nullptr); + } + CV.notify_all(); + for (auto& thread : Threads) { + thread.join(); + } + Threads.clear(); + } + +private: + void Work() noexcept { + std::unique_lock lock{M}; + while (true) { + while (!Tasks.empty()) { + auto* task = Tasks.front(); + if (Y_UNLIKELY(!task)) + return; + Tasks.pop(); + lock.unlock(); + task->Call(); + lock.lock(); + } + CV.wait(lock); + } + } + + std::mutex M; + std::condition_variable CV; + std::queue Tasks; + std::vector Threads; +}; + +// naive wait group: it's enough for our example +struct TWaitGroup { + explicit TWaitGroup(size_t counter = 0) noexcept + : counter_{counter + 1} + { + } + + void Add(size_t counter = 1) noexcept { + counter_.fetch_add(counter, std::memory_order_relaxed); + } + + void Done(size_t counter = 1) noexcept { + if (counter_.fetch_sub(counter, std::memory_order_acq_rel) == counter) { + std::lock_guard lock{m_}; + cv_.notify_one(); + } + } + + void Wait(size_t counter = 0) noexcept { + if (counter_.fetch_sub(1, std::memory_order_acq_rel) != 1) { + std::unique_lock lock{m_}; + while (counter_.load(std::memory_order_acquire) != 0) { + cv_.wait(lock); + } + } + Reset(counter); + } + + void Reset(size_t counter) noexcept { + counter_.store(counter + 1, std::memory_order_relaxed); + } + +private: + std::atomic_size_t counter_; + std::condition_variable cv_; + std::mutex m_; +}; + +} diff --git a/ydb/public/sdk/cpp/examples/vector_index/vector_index.cpp b/ydb/public/sdk/cpp/examples/vector_index/vector_index.cpp new file mode 100644 index 000000000000..b138b0b4dc90 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/vector_index.cpp @@ -0,0 +1,1015 @@ +#include "clusterizer.h" +#include "thread_pool.h" +#include "vector_index.h" + +#include +#include + +#include + +template <> +struct std::formatter: std::formatter { + template + auto format(const TString& param, FormatContext& fc) const { + return std::formatter::format(std::string_view{param}, fc); + } +}; + +using namespace NLastGetopt; +using namespace NYdb; +using namespace NTable; + +static constexpr ui64 kBulkSize = 1000; +static constexpr ui64 kSmallClusterSize = 20'000; + +static constexpr std::string_view FlatIndex = "flat"; +static constexpr std::string_view KMeansIndex = "kmeans"; + +namespace NQuantizer { +static constexpr std::string_view None = "none"; +static constexpr std::string_view Int8 = "int8"; +// static constexpr std::string_view Uint8 = "uint8"; +static constexpr std::string_view Bit = "bit"; +} + +ECommand Parse(std::string_view command) { + if (command == "DropIndex") { + return ECommand::DropIndex; + } + if (command == "CreateIndex") { + return ECommand::CreateIndex; + } + if (command == "UpdateIndex") { + return ECommand::UpdateIndex; + } + if (command == "RecreateIndex") { + return ECommand::RecreateIndex; + } + if (command == "TopK") { + return ECommand::TopK; + } + return ECommand::None; +} + +static void PrintTop(TResultSetParser&& parser) { + while (parser.TryNextRow()) { + Y_ASSERT(parser.ColumnsCount() >= 2); + Cout // << parser.ColumnParser(0).GetUint64() << " \t" + << *parser.ColumnParser(1).GetOptionalFloat() << "\t"; + for (size_t i = 2; i < parser.ColumnsCount(); ++i) { + Cout << *parser.ColumnParser(2).GetOptionalUtf8() << "\t"; + } + Cout << "\n"; + } + Cout << Endl; +} + +static TString FullName(const TOptions& options, const TString& name) { + return TString::Join(options.Database, "/", name); +} + +static TString IndexName(const TOptions& options) { + return TString::Join(options.Table, "_", options.IndexType, "_", options.IndexQuantizer); +} + +static TString FullIndexName(const TOptions& options) { + return FullName(options, IndexName(options)); +} + +static void DropTable(TTableClient& client, const TString& table) { + auto r = client.RetryOperationSync([&](TSession session) { + TDropTableSettings settings; + return session.DropTable(table).ExtractValueSync(); + }); + if (!r.IsSuccess() && r.GetStatus() != EStatus::SCHEME_ERROR) { + ythrow TVectorException{r}; + } +} + +static void DropIndex(TTableClient& client, const TOptions& options) { + DropTable(client, FullIndexName(options)); +} + +static void CreateFlat(TTableClient& client, const TOptions& options) { + auto r = client.RetryOperationSync([&](TSession session) { + auto desc = TTableBuilder() + .AddNonNullableColumn(options.PrimaryKey, EPrimitiveType::Uint32) + .AddNullableColumn(options.Embedding, EPrimitiveType::String) + .SetPrimaryKeyColumn(options.PrimaryKey) + .Build(); + + return session.CreateTable(FullIndexName(options), std::move(desc)).ExtractValueSync(); + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } +} + +static void CreateKMeans(TTableClient& client, const TOptions& options, std::string_view suffix = {}) { + auto parentPK = "parent_" + options.PrimaryKey; + auto r = client.RetryOperationSync([&](TSession session) { + auto desc = TTableBuilder() + .AddNonNullableColumn(parentPK, EPrimitiveType::Uint32) + .AddNonNullableColumn(options.PrimaryKey, EPrimitiveType::Uint32) + .AddNullableColumn(options.Embedding, EPrimitiveType::String) + .SetPrimaryKeyColumns({parentPK, options.PrimaryKey}) + .Build(); + + return session.CreateTable(FullIndexName(options) + suffix, std::move(desc)).ExtractValueSync(); + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } +} + +static void UpdateFlatBit(TTableClient& client, const TOptions& options) { + TString query = std::format(R"( + DECLARE $begin AS Uint64; + DECLARE $rows AS Uint64; + + UPSERT INTO {1} + SELECT {2}, Yql::Map(Knn::ToBinaryStringBit(Knn::FloatFromBinaryString({3})), ($e) -> Untag($e, "BitVector")) AS {3} + FROM {0} + WHERE $begin <= {2} AND {2} < $begin + $rows + )", + options.Table, + IndexName(options), + options.PrimaryKey, + options.Embedding); + for (ui64 i = 0; i < options.Rows; i += kBulkSize) { + TParamsBuilder paramsBuilder; + paramsBuilder.AddParam("$begin").Uint64(i).Build(); + paramsBuilder.AddParam("$rows").Uint64(kBulkSize).Build(); + auto r = client.RetryOperationSync([&](TSession session) { + return session.ExecuteDataQuery(query, + TTxControl::BeginTx(TTxSettings::SerializableRW()) + .CommitTx(), + paramsBuilder.Build()) + .ExtractValueSync(); + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } + } +} + +class TTableIterator final: public TDatasetIterator { +public: + TTableIterator(const TOptions& options, TTableClient& client, NVectorIndex::TThreadPool& tp) + : Options{options} + , Client{client} + , ThreadPool{tp} + { + } + + void UseLevel(ui8 level, TId parentId, ui64 rows) { + ParentId = parentId; + RowsCount = rows; + + TString newTable; + if (level == 1) { + newTable = Options.Table; + } else { + newTable = IndexName(Options) + "_" + std::to_string(level % 2); + } + if (newTable != Table) { + Table = newTable; + } + Embeddings.clear(); + } + + ui64 Rows() const final { + return RowsCount; + } + + TString RandomKQuery(ui64 k) { + Y_ASSERT(kMinClusterSize * k < Rows()); + auto r = std::max(0.5 / k, static_cast(k * kMinClusterSize) / Rows()); + if (ParentId != 0 && !Options.ShuffleWithEmbeddings) { + TString query = std::format(R"( + $ids = SELECT id FROM {0} WHERE parent_id = {2} AND RANDOM(id) < {3} LIMIT {4}; + SELECT embedding FROM {1} WHERE id IN $ids LIMIT {4}; + )", Table, Options.Table, ParentId, r, k); + // Cout << query << Endl; + return query; + } + + TString query = std::format(R"(SELECT {0} FROM {1} WHERE)", + Options.Embedding, + Table); + if (ParentId != 0) { + query = std::format(R"({0} {1} = {2} AND)", query, "parent_" + Options.PrimaryKey, ParentId); + } + query = std::format(R"({0} RANDOM({1}) < {2} LIMIT {3})", query, Options.Embedding, r, k); + // Cout << query << Endl; + return query; + } + + void RandomK(ui64 k, std::function cb) final { + if (k == 0) { + return; + } + auto query = RandomKQuery(k); + auto r = Client.RetryOperationSync([&](TSession session) -> TStatus { + cb({}); + auto r = session.ExecuteDataQuery(query, TTxControl::BeginTx(TTxSettings::SerializableRW()) + .CommitTx()) + .ExtractValueSync(); + for (auto& part : r.GetResultSets()) { + TResultSetParser batch(part); + Y_ASSERT(batch.ColumnsCount() == 1); + auto embeddingIdx = batch.ColumnIndex(Options.Embedding); + auto& embedding = batch.ColumnParser(embeddingIdx); + while (batch.TryNextRow()) { + cb(*embedding.GetOptionalString()); + } + } + return r; + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } + } + + void IterateEmbedding(TReadCallback& read) final { + if (Rows() > kSmallClusterSize) { + IterateImpl(&read, [&](TRawEmbedding rawEmbedding) { + read.Handle(std::move(rawEmbedding)); + }); + return; + } + if (Embeddings.empty()) { + Embeddings.reserve(Rows()); + IterateImpl(&read, [&](TRawEmbedding embedding) { + Embeddings.push_back(std::move(embedding)); + }); + } + read.Handle(Embeddings); + } + + void IterateId(TReadCallback& read) final { + Embeddings.clear(); + IterateImpl(&read, [&](TId id, TRawEmbedding rawEmbedding) { + read.Handle(id, std::move(rawEmbedding)); + }); + } + + void IterateId(std::function cb) { + Embeddings.clear(); + IterateImpl(nullptr, [&](TId id, TRawEmbedding rawEmbedding) { + cb(id, std::move(rawEmbedding)); + }); + } + +private: + template + void IterateImpl(TReadCallback* read, auto&& cb) { + ReadImpl(WithPK); + JoinImpl(WithPK); + ProcessImpl(read, cb); + } + + auto ReadSettings(bool withPK) { + TReadTableSettings settings; + settings.ReturnNotNullAsOptional(false); + if (ParentId != 0) { + TValueBuilder pk; + + pk.BeginTuple(); + pk.AddElement().Uint32(ParentId); + pk.AddElement().Uint32(0); + pk.EndTuple(); + settings.From(TKeyBound::Inclusive(pk.Build())); + + pk.BeginTuple(); + pk.AddElement().Uint32(ParentId + 1); + pk.AddElement().Uint32(0); + pk.EndTuple(); + settings.To(TKeyBound::Exclusive(pk.Build())); + } + if (ParentId == 0 || Options.ShuffleWithEmbeddings) { + if (withPK) { + settings.AppendColumns(Options.PrimaryKey); + } + settings.AppendColumns(Options.Embedding); + } else { + settings.AppendColumns(Options.PrimaryKey); + } + return settings; + } + + bool ReadPart(TTablePartIterator& it, ui64& rows) { + auto next = it.ReadNext(); + auto part = next.ExtractValueSync(); + if (part.EOS()) { + rows = std::numeric_limits::max(); + } else if (!part.IsSuccess() || part.GetPart().RowsCount() == 0) { + return true; + } else { + rows += part.GetPart().RowsCount(); + } + if (Y_UNLIKELY(rows > Rows())) { + Read.Stop(); + return false; + } + Read.Push(part.ExtractPart()); + return true; + } + + void ReadImpl(bool WithPK) { + auto settings = ReadSettings(WithPK); + auto r = Client.RetryOperationSync([&](TSession session) { + auto fit = session.ReadTable(FullName(Options, Table), settings); + auto r = fit.ExtractValueSync(); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } + ThreadPool.Submit([this, it = std::move(r)]() mutable { + ui64 rows = 0; + while (ReadPart(it, rows)) { + } + }); + return TStatus{EStatus::SUCCESS, {}}; + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } + } + + bool JoinPart(bool withPK, TResultSet&& r) { + TResultSetParser batch{r}; + if (!batch.TryNextRow()) { + return false; + } + Y_ASSERT(batch.ColumnsCount() == 1); + auto& builder = ParamsBuilder.AddParam("$ids").BeginList(); + auto primaryKeyIdx = batch.ColumnIndex(Options.PrimaryKey); + auto& primaryKey = batch.ColumnParser(primaryKeyIdx); + do { + builder.AddListItem().Uint32(primaryKey.GetUint32()); + } while (batch.TryNextRow()); + builder.EndList().Build(); + + TString query = std::format("embedding FROM {0} WHERE id IN $ids", Options.Table); + if (withPK) { + query = "SELECT id, " + query; + } else { + query = "SELECT " + query; + } + + auto fit = Client.StreamExecuteScanQuery(query, ParamsBuilder.Build()); + + auto it = fit.ExtractValueSync(); + if (!it.IsSuccess()) { + ythrow TVectorException{it}; + } + + while (true) { + auto next = it.ReadNext(); + auto part = next.ExtractValueSync(); + if (part.EOS()) { + break; + } else if (!part.IsSuccess() || part.GetResultSet().RowsCount() == 0) { + continue; + } + Join.Push(part.ExtractResultSet()); + } + return true; + } + + void JoinImpl(bool withPK) { + if (ParentId == 0 || Options.ShuffleWithEmbeddings) { + Process = &Read; + return; + } + Process = &Join; + ThreadPool.Submit([this, withPK]() mutable { + while (true) { + auto part = Read.Pop(); + if (!JoinPart(withPK, std::move(part))) { + Join.Stop(); + return; + } + } + }); + } + + template + bool ProcessPart(TResultSet&& r, auto&& cb) { + TResultSetParser batch{r}; + if (!batch.TryNextRow()) { + return false; + } + if constexpr (WithPK) { + Y_ASSERT(batch.ColumnsCount() == 2); + auto primaryKeyIdx = batch.ColumnIndex(Options.PrimaryKey); + auto embeddingIdx = batch.ColumnIndex(Options.Embedding); + auto& primaryKey = batch.ColumnParser(primaryKeyIdx); + auto& embedding = batch.ColumnParser(embeddingIdx); + do { + TId pk = 0; + if constexpr (TABLE == 64) { + if (ParentId == 0) { + pk = primaryKey.GetUint64(); + } else if (Options.ShuffleWithEmbeddings) { + pk = primaryKey.GetUint32(); + } else { + pk = primaryKey.GetUint64(); + } + } else { + pk = primaryKey.GetUint32(); + } + cb(pk, *embedding.GetOptionalString()); + } while (batch.TryNextRow()); + } else { + Y_ASSERT(batch.ColumnsCount() == 1); + auto embeddingIdx = batch.ColumnIndex(Options.Embedding); + auto& embedding = batch.ColumnParser(embeddingIdx); + do { + cb(*embedding.GetOptionalString()); + } while (batch.TryNextRow()); + } + return true; + } + + template + void ProcessImpl(TReadCallback* read, auto&& cb) { + while (true) { + std::unique_lock lock{Process->M}; + if (read && Process->Queue.empty()) { + lock.unlock(); + if constexpr (WithPK) { + read->TriggerIds(); + } else { + read->TriggerEmbeddings(); + } + lock.lock(); + } + auto part = Process->PopImpl(lock); + lock.unlock(); + if (!ProcessPart(std::move(part), cb)) { + return; + } + } + } + + const TOptions& Options; + TTableClient& Client; + NVectorIndex::TThreadPool& ThreadPool; + TString Table; + TId ParentId = 0; + ui64 RowsCount = 0; + TParamsBuilder ParamsBuilder; + + struct Stream { + static constexpr ui64 kPrefetch = 1; + inline static const TResultSet EmptyPart{Ydb::ResultSet{}}; + + std::mutex M; + std::condition_variable WasPush; + std::condition_variable WasPop; + std::queue Queue; + + void Push(TResultSet&& part) { + std::unique_lock lock{M}; + PushImpl(std::move(part)); + while (Queue.size() > kPrefetch) { + WasPop.wait(lock); + } + } + + void Stop() { + auto part = EmptyPart; + std::unique_lock lock{M}; + PushImpl(std::move(part)); + } + + TResultSet PopImpl(std::unique_lock& lock) { + while (Queue.empty()) { + WasPush.wait(lock); + } + auto part = std::move(Queue.front()); + Queue.pop(); + if (Queue.size() == kPrefetch) { + WasPop.notify_one(); + } + return part; + } + + TResultSet Pop() { + std::unique_lock lock{M}; + return PopImpl(lock); + } + + private: + void PushImpl(TResultSet&& part) { + Queue.emplace(std::move(part)); + if (Queue.size() == 1) { + WasPush.notify_one(); + } + } + }; + + // Read push to Read Stream + Stream Read; + // Join pop from Read Stream and push to Join Stream + Stream Join; // Join is optional step + // Process pop from Join Stream if it's present otherwise Read + Stream* Process = nullptr; + + std::vector Embeddings; +}; + +template +static float CosineDistance(std::span lhs, std::span rhs) { + Y_ASSERT(lhs.size() == rhs.size()); + auto* l = lhs.data(); + auto* r = rhs.data(); + if constexpr (std::is_same_v) { + auto res = TriWayDotProduct(l, r, lhs.size()); + float norm = std::sqrt(res.LL * res.RR); + return norm != 0 ? 1 - (res.LR / norm) : 1; + } else { + float ll = DotProduct(l, l, lhs.size()); + float rr = DotProduct(r, r, lhs.size()); + float lr = DotProduct(l, r, lhs.size()); + float norm = std::sqrt(ll * rr); + return norm != 0 ? 1 - (lr / norm) : 1; + } +} + +struct TBulkSender { + TBulkSender(const TOptions& options, TTableClient& client, ui32 cores) + : Options{options} + , Client{client} + { + RetrySettings + .MaxRetries(60) + .GetSessionClientTimeout(TDuration::Seconds(60)) + .Idempotent(true) + .RetryUndefined(true); + + ToSend.reserve(std::max(cores, 2)); + ToWait.reserve(std::max(cores, 2)); + } + + void CreateLevel(ui8 level) { + Count = 0; + Cout << "Start create level: " << (int)level << Endl; + if (level >= Options.Levels) { + return; + } + if (level % 2 == 0) { + CreateKMeans(Client, Options, "_1"); + } else { + CreateKMeans(Client, Options, "_0"); + } + } + + void DropLevel(ui8 level) { + Cout << "Finish create level: " << (int)level << Endl; + auto indexTable = FullIndexName(Options); + if (level % 2 == 0) { + DropTable(Client, indexTable + "_0"); + } else { + DropTable(Client, indexTable + "_1"); + } + } + + void Send(TString table, TValue&& value) { + std::lock_guard lock{M}; + if (ToSend.size() == ToSend.capacity()) { + WaitImpl(); + } + auto f = Client.RetryOperation([table = std::move(table), value = std::move(value)](TTableClient& client) { + auto r = value; + return client.BulkUpsert(table, std::move(r)).Apply([](TAsyncBulkUpsertResult result) -> TStatus { + return result.ExtractValueSync(); + }); + }, RetrySettings); + ToSend.emplace_back(std::move(f)); + } + + void Wait() { + std::lock_guard lock{M}; + WaitImpl(); + WaitImpl(); + } + + const TOptions& Options; + +private: + void WaitImpl() { + if (!ToWait.empty()) { + Count += ToWait.size(); + // Cout << "Wait for insertions " << Count << " / " << Options.Rows / kBulkSize << Endl; + for (auto& f : ToWait) { + auto r = f.ExtractValueSync(); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } + } + ToWait.clear(); + } + ToWait.swap(ToSend); + } + + TRetryOperationSettings RetrySettings; + std::mutex M; + TTableClient& Client; + std::vector ToSend; + std::vector ToWait; + ui64 Count = 0; +}; + +enum EFormat: ui8 { + FloatVector = 1, // 4-byte per element + Uint8Vector = 2, // 1-byte per element, better than Int8 for positive-only Float + Int8Vector = 3, // 1-byte per element + BitVector = 10, // 1-bit per element +}; + +template +struct TTypeToFormat; + +template <> +struct TTypeToFormat { + static constexpr auto Format = EFormat::FloatVector; +}; + +template <> +struct TTypeToFormat { + static constexpr auto Format = EFormat::Int8Vector; +}; + +template +inline constexpr auto Format = TTypeToFormat::Format; + +struct TBulkWriter { + TBulkWriter(TBulkSender& sender) + : Sender{sender} + { + Rows.BeginList(); + } + + void UseLevel(ui8 level) { + auto indexTable = FullIndexName(Sender.Options); + Embeddings = Sender.Options.LastLevelEmbeddings; + if (level < Sender.Options.Levels) { + Embeddings = Sender.Options.ShuffleWithEmbeddings; + indexTable += "_" + std::to_string((level + 1) % 2); + } + if (indexTable != Table) { + Send(); + Table = indexTable; + } + } + + void WritePosting(TId parentId, TId id, TRawEmbedding rawEmbedding) { + Y_ASSERT(parentId <= std::numeric_limits::max()); + if (!Embeddings) { + rawEmbedding = {}; + } + Rows.AddListItem() + .BeginStruct() + .AddMember("parent_" + Sender.Options.PrimaryKey) + .Uint32(static_cast(parentId)) + .AddMember(Sender.Options.PrimaryKey) + .Uint32(static_cast(id)) + .AddMember(Sender.Options.Embedding) + .String(std::move(rawEmbedding)) + .EndStruct(); + if (++Count == kBulkSize) { + Send(); + } + } + + template + void WriteCluster(TId parentId, TId id, const std::vector& embedding) { + auto byteSize = embedding.size() * sizeof(T); + TString rawEmbedding(byteSize + 1, 0); + auto* data = const_cast(rawEmbedding.data()); + std::memcpy(data, embedding.data(), byteSize); + data[byteSize] = Format; + bool embeddings = std::exchange(Embeddings, true); + WritePosting(parentId, id, std::move(rawEmbedding)); + Embeddings = embeddings; + } + + void Send() { + if (Count == 0) { + return; + } + auto value = Rows.EndList().Build(); + Sender.Send(Table, std::move(value)); + Rows.BeginList(); + Count = 0; + } + +private: + TBulkSender& Sender; + TString Table; + NYdb::TValueBuilder Rows; + ui32 Count = 0; + bool Embeddings = true; +}; + +template +struct [[nodiscard]] Finally { + Finally(TFunc&& func) + : Func{std::move(func)} + { + } + + ~Finally() noexcept { + Func(); + } + +private: + [[no_unique_address]] TFunc Func; +}; + +template +static void UpdateKMeans(TTableClient& client, const TOptions& options) { + using TClusterizer = TClusterizer; + using TClusters = typename TClusterizer::TClusters; + const auto cores = std::thread::hardware_concurrency() / 2; + NVectorIndex::TThreadPool tpCompute{cores}; + NVectorIndex::TThreadPool tpIO{cores * (options.ShuffleWithEmbeddings ? 2 : 1)}; + + auto makeClustersImpl = [&options](TTableIterator& reader, TBulkWriter& writer, TClusterizer& clusterizer, ui8 level, TId parentId, ui64 count) { + reader.UseLevel(level, parentId, count); + writer.UseLevel(level); + auto clusters = clusterizer.Run({ + .parentId = parentId, + .maxIterations = options.Iterations, + .maxK = options.Clusters, + }); + + writer.UseLevel(options.Levels); + for (size_t i = 0; auto id : clusters.Ids) { + if (id) { + writer.WriteCluster(parentId, id, clusters.Coords[i]); + } + ++i; + } + writer.Send(); + if (level >= options.Levels) { + return TClusters{}; + } + return clusters; + }; + + struct TMeta { + TId ParentId = 0; + ui64 Count = 0; + }; + std::vector next; + auto processCluster = [&](const TClusters& clusters) { + for (size_t i = 0; auto id : clusters.Ids) { + auto count = clusters.Count[i]; + if (count != 0) { + next.push_back({id, count}); + } + ++i; + } + }; + + NVectorIndex::TWaitGroup wg; + std::deque clusters; + ui64 countDoneClusters = 0; + ui64 countAllClusters = 0; + + auto processClusters = [&](ui8 level, ui32 count) { + if (clusters.size() < count) { + return; + } + countDoneClusters += clusters.size(); + Cout << "Wait for " << countDoneClusters << " / " << countAllClusters << " clusters on " << (int)level << " level" << Endl; + wg.Wait(); + for (auto& cluster : clusters) { + processCluster(cluster); + } + clusters.clear(); + }; + TTableIterator reader{options, client, tpIO}; + TBulkSender sender{options, client, cores}; + TBulkWriter writer{sender}; + TClusterizer clusterizer{reader, CosineDistance, + [&](TId parentId, TId id, TRawEmbedding embedding) { + writer.WritePosting(parentId, id, std::move(embedding)); + }, + &tpCompute}; + sender.DropLevel(options.Levels); + + auto makeClusters = [&](ui8 level, TId parentId, ui64 count) { + if (ui64(level - 1) * options.Clusters * 2 < cores) { + auto clusters = makeClustersImpl(reader, writer, clusterizer, level, parentId, count); + processCluster(clusters); + return; + } + processClusters(level, cores); + auto& p = clusters.emplace_back(); + wg.Add(); + tpCompute.Submit([&, level, parentId, count]() mutable { + Finally done = [&] { + wg.Done(); + }; + TTableIterator reader{options, client, tpIO}; + TBulkWriter writer{sender}; + TClusterizer clusterizer{reader, CosineDistance, + [&](TId parentId, TId id, TRawEmbedding embedding) { + writer.WritePosting(parentId, id, std::move(embedding)); + }}; + p = makeClustersImpl(reader, writer, clusterizer, level, parentId, count); + }); + }; + Finally join = [&] { + Cout << "Start join pools" << Endl; + tpCompute.Join(); + tpIO.Join(); + Cout << "Finish UpdateKMeans " << Endl; + }; + // TODO(mbkkt) start as bfs but continue as dfs? + next.push_back({0, options.Rows}); + for (ui8 level = 1; !next.empty(); ++level) { + sender.CreateLevel(level); + auto curr = std::move(next); + countDoneClusters = 0; + countAllClusters = curr.size(); + if (level < options.Levels) { + next.reserve(countAllClusters * options.Clusters); + } + for (auto& meta : curr) { + makeClusters(level, meta.ParentId, meta.Count); + } + curr = {}; + processClusters(level, 1); + sender.DropLevel(level); + sender.Wait(); + } +} + +// TODO use user defined target +static constexpr std::string_view kTargetQuery = R"($Target = Cast([0.1961289,0.51426697,0.03864574,0.5552187,-0.041873194,0.24177523,0.46322846,-0.3476358,-0.0802049,0.44246107,-0.06727136,-0.04970105,-0.0012320493,0.29773152,-0.3771864,0.047693416,0.30664062,0.15911901,0.27795044,0.11875397,-0.056650203,0.33322853,-0.28901896,-0.43791273,-0.014167095,0.36109218,-0.16923136,0.29162315,-0.22875166,0.122518055,0.030670911,-0.13762642,-0.13884683,0.31455114,-0.21587017,0.32154146,-0.4452795,-0.058932953,0.07103838,0.4289945,-0.6023675,-0.14161813,0.11005565,0.19201005,0.2591869,-0.24074492,0.18088372,-0.16547637,0.08194011,0.10669302,-0.049760908,0.15548608,0.011035396,0.16121127,-0.4862669,0.5691393,-0.4885568,0.90131176,0.20769958,0.010636337,-0.2094356,-0.15292564,-0.2704138,-0.01326699,0.11226809,0.37113565,-0.018971693,0.86532146,0.28991342,0.004782651,-0.0024367527,-0.0861291,0.39704522,0.25665164,-0.45121723,-0.2728092,0.1441502,-0.5042585,0.3507123,-0.38818485,0.5468399,0.16378048,-0.11177127,0.5224827,-0.05927702,0.44906104,-0.036211397,-0.08465567,-0.33162776,0.25222498,-0.22274417,0.15050206,-0.012386843,0.23640677,-0.18704978,0.1139806,0.19379948,-0.2326912,0.36477265,-0.2544955,0.27143118,-0.095495716,-0.1727166,0.29109988,0.32738894,0.0016002139,0.052142758,0.37208632,0.034044757,0.17740013,0.16472393,-0.20134833,0.055949032,-0.06671674,0.04691583,0.13196157,-0.13174891,-0.17132106,-0.4257385,-1.1067779,0.55262613,0.37117195,-0.37033138,-0.16229,-0.31594914,-0.87293816,0.62064904,-0.32178572,0.28461748,0.41640115,-0.050539408,0.009697271,0.3483608,0.4401717,-0.08273758,0.4873984,0.057845585,0.28128678,-0.43955156,-0.18790118,0.40001884,0.54413813,0.054571174,0.65416795,0.04503013,0.40744695,-0.048226677,0.4787822,0.09700139,0.07739511,0.6503141,0.39685145,-0.54047453,0.041596334,-0.22190939,0.25528133,0.17406437,-0.17308964,0.22076453,0.31207982,0.8434676,0.2086337,-0.014262581,0.05081182,-0.30908328,-0.35717097,0.17224313,0.5266846,0.58924395,-0.29272506,0.01910475,0.061457288,0.18099669,0.04807291,0.34706554,0.32477927,0.17174402,-0.070991516,0.5819317,0.71045977,0.07172716,0.32184732,0.19009985,0.04727492,0.3004647,0.26943457,0.61640364,0.1655051,-0.6033329,0.09797926,-0.20623252,0.10987298,1.016591,-0.29540864,0.25161317,0.19790122,0.14642714,0.5081536,-0.22128952,0.4286613,-0.029895071,0.23768105,-0.0023987228,0.086968,0.42884818,-0.33578634,-0.38033295,-0.16163215,-0.18072455,-0.5015756,0.28035417,-0.0066010267,0.67613393,-0.026721207,0.22796173,-0.008428602,-0.38017297,-0.33044866,0.4519961,-0.05542353,-0.2976922,0.37046987,0.23409955,-0.24246313,-0.12839256,-0.4206849,-0.049280513,-0.7651326,0.1649417,-0.2321146,0.106625736,-0.37506104,0.14470209,-0.114986554,-0.17738944,0.612335,0.25292027,-0.092776075,-0.3876576,-0.08905502,0.3793106,0.7376429,-0.3080258,-0.3869677,0.5239047,-0.41152182,0.22852719,0.42226496,-0.28244498,0.0651847,0.3525671,-0.5396397,-0.17514983,0.29470462,-0.47671098,0.43471992,0.38677526,0.054752454,0.2183725,0.06853758,-0.12792642,0.67841107,0.24607432,0.18936129,0.24056062,-0.30873874,0.62442464,0.5792256,0.20426203,0.54328054,0.56583667,-0.7724596,-0.08384111,-0.16767848,-0.21682987,0.05710991,-0.015403866,0.38889074,-0.6050326,0.4075437,0.40839496,0.2507789,-0.32695654,0.24276069,0.1271161,-0.010688765,-0.31864303,0.15747054,-0.4670915,-0.21059138,0.7470888,0.47273478,-0.119508654,-0.63659865,0.64500844,0.5370401,0.28596714,0.0046216915,0.12771192,-0.18660222,0.47342712,-0.32039297,0.10946048,0.25172964,0.021965463,-0.12397459,-0.048939236,0.2881649,-0.61231786,-0.33459276,-0.29495123,-0.14027011,-0.23020774,0.73250633,0.71871173,0.78408533,0.4140183,0.1398299,0.7395877,0.06801048,-0.8895956,-0.64981127,-0.37226167,0.1905936,0.12819989,-0.47098637,-0.14334664,-0.933116,0.4597078,0.09895813,0.38114703,0.14368558,-0.42793563,-0.10805895,0.025374172,0.40162122,-0.1686769,0.5257471,-0.3540743,0.08181256,-0.34759146,0.0053078625,0.09163392,0.074487045,-0.14934056,0.034427803,0.19613744,-0.00032829077,0.27792764,0.09889235,-0.029708104,0.3528952,0.22679164,-0.27263018,0.6655268,-0.21362385,0.13035864,0.41666874,0.1253278,-0.22861275,0.105085365,0.09412938,0.03228179,0.11568338,0.23504587,-0.044100706,0.0104857525,-0.07461301,0.1034835,0.3078725,0.5257031,-0.015183647,-0.0060899477,-0.02852683,-0.39821762,-0.20495597,-0.14892153,0.44850922,0.40366673,-0.10324784,0.4095244,0.8356313,0.21190739,-0.12822983,0.06830399,0.036365107,0.044244137,0.26112562,0.033477627,-0.41074416,-0.009961431,0.23717403,0.12438699,-0.05255729,-0.18411024,-0.18563229,-0.16543737,-0.122300245,0.40962145,-0.4751102,0.5309857,0.04474563,0.103834346,0.14118321,4.2373734,0.45751426,0.21709882,0.6866778,0.14838168,-0.1831362,0.10963214,-0.33557487,-0.1084519,0.3299757,0.076113895,0.12850489,-0.07326015,-0.23770756,0.11080451,0.29712623,-0.13904962,0.25797644,-0.5074562,0.4018296,-0.23186816,0.24427155,0.39540753,0.015477164,0.14021018,0.273185,0.013538655,0.47227964,0.52339536,0.54428,0.16983595,0.5470162,-0.0042650895,0.21768,0.090606116,-0.13433483,0.5818122,-0.1384567,0.2354754,0.08440857,-0.2166868,0.48664945,-0.13175073,0.45613387,0.089229666,0.15436831,0.08720108,0.37597507,0.52855235,-0.019367872,0.544358,-0.327109,-0.20839518,-0.33598265,0.033363096,0.42312673,0.13452567,0.40526676,0.08402101,-0.19661862,-0.24802914,0.23069139,0.5153508,0.13562717,-0.23842931,-0.23257096,-0.009195984,0.41388315,0.56304437,-0.23492545,-0.2642354,0.3038204,-0.09548942,-0.22467934,-0.2561862,-0.34057313,-0.19744347,0.0007430283,-0.12842518,-0.13980682,0.6849243,0.1795335,-0.5626032,-0.07626079,-0.062749654,0.6660117,-0.4479761,0.07978033,0.6269782,0.536793,0.6801336,-0.22563715,0.38902125,-0.09493616,0.21312712,0.17763247,0.1796997,-3.868085,0.08134122,0.10347531,-0.034904435,-0.2792477,-0.17850947,0.083218865,0.26535586,-0.25551575,0.28172702,0.1383222,0.10376686,-0.123248994,0.1985073,-0.40000066,0.44763976,0.028454497,0.37575415,0.071487874,-0.16965964,0.38927504,0.29088503,-0.011822928,-0.19522227,-0.1766321,0.1731763,0.49192554,0.44358602,-0.49064636,0.024170646,0.025736902,-0.17963372,0.38337404,0.07339889,0.042465065,0.5910191,0.07904464,-0.043729525,-0.16969916,0.4008944,-0.04921039,-0.3757768,0.6075314,-0.24661873,-0.1780646,0.60300773,-0.09518917,0.2213779,-0.46496615,-0.41421738,0.23309247,0.14687467,-0.36499617,0.04227981,0.88024706,0.57489127,0.21026954,-0.13666761,0.05710815,0.22095469,-0.033460964,0.13861561,0.22527887,0.1660716,-0.3286249,-0.060175333,-0.2971499,0.2454142,0.6536238,-0.22991207,0.046677545,-0.026631566,-0.04271381,-0.53681016,0.11866242,-0.24970472,-0.37882543,0.33650783,0.7634871,-0.2858582,0.029164914,0.28833458,-0.39263156,0.64842117,2.6358266,0.058920268,2.2507918,0.6809379,-0.41290292,0.36954543,-0.60793567,0.42561662,0.2498035,0.27133986,-0.005307673,0.32910514,-0.03169463,-0.02270061,-0.14702365,-0.25256258,0.54468036,-0.46112943,-0.07411629,-0.030253865,0.20578359,0.6495886,-0.11674013,0.029835526,0.019896187,-0.008101909,0.3706806,-0.26088533,-0.018712807,0.17228629,0.15223767,0.0675542,0.6338221,-0.15303946,0.02908536,0.27217266,-0.10829474,4.503505,-0.37745082,0.20543274,-0.087563366,-0.14404398,0.5562983,0.41639867,-0.38191214,-0.16266975,-0.46071815,0.51874137,0.36326376,0.027115177,-0.06804209,0.35159302,-0.41162485,0.30493516,0.18828706,0.63608,-0.04735176,0.13811842,0.09368063,0.037441075,-0.0012712433,-0.19929455,0.34804425,0.46975428,0.38857734,-0.061463855,0.122808196,0.37608445,5.2436657,0.25659403,-0.19236223,-0.25611007,0.22265173,0.5898642,-0.28255892,-0.4123271,-0.4214137,0.09197922,-0.060595497,-0.13819462,-0.13570791,0.25433356,0.5907837,0.2548469,-0.39375016,-0.37651995,0.701745,-0.0359955,-0.048193086,0.4458719,0.088069156,-0.015497342,0.52568024,-0.4795603,-0.025876174,0.76476455,-0.32245165,-0.038828112,0.6325802,0.06385053,-0.26389623,0.2439906,-0.4231506,0.19213657,0.5828574,0.053197365,0.45217928,0.040650904,0.83714896,0.63782233,-0.737095,-0.41026706,0.23113042,0.19471557,-0.24410644,-0.35155243,0.20881484,-0.01721743,-0.29494065,-0.114185065,1.2226206,-0.16469914,0.083336286,0.63608664,0.41011855,-0.032080106,-0.08833447,-0.6261006,0.22665286,0.08313674,-0.16372047,0.5235312,0.39580458,0.0007253827,0.10186727,-0.15955615,0.54162663,0.32992217,-0.02491269,0.16312002,0.118171245,-0.029900813,0.038405042,0.31396118,0.45241603,-0.07010825,0.07611299,0.084779754,0.34168348,-0.60676336,0.054825004,-0.16054128,0.2525291,0.20532744,-0.1510394,0.4857572,0.32150552,0.35749313,0.4483151,0.0057622716,0.28705776,-0.018361313,0.08605509,-0.08649293,0.26918742,0.4806176,0.098294765,0.3284613,0.00010664656,0.43832678,-0.33351916,0.02354738,0.004953976,-0.14319824,-0.33351237,-0.7268964,0.56292313,0.1275613,0.4438945,0.7984555,-0.19372283,0.2940397,-0.11770557] AS List); + {0})"; + +static constexpr std::string_view kTopKFlatBitQuery = R"($TargetEmbeddingFloat = Knn::ToBinaryStringFloat($Target); + $TargetEmbeddingBit = Knn::ToBinaryStringBit($Target); + + $IndexIds = SELECT {1}, Knn::{0}({4}, $TargetEmbeddingBit) as distance + FROM {2} + ORDER BY distance + LIMIT {6} * 4; + + SELECT {1}, Knn::{0}({4}, $TargetEmbeddingFloat) as distance, {5} + FROM {3} + WHERE {1} IN (SELECT {1} FROM $IndexIds) + ORDER BY distance + LIMIT {6};)"; + +static constexpr std::string_view kTopKKMeansNoneQuery = R"($TargetEmbeddingFloat = Knn::ToBinaryStringFloat($Target); +$LimitCentroid0 = 1; +$LimitVectors = {6}; + +$Centroid0Ids = SELECT Knn::{0}({4}, $TargetEmbeddingFloat) AS distance, {1} + FROM {2} + WHERE parent_id = 0 + ORDER BY distance + LIMIT $LimitCentroid0; + +$VectorIds = SELECT Knn::{0}({4}, $TargetEmbeddingFloat) AS distance, {1} + FROM {2} + WHERE parent_id IN (SELECT {1} FROM $Centroid0Ids) + ORDER BY distance + LIMIT $LimitVectors; + +SELECT {1}, Knn::{0}({4}, $TargetEmbeddingFloat) as distance, {5} + FROM {3} + WHERE {1} IN (SELECT {1} FROM $VectorIds) + ORDER BY distance + LIMIT $LimitVectors; +)"; + +static void TopKFlatBit(TTableClient& client, const TOptions& options) { + TString query = std::format(kTopKFlatBitQuery, + options.Distance, + options.PrimaryKey, + IndexName(options), + options.Table, + options.Embedding, + options.Data, + options.TopK); + // Cout << query << Endl; + query = std::format(kTargetQuery, query); + auto r = client.RetryOperationSync([&](TSession session) -> TStatus { + auto prepareResult = session.PrepareDataQuery(query).ExtractValueSync(); + if (!prepareResult.IsSuccess()) { + return prepareResult; + } + + auto query = prepareResult.GetQuery(); + auto result = query.Execute(TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()) + .ExtractValueSync(); + + if (result.IsSuccess()) { + PrintTop(result.GetResultSetParser(0)); + } + + return result; + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } +} + +static void TopKKMeansNone(TTableClient& client, const TOptions& options) { + TString query = std::format(kTopKKMeansNoneQuery, + options.Distance, + options.PrimaryKey, + IndexName(options), + options.Table, + options.Embedding, + options.Data, + options.TopK); + // Cout << query << Endl; + query = std::format(kTargetQuery, query); + auto r = client.RetryOperationSync([&](TSession session) -> TStatus { + auto prepareResult = session.PrepareDataQuery(query).ExtractValueSync(); + if (!prepareResult.IsSuccess()) { + return prepareResult; + } + + auto query = prepareResult.GetQuery(); + auto result = query.Execute(TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()) + .ExtractValueSync(); + + if (result.IsSuccess()) { + PrintTop(result.GetResultSetParser(0)); + } + + return result; + }); + if (!r.IsSuccess()) { + ythrow TVectorException{r}; + } +} + +int DropIndex(NYdb::TDriver& driver, const TOptions& options) { + TTableClient client(driver); + DropIndex(client, options); + return 0; +} + +int CreateIndex(NYdb::TDriver& driver, const TOptions& options) { + TTableClient client(driver); + if (options.IndexType == FlatIndex) { + CreateFlat(client, options); + return 0; + } else if (options.IndexType == KMeansIndex) { + CreateKMeans(client, options); + return 0; + } + return 1; +} + +int UpdateIndex(NYdb::TDriver& driver, const TOptions& options) { + TTableClient client(driver); + if (options.IndexType == FlatIndex) { + if (options.IndexQuantizer == NQuantizer::None) { + return 0; + } + if (options.IndexQuantizer == NQuantizer::Bit) { + UpdateFlatBit(client, options); + return 0; + } + } else if (options.IndexType == KMeansIndex) { + if (options.IndexQuantizer == NQuantizer::None) { + UpdateKMeans(client, options); + return 0; + } else if (options.IndexQuantizer == NQuantizer::Int8) { + UpdateKMeans(client, options); + return 0; + } + } + return 1; +} + +int TopK(NYdb::TDriver& driver, const TOptions& options) { + TTableClient client(driver); + if (options.IndexType == FlatIndex) { + if (options.IndexQuantizer == NQuantizer::None) { + return 0; + } + if (options.IndexQuantizer == NQuantizer::Bit) { + TopKFlatBit(client, options); + return 0; + } + } else if (options.IndexType == KMeansIndex) { + if (options.IndexQuantizer == NQuantizer::None) { + TopKKMeansNone(client, options); + return 0; + } + } + return 1; +} \ No newline at end of file diff --git a/ydb/public/sdk/cpp/examples/vector_index/vector_index.h b/ydb/public/sdk/cpp/examples/vector_index/vector_index.h new file mode 100644 index 000000000000..6ebe311eb6ee --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/vector_index.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +enum class ECommand { + DropIndex, + CreateIndex, + UpdateIndex, // fill/construct + RecreateIndex, // Drop, Create, Update + TopK, + None, +}; + +ECommand Parse(std::string_view command); + +#define TABLE 64 + +struct TOptions { + TString Database; + TString Table; + TString IndexType; + TString IndexQuantizer; + TString PrimaryKey; + TString Embedding; + TString Distance; + TString Data; + ui64 Rows = 0; + ui64 TopK = 0; + ui16 Iterations = 5; +#if TABLE == 64 + ui16 Levels = 2; + ui16 Clusters = 80; +#else + ui16 Levels = 3; + ui16 Clusters = 120; +#endif + bool ShuffleWithEmbeddings = true; + bool LastLevelEmbeddings = false; +}; + +int DropIndex(NYdb::TDriver& driver, const TOptions& options); + +int CreateIndex(NYdb::TDriver& driver, const TOptions& options); + +int UpdateIndex(NYdb::TDriver& driver, const TOptions& options); + +int TopK(NYdb::TDriver& driver, const TOptions& options); + +class TVectorException: public yexception { +public: + TVectorException(const NYdb::TStatus& status) { + *this << "Status:" << status; + } +}; diff --git a/ydb/public/sdk/cpp/examples/vector_index/ya.make b/ydb/public/sdk/cpp/examples/vector_index/ya.make new file mode 100644 index 000000000000..193576c4da48 --- /dev/null +++ b/ydb/public/sdk/cpp/examples/vector_index/ya.make @@ -0,0 +1,15 @@ +PROGRAM() + +SRCS( + main.cpp + vector_index.cpp + clusterizer.cpp +) + +PEERDIR( + library/cpp/getopt + library/cpp/dot_product + ydb/public/sdk/cpp/client/ydb_table +) + +END() diff --git a/ydb/public/sdk/cpp/examples/ya.make b/ydb/public/sdk/cpp/examples/ya.make index 2cb41f28d27b..14a498dfd83f 100644 --- a/ydb/public/sdk/cpp/examples/ya.make +++ b/ydb/public/sdk/cpp/examples/ya.make @@ -6,4 +6,5 @@ RECURSE( secondary_index_builtin topic_reader ttl + vector_index )