From 9a147350aa72cb8ef101734aa622364e0db442bd Mon Sep 17 00:00:00 2001 From: Valerii Mironov Date: Mon, 25 Nov 2024 15:17:07 +0000 Subject: [PATCH 1/5] Fix: vector index description wasn't persisted --- .../schemeshard__conditional_erase.cpp | 1 + ydb/core/tx/schemeshard/schemeshard__init.cpp | 14 +++++++---- ...hard__operation_consistent_copy_tables.cpp | 1 + .../schemeshard__operation_copy_table.cpp | 1 + .../schemeshard_build_index__create.cpp | 2 ++ ydb/core/tx/schemeshard/schemeshard_impl.cpp | 16 ++++++------ .../tx/schemeshard/schemeshard_info_types.h | 25 ++++++++++++++++--- ydb/core/tx/schemeshard/schemeshard_schema.h | 10 +++++--- .../tx/schemeshard/ut_helpers/helpers.cpp | 14 ++++++----- ydb/core/tx/schemeshard/ut_helpers/helpers.h | 4 +-- .../ut_index_build/ut_vector_index_build.cpp | 20 +++++++++------ 11 files changed, 72 insertions(+), 36 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp index 08b812a07dc8..ff7d803a25f3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp @@ -267,6 +267,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase { } static TVector> MakeColumnIds(TTableInfo::TPtr mainTable, TTableIndexInfo::TPtr index, TTableInfo::TPtr indexImplTable) { + Y_ENSURE(index->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); TVector> result; THashSet keys; diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 6c8174f2fbc7..13c985b8fb41 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -987,7 +987,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { return LoadBackupStatusesImpl(statuses, byShardBackupStatus, byMigratedShardBackupStatus, byTxShardStatus); } - typedef std::tuple TTableIndexRec; + typedef std::tuple TTableIndexRec; typedef TDeque TTableIndexRows; template @@ -995,7 +995,8 @@ struct TSchemeShard::TTxInit : public TTransactionBase { return std::make_tuple(pathId, rowSet.template GetValue(), rowSet.template GetValue(), - rowSet.template GetValue() + rowSet.template GetValue(), + TString{} ); } @@ -1007,7 +1008,8 @@ struct TSchemeShard::TTxInit : public TTransactionBase { } while (!rowSet.EndOfSet()) { const auto pathId = Self->MakeLocalId(TLocalPathId(rowSet.GetValue())); - tableIndexes.push_back(MakeTableIndexRec(pathId, rowSet)); + auto& back = tableIndexes.emplace_back(MakeTableIndexRec(pathId, rowSet)); + std::get<4>(back) = rowSet.GetValue(); if (!rowSet.Next()) { return false; @@ -2791,6 +2793,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { ui64 alterVersion = std::get<1>(rec); TTableIndexInfo::EType indexType = std::get<2>(rec); TTableIndexInfo::EState state = std::get<3>(rec); + auto description = std::get<4>(rec); Y_VERIFY_S(Self->PathsById.contains(pathId), "Path doesn't exist, pathId: " << pathId); TPathElement::TPtr path = Self->PathsById.at(pathId); @@ -2799,7 +2802,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { << ", path type: " << NKikimrSchemeOp::EPathType_Name(path->PathType)); Y_ABORT_UNLESS(!Self->Indexes.contains(pathId)); - Self->Indexes[pathId] = new TTableIndexInfo(alterVersion, indexType, state); + Self->Indexes[pathId] = new TTableIndexInfo(alterVersion, indexType, state, description); Self->IncrementPathDbRefCount(pathId); } @@ -2816,6 +2819,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { ui64 alterVersion = rowset.GetValue(); TTableIndexInfo::EType indexType = rowset.GetValue(); TTableIndexInfo::EState state = rowset.GetValue(); + auto description = rowset.GetValue(); Y_VERIFY_S(Self->PathsById.contains(pathId), "Path doesn't exist, pathId: " << pathId); TPathElement::TPtr path = Self->PathsById.at(pathId); @@ -2828,7 +2832,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { auto tableIndex = Self->Indexes.at(pathId); Y_ABORT_UNLESS(tableIndex->AlterData == nullptr); Y_ABORT_UNLESS(tableIndex->AlterVersion < alterVersion); - tableIndex->AlterData = new TTableIndexInfo(alterVersion, indexType, state); + tableIndex->AlterData = new TTableIndexInfo(alterVersion, indexType, state, description); Y_VERIFY_S(Self->PathsById.contains(path->ParentPathId), "Parent path is not found" << ", index pathId: " << pathId diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp index a50c0e82eac9..fdc9dc4cddac 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp @@ -35,6 +35,7 @@ NKikimrSchemeOp::TModifyScheme CreateIndexTask(NKikimr::NSchemeShard::TTableInde operation->SetName(dst.LeafName()); operation->SetType(indexInfo->Type); + Y_ENSURE(indexInfo->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); for (const auto& keyName: indexInfo->IndexKeys) { *operation->MutableKeyColumnNames()->Add() = keyName; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index dd2c2f6095d4..983a6f940f78 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -862,6 +862,7 @@ TVector CreateCopyTable(TOperationId nextId, const TTxTrans auto operation = schema.MutableCreateTableIndex(); operation->SetName(name); operation->SetType(indexInfo->Type); + Y_ENSURE(indexInfo->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); for (const auto& keyName: indexInfo->IndexKeys) { *operation->MutableKeyColumnNames()->Add() = keyName; } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 9421b40a3f2a..bf63fe9ce054 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -229,6 +229,8 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder NKikimrSchemeOp::TVectorIndexKmeansTreeDescription vectorIndexKmeansTreeDescription; *vectorIndexKmeansTreeDescription.MutableSettings() = index.global_vector_kmeans_tree_index().vector_settings(); buildInfo.SpecializedIndexDescription = vectorIndexKmeansTreeDescription; + buildInfo.KMeans.K = std::max(2, vectorIndexKmeansTreeDescription.GetSettings().clusters()); + buildInfo.KMeans.Levels = std::max(1, vectorIndexKmeansTreeDescription.GetSettings().levels()); break; } case Ydb::Table::TableIndex::TypeCase::TYPE_NOT_SET: diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index f63eb65f8465..38896766d670 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -1695,7 +1695,8 @@ void TSchemeShard::PersistTableIndex(NIceDb::TNiceDb& db, const TPathId& pathId) db.Table().Key(element->PathId.LocalPathId).Update( NIceDb::TUpdate(alterData->AlterVersion), NIceDb::TUpdate(alterData->Type), - NIceDb::TUpdate(alterData->State)); + NIceDb::TUpdate(alterData->State), + NIceDb::TUpdate(alterData->DescriptionToStr())); db.Table().Key(element->PathId.LocalPathId).Delete(); @@ -1730,7 +1731,8 @@ void TSchemeShard::PersistTableIndexAlterData(NIceDb::TNiceDb& db, const TPathId db.Table().Key(elem->PathId.LocalPathId).Update( NIceDb::TUpdate(alterData->AlterVersion), NIceDb::TUpdate(alterData->Type), - NIceDb::TUpdate(alterData->State)); + NIceDb::TUpdate(alterData->State), + NIceDb::TUpdate(alterData->DescriptionToStr())); for (ui32 keyIdx = 0; keyIdx < alterData->IndexKeys.size(); ++keyIdx) { db.Table().Key(elem->PathId.LocalPathId, keyIdx).Update( @@ -7477,7 +7479,7 @@ void TSchemeShard::ResolveSA() { StatisticsAggregatorId = subDomainInfo->GetTenantStatisticsAggregatorID(); LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, "ResolveSA(), StatisticsAggregatorId=" << StatisticsAggregatorId - << ", at schemeshard: " << TabletID()); + << ", at schemeshard: " << TabletID()); ConnectToSA(); } } @@ -7485,11 +7487,11 @@ void TSchemeShard::ResolveSA() { void TSchemeShard::ConnectToSA() { if (!EnableStatistics) return; - + if (!StatisticsAggregatorId) { LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::STATISTICS, "ConnectToSA(), no StatisticsAggregatorId" - << ", at schemeshard: " << TabletID()); + << ", at schemeshard: " << TabletID()); return; } auto policy = NTabletPipe::TClientRetryPolicy::WithRetries(); @@ -7606,8 +7608,8 @@ TDuration TSchemeShard::SendBaseStatsToSA() { << ", path count: " << count << ", at schemeshard: " << TabletID()); - return TDuration::Seconds(SendStatsIntervalMinSeconds - + RandomNumber(SendStatsIntervalMaxSeconds - SendStatsIntervalMinSeconds)); + return TDuration::Seconds(SendStatsIntervalMinSeconds + + RandomNumber(SendStatsIntervalMaxSeconds - SendStatsIntervalMinSeconds)); } } // namespace NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 96adfcbc4205..1014e544e13b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2371,11 +2371,16 @@ struct TTableIndexInfo : public TSimpleRefCount { using EType = NKikimrSchemeOp::EIndexType; using EState = NKikimrSchemeOp::EIndexState; - TTableIndexInfo(ui64 version, EType type, EState state) + TTableIndexInfo(ui64 version, EType type, EState state, std::string_view description) : AlterVersion(version) , Type(type) , State(state) - {} + { + if (type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + Y_ABORT_UNLESS(SpecializedIndexDescription.emplace() + .ParseFromString(description)); + } + } TTableIndexInfo(const TTableIndexInfo&) = default; @@ -2391,8 +2396,20 @@ struct TTableIndexInfo : public TSimpleRefCount { return result; } + TString DescriptionToStr() const { + return std::visit([](const auto& v) { + if constexpr (requires { v.SerializeAsString(); }) { + TString str{v.SerializeAsString()}; + Y_ENSURE(!str.empty()); + return str; + } else { + return TString{}; + } + }, SpecializedIndexDescription); + } + static TPtr NotExistedYet(EType type) { - return new TTableIndexInfo(0, type, EState::EIndexStateInvalid); + return new TTableIndexInfo(0, type, EState::EIndexStateInvalid, {}); } static TPtr Create(const NKikimrSchemeOp::TIndexCreationConfig& config, TString& errMsg) { @@ -2405,7 +2422,7 @@ struct TTableIndexInfo : public TSimpleRefCount { TPtr alterData = result->CreateNextVersion(); alterData->IndexKeys.assign(config.GetKeyColumnNames().begin(), config.GetKeyColumnNames().end()); - Y_ABORT_UNLESS(alterData->IndexKeys.size()); + Y_ABORT_UNLESS(!alterData->IndexKeys.empty()); alterData->IndexDataColumns.assign(config.GetDataColumnNames().begin(), config.GetDataColumnNames().end()); alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index efcf6a89e5d5..c034b8a303ca 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -1022,9 +1022,10 @@ struct Schema : NIceDb::Schema { struct AlterVersion : Column<3, NScheme::NTypeIds::Uint64> {}; struct IndexType : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexType; static constexpr Type Default = NKikimrSchemeOp::EIndexTypeInvalid; }; struct State : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexState; static constexpr Type Default = NKikimrSchemeOp::EIndexStateInvalid; }; + struct Description : Column<6, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct MigratedTableIndex : Table<67> { @@ -1043,9 +1044,10 @@ struct Schema : NIceDb::Schema { struct AlterVersion : Column<3, NScheme::NTypeIds::Uint64> {}; struct IndexType : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexType; static constexpr Type Default = NKikimrSchemeOp::EIndexTypeInvalid; }; struct State : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexState; static constexpr Type Default = NKikimrSchemeOp::EIndexStateInvalid; }; + struct Description : Column<6, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct TableIndexKeys : Table<40> { @@ -1326,7 +1328,7 @@ struct Schema : NIceDb::Schema { struct AlterMainTableTxDone : Column<33, NScheme::NTypeIds::Bool> {}; // Serialized as string NKikimrSchemeOp::TIndexCreationConfig protobuf. - struct CreationConfig : Column<34, NScheme::NTypeIds::String> { using Type = TString; }; + struct CreationConfig : Column<34, NScheme::NTypeIds::String> {}; struct ReadRowsBilled : Column<35, NScheme::NTypeIds::Uint64> {}; struct ReadBytesBilled : Column<36, NScheme::NTypeIds::Uint64> {}; @@ -1440,7 +1442,7 @@ struct Schema : NIceDb::Schema { struct LocalShardIdx : Column<3, NScheme::NTypeIds::Uint64> { using Type = TLocalShardIdx; }; struct Range : Column<4, NScheme::NTypeIds::String> { using Type = NKikimrTx::TKeyRange; }; - struct LastKeyAck : Column<5, NScheme::NTypeIds::String> { using Type = TString; }; + struct LastKeyAck : Column<5, NScheme::NTypeIds::String> {}; struct Status : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrIndexBuilder::EBuildStatus; }; struct Message : Column<7, NScheme::NTypeIds::Utf8> {}; diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 6676c0152b9b..91d4638dd1a7 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -1703,14 +1703,16 @@ namespace NSchemeShardUT_Private { case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { auto& settings = *index.mutable_global_vector_kmeans_tree_index(); - auto& vectorIndexSettings = *settings.mutable_vector_settings()->mutable_settings(); - if (cfg.VectorIndexSettings) { - cfg.VectorIndexSettings->SerializeTo(vectorIndexSettings); + auto& kmeansTreeSettings = *settings.mutable_vector_settings(); + if (cfg.KMeansTreeSettings) { + cfg.KMeansTreeSettings->SerializeTo(kmeansTreeSettings); } else { // some random valid settings - vectorIndexSettings.set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT); - vectorIndexSettings.set_vector_dimension(42); - vectorIndexSettings.set_metric(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE); + kmeansTreeSettings.mutable_settings()->set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT); + kmeansTreeSettings.mutable_settings()->set_vector_dimension(42); + kmeansTreeSettings.mutable_settings()->set_metric(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE); + kmeansTreeSettings.set_clusters(4); + kmeansTreeSettings.set_levels(5); } if (cfg.GlobalIndexSettings) { diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.h b/ydb/core/tx/schemeshard/ut_helpers/helpers.h index d64de269f421..79161d0c0c8e 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.h +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.h @@ -64,7 +64,7 @@ namespace NYdb::NTable { struct TGlobalIndexSettings; - struct TVectorIndexSettings; + struct TKMeansTreeSettings; } namespace NSchemeShardUT_Private { @@ -375,7 +375,7 @@ namespace NSchemeShardUT_Private { TVector DataColumns; TVector GlobalIndexSettings = {}; // implementation note: it was made a pointer, not optional, to enable forward declaration - std::unique_ptr VectorIndexSettings = {}; + std::unique_ptr KMeansTreeSettings = {}; }; std::unique_ptr CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal); diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp index 47ce8e0b2225..1e28bb24a5ef 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp @@ -266,16 +266,20 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { globalIndexSettings = NYdb::NTable::TGlobalIndexSettings::FromProto(proto); } - std::unique_ptr vectorIndexSettings; + std::unique_ptr kmeansTreeSettings; { - Ydb::Table::VectorIndexSettings proto; + Ydb::Table::KMeansTreeSettings proto; UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( - metric: DISTANCE_COSINE, - vector_type: VECTOR_TYPE_FLOAT, - vector_dimension: 1024 + settings { + metric: DISTANCE_COSINE + vector_type: VECTOR_TYPE_FLOAT + vector_dimension: 1024 + } + levels: 5 + clusters: 4 )", &proto)); - using T = NYdb::NTable::TVectorIndexSettings; - vectorIndexSettings = std::make_unique(T::FromProto(proto)); + using T = NYdb::NTable::TKMeansTreeSettings; + kmeansTreeSettings = std::make_unique(T::FromProto(proto)); } TBlockEvents indexCreationBlocker(runtime, [](const auto& ev) { @@ -286,7 +290,7 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { const ui64 buildIndexTx = ++txId; TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", TBuildIndexConfig{ "by_embedding", NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree, { "embedding" }, { "covered" }, - { globalIndexSettings, globalIndexSettings }, std::move(vectorIndexSettings) + { globalIndexSettings, globalIndexSettings }, std::move(kmeansTreeSettings) }); RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); From 6964950d489293bae397d3c3361bea456eb8c2d4 Mon Sep 17 00:00:00 2001 From: Valerii Mironov Date: Mon, 25 Nov 2024 17:10:55 +0000 Subject: [PATCH 2/5] Vector index description persisted: add test --- .../tx/schemeshard/ut_helpers/ls_checks.cpp | 9 +++-- .../tx/schemeshard/ut_helpers/ls_checks.h | 8 +++-- .../schemeshard/ut_index/ut_vector_index.cpp | 24 +++++++------ .../ut_index_build/ut_vector_index_build.cpp | 34 ++++++++++++------- 4 files changed, 46 insertions(+), 29 deletions(-) diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 2f440b74c04a..6cf7514dde9c 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -853,9 +853,11 @@ TCheckFunc IndexDataColumns(const TVector& dataColumnNames) { }; } -TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Metric metric, +TCheckFunc KMeansTreeDescription(Ydb::Table::VectorIndexSettings_Metric metric, Ydb::Table::VectorIndexSettings_VectorType vectorType, - ui32 vectorDimension + ui32 vectorDimension, + ui32 clusters, + ui32 levels ) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { if (record.GetPathDescription().GetTableIndex().HasVectorIndexKmeansTreeDescription()) { @@ -863,11 +865,12 @@ TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Metric metric, UNIT_ASSERT_VALUES_EQUAL(settings.settings().metric(), metric); UNIT_ASSERT_VALUES_EQUAL(settings.settings().vector_type(), vectorType); UNIT_ASSERT_VALUES_EQUAL(settings.settings().vector_dimension(), vectorDimension); + UNIT_ASSERT_VALUES_EQUAL(settings.clusters(), clusters); + UNIT_ASSERT_VALUES_EQUAL(settings.levels(), levels); } else { UNIT_FAIL("oneof SpecializedIndexDescription should be set."); } }; - } diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index 59fd61bf4233..bcd1e2c0e163 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -146,9 +146,11 @@ namespace NLs { TCheckFunc IndexKeys(const TVector& keyNames); TCheckFunc IndexDataColumns(const TVector& dataColumnNames); - TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Metric metric, - Ydb::Table::VectorIndexSettings_VectorType vectorType, - ui32 vectorDimension + TCheckFunc KMeansTreeDescription(Ydb::Table::VectorIndexSettings_Metric metric, + Ydb::Table::VectorIndexSettings_VectorType vectorType, + ui32 vectorDimension, + ui32 clusters, + ui32 levels ); TCheckFunc SequenceName(const TString& name); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index 5c1690e29b33..dd16a3481af2 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -34,7 +34,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { KeyColumnNames: ["embedding"] DataColumnNames: ["covered"] Type: EIndexTypeGlobalVectorKmeansTree - VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } + VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 }, clusters: 4, levels: 5 } } } )"); env.TestWaitNotification(runtime, txId); @@ -45,9 +45,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), NLs::IndexKeys({"embedding"}), NLs::IndexDataColumns({"covered"}), - NLs::VectorIndexDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, + NLs::KMeansTreeDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, - 1024 + 1024, + 4, + 5 ), }); @@ -62,8 +64,8 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TVector dropTxIds; TestDropTable(runtime, dropTxIds.emplace_back(++txId), "/MyRoot", "vectors"); - env.TestWaitNotification(runtime, dropTxIds); - } + env.TestWaitNotification(runtime, dropTxIds); + } Y_UNIT_TEST(CreateTableCoveredEmbedding) { TTestBasicRuntime runtime; @@ -83,7 +85,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { KeyColumnNames: ["embedding"] DataColumnNames: ["embedding"] Type: EIndexTypeGlobalVectorKmeansTree - VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } + VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 }, clusters: 4, levels: 5 } } } )"); env.TestWaitNotification(runtime, txId); @@ -94,9 +96,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), NLs::IndexKeys({"embedding"}), NLs::IndexDataColumns({"embedding"}), - NLs::VectorIndexDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, + NLs::KMeansTreeDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, - 1024 + 1024, + 4, + 5 ), }); @@ -151,7 +155,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentColumn, "id1", "id2"}, true) }); - } + } Y_UNIT_TEST(VectorKmeansTreePostingImplTable) { // partition @@ -218,5 +222,5 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } } )", {NKikimrScheme::StatusInvalidParameter}); - } + } } diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp index 1e28bb24a5ef..f636b5d54480 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp @@ -304,19 +304,6 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { buildIndexOperation.DebugString() ); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/by_embedding"), { - NLs::PathExist, - NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), - NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), - NLs::IndexKeys({"embedding"}), - NLs::IndexDataColumns({"covered"}), - NLs::VectorIndexDescription( - Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, - Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, - 1024 - ) - }); - using namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex; TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", LevelTable), true, true), { NLs::IsTable, @@ -332,5 +319,26 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { NLs::MaxPartitionsCountEqual(3), NLs::SplitBoundaries({12345, 54321}) }); + + for (size_t i = 0; i != 3; ++i) { + if (i != 0) { + // check that specialized index description persisted even after reboot + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + } + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/by_embedding"), { + NLs::PathExist, + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), + NLs::IndexKeys({"embedding"}), + NLs::IndexDataColumns({"covered"}), + NLs::KMeansTreeDescription( + Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, + Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, + 1024, + 4, + 5 + ) + }); + } } } From f6eff60ee00d61e29e14932d671402f76b890bba Mon Sep 17 00:00:00 2001 From: Valerii Mironov Date: Mon, 25 Nov 2024 19:59:24 +0000 Subject: [PATCH 3/5] Canonize --- .../flat_schemeshard.schema | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema index a2b2fb1f61f7..109300a19c99 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema @@ -2572,6 +2572,11 @@ "ColumnId": 5, "ColumnName": "State", "ColumnType": "Uint32" + }, + { + "ColumnId": 6, + "ColumnName": "Description", + "ColumnType": "String" } ], "ColumnsDropped": [], @@ -2581,7 +2586,8 @@ 1, 3, 4, - 5 + 5, + 6 ], "RoomID": 0, "Codec": 0, @@ -2628,6 +2634,11 @@ "ColumnId": 5, "ColumnName": "State", "ColumnType": "Uint32" + }, + { + "ColumnId": 6, + "ColumnName": "Description", + "ColumnType": "String" } ], "ColumnsDropped": [], @@ -2637,7 +2648,8 @@ 1, 3, 4, - 5 + 5, + 6 ], "RoomID": 0, "Codec": 0, From 500d47fa56c80d743ee91abcffeb477269672a8c Mon Sep 17 00:00:00 2001 From: Valerii Mironov Date: Tue, 26 Nov 2024 19:16:26 +0000 Subject: [PATCH 4/5] Apply review suggestion --- ydb/core/tx/schemeshard/schemeshard__init.cpp | 27 ++++++++++--------- ydb/core/tx/schemeshard/schemeshard_impl.cpp | 4 +-- .../tx/schemeshard/schemeshard_info_types.h | 10 +++---- ydb/core/tx/schemeshard/schemeshard_schema.h | 2 ++ 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 13c985b8fb41..13784dd76987 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -990,16 +990,6 @@ struct TSchemeShard::TTxInit : public TTransactionBase { typedef std::tuple TTableIndexRec; typedef TDeque TTableIndexRows; - template - static TTableIndexRec MakeTableIndexRec(const TPathId& pathId, TRowSet& rowSet) { - return std::make_tuple(pathId, - rowSet.template GetValue(), - rowSet.template GetValue(), - rowSet.template GetValue(), - TString{} - ); - } - bool LoadTableIndexes(NIceDb::TNiceDb& db, TTableIndexRows& tableIndexes) const { { auto rowSet = db.Table().Range().Select(); @@ -1008,8 +998,13 @@ struct TSchemeShard::TTxInit : public TTransactionBase { } while (!rowSet.EndOfSet()) { const auto pathId = Self->MakeLocalId(TLocalPathId(rowSet.GetValue())); - auto& back = tableIndexes.emplace_back(MakeTableIndexRec(pathId, rowSet)); - std::get<4>(back) = rowSet.GetValue(); + tableIndexes.emplace_back( + pathId, + rowSet.GetValue(), + rowSet.GetValue(), + rowSet.GetValue(), + rowSet.GetValue() + ); if (!rowSet.Next()) { return false; @@ -1026,7 +1021,13 @@ struct TSchemeShard::TTxInit : public TTransactionBase { TOwnerId(rowSet.GetValue()), TLocalPathId(rowSet.GetValue()) ); - tableIndexes.push_back(MakeTableIndexRec(pathId, rowSet)); + tableIndexes.emplace_back( + pathId, + rowSet.GetValue(), + rowSet.GetValue(), + rowSet.GetValue(), + TString{} + ); if (!rowSet.Next()) { return false; diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index 38896766d670..72342e0f17e4 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -1696,7 +1696,7 @@ void TSchemeShard::PersistTableIndex(NIceDb::TNiceDb& db, const TPathId& pathId) NIceDb::TUpdate(alterData->AlterVersion), NIceDb::TUpdate(alterData->Type), NIceDb::TUpdate(alterData->State), - NIceDb::TUpdate(alterData->DescriptionToStr())); + NIceDb::TUpdate(alterData->SerializeDescription())); db.Table().Key(element->PathId.LocalPathId).Delete(); @@ -1732,7 +1732,7 @@ void TSchemeShard::PersistTableIndexAlterData(NIceDb::TNiceDb& db, const TPathId NIceDb::TUpdate(alterData->AlterVersion), NIceDb::TUpdate(alterData->Type), NIceDb::TUpdate(alterData->State), - NIceDb::TUpdate(alterData->DescriptionToStr())); + NIceDb::TUpdate(alterData->SerializeDescription())); for (ui32 keyIdx = 0; keyIdx < alterData->IndexKeys.size(); ++keyIdx) { db.Table().Key(elem->PathId.LocalPathId, keyIdx).Update( diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 1014e544e13b..d8cdce946486 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2396,14 +2396,14 @@ struct TTableIndexInfo : public TSimpleRefCount { return result; } - TString DescriptionToStr() const { - return std::visit([](const auto& v) { - if constexpr (requires { v.SerializeAsString(); }) { + TString SerializeDescription() const { + return std::visit([](const T& v) { + if constexpr (std::is_same_v) { + return TString{}; + } else { TString str{v.SerializeAsString()}; Y_ENSURE(!str.empty()); return str; - } else { - return TString{}; } }, SpecializedIndexDescription); } diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index c034b8a303ca..6f0e1c12a5a6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -1022,6 +1022,7 @@ struct Schema : NIceDb::Schema { struct AlterVersion : Column<3, NScheme::NTypeIds::Uint64> {}; struct IndexType : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexType; static constexpr Type Default = NKikimrSchemeOp::EIndexTypeInvalid; }; struct State : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexState; static constexpr Type Default = NKikimrSchemeOp::EIndexStateInvalid; }; + // One of the SpecializedIndexDescription protobufs serialized as a string. struct Description : Column<6, NScheme::NTypeIds::String> {}; using TKey = TableKey; @@ -1044,6 +1045,7 @@ struct Schema : NIceDb::Schema { struct AlterVersion : Column<3, NScheme::NTypeIds::Uint64> {}; struct IndexType : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexType; static constexpr Type Default = NKikimrSchemeOp::EIndexTypeInvalid; }; struct State : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::EIndexState; static constexpr Type Default = NKikimrSchemeOp::EIndexStateInvalid; }; + // One of the SpecializedIndexDescription protobufs serialized as a string. struct Description : Column<6, NScheme::NTypeIds::String> {}; using TKey = TableKey; From 25bf47f91358f657a11e598cad0557535176774e Mon Sep 17 00:00:00 2001 From: Valerii Mironov Date: Tue, 26 Nov 2024 20:21:27 +0000 Subject: [PATCH 5/5] Apply review suggestion --- .../tx/schemeshard/schemeshard__conditional_erase.cpp | 5 +++-- .../schemeshard__operation_consistent_copy_tables.cpp | 9 +++++++-- .../tx/schemeshard/schemeshard__operation_copy_table.cpp | 5 ++++- ydb/core/tx/schemeshard/schemeshard_info_types.h | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp index ff7d803a25f3..62a3a7cfad40 100644 --- a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp @@ -230,7 +230,8 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase { } auto index = GetIndex(childPath); - if (index->Type == NKikimrSchemeOp::EIndexTypeGlobalAsync) { + if (index->Type == NKikimrSchemeOp::EIndexTypeGlobalAsync + || index->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { continue; } @@ -267,7 +268,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase { } static TVector> MakeColumnIds(TTableInfo::TPtr mainTable, TTableIndexInfo::TPtr index, TTableInfo::TPtr indexImplTable) { - Y_ENSURE(index->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + Y_ABORT_UNLESS(index->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); TVector> result; THashSet keys; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp index fdc9dc4cddac..e00d8842049b 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp @@ -35,7 +35,7 @@ NKikimrSchemeOp::TModifyScheme CreateIndexTask(NKikimr::NSchemeShard::TTableInde operation->SetName(dst.LeafName()); operation->SetType(indexInfo->Type); - Y_ENSURE(indexInfo->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + Y_ABORT_UNLESS(indexInfo->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); for (const auto& keyName: indexInfo->IndexKeys) { *operation->MutableKeyColumnNames()->Add() = keyName; } @@ -175,9 +175,14 @@ bool CreateConsistentCopyTables( } Y_ABORT_UNLESS(srcIndexPath.Base()->PathId == pathId); + TTableIndexInfo::TPtr indexInfo = context.SS->Indexes.at(pathId); + if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + result = {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, + "Consistent copy table doesn't support table with vector index")}; + return false; + } Y_VERIFY_S(srcIndexPath.Base()->GetChildren().size() == 1, srcIndexPath.PathString() << " has children " << srcIndexPath.Base()->GetChildren().size() << " but 1 expected"); - TTableIndexInfo::TPtr indexInfo = context.SS->Indexes.at(pathId); result.push_back(CreateNewTableIndex(NextPartId(nextId, result), CreateIndexTask(indexInfo, dstIndexPath))); TString srcImplTableName = srcIndexPath.Base()->GetChildren().begin()->first; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index 983a6f940f78..5fe070536e08 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -862,7 +862,10 @@ TVector CreateCopyTable(TOperationId nextId, const TTxTrans auto operation = schema.MutableCreateTableIndex(); operation->SetName(name); operation->SetType(indexInfo->Type); - Y_ENSURE(indexInfo->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, + "Copy table doesn't support table with vector index")}; + } for (const auto& keyName: indexInfo->IndexKeys) { *operation->MutableKeyColumnNames()->Add() = keyName; } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index d8cdce946486..6d446ad44454 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2402,7 +2402,7 @@ struct TTableIndexInfo : public TSimpleRefCount { return TString{}; } else { TString str{v.SerializeAsString()}; - Y_ENSURE(!str.empty()); + Y_ABORT_UNLESS(!str.empty()); return str; } }, SpecializedIndexDescription);