From 61d6047eb8edad64e2f60d76acb54847e57ada84 Mon Sep 17 00:00:00 2001 From: ijon Date: Tue, 16 Jul 2024 17:04:08 +0300 Subject: [PATCH] schemeshard: preserialize Table.SplitBoundary for describe result (#6648) merged 83a86c2 from main Preserialize table's split boundaries the same way table partitions are. The size of both depend on the same variable: number of shards in the table, but TablePartitions was preserialized (and cached) while Table.SplitBoundaries wasn't. Preserializing all potentially huge parts of DescribeSchemeResult message before it gets to the interconnect saves interconnect actors additional serialization costs. And when partitioning of the huge tables goes through the period of a rapid change that additional serialization causes interconnect to overload. Single shortcoming though: preserialized SplitBoundary is not used (cannot be used) when boundaries of the index tables are requested through describe request on table index. KIKIMR-21686 --- ...ard__operation_alter_continuous_backup.cpp | 2 +- .../schemeshard__operation_copy_table.cpp | 2 +- ydb/core/tx/schemeshard/schemeshard_impl.cpp | 5 +- ydb/core/tx/schemeshard/schemeshard_impl.h | 3 +- .../tx/schemeshard/schemeshard_info_types.cpp | 13 +- .../tx/schemeshard/schemeshard_info_types.h | 7 +- .../schemeshard_path_describer.cpp | 165 ++++++++++-------- 7 files changed, 113 insertions(+), 84 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp index 24f9c7c0d0f8..4c7d2f282cad 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp @@ -83,7 +83,7 @@ TVector CreateAlterContinuousBackup(TOperationId opId, cons const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription schema; - context.SS->DescribeTable(table, typeRegistry, true, false, &schema); + context.SS->DescribeTable(table, typeRegistry, true, &schema); schema.MutablePartitionConfig()->CopyFrom(table->TableDescription.GetPartitionConfig()); TString errStr; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index 417e52cc18da..2bf68f59488c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -14,7 +14,7 @@ void PrepareScheme(NKikimrSchemeOp::TTableDescription* schema, const TString& na const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription completedSchema; - context.SS->DescribeTable(srcTableInfo, typeRegistry, true, false, &completedSchema); + context.SS->DescribeTable(srcTableInfo, typeRegistry, true, &completedSchema); completedSchema.SetName(name); //inherit all from Src except PartitionConfig, PartitionConfig could be altered diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index 93a3c437fe6a..765401edd1f7 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -598,8 +598,9 @@ void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool f } else if (node->PathType == NKikimrSchemeOp::EPathType::EPathTypeTable) { Y_ABORT_UNLESS(Tables.contains(node->PathId)); TTableInfo::TPtr tabletInfo = Tables.at(node->PathId); - tabletInfo->PreSerializedPathDescription.clear(); - tabletInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + tabletInfo->PreserializedTablePartitions.clear(); + tabletInfo->PreserializedTablePartitionsNoKeys.clear(); + tabletInfo->PreserializedTableSplitBoundaries.clear(); } } diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index e58560745d59..7a9eb9586dd6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -1015,7 +1015,7 @@ class TSchemeShard void FillAsyncIndexInfo(const TPathId& tableId, NKikimrTxDataShard::TFlatSchemeTransaction& tx); void DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const; + bool fillConfig, NKikimrSchemeOp::TTableDescription* entry) const; void DescribeTableIndex(const TPathId& pathId, const TString& name, bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry ) const; @@ -1031,7 +1031,6 @@ class TSchemeShard void DescribeReplication(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeReplication(const TPathId& pathId, const TString& name, TReplicationInfo::TPtr info, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeBlobDepot(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TBlobDepotDescription& desc); - static void FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries); void Handle(NKikimr::NOlap::NBackground::TEvExecuteGeneralLocalTransaction::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NOlap::NBackground::TEvRemoveSession::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 54bc391e5c6a..a13ef59e4de1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -356,10 +356,10 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( const TTableInfo::TColumn& sourceColumn = source->Columns[colId]; if (col.HasDefaultFromSequence()) { - if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 + if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 && NPg::PgTypeIdFromTypeDesc(sourceColumn.PType.GetTypeDesc()) != INT8OID) { - TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg - ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) + TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg + ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) : NScheme::TypeName(NScheme::NTypeIds::Int64); errStr = Sprintf( "Sequence value type '%s' must be equal to the column type '%s'", sequenceType.c_str(), @@ -411,7 +411,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( return nullptr; default: break; - } + } } } else { auto* typeDesc = NPg::TypeDescFromPgTypeName(typeName); @@ -1614,8 +1614,9 @@ void TTableInfo::SetPartitioning(TVector&& newPartitioning) { Stats.PartitionStats.swap(newPartitionStats); Stats.Aggregated = newAggregatedStats; Partitions.swap(newPartitioning); - PreSerializedPathDescription.clear(); - PreSerializedPathDescriptionWithoutRangeKey.clear(); + PreserializedTablePartitions.clear(); + PreserializedTablePartitionsNoKeys.clear(); + PreserializedTableSplitBoundaries.clear(); CondEraseSchedule.clear(); InFlightCondErase.clear(); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index b5297bf20037..0d97a320a6ee 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -436,8 +436,11 @@ struct TTableInfo : public TSimpleRefCount { TMap BackupHistory; TMap RestoreHistory; - TString PreSerializedPathDescription; - TString PreSerializedPathDescriptionWithoutRangeKey; + // Preserialized TDescribeSchemeResult with PathDescription.TablePartitions field filled + TString PreserializedTablePartitions; + TString PreserializedTablePartitionsNoKeys; + // Preserialized TDescribeSchemeResult with PathDescription.Table.SplitBoundary field filled + TString PreserializedTableSplitBoundaries; THashMap PerShardPartitionConfig; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index c1259db27f56..34ecf178b3bf 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -217,6 +217,68 @@ void TPathDescriber::DescribeDir(const TPath& path) { DescribeChildren(path); } +void FillTableBoundaries( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo +) { + TString errStr; + // Number of split boundaries equals to number of partitions - 1 + result->Reserve(tableInfo->GetPartitions().size() - 1); + for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { + const auto& p = tableInfo->GetPartitions()[pi]; + TSerializedCellVec endKey(p.EndOfRange); + auto boundary = result->Add()->MutableKeyPrefix(); + for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ + const auto& c = endKey.GetCells()[ki]; + auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; + bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); + Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); + } + } +} + +void FillTablePartitions( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool includeKeys +) { + result->Reserve(tableInfo->GetPartitions().size()); + for (auto& p : tableInfo->GetPartitions()) { + const auto& tabletId = ui64(shardInfos.at(p.ShardIdx).TabletID); + const auto& key = p.EndOfRange; + + auto part = result->Add(); + part->SetDatashardId(tabletId); + if (includeKeys) { + // Currently we only support uniform partitioning where each range is [start, end) + // +inf as the end of the last range is represented by empty TCell vector + part->SetIsPoint(false); + part->SetIsInclusive(false); + part->SetEndOfRangeKeyPrefix(key); + } + } +} + +const TString& GetSerializedTablePartitions( + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool returnRangeKey +) { + TString& cache = (returnRangeKey + ? tableInfo->PreserializedTablePartitions + : tableInfo->PreserializedTablePartitionsNoKeys + ); + + if (cache.empty()) { + NKikimrScheme::TEvDescribeSchemeResult result; + FillTablePartitions(result.MutablePathDescription()->MutableTablePartitions(), tableInfo, shardInfos, returnRangeKey); + Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&cache); + } + + return cache; +} + void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPathElement::TPtr pathEl) { const NScheme::TTypeRegistry* typeRegistry = AppData(ctx)->TypeRegistry; const TTableInfo::TPtr tableInfo = *Self->Tables.FindPtr(pathId); @@ -238,50 +300,30 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa returnRangeKey = Params.GetOptions().GetReturnRangeKey(); } - Self->DescribeTable(tableInfo, typeRegistry, returnConfig, returnBoundaries, entry); + Self->DescribeTable(tableInfo, typeRegistry, returnConfig, entry); entry->SetName(pathEl->Name); - if (returnPartitioning) { - // partitions - if (tableInfo->PreSerializedPathDescription.empty()) { + if (returnBoundaries) { + // split boundaries (split keys without shard's tablet-ids) + if (tableInfo->PreserializedTableSplitBoundaries.empty()) { NKikimrScheme::TEvDescribeSchemeResult preSerializedResult; - NKikimrScheme::TEvDescribeSchemeResult preSerializedResultWithoutRangeKey; - - NKikimrSchemeOp::TPathDescription& pathDescription = *preSerializedResult.MutablePathDescription(); - NKikimrSchemeOp::TPathDescription& pathDescriptionWithoutRangeKey = *preSerializedResultWithoutRangeKey.MutablePathDescription(); - - pathDescription.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - pathDescriptionWithoutRangeKey.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - for (auto& p : tableInfo->GetPartitions()) { - auto part = pathDescription.AddTablePartitions(); - auto partWithoutRangeKey = pathDescriptionWithoutRangeKey.AddTablePartitions(); - auto datashardIdx = p.ShardIdx; - auto datashardTabletId = Self->ShardInfos[datashardIdx].TabletID; - // Currently we only support uniform partitioning where each range is [start, end) - // +inf as the end of the last range is represented by empty TCell vector - part->SetDatashardId(ui64(datashardTabletId)); - partWithoutRangeKey->SetDatashardId(ui64(datashardTabletId)); - - part->SetIsPoint(false); - partWithoutRangeKey->SetIsPoint(false); - - part->SetIsInclusive(false); - partWithoutRangeKey->SetIsInclusive(false); - - part->SetEndOfRangeKeyPrefix(p.EndOfRange); - } - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreSerializedPathDescription); - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResultWithoutRangeKey.SerializeToString(&tableInfo->PreSerializedPathDescriptionWithoutRangeKey); - } - if (returnRangeKey) { - Result->PreSerializedData += tableInfo->PreSerializedPathDescription; - } else { - Result->PreSerializedData += tableInfo->PreSerializedPathDescriptionWithoutRangeKey; - } - if (!pathEl->IsCreateFinished()) { - tableInfo->PreSerializedPathDescription.clear(); // KIKIMR-4337 - tableInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + auto& tableDesc = *preSerializedResult.MutablePathDescription()->MutableTable(); + FillTableBoundaries(tableDesc.MutableSplitBoundary(), tableInfo); + Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreserializedTableSplitBoundaries); } + Result->PreSerializedData += tableInfo->PreserializedTableSplitBoundaries; + } + + if (returnPartitioning) { + // partitions (shard tablet-ids with range keys) + Result->PreSerializedData += GetSerializedTablePartitions(tableInfo, Self->ShardInfos, returnRangeKey); + } + + // KIKIMR-4337: table info is in flux until table is finally created + if (!pathEl->IsCreateFinished()) { + tableInfo->PreserializedTablePartitions.clear(); + tableInfo->PreserializedTablePartitionsNoKeys.clear(); + tableInfo->PreserializedTableSplitBoundaries.clear(); } FillAggregatedStats(*Result->Record.MutablePathDescription(), tableInfo->GetStats()); @@ -1122,8 +1164,12 @@ THolder DescribePath( return DescribePath(self, ctx, pathId, options); } -void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const +void TSchemeShard::DescribeTable( + const TTableInfo::TPtr tableInfo, + const NScheme::TTypeRegistry* typeRegistry, + bool fillConfig, + NKikimrSchemeOp::TTableDescription* entry + ) const { Y_UNUSED(typeRegistry); THashMap familyNames; @@ -1192,10 +1238,6 @@ void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme FillPartitionConfig(tableInfo->PartitionConfig(), *entry->MutablePartitionConfig()); } - if (fillBoundaries) { - FillTableBoundaries(tableInfo, *entry->MutableSplitBoundary()); - } - if (tableInfo->HasTTLSettings()) { entry->MutableTTLSettings()->CopyFrom(tableInfo->TTLSettings()); } @@ -1238,23 +1280,23 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name *entry.MutableDataColumnNames()->Add() = dataColumns; } - auto* indexPath = PathsById.FindPtr(pathId); + auto indexPath = *PathsById.FindPtr(pathId); Y_ABORT_UNLESS(indexPath); - Y_ABORT_UNLESS((*indexPath)->GetChildren().size() == 1); - const auto& indexImplTablePathId = (*indexPath)->GetChildren().begin()->second; + Y_ABORT_UNLESS(indexPath->GetChildren().size() == 1); + const auto& indexImplTablePathId = indexPath->GetChildren().begin()->second; - auto* tableInfo = Tables.FindPtr(indexImplTablePathId); + auto tableInfo = *Tables.FindPtr(indexImplTablePathId); Y_ABORT_UNLESS(tableInfo); - const auto& tableStats = (*tableInfo)->GetStats().Aggregated; + const auto& tableStats = tableInfo->GetStats().Aggregated; entry.SetDataSize(tableStats.DataSize + tableStats.IndexSize); auto* tableDescription = entry.AddIndexImplTableDescriptions(); if (fillConfig) { - FillPartitionConfig((*tableInfo)->PartitionConfig(), *tableDescription->MutablePartitionConfig()); + FillPartitionConfig(tableInfo->PartitionConfig(), *tableDescription->MutablePartitionConfig()); } if (fillBoundaries) { - FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary()); + FillTableBoundaries(tableDescription->MutableSplitBoundary(), tableInfo); } } @@ -1401,22 +1443,5 @@ void TSchemeShard::DescribeBlobDepot(const TPathId& pathId, const TString& name, desc.SetTabletId(static_cast(it->second->BlobDepotTabletId)); } -void TSchemeShard::FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries) { - TString errStr; - // Number of split boundaries equals to number of partitions - 1 - boundaries.Reserve(tableInfo->GetPartitions().size() - 1); - for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { - const auto& p = tableInfo->GetPartitions()[pi]; - TSerializedCellVec endKey(p.EndOfRange); - auto boundary = boundaries.Add()->MutableKeyPrefix(); - for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ - const auto& c = endKey.GetCells()[ki]; - auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; - bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); - Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); - } - } -} - } // NSchemeShard } // NKikimr