diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 98f031b6ef1e..ae494e2e65b0 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -175,14 +175,30 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id)); } +TInstant THive::GetAllowedBootingTime() { + auto connectedNodes = TabletCounters->Simple()[NHive::COUNTER_NODES_CONNECTED].Get(); + BLOG_D(connectedNodes << " nodes connected out of " << ExpectedNodes); + if (connectedNodes == 0) { + return {}; + } + TInstant result = LastConnect + MaxTimeBetweenConnects * std::max(static_cast(ExpectedNodes) - static_cast(connectedNodes), 1); + if (connectedNodes < ExpectedNodes) { + result = std::max(result, StartTime() + GetWarmUpBootWaitingPeriod()); + } + result = std::min(result, StartTime() + GetMaxWarmUpPeriod()); + return result; +} + void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) { TInstant now = TActivationContext::Now(); - TInstant allowed = std::min(LastConnect + GetWarmUpBootWaitingPeriod(), StartTime() + GetMaxWarmUpPeriod()); - if (WarmUp && now < allowed) { - BLOG_D("ProcessBootQueue - last connect was at " << LastConnect << "- not long enough ago"); - ProcessBootQueueScheduled = false; - PostponeProcessBootQueue(allowed - now); - return; + if (WarmUp) { + TInstant allowed = GetAllowedBootingTime(); + if (now < allowed) { + BLOG_D("ProcessBootQueue - waiting until " << allowed << " because of warmup, now: " << now); + ProcessBootQueueScheduled = false; + PostponeProcessBootQueue(allowed - now); + return; + } } BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")"); THPTimer bootQueueProcessingTimer; @@ -302,9 +318,11 @@ void THive::ProcessBootQueue() { } void THive::PostponeProcessBootQueue(TDuration after) { - if (!ProcessBootQueuePostponed) { + TInstant postponeUntil = TActivationContext::Now() + after; + if (!ProcessBootQueuePostponed || postponeUntil < ProcessBootQueuePostponedUntil) { BLOG_D("PostponeProcessBootQueue (" << after << ")"); ProcessBootQueuePostponed = true; + ProcessBootQueuePostponedUntil = postponeUntil; Schedule(after, new TEvPrivate::TEvPostponeProcessBootQueue()); } } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index e3318ad1e42f..57de13f85f48 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -362,7 +362,10 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar bool ProcessBootQueueScheduled = false; bool ProcessBootQueuePostponed = false; TInstant LastConnect; + TInstant ProcessBootQueuePostponedUntil; + TDuration MaxTimeBetweenConnects; bool WarmUp; + ui64 ExpectedNodes; THashMap NodesInfo; TTabletCountersBase* TabletCounters; @@ -901,6 +904,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar void ScheduleDisconnectNode(THolder event); void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet); void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects); + TInstant GetAllowedBootingTime(); void ScheduleUnlockTabletExecution(TNodeInfo& node); TString DebugDomainsActiveNodes() const; TResourceNormalizedValues GetStDevResourceValues() const; diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 557759d9a392..ac9271558145 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -672,6 +672,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { TMailboxType::Simple, 0, TMailboxType::Simple, 0); TTenantPoolConfig::TPtr tenantPoolConfig = new TTenantPoolConfig(localConfig); + // tenantPoolConfig->AddStaticSlot(DOMAIN_NAME); tenantPoolConfig->AddStaticSlot(tenant); TActorId actorId = runtime.Register( @@ -1877,6 +1878,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { Ctest << "killing tablet " << tabletId << Endl; runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(0))); + // runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(1))); waitFor([&]{ return blockedCommits.size() >= 2; }, "at least 2 blocked commits"); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index 18e885f3cd7c..8506670edca5 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -1869,7 +1869,7 @@ function fillDataShort(result) { if ("TotalTablets" in result) { var percent = Math.floor(result.RunningTablets * 100 / result.TotalTablets) + '%'; var values = result.RunningTablets + ' of ' + result.TotalTablets; - var warmup = result.Warmup ? "" : ""; + var warmup = result.WarmUp ? "" : ""; $('#runningTablets').html(warmup + percent + ' (' + values + ')'); $('#aliveNodes').html(result.AliveNodes); $('#bootQueue').html(result.BootQueueSize); diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp index 73525a3752bd..3ac0f56e3469 100644 --- a/ydb/core/mind/hive/tx__load_everything.cpp +++ b/ydb/core/mind/hive/tx__load_everything.cpp @@ -723,7 +723,8 @@ class TTxLoadEverything : public TTransactionBase { Self->SetCounterTabletsTotal(tabletsTotal); Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_FREE].Set(Self->Sequencer.FreeSize()); Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_ALLOCATED].Set(Self->Sequencer.AllocatedSequencesSize()); - Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->Nodes.size()); + Self->ExpectedNodes = Self->Nodes.size(); + Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->ExpectedNodes); Self->MigrationState = NKikimrHive::EMigrationState::MIGRATION_READY; ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets()); diff --git a/ydb/core/mind/hive/tx__status.cpp b/ydb/core/mind/hive/tx__status.cpp index f1e4f7a9d857..2cacf39804a4 100644 --- a/ydb/core/mind/hive/tx__status.cpp +++ b/ydb/core/mind/hive/tx__status.cpp @@ -33,7 +33,11 @@ class TTxStatus : public TTransactionBase { } if (Self->WarmUp && node.Statistics.RestartTimestampSize() < Self->GetNodeRestartsToIgnoreInWarmup()) { - Self->LastConnect = TActivationContext::Now(); + TInstant now = TActivationContext::Now(); + if (Self->LastConnect != TInstant{}) { + Self->MaxTimeBetweenConnects = std::max(Self->MaxTimeBetweenConnects, now - Self->LastConnect); + } + Self->LastConnect = now; } if (node.LocationAcquired) { NIceDb::TNiceDb db(txc.DB); diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 844c793c7112..f59151d6430f 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1384,9 +1384,9 @@ message THiveConfig { repeated NKikimrTabletBase.TTabletTypes.EType BalancerIgnoreTabletTypes = 49; optional double SpaceUsagePenaltyThreshold = 53 [default = 1.1]; // number > 1 optional double SpaceUsagePenalty = 54 [default = 0.2]; // number <= 1 - optional uint64 WarmUpBootWaitingPeriod = 50 [default = 5000]; // milliseconds + optional uint64 WarmUpBootWaitingPeriod = 50 [default = 30000]; // milliseconds, time to wait for known nodes on cluster restart optional uint64 NodeRestartsToIgnoreInWarmup = 51 [default = 10]; - optional double MaxWarmUpPeriod = 52 [default = 30.0]; // seconds + optional double MaxWarmUpPeriod = 52 [default = 600.0]; // seconds optional bool WarmUpEnabled = 55 [default = true]; optional uint64 EmergencyBalancerInflight = 56 [default = 1]; // tablets optional uint64 MaxMovementsOnEmergencyBalancer = 57 [default = 2];