diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 9ce2d1ca2700..d9ae407390b6 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -337,7 +337,7 @@ void TBlobStorageController::ValidateInternalState() { Y_ABORT_UNLESS(donor->GetShortVDiskId() == vslot->GetShortVDiskId()); } if (vslot->Group) { - if (vslot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (vslot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { Y_DEBUG_ABORT_UNLESS(vslot->IsReady || vslot->IsInVSlotReadyTimestampQ()); } else { Y_DEBUG_ABORT_UNLESS(!vslot->IsReady && !vslot->IsInVSlotReadyTimestampQ()); @@ -401,7 +401,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) { const auto& record = msg->Record; for (const auto& item : record.GetVDiskStatus()) { const TVSlotId vslotId(item.GetNodeId(), item.GetPDiskId(), item.GetVSlotId()); - if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->Status > item.GetStatus()) { + if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->GetStatus() > item.GetStatus()) { return 1; } else if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskStatus > item.GetStatus()) { return 1; diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index e1e821c5c643..4b7f53c6beb0 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -213,7 +213,7 @@ namespace NKikimr::NBsController { for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) { if (slot->Group) { auto *m = VSlots.FindForUpdate(slot->VSlotId); - m->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + m->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; m->IsReady = false; TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID); GroupFailureModelChanged.insert(slot->Group->ID); diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 516ab802d6ad..01a09938030b 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -557,7 +557,7 @@ namespace NKikimr::NBsController { x->MutableVDiskMetrics()->CopyFrom(*vslot.VDiskMetrics); x->MutableVDiskMetrics()->ClearVDiskId(); } - x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus)); + x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR))); x->SetReady(vslot.ReadySince <= mono); } if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) { @@ -698,7 +698,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = TMood::Wipe; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); @@ -744,7 +744,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = targetMood; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index 4ed5a2cffc5b..b63680720525 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -486,9 +486,9 @@ namespace NKikimr::NBsController { if (!overlay->second || !overlay->second->Group) { // deleted one (overlay->second ? overlay->second : base->second)->DropFromVSlotReadyTimestampQ(); NotReadyVSlotIds.erase(overlay->first); - } else if (overlay->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (overlay->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->DropFromVSlotReadyTimestampQ(); - } else if (!base || base->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (!base || base->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->PutInVSlotReadyTimestampQ(now); } else { Y_DEBUG_ABORT_UNLESS(overlay->second->IsReady || overlay->second->IsInVSlotReadyTimestampQ()); @@ -998,7 +998,7 @@ namespace NKikimr::NBsController { pb->SetAllocatedSize(vslot.Metrics.GetAllocatedSize()); pb->MutableVDiskMetrics()->CopyFrom(vslot.Metrics); pb->MutableVDiskMetrics()->ClearVDiskId(); - pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.Status)); + pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.GetStatus())); for (const TVSlotId& vslotId : vslot.Donors) { auto *item = pb->AddDonors(); Serialize(item->MutableVSlotId(), vslotId); diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index e72184045d26..0953ad03f97b 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -524,7 +524,7 @@ namespace NKikimr { // also we have to find replicating VSlots on this PDisk and assume they consume up to // max(vslotSize for every slot in group), not their actual AllocatedSize for (const auto& [id, slot] : info.VSlotsOnPDisk) { - if (slot->Group && slot->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->Group && slot->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { ui64 maxGroupSlotSize = 0; for (const TVSlotInfo *peer : slot->Group->VDisksInGroup) { maxGroupSlotSize = Max(maxGroupSlotSize, peer->Metrics.GetAllocatedSize()); diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index d55e57b34d63..f8a3c9a36703 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -124,16 +124,17 @@ class TBlobStorageController : public TActor, public TTa TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter; public: - NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast(); bool IsReady = false; bool OnlyPhantomsRemain = false; public: void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now, TInstant instant, bool onlyPhantomsRemain) { - if (status != Status) { + if (status != VDiskStatus) { if (status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // became "replicating" LastGotReplicating = instant; - } else if (Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" + } else if (VDiskStatus == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" Y_DEBUG_ABORT_UNLESS(LastGotReplicating != TInstant::Zero()); ReplicationTime += instant - LastGotReplicating; LastGotReplicating = {}; @@ -145,7 +146,7 @@ class TBlobStorageController : public TActor, public TTa LastSeenReady = instant; } - Status = status; + VDiskStatus = status; IsReady = false; if (status == NKikimrBlobStorage::EVDiskStatus::READY) { PutInVSlotReadyTimestampQ(now); @@ -159,6 +160,10 @@ class TBlobStorageController : public TActor, public TTa } } + NKikimrBlobStorage::EVDiskStatus GetStatus() const { + return VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR); + } + void PutInVSlotReadyTimestampQ(TMonotonic now) { const TMonotonic readyAfter = now + ReadyStablePeriod; // vdisk will be treated as READY one shortly, but not now Y_ABORT_UNLESS(VSlotReadyTimestampIter == TVSlotReadyTimestampQ::iterator()); @@ -291,15 +296,16 @@ class TBlobStorageController : public TActor, public TTa TString GetStatusString() const { TStringStream s; - s << NKikimrBlobStorage::EVDiskStatus_Name(Status); - if (Status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { + const auto status = GetStatus(); + s << NKikimrBlobStorage::EVDiskStatus_Name(status); + if (status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { s << "/p"; } return s.Str(); } bool IsOperational() const { - return Status >= NKikimrBlobStorage::REPLICATING; + return GetStatus() >= NKikimrBlobStorage::REPLICATING; } void OnCommit(); @@ -2276,7 +2282,7 @@ class TBlobStorageController : public TActor, public TTa histo.IncrementFor(passed.Seconds()); TDuration timeBeingReplicating = slot->ReplicationTime; - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { timeBeingReplicating += now - slot->LastGotReplicating; } @@ -2301,7 +2307,8 @@ class TBlobStorageController : public TActor, public TTa const NKikimrBlobStorage::TVDiskKind::EVDiskKind VDiskKind; std::optional VDiskMetrics; - NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast(); TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk, @@ -2315,6 +2322,7 @@ class TBlobStorageController : public TActor, public TTa TStaticVSlotInfo& item = it->second; VDiskMetrics = std::move(item.VDiskMetrics); VDiskStatus = item.VDiskStatus; + VDiskStatusTimestamp = item.VDiskStatusTimestamp; ReadySince = item.ReadySince; } } diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index 9f6fa7597e6d..374955bc70f7 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -1296,7 +1296,7 @@ void TBlobStorageController::RenderVSlotRow(IOutputStream& out, const TVSlotInfo } TABLED() { TDuration time = vslot.ReplicationTime; - if (vslot.Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (vslot.GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { time += TActivationContext::Now() - vslot.LastGotReplicating; } out << time; diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 69d05dcdfd8d..8dd84e535250 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -547,7 +547,7 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId, TActorId serve updates.push_back({ .VDiskId = it->second->GetVDiskId(), .IsReady = it->second->IsReady, - .VDiskStatus = it->second->Status, + .VDiskStatus = it->second->GetStatus(), }); ScrubState.UpdateVDiskState(&*it->second); SysViewChangedVSlots.insert(it->second->VSlotId); diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index 2d13a1d86cfd..e75c4ef7333e 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -913,7 +913,7 @@ namespace NKikimr::NBsController { slot->OnlyPhantomsRemain, slot->IsReady, TMonotonic::Zero(), - slot->Status, + slot->GetStatus(), }; } } @@ -960,7 +960,7 @@ namespace NKikimr::NBsController { false, /* OnlyPhantomsRemain */ true, /* IsReady; decision is based on ReadySince */ info.ReadySince, - info.VDiskStatus, + info.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR), }; } } @@ -987,7 +987,7 @@ namespace NKikimr::NBsController { const bool was = slot->IsOperational(); if (const TGroupInfo *group = slot->Group) { const bool wasReady = slot->IsReady; - if (slot->Status != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { + if (slot->GetStatus() != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { slot->SetStatus(m.GetStatus(), mono, now, m.GetOnlyPhantomsRemain()); if (slot->IsReady != wasReady) { ScrubState.UpdateVDiskState(slot); @@ -1001,14 +1001,14 @@ namespace NKikimr::NBsController { .VDiskId = vdiskId, .OnlyPhantomsRemain = slot->OnlyPhantomsRemain, .IsReady = slot->IsReady, - .VDiskStatus = slot->Status, + .VDiskStatus = slot->GetStatus(), }); if (!was && slot->IsOperational() && !group->SeenOperational) { groups.insert(const_cast(group)); } SysViewChangedVSlots.insert(vslotId); } - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { // we can release donor slots without further notice then the VDisk is completely replicated; we // intentionally use GetStatus() here instead of IsReady() to prevent waiting for (const TVSlotId& donorVSlotId : slot->Donors) { diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index 6526402a825c..c2f9f737de45 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -325,7 +325,8 @@ void CopyInfo(NKikimrSysView::TPDiskInfo* info, const THolder status, NHPTimer::STime statusTimestamp, + NKikimrBlobStorage::TVDiskKind::EVDiskKind kind, bool isBeingDeleted) { pb->SetGroupId(vdiskId.GroupID.GetRawId()); pb->SetGroupGeneration(vdiskId.GroupGeneration); pb->SetFailRealm(vdiskId.FailRealm); @@ -337,7 +338,12 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, if (m.HasAvailableSize()) { pb->SetAvailableSize(m.GetAvailableSize()); } - pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(status)); + if (!status && CyclesToDuration(GetCycleCountFast() - statusTimestamp) > TDuration::Seconds(15)) { + status = NKikimrBlobStorage::EVDiskStatus::ERROR; + } + if (status) { + pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(*status)); + } pb->SetKind(NKikimrBlobStorage::TVDiskKind::EVDiskKind_Name(kind)); if (isBeingDeleted) { pb->SetIsBeingDeleted(true); @@ -345,8 +351,8 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, } void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder& vSlotInfo) { - SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->Status, vSlotInfo->Kind, - vSlotInfo->IsBeingDeleted()); + SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->VDiskStatus, + vSlotInfo->VDiskStatusTimestamp, vSlotInfo->Kind, vSlotInfo->IsBeingDeleted()); } void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder& groupInfo) { @@ -462,7 +468,7 @@ void TBlobStorageController::UpdateSystemViews() { if (SysViewChangedVSlots.count(vslotId)) { static const NKikimrBlobStorage::TVDiskMetrics zero; SerializeVSlotInfo(&state.VSlots[vslotId], vslot.VDiskId, vslot.VDiskMetrics ? *vslot.VDiskMetrics : zero, - vslot.VDiskStatus, vslot.VDiskKind, false); + vslot.VDiskStatus, vslot.VDiskStatusTimestamp, vslot.VDiskKind, false); } } if (StorageConfig.HasBlobStorageConfig()) { diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index cf8f7f95974d..8191e9b1ff34 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -997,7 +997,7 @@ Y_UNIT_TEST_SUITE(SystemView) { check.String("Default"); // Kind check.Uint64(env.GetServer().GetRuntime()->GetNodeId(0)); // NodeId check.Uint64(1u); // PDiskId - check.String("ERROR"); // Status + check.Null(); // Status check.Uint64(0u); // VDisk check.Uint64(1000u); // VSlotId }