Skip to content

Commit dd33ff6

Browse files
authored
Report status for static group (#1116)
1 parent 5665640 commit dd33ff6

File tree

4 files changed

+69
-18
lines changed

4 files changed

+69
-18
lines changed

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,23 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
7575
failedByPDisk |= {Topology.get(), slot->GetShortVDiskId()};
7676
}
7777
}
78-
auto deriveStatus = [&](const auto& failed) {
79-
auto& checker = *Topology->QuorumChecker;
80-
if (!failed.GetNumSetItems()) { // all disks of group are operational
81-
return NKikimrBlobStorage::TGroupStatus::FULL;
82-
} else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded
83-
return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED;
84-
} else if (checker.IsDegraded(failed)) { // group degraded
85-
return NKikimrBlobStorage::TGroupStatus::DEGRADED;
86-
} else if (failed.GetNumSetItems()) { // group partially available, but not degraded
87-
return NKikimrBlobStorage::TGroupStatus::PARTIAL;
88-
} else {
89-
Y_ABORT("unexpected case");
90-
}
91-
};
92-
Status.MakeWorst(deriveStatus(failed), deriveStatus(failed | failedByPDisk));
78+
Status.MakeWorst(DeriveStatus(Topology.get(), failed), DeriveStatus(Topology.get(), failed | failedByPDisk));
79+
}
80+
}
81+
82+
NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
83+
const TBlobStorageGroupInfo::TGroupVDisks& failed) {
84+
auto& checker = *topology->QuorumChecker;
85+
if (!failed.GetNumSetItems()) { // all disks of group are operational
86+
return NKikimrBlobStorage::TGroupStatus::FULL;
87+
} else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded
88+
return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED;
89+
} else if (checker.IsDegraded(failed)) { // group degraded
90+
return NKikimrBlobStorage::TGroupStatus::DEGRADED;
91+
} else if (failed.GetNumSetItems()) { // group partially available, but not degraded
92+
return NKikimrBlobStorage::TGroupStatus::PARTIAL;
93+
} else {
94+
Y_ABORT("unexpected case");
9395
}
9496
}
9597

ydb/core/mind/bscontroller/cmds_storage_pool.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ namespace NKikimr::NBsController {
568568
if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) {
569569
if (const auto& bsConfig = s.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
570570
const auto& ss = bsConfig.GetServiceSet();
571+
const TMonotonic mono = TActivationContext::Monotonic();
571572
for (const auto& group : ss.GetGroups()) {
572573
auto *x = pb->AddGroup();
573574
x->SetGroupId(group.GetGroupID());
@@ -581,6 +582,45 @@ namespace NKikimr::NBsController {
581582
}
582583
}
583584
}
585+
586+
TStringStream err;
587+
auto info = TBlobStorageGroupInfo::Parse(group, nullptr, &err);
588+
Y_VERIFY_DEBUG_S(info, "failed to parse static group, error# " << err.Str());
589+
if (info) {
590+
const auto *topology = &info->GetTopology();
591+
592+
TBlobStorageGroupInfo::TGroupVDisks failed(topology);
593+
TBlobStorageGroupInfo::TGroupVDisks failedByPDisk(topology);
594+
595+
ui32 realmIdx = 0;
596+
for (const auto& realm : group.GetRings()) {
597+
ui32 domainIdx = 0;
598+
for (const auto& domain : realm.GetFailDomains()) {
599+
ui32 vdiskIdx = 0;
600+
for (const auto& location : domain.GetVDiskLocations()) {
601+
const TVSlotId vslotId(location.GetNodeID(), location.GetPDiskID(), location.GetVDiskSlotID());
602+
const TVDiskIdShort vdiskId(realmIdx, domainIdx, vdiskIdx);
603+
604+
if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end()) {
605+
if (mono <= it->second.ReadySince) { // VDisk can't be treated as READY one
606+
failed |= {topology, vdiskId};
607+
} else if (const TPDiskInfo *pdisk = PDisks.Find(vslotId.ComprisingPDiskId()); !pdisk || !pdisk->HasGoodExpectedStatus()) {
608+
failedByPDisk |= {topology, vdiskId};
609+
}
610+
} else {
611+
failed |= {topology, vdiskId};
612+
}
613+
614+
++vdiskIdx;
615+
}
616+
++domainIdx;
617+
}
618+
++realmIdx;
619+
}
620+
621+
x->SetOperatingStatus(DeriveStatus(topology, failed));
622+
x->SetExpectedStatus(DeriveStatus(topology, failed | failedByPDisk));
623+
}
584624
}
585625
}
586626
}

ydb/core/mind/bscontroller/impl.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
8282

8383
using TVSlotReadyTimestampQ = std::list<std::pair<TMonotonic, TVSlotInfo*>>;
8484

85+
// VDisk will be considered READY during this period after reporting its READY state
86+
static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15);
87+
8588
class TVSlotInfo : public TIndirectReferable<TVSlotInfo> {
8689
public:
8790
using Table = Schema::VSlot;
@@ -121,9 +124,6 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
121124
TVSlotReadyTimestampQ& VSlotReadyTimestampQ;
122125
TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter;
123126

124-
// VDisk will be considered READY during this period after reporting its READY state
125-
static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15);
126-
127127
public:
128128
NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
129129
bool IsReady = false;
@@ -2237,6 +2237,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
22372237

22382238
std::optional<NKikimrBlobStorage::TVDiskMetrics> VDiskMetrics;
22392239
NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR;
2240+
TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state
22402241

22412242
TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk)
22422243
: VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID()))
@@ -2306,6 +2307,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
23062307
const TGroupInfo& group, const TVSlotFinder& finder);
23072308
static void SerializeGroupInfo(NKikimrBlobStorage::TGroupInfo *group, const TGroupInfo& groupInfo,
23082309
const TString& storagePoolName, const TMaybe<TKikimrScopeId>& scopeId);
2310+
2311+
static NKikimrBlobStorage::TGroupStatus::E DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
2312+
const TBlobStorageGroupInfo::TGroupVDisks& failed);
23092313
};
23102314

23112315
} //NBsController

ydb/core/mind/bscontroller/self_heal.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,11 @@ namespace NKikimr::NBsController {
971971
}
972972
if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskId == vdiskId) {
973973
it->second.VDiskStatus = m.GetStatus();
974+
if (it->second.VDiskStatus == NKikimrBlobStorage::EVDiskStatus::READY) {
975+
it->second.ReadySince = Min(it->second.ReadySince, mono + ReadyStablePeriod);
976+
} else {
977+
it->second.ReadySince = TMonotonic::Max();
978+
}
974979
}
975980
}
976981

0 commit comments

Comments
 (0)