Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions ydb/core/health_check/health_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
TString ErasureSpecies;
std::vector<const NKikimrSysView::TVSlotEntry*> VSlots;
ui32 Generation;
bool LayoutCorrect = true;
};

struct TSelfCheckResult {
Expand Down Expand Up @@ -1710,6 +1711,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
auto& groupState = GroupState[groupId];
groupState.ErasureSpecies = group.GetInfo().GetErasureSpeciesV2();
groupState.Generation = group.GetInfo().GetGeneration();
groupState.LayoutCorrect = group.GetInfo().GetLayoutCorrect();
StoragePoolState[poolId].Groups.emplace(groupId);
}
for (const auto& vSlot : VSlots->Get()->Record.GetEntries()) {
Expand Down Expand Up @@ -2504,6 +2506,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {

class TGroupChecker {
TString ErasureSpecies;
bool LayoutCorrect;
int FailedDisks = 0;
std::array<int, Ydb::Monitoring::StatusFlag::Status_ARRAYSIZE> DisksColors = {};
TStackVec<std::pair<ui32, int>> FailedRealms;
Expand All @@ -2520,7 +2523,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
}

public:
TGroupChecker(const TString& erasure) : ErasureSpecies(erasure) {}
TGroupChecker(const TString& erasure, const bool layoutCorrect = true)
: ErasureSpecies(erasure)
, LayoutCorrect(layoutCorrect)
{}

void AddVDiskStatus(Ydb::Monitoring::StatusFlag::Status status, ui32 realm) {
++DisksColors[status];
Expand All @@ -2539,6 +2545,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {

void ReportStatus(TSelfCheckContext& context) const {
context.OverallStatus = Ydb::Monitoring::StatusFlag::GREEN;
if (!LayoutCorrect) {
context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group layout is incorrect", ETags::GroupState);
}
if (ErasureSpecies == NONE) {
if (FailedDisks > 0) {
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState});
Expand Down Expand Up @@ -2888,7 +2897,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
return;
}

TGroupChecker checker(itGroup->second.ErasureSpecies);
TGroupChecker checker(itGroup->second.ErasureSpecies, itGroup->second.LayoutCorrect);
const auto& slots = itGroup->second.VSlots;
for (const auto* slot : slots) {
const auto& slotInfo = slot->GetInfo();
Expand Down
68 changes: 68 additions & 0 deletions ydb/core/health_check/health_check_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2113,5 +2113,73 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 2, nodeRestarts + 5, nodeId, Ydb::Monitoring::StatusFlag::YELLOW);
TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 5, nodeRestarts / 2, nodeId, Ydb::Monitoring::StatusFlag::ORANGE);
}

Y_UNIT_TEST(LayoutIncorrect) {
TPortManager tp;
ui16 port = tp.GetPort(2134);
ui16 grpcPort = tp.GetPort(2135);
auto settings = TServerSettings(port)
.SetNodeCount(1)
.SetDynamicNodeCount(1)
.SetUseRealThreads(false)
.SetDomainName("Root");

TServer server(settings);
server.EnableGRpc(grpcPort);
TClient client(settings);
TTestActorRuntime& runtime = *server.GetRuntime();
TActorId sender = runtime.AllocateEdgeActor();

auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
switch (ev->GetTypeRewrite()) {
case NSysView::TEvSysView::EvGetGroupsResponse: {
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetGroupsResponse::TPtr*>(&ev);
auto& record = (*x)->Get()->Record;
for (auto& entry : *record.mutable_entries()) {
entry.mutable_info()->set_layoutcorrect(false);
}

break;
}
}

return TTestActorRuntime::EEventAction::PROCESS;
};
runtime.SetObserverFunc(observerFunc);

TAutoPtr<IEventHandle> handle;
auto *request = new NHealthCheck::TEvSelfCheckRequest;
request->Request.set_return_verbose_status(true);
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;

UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::MAINTENANCE_REQUIRED);
UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1);
const auto &database_status = result.database_status(0);

UNIT_ASSERT_VALUES_EQUAL(database_status.overall(), Ydb::Monitoring::StatusFlag::ORANGE);
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().overall(), Ydb::Monitoring::StatusFlag::GREEN);
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().overall(), Ydb::Monitoring::StatusFlag::ORANGE);
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools().size(), 1);
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].overall(), Ydb::Monitoring::StatusFlag::ORANGE);
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].groups().size(), 1);
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].groups()[0].overall(), Ydb::Monitoring::StatusFlag::ORANGE);

for (const auto &issue_log : result.issue_log()) {
if (issue_log.level() == 1 && issue_log.type() == "DATABASE") {
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().database().name(), "/Root");
UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Database has storage issues");
} else if (issue_log.level() == 2 && issue_log.type() == "STORAGE") {
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().database().name(), "/Root");
UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Storage has no redundancy");
} else if (issue_log.level() == 3 && issue_log.type() == "STORAGE_POOL") {
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().name(), "static");
UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Pool has no redundancy");
} else if (issue_log.level() == 4 && issue_log.type() == "STORAGE_GROUP") {
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().name(), "static");
UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Group layout is incorrect");
}
}
}
}
}
Loading