Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions ydb/core/cms/cms_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1808,6 +1808,51 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
env.CheckRejectRequest("user", request3.GetRequestId());
}

Y_UNIT_TEST(AllVDisksEvictionInRack)
{
auto opts = TTestEnvOpts(8)
.WithSentinel()
.WithNodeLocationCallback([](ui32 nodeId) {
NActorsInterconnect::TNodeLocation location;
location.SetRack(ToString(nodeId / 2 + 1));
return TNodeLocation(location); // Node = [0, 1, 2, 3, 4, 5, 6, 7]
// Rack = [1, 1, 2, 2, 3, 3, 4, 4]
});
TCmsTestEnv env(opts);
env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG);

// Evict all VDisks from rack 1
auto request1 = env.CheckPermissionRequest(
MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(),
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage")
),
TStatus::DISALLOW_TEMP // ok, waiting for move VDisks
);
auto request2 = env.CheckPermissionRequest(
MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(),
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 600000000, "storage")
),
TStatus::DISALLOW_TEMP // ok, waiting for move VDisks
);

// Check that FAULTY BSC requests are sent
env.CheckBSCUpdateRequests({ env.GetNodeId(0), env.GetNodeId(1) }, NKikimrBlobStorage::FAULTY);

// "Move" VDisks from rack 1
auto& node1 = TFakeNodeWhiteboardService::Info[env.GetNodeId(0)];
node1.VDisksMoved = true;
node1.VDiskStateInfo.clear();
auto& node2 = TFakeNodeWhiteboardService::Info[env.GetNodeId(1)];
node2.VDisksMoved = true;
node2.VDiskStateInfo.clear();
env.RegenerateBSConfig(TFakeNodeWhiteboardService::Config.MutableResponse()->MutableStatus(0)->MutableBaseConfig(), opts);

auto permission1 = env.CheckRequest("user", request1.GetRequestId(), false, TStatus::ALLOW, 1);
auto permission2 = env.CheckRequest("user", request2.GetRequestId(), false, TStatus::ALLOW, 1);
env.CheckDonePermission("user", permission1.GetPermissions(0).GetId());
env.CheckDonePermission("user", permission2.GetPermissions(0).GetId());
}

Y_UNIT_TEST(EmergencyDuringRollingRestart)
{
TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities());
Expand Down
47 changes: 46 additions & 1 deletion ydb/core/cms/cms_ut_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ const bool ENABLE_DETAILED_CMS_LOG = true;
const bool ENABLE_DETAILED_CMS_LOG = false;
#endif

#define COMMA ,
Y_DECLARE_OUT_SPEC(, std::map<NKikimrBlobStorage::EDriveStatus COMMA std::set<ui32>>, o, value) {
std::vector<TString> pairs;
for (const auto& [status, nodes] : value) {
pairs.push_back(
TStringBuilder() << status << "=" << '[' << JoinSeq(',', nodes) << ']'
);
}
o << '[' << JoinSeq(',', pairs) << ']';
};

namespace NKikimr {
namespace NCmsTest {

Expand Down Expand Up @@ -391,7 +402,7 @@ static NKikimrConfig::TBootstrap GenerateBootstrapConfig(TTestActorRuntime &runt
return res;
}

static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &options) {
static void SetupServices(TTestBasicRuntime &runtime, const TTestEnvOpts &options) {
const ui32 domainsNum = 1;
const ui32 disksInDomain = 1;

Expand Down Expand Up @@ -503,6 +514,7 @@ static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &option
),
0);

runtime.LocationCallback = options.NodeLocationCallback;
runtime.Initialize(app.Unwrap());
auto dnsConfig = new TDynamicNameserviceConfig();
dnsConfig->MaxStaticNodeId = 1000;
Expand Down Expand Up @@ -868,6 +880,39 @@ TCmsTestEnv::CheckRequest(const TString &user,
return rec;
}

void TCmsTestEnv::CheckBSCUpdateRequests(std::set<ui32> expectedNodes,
NKikimrBlobStorage::EDriveStatus expectedStatus)
{
using TBSCRequests = std::map<NKikimrBlobStorage::EDriveStatus, std::set<ui32>>;

TBSCRequests expectedRequests = { {expectedStatus, expectedNodes} };
TBSCRequests actualRequests;

TDispatchOptions options;
options.FinalEvents.emplace_back([&](IEventHandle& ev) {
if (ev.GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigRequest::EventType) {
const auto& request = ev.Get<TEvBlobStorage::TEvControllerConfigRequest>()->Record;
bool foundUpdateDriveCommand = false;
for (const auto& command : request.GetRequest().GetCommand()) {
if (command.HasUpdateDriveStatus()) {
foundUpdateDriveCommand = true;
const auto& update = command.GetUpdateDriveStatus();
actualRequests[update.GetStatus()].insert(update.GetHostKey().GetNodeId());
}
}
return foundUpdateDriveCommand;
}
return false;
});
DispatchEvents(options, TDuration::Minutes(1));

UNIT_ASSERT_C(
actualRequests == expectedRequests,
TStringBuilder() << "Sentinel sent wrong update requests to BSC: "
<< "expected# " << expectedRequests
<< ", actual# " << actualRequests
);
}

void TCmsTestEnv::CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask* req)
{
Expand Down
11 changes: 11 additions & 0 deletions ydb/core/cms/cms_ut_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ struct TTestEnvOpts {
bool EnableCMSRequestPriorities;
bool EnableSingleCompositeActionGroup;

using TNodeLocationCallback = std::function<TNodeLocation(ui32)>;
TNodeLocationCallback NodeLocationCallback;

TTestEnvOpts() = default;

TTestEnvOpts(ui32 nodeCount,
Expand Down Expand Up @@ -126,6 +129,12 @@ struct TTestEnvOpts {
EnableCMSRequestPriorities = true;
return *this;
}

TTestEnvOpts& WithNodeLocationCallback(TNodeLocationCallback nodeLocationCallback) {
NodeLocationCallback = nodeLocationCallback;
return *this;
}

};

class TCmsTestEnv : public TTestBasicRuntime {
Expand Down Expand Up @@ -323,6 +332,8 @@ class TCmsTestEnv : public TTestBasicRuntime {
return CheckRequest(user, id, dry, NKikimrCms::MODE_MAX_AVAILABILITY, res, count);
}

void CheckBSCUpdateRequests(std::set<ui32> expectedNodes, NKikimrBlobStorage::EDriveStatus expectedStatus);

void CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask *req);

template <typename... Ts>
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/cms/pdisk_status.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include <ydb/core/protos/blobstorage_config.pb.h>

namespace NKikimr::NCms {

using EPDiskStatus = NKikimrBlobStorage::EDriveStatus;

} // namespace NKikimr::NCms
7 changes: 6 additions & 1 deletion ydb/core/cms/sentinel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ void TPDiskStatusComputer::SetForcedStatus(EPDiskStatus status) {
ForcedStatus = status;
}

bool TPDiskStatusComputer::HasForcedStatus() const {
return ForcedStatus.Defined();
}

void TPDiskStatusComputer::ResetForcedStatus() {
ForcedStatus.Clear();
}
Expand Down Expand Up @@ -196,6 +200,7 @@ void TPDiskStatus::DisallowChanging() {

TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits)
: TPDiskStatus(initialStatus, defaultStateLimit, stateLimits)
, ActualStatus(initialStatus)
{
Touch();
}
Expand Down Expand Up @@ -898,7 +903,7 @@ class TSentinel: public TActorBootstrapped<TSentinel> {

all.AddPDisk(id);
if (info.IsChanged()) {
if (info.IsNewStatusGood()) {
if (info.IsNewStatusGood() || info.HasForcedStatus()) {
alwaysAllowed.insert(id);
} else {
changed.AddPDisk(id);
Expand Down
7 changes: 3 additions & 4 deletions ydb/core/cms/sentinel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
#include "defs.h"
#include "pdiskid.h"
#include "pdisk_state.h"

#include <ydb/core/protos/blobstorage_config.pb.h>
#include "pdisk_status.h"

#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
#include <util/generic/map.h>

namespace NKikimr::NCms::NSentinel {

using EPDiskStatus = NKikimrBlobStorage::EDriveStatus;
using TLimitsMap = TMap<EPDiskState, ui32>;

class TPDiskStatusComputer {
Expand All @@ -29,6 +27,7 @@ class TPDiskStatusComputer {
void Reset();

void SetForcedStatus(EPDiskStatus status);
bool HasForcedStatus() const;
void ResetForcedStatus();

private:
Expand Down Expand Up @@ -84,7 +83,7 @@ struct TPDiskInfo
using EIgnoreReason = NKikimrCms::TPDiskInfo::EIgnoreReason;

EPDiskStatus ActualStatus = EPDiskStatus::ACTIVE;
EPDiskStatus PrevStatus = EPDiskStatus::ACTIVE;
EPDiskStatus PrevStatus = EPDiskStatus::UNKNOWN;
TInstant LastStatusChange;
bool StatusChangeFailed = false;
// means that this pdisk status change last time was the reason of whole request failure
Expand Down