Skip to content

Commit b53c906

Browse files
SammyVimesUgnineSirdis
authored andcommitted
CMS API per-drive locks (#16064)
Add support for CMS API to address single PDisk for REPLACE_DEVICES action. Since PDisks can be restarted, there is no need to lock and shutdown entire host. (cherry picked from commit 6e6034a)
1 parent 138583e commit b53c906

File tree

8 files changed

+286
-17
lines changed

8 files changed

+286
-17
lines changed

ydb/core/cms/api_adapters.cpp

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,44 @@ namespace {
4343
}
4444
}
4545

46+
static bool ConvertToPDiskId(const TString &name, TPDiskID &id) {
47+
int size;
48+
49+
if (sscanf(name.data(), "pdisk-%" SCNu32 "-%" SCNu32 "%n", &id.NodeId, &id.DiskId, &size) != 2) {
50+
return false;
51+
}
52+
53+
if (size != static_cast<int>(name.size())) {
54+
return false;
55+
}
56+
57+
return true;
58+
}
59+
4660
void ConvertAction(const NKikimrCms::TAction& cmsAction, Ydb::Maintenance::LockAction& action) {
4761
*action.mutable_duration() = TimeUtil::MicrosecondsToDuration(cmsAction.GetDuration());
4862

49-
ui32 nodeId;
50-
if (TryFromString(cmsAction.GetHost(), nodeId)) {
51-
action.mutable_scope()->set_node_id(nodeId);
63+
if (cmsAction.DevicesSize() > 0) {
64+
Y_ABORT_UNLESS(cmsAction.DevicesSize() == 1);
65+
auto& device = cmsAction.GetDevices()[0];
66+
auto* pdisk = action.mutable_scope()->mutable_pdisk();
67+
TPDiskID id;
68+
if (ConvertToPDiskId(device, id)) {
69+
auto* pdiskId = pdisk->mutable_pdisk_id();
70+
pdiskId->set_node_id(id.NodeId);
71+
pdiskId->set_pdisk_id(id.DiskId);
72+
} else {
73+
auto* pdiskLocation = pdisk->mutable_pdisk_location();
74+
pdiskLocation->set_host(cmsAction.GetHost());
75+
pdiskLocation->set_path(device);
76+
}
5277
} else {
53-
action.mutable_scope()->set_host(cmsAction.GetHost());
78+
ui32 nodeId;
79+
if (TryFromString(cmsAction.GetHost(), nodeId)) {
80+
action.mutable_scope()->set_node_id(nodeId);
81+
} else {
82+
action.mutable_scope()->set_host(cmsAction.GetHost());
83+
}
5484
}
5585
}
5686

@@ -343,6 +373,7 @@ class TCreateMaintenanceTask: public TPermissionResponseProcessor<
343373
switch (scope.scope_case()) {
344374
case Ydb::Maintenance::ActionScope::kNodeId:
345375
case Ydb::Maintenance::ActionScope::kHost:
376+
case Ydb::Maintenance::ActionScope::kPdisk:
346377
return true;
347378
default:
348379
Reply(Ydb::StatusIds::BAD_REQUEST, "Unknown scope");
@@ -399,18 +430,35 @@ class TCreateMaintenanceTask: public TPermissionResponseProcessor<
399430
return true;
400431
}
401432

433+
static void ConvertPDiskId(const Ydb::Maintenance::ActionScope_PDiskId& pdiskId, TString& out) {
434+
out = Sprintf("pdisk-%" PRIu32 "-%" PRIu32, pdiskId.node_id(), pdiskId.pdisk_id());
435+
}
436+
402437
static void ConvertAction(const Ydb::Maintenance::LockAction& action, NKikimrCms::TAction& cmsAction) {
403-
cmsAction.SetType(NKikimrCms::TAction::SHUTDOWN_HOST);
404438
cmsAction.SetDuration(TimeUtil::DurationToMicroseconds(action.duration()));
405439

406440
const auto& scope = action.scope();
407441
switch (scope.scope_case()) {
408442
case Ydb::Maintenance::ActionScope::kNodeId:
443+
cmsAction.SetType(NKikimrCms::TAction::SHUTDOWN_HOST);
409444
cmsAction.SetHost(ToString(scope.node_id()));
410445
break;
411446
case Ydb::Maintenance::ActionScope::kHost:
447+
cmsAction.SetType(NKikimrCms::TAction::SHUTDOWN_HOST);
412448
cmsAction.SetHost(scope.host());
413449
break;
450+
case Ydb::Maintenance::ActionScope::kPdisk: {
451+
cmsAction.SetType(NKikimrCms::TAction::REPLACE_DEVICES);
452+
auto& pdisk = scope.pdisk();
453+
if (pdisk.has_pdisk_id()) {
454+
ConvertPDiskId(pdisk.pdisk_id(), *cmsAction.add_devices());
455+
} else if (pdisk.has_pdisk_location()) {
456+
auto& pdiskLocation = pdisk.pdisk_location();
457+
cmsAction.SetHost(pdiskLocation.host());
458+
*cmsAction.add_devices() = pdiskLocation.path();
459+
}
460+
break;
461+
}
414462
default:
415463
Y_ABORT("unreachable");
416464
}

ydb/core/cms/cluster_info.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,8 @@ TSet<TLockableItem *> TClusterInfo::FindLockedItems(const NKikimrCms::TAction &a
725725

726726
if (HasPDisk(device))
727727
item = &PDiskRef(device);
728+
else if (HasPDisk(action.GetHost(), device))
729+
item = &PDiskRef(action.GetHost(), device);
728730
else if (HasVDisk(device))
729731
item = &VDiskRef(device);
730732

ydb/core/cms/cluster_info.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,10 @@ class TClusterInfo : public TThrRefBase {
978978
return PDiskRef(id);
979979
}
980980

981+
TPDiskInfo &PDiskRef(const TString &hostName, const TString &path) {
982+
return PDiskRef(HostNamePathToPDiskId(hostName, path));
983+
}
984+
981985
TVDiskInfo &VDiskRef(const TVDiskID &vdId) {
982986
Y_ABORT_UNLESS(HasVDisk(vdId));
983987
return *VDisks.find(vdId)->second;

ydb/core/cms/cms_maintenance_api_ut.cpp

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) {
4141
MakeLockAction(env.GetNodeId(0), TDuration::Minutes(10))
4242
)
4343
);
44-
44+
4545
auto response = env.CheckMaintenanceTaskCreate("task-2", Ydb::StatusIds::SUCCESS,
4646
MakeActionGroup(
4747
MakeLockAction(env.GetNodeId(1), TDuration::Minutes(10)),
@@ -116,6 +116,94 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) {
116116
const auto &a = response.action_group_states(0).action_states(0);
117117
UNIT_ASSERT_VALUES_EQUAL(a.status(), ActionState::ACTION_STATUS_PERFORMED);
118118
}
119+
120+
Y_UNIT_TEST(RequestReplaceDevicePDisk) {
121+
std::function<void(NCms::TPDiskID&, Ydb::Maintenance::ActionScope_PDisk*)> serializeIdFn = [](NCms::TPDiskID& pdiskId, Ydb::Maintenance::ActionScope_PDisk* pdisk) {
122+
auto* id = pdisk->mutable_pdisk_id();
123+
id->set_node_id(pdiskId.NodeId);
124+
id->set_pdisk_id(pdiskId.DiskId);
125+
};
126+
127+
std::function<void(const Ydb::Maintenance::ActionScope_PDisk&, NCms::TPDiskID&)> deserializeIdFn = [](const Ydb::Maintenance::ActionScope_PDisk& pdisk, NCms::TPDiskID& pdiskId) {
128+
auto& id = pdisk.Getpdisk_id();
129+
pdiskId.NodeId = id.node_id();
130+
pdiskId.DiskId = id.pdisk_id();
131+
};
132+
133+
std::function<void(NCms::TPDiskID&, Ydb::Maintenance::ActionScope_PDisk*)> serializeLocationFn = [](NCms::TPDiskID& pdiskId, Ydb::Maintenance::ActionScope_PDisk* pdisk) {
134+
auto* location = pdisk->mutable_pdisk_location();
135+
location->set_host("::1"); // All nodes live on this host.
136+
location->set_path("/" + std::to_string(pdiskId.NodeId) + "/pdisk-" + std::to_string(pdiskId.DiskId) + ".data");
137+
};
138+
139+
std::function<void(const Ydb::Maintenance::ActionScope_PDisk&, NCms::TPDiskID&)> deserializeLocationFn = [](const Ydb::Maintenance::ActionScope_PDisk& pdisk, NCms::TPDiskID& pdiskId) {
140+
auto& location = pdisk.Getpdisk_location();
141+
sscanf(location.path().c_str(), "/%d/pdisk-%d.data", &pdiskId.NodeId, &pdiskId.DiskId);
142+
};
143+
144+
std::vector<std::function<void(NCms::TPDiskID&, Ydb::Maintenance::ActionScope_PDisk*)>> serializeFns = {serializeIdFn, serializeLocationFn};
145+
146+
std::vector<std::function<void(const Ydb::Maintenance::ActionScope_PDisk&, NCms::TPDiskID&)>> deserializeFns = {deserializeIdFn, deserializeLocationFn};
147+
148+
for (size_t i = 0; i < serializeFns.size(); i++) {
149+
auto serializeFn = serializeFns[i];
150+
auto deserializeFn = deserializeFns[i];
151+
152+
TTestEnvOpts envOpts(8);
153+
envOpts.WithSentinel();
154+
155+
TCmsTestEnv env(envOpts);
156+
157+
auto req = std::make_unique<NKikimr::NCms::TEvCms::TEvCreateMaintenanceTaskRequest>();
158+
159+
Ydb::Maintenance::CreateMaintenanceTaskRequest* rec = req->Record.MutableRequest();
160+
161+
req->Record.SetUserSID("user");
162+
163+
auto* options = rec->mutable_task_options();
164+
options->set_availability_mode(::Ydb::Maintenance::AVAILABILITY_MODE_STRONG);
165+
166+
auto* actionGroup = rec->add_action_groups();
167+
auto* action = actionGroup->Addactions();
168+
auto* lockAction = action->mutable_lock_action();
169+
170+
auto* scope = lockAction->mutable_scope();
171+
172+
auto* pdisk = scope->mutable_pdisk();
173+
NCms::TPDiskID disk = env.PDiskId(0);
174+
serializeFn(disk, pdisk);
175+
lockAction->mutable_duration()->set_seconds(60);
176+
177+
env.SendToCms(req.release());
178+
179+
auto response = env.GrabEdgeEvent<NCms::TEvCms::TEvMaintenanceTaskResponse>();
180+
181+
UNIT_ASSERT_VALUES_EQUAL(response->Record.GetStatus(), Ydb::StatusIds_StatusCode_SUCCESS);
182+
183+
{
184+
auto& result = response->Record.GetResult();
185+
UNIT_ASSERT_VALUES_EQUAL(result.action_group_states().size(), 1);
186+
UNIT_ASSERT_VALUES_EQUAL(result.action_group_states(0).action_states().size(), 1);
187+
const auto &actionState = result.action_group_states(0).action_states(0);
188+
189+
UNIT_ASSERT(actionState.has_action());
190+
const auto& action = actionState.action();
191+
UNIT_ASSERT(action.has_lock_action());
192+
const auto& lockAction = action.lock_action();
193+
194+
UNIT_ASSERT(lockAction.has_scope());
195+
const auto& scope = lockAction.scope();
196+
UNIT_ASSERT(scope.has_pdisk());
197+
const auto& pdisk = scope.pdisk();
198+
199+
NCms::TPDiskID resultDisk;
200+
deserializeFn(pdisk, resultDisk);
201+
UNIT_ASSERT_VALUES_EQUAL(resultDisk, disk);
202+
}
203+
204+
env.CheckRequest("user", "user-r-1", false, NKikimrCms::TStatus::TStatus::WRONG_REQUEST);
205+
}
206+
}
119207
}
120208

121-
} // namespace NKikimr::NCmsTest
209+
} // namespace NKikimr::NCmsTest

ydb/core/cms/cms_ut.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,94 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
459459
"vdisk-3-1-0-1-0", "vdisk-3-1-0-5-0"));
460460
}
461461

462+
Y_UNIT_TEST(RequestReplaceDevicePDisk)
463+
{
464+
auto opts = TTestEnvOpts(8, 8).WithSentinel().WithDynamicGroups();
465+
TCmsTestEnv env(opts);
466+
467+
env.CheckPermissionRequest("user", false, false, false, true, TStatus::NO_SUCH_DEVICE,
468+
MakeAction(TAction::REPLACE_DEVICES, "::1", 60000000, "/dev/bad/device/path"));
469+
470+
env.CheckPermissionRequest(
471+
MakePermissionRequest(TRequestOptions("user", false, false, false),
472+
MakeAction(TAction::REPLACE_DEVICES, 1, 60000000, env.PDiskName(0, 1))
473+
),
474+
TStatus::ALLOW
475+
);
476+
}
477+
478+
Y_UNIT_TEST(RequestReplaceDevicePDiskByPath)
479+
{
480+
auto opts = TTestEnvOpts(8, 8).WithSentinel().WithDynamicGroups();
481+
TCmsTestEnv env(opts);
482+
483+
auto pdiskId = env.PDiskId(0, 0);
484+
485+
TString pdiskPath = "/" + std::to_string(pdiskId.NodeId) + "/pdisk-" + std::to_string(pdiskId.DiskId) + ".data";
486+
487+
env.CheckPermissionRequest(
488+
MakePermissionRequest(TRequestOptions("user", false, false, false),
489+
MakeAction(TAction::REPLACE_DEVICES, "::1", 60000000, pdiskPath)
490+
),
491+
TStatus::ALLOW
492+
);
493+
}
494+
495+
Y_UNIT_TEST(RequestReplacePDiskDoesntBreakGroup)
496+
{
497+
auto opts = TTestEnvOpts(8, 2).WithSentinel().WithDynamicGroups();
498+
TCmsTestEnv env(opts);
499+
500+
{
501+
auto pdiskId = env.PDiskId(0, 0);
502+
503+
TString pdiskPath = "/" + std::to_string(pdiskId.NodeId) + "/pdisk-" + std::to_string(pdiskId.DiskId) + ".data";
504+
505+
env.CheckPermissionRequest(
506+
MakePermissionRequest(TRequestOptions("user", false, false, false),
507+
MakeAction(TAction::REPLACE_DEVICES, "::1", 60000000, pdiskPath)
508+
),
509+
TStatus::ALLOW
510+
);
511+
}
512+
513+
{
514+
auto pdiskId = env.PDiskId(1, 0);
515+
516+
TString pdiskPath = "/" + std::to_string(pdiskId.NodeId) + "/pdisk-" + std::to_string(pdiskId.DiskId) + ".data";
517+
518+
env.CheckPermissionRequest(
519+
MakePermissionRequest(TRequestOptions("user", false, false, false),
520+
MakeAction(TAction::REPLACE_DEVICES, "::1", 60000000, pdiskPath)
521+
),
522+
TStatus::DISALLOW_TEMP
523+
);
524+
}
525+
}
526+
527+
Y_UNIT_TEST(RequestReplacePDiskConsecutiveWithDone)
528+
{
529+
auto opts = TTestEnvOpts(8, 2).WithSentinel().WithDynamicGroups();
530+
TCmsTestEnv env(opts);
531+
532+
for (ui32 i = 0; i < 8; ++i) {
533+
auto pdiskId = env.PDiskId(i, 0);
534+
535+
TString pdiskPath = "/" + std::to_string(pdiskId.NodeId) + "/pdisk-" + std::to_string(pdiskId.DiskId) + ".data";
536+
537+
auto rec = env.CheckPermissionRequest(
538+
MakePermissionRequest(TRequestOptions("user", false, false, false),
539+
MakeAction(TAction::REPLACE_DEVICES, "::1", 60000000, pdiskPath)
540+
),
541+
TStatus::ALLOW
542+
);
543+
544+
auto pid = rec.GetPermissions(0).GetId();
545+
546+
env.CheckDonePermission("user", pid);
547+
}
548+
}
549+
462550
Y_UNIT_TEST(RequestReplaceManyDevicesOnOneNode)
463551
{
464552
TCmsTestEnv env(16, 3);

0 commit comments

Comments
 (0)