Skip to content

Commit

Permalink
Add possibility to do lite reallocations (#2247)
Browse files Browse the repository at this point in the history
  • Loading branch information
komarevtsev-d authored Oct 9, 2024
1 parent afd77c1 commit a835bd6
Show file tree
Hide file tree
Showing 18 changed files with 317 additions and 29 deletions.
7 changes: 7 additions & 0 deletions cloud/blockstore/config/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1035,4 +1035,11 @@ message TStorageServiceConfig

// Node type
optional string NodeType = 384;

// The volume actor will compare meta before and after reallocation and
// decide whether a lite reallocation is possible.
optional bool AllowLiteDiskReallocations = 385;

// Timeout between disks reallocations.
optional uint32 DiskRegistryDisksNotificationTimeout = 386;
}
1 change: 1 addition & 0 deletions cloud/blockstore/libs/diagnostics/critical_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ namespace NCloud::NBlockStore {
xxx(DiskRegistryUnexpectedAffectedDisks) \
xxx(ReadBlockCountMismatch) \
xxx(CancelRoutineIsNotSet) \
xxx(FieldDescriptorNotFound) \
// BLOCKSTORE_IMPOSSIBLE_EVENTS

////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 2 additions & 0 deletions cloud/blockstore/libs/storage/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ TDuration MSeconds(ui32 value)
xxx(OptimizeVoidBuffersTransferForReadsEnabled, bool, false )\
xxx(VolumeHistoryCleanupItemCount, ui32, 100'000 )\
xxx(IdleAgentDeployByCmsDelay, TDuration, Hours(1) )\
xxx(AllowLiteDiskReallocations, bool, false )\
xxx(DiskRegistryDisksNotificationTimeout, TDuration, Seconds(5) )\


// BLOCKSTORE_STORAGE_CONFIG_RW
Expand Down
3 changes: 3 additions & 0 deletions cloud/blockstore/libs/storage/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,9 @@ class TStorageConfig
TVector<TString> GetDestructionAllowedOnlyForDisksWithIdPrefixes() const;

TDuration GetIdleAgentDeployByCmsDelay() const;
TDuration GetDiskRegistryDisksNotificationTimeout() const;

bool GetAllowLiteDiskReallocations() const;

TString GetNodeRegistrationToken() const;
ui32 GetNodeRegistrationMaxAttempts() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ void TDiskRegistryActor::ReallocateDisks(const TActorContext& ctx)

auto request = std::make_unique<TEvDiskRegistryPrivate::TEvNotifyDisksRequest>();

auto deadline = Min(DisksNotificationStartTs, ctx.Now()) + TDuration::Seconds(5);
auto deadline = Min(DisksNotificationStartTs, ctx.Now()) +
Config->GetDiskRegistryDisksNotificationTimeout();
if (deadline > ctx.Now()) {
LOG_INFO(ctx, TBlockStoreComponents::DISK_REGISTRY,
"[%lu] Scheduled disks notification, now: %lu, deadline: %lu",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ target_sources(libs-storage-volume PRIVATE
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_meta_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reset_seqnumber.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ target_sources(libs-storage-volume PRIVATE
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_meta_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reset_seqnumber.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ target_sources(libs-storage-volume PRIVATE
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_meta_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reset_seqnumber.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ target_sources(libs-storage-volume PRIVATE
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_monitoring.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_read_meta_history.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp
${CMAKE_SOURCE_DIR}/cloud/blockstore/libs/storage/volume/volume_actor_reset_seqnumber.cpp
Expand Down
6 changes: 6 additions & 0 deletions cloud/blockstore/libs/storage/volume/testlib/test_env.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,12 @@ TVolumeClient::CreateUpdateShadowDiskStateRequest(
processedBlockCount);
}

std::unique_ptr<TEvVolumePrivate::TEvReadMetaHistoryRequest>
TVolumeClient::CreateReadMetaHistoryRequest()
{
return std::make_unique<TEvVolumePrivate::TEvReadMetaHistoryRequest>();
}

void TVolumeClient::SendRemoteHttpInfo(
const TString& params,
HTTP_METHOD method)
Expand Down
3 changes: 3 additions & 0 deletions cloud/blockstore/libs/storage/volume/testlib/test_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,9 @@ class TVolumeClient
TEvVolumePrivate::TEvUpdateShadowDiskStateRequest::EReason reason,
ui64 processedBlockCount);

std::unique_ptr<TEvVolumePrivate::TEvReadMetaHistoryRequest>
CreateReadMetaHistoryRequest();

void SendRemoteHttpInfo(
const TString& params,
HTTP_METHOD method);
Expand Down
110 changes: 83 additions & 27 deletions cloud/blockstore/libs/storage/volume/volume_actor_allocatedisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ using namespace NActors;
using namespace NKikimr;
using namespace NKikimr::NTabletFlatExecutor;

using MessageDifferencer = google::protobuf::util::MessageDifferencer;

namespace {

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -108,6 +110,57 @@ bool ValidateDevices(
return ok;
}

std::unique_ptr<MessageDifferencer> CreateNodeIdChangeDifferencer()
{
// These are two fields that will change during disk agent blue-green
// deploy.
const auto* nodeIdDescriptor =
NProto::TDeviceConfig::GetDescriptor()->FindFieldByName("NodeId");
const auto* rdmaPortDescriptor =
NProto::TRdmaEndpoint::GetDescriptor()->FindFieldByName("Port");
if (!nodeIdDescriptor || !rdmaPortDescriptor) {
ReportFieldDescriptorNotFound(
TStringBuilder()
<< "Lite reallocation is impossible. nodeIdDescriptor = "
<< static_cast<const void*>(nodeIdDescriptor)
<< "; rdmaPortDescriptor = "
<< static_cast<const void*>(rdmaPortDescriptor));
return nullptr;
}

auto diff = std::make_unique<MessageDifferencer>();
diff->IgnoreField(nodeIdDescriptor);
diff->IgnoreField(rdmaPortDescriptor);
diff->set_float_comparison(
MessageDifferencer::FloatComparison::APPROXIMATE);
diff->set_message_field_comparison(
MessageDifferencer::MessageFieldComparison::EQUAL);
return diff;
}

NProto::TVolumeMeta CreateNewMeta(
const NProto::TVolumeMeta& oldMeta,
TTxVolume::TUpdateDevices& args)
{
auto newMeta = oldMeta;
*newMeta.MutableDevices() = std::move(args.Devices);
*newMeta.MutableMigrations() = std::move(args.Migrations);
newMeta.ClearReplicas();
for (auto& devices: args.Replicas) {
auto* replica = newMeta.AddReplicas();
*replica->MutableDevices() = std::move(devices);
}
newMeta.ClearFreshDeviceIds();
for (auto& freshDeviceId: args.FreshDeviceIds) {
*newMeta.AddFreshDeviceIds() = std::move(freshDeviceId);
}
newMeta.SetIOMode(args.IOMode);
newMeta.SetIOModeTs(args.IOModeTs.MicroSeconds());
newMeta.SetMuteIOErrors(args.MuteIOErrors);

return newMeta;
}

} // namespace

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -464,49 +517,52 @@ void TVolumeActor::ExecuteUpdateDevices(
TTransactionContext& tx,
TTxVolume::TUpdateDevices& args)
{
Y_UNUSED(ctx);
Y_ABORT_UNLESS(State);

auto newMeta = State->GetMeta();
*newMeta.MutableDevices() = std::move(args.Devices);
*newMeta.MutableMigrations() = std::move(args.Migrations);
newMeta.ClearReplicas();
for (auto& devices: args.Replicas) {
auto* replica = newMeta.AddReplicas();
*replica->MutableDevices() = std::move(devices);
const auto& oldMeta = State->GetMeta();
auto newMeta = CreateNewMeta(oldMeta, args);

Y_DEBUG_ABORT_UNLESS(State->IsDiskRegistryMediaKind());
if (Config->GetAllowLiteDiskReallocations()) {
auto differencer = CreateNodeIdChangeDifferencer();
args.LiteReallocation =
differencer && differencer->Compare(oldMeta, newMeta);
}
newMeta.ClearFreshDeviceIds();
for (auto& freshDeviceId: args.FreshDeviceIds) {
*newMeta.AddFreshDeviceIds() = std::move(freshDeviceId);
}
newMeta.SetIOMode(args.IOMode);
newMeta.SetIOModeTs(args.IOModeTs.MicroSeconds());
newMeta.SetMuteIOErrors(args.MuteIOErrors);

// TODO: reset MigrationIndex here and in UpdateVolumeConfig only if our
// migration or fresh device lists have changed
// NBS-1988
newMeta.SetMigrationIndex(0);

TVolumeMetaHistoryItem metaHistoryItem{ctx.Now(), newMeta};

TVolumeDatabase db(tx.DB);
if (!args.LiteReallocation) {
// TODO: reset MigrationIndex here and in UpdateVolumeConfig only if our
// migration or fresh device lists have changed
// NBS-1988
newMeta.SetMigrationIndex(0);

TVolumeMetaHistoryItem metaHistoryItem{ctx.Now(), newMeta};
db.WriteMetaHistory(State->GetMetaHistory().size(), metaHistoryItem);
State->AddMetaHistory(std::move(metaHistoryItem));
}

db.WriteMeta(newMeta);
db.WriteMetaHistory(State->GetMetaHistory().size(), metaHistoryItem);
State->ResetMeta(std::move(newMeta));
State->AddMetaHistory(std::move(metaHistoryItem));
}

void TVolumeActor::CompleteUpdateDevices(
const TActorContext& ctx,
TTxVolume::TUpdateDevices& args)
{
LOG_INFO(
ctx,
TBlockStoreComponents::VOLUME,
"[%lu] Devices have been updated. DiskId: %s LiteReallocation: %d",
TabletID(),
State->GetDiskId().c_str(),
args.LiteReallocation);

if (auto actorId = State->GetDiskRegistryBasedPartitionActor()) {
if (!args.RequestInfo) {
WaitForPartitions.emplace_back(actorId, nullptr);
} else {
auto requestInfo = std::move(args.RequestInfo);
auto reply = [=] (const auto& ctx, auto error) {
auto reply =
[requestInfo = args.RequestInfo](const auto& ctx, auto error)
{
using TResponse = TEvVolumePrivate::TEvUpdateDevicesResponse;

NCloud::Reply(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,3 @@ void TVolumeActor::CompleteReadHistory(
}

} // namespace NCloud::NBlockStore::NStorage

Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "volume_actor.h"

#include "volume_database.h"

#include <cloud/blockstore/libs/storage/core/proto_helpers.h>
#include <cloud/blockstore/libs/storage/core/request_info.h>

namespace NCloud::NBlockStore::NStorage {

using namespace NActors;

using namespace NKikimr;
using namespace NKikimr::NTabletFlatExecutor;

////////////////////////////////////////////////////////////////////////////////

void TVolumeActor::HandleReadMetaHistory(
const TEvVolumePrivate::TEvReadMetaHistoryRequest::TPtr& ev,
const NActors::TActorContext& ctx)
{
auto* msg = ev->Get();
auto requestInfo =
CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext);
AddTransaction(*requestInfo);

ExecuteTx<TReadMetaHistory>(ctx, std::move(requestInfo));
}

////////////////////////////////////////////////////////////////////////////////

bool TVolumeActor::PrepareReadMetaHistory(
const TActorContext& ctx,
TTransactionContext& tx,
TTxVolume::TReadMetaHistory& args)
{
Y_UNUSED(ctx);

TVolumeDatabase db(tx.DB);
return db.ReadMetaHistory(args.MetaHistory);
}

void TVolumeActor::ExecuteReadMetaHistory(
const TActorContext& ctx,
TTransactionContext& tx,
TTxVolume::TReadMetaHistory& args)
{
Y_UNUSED(ctx);
Y_UNUSED(tx);
Y_UNUSED(args);
}

void TVolumeActor::CompleteReadMetaHistory(
const TActorContext& ctx,
TTxVolume::TReadMetaHistory& args)
{
auto response =
std::make_unique<TEvVolumePrivate::TEvReadMetaHistoryResponse>();
response->MetaHistory = std::move(args.MetaHistory);
NCloud::Reply(ctx, *args.RequestInfo, std::move(response));

RemoveTransaction(*args.RequestInfo);
}

} // namespace NCloud::NBlockStore::NStorage
14 changes: 14 additions & 0 deletions cloud/blockstore/libs/storage/volume/volume_events_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace NCloud::NBlockStore::NStorage {
xxx(UpdateDevices, __VA_ARGS__) \
xxx(UpdateCheckpointRequest, __VA_ARGS__) \
xxx(UpdateShadowDiskState, __VA_ARGS__) \
xxx(ReadMetaHistory, __VA_ARGS__) \
// BLOCKSTORE_VOLUME_REQUESTS_PRIVATE

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -118,6 +119,19 @@ struct TEvVolumePrivate
TVector<THistoryLogItem> History;
};

//
// ReadMetaHistory
//

struct TReadMetaHistoryRequest
{
};

struct TReadMetaHistoryResponse
{
TVector<TVolumeMetaHistoryItem> MetaHistory;
};

//
// UpdateDevices
//
Expand Down
23 changes: 23 additions & 0 deletions cloud/blockstore/libs/storage/volume/volume_tx.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace NCloud::NBlockStore::NStorage {
xxx(UpdateVolumeParams, __VA_ARGS__) \
xxx(DeleteVolumeParams, __VA_ARGS__) \
xxx(ChangeStorageConfig, __VA_ARGS__) \
xxx(ReadMetaHistory, __VA_ARGS__) \
// BLOCKSTORE_VOLUME_TRANSACTIONS

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -151,6 +152,8 @@ struct TTxVolume
TInstant IOModeTs;
bool MuteIOErrors;

bool LiteReallocation = false;

TUpdateDevices(
TDevices devices,
TMigrations migrations,
Expand Down Expand Up @@ -368,6 +371,26 @@ struct TTxVolume
}
};

//
// Read Meta History
//

struct TReadMetaHistory
{
const TRequestInfoPtr RequestInfo;

TVector<TVolumeMetaHistoryItem> MetaHistory;

explicit TReadMetaHistory(TRequestInfoPtr requestInfo)
: RequestInfo(std::move(requestInfo))
{}

void Clear()
{
MetaHistory.clear();
}
};

//
// SavePartStats
//
Expand Down
Loading

0 comments on commit a835bd6

Please sign in to comment.