diff --git a/cloud/blockstore/apps/client/lib/command.cpp b/cloud/blockstore/apps/client/lib/command.cpp index 865fc5033f..f8085843c6 100644 --- a/cloud/blockstore/apps/client/lib/command.cpp +++ b/cloud/blockstore/apps/client/lib/command.cpp @@ -470,7 +470,10 @@ void TCommand::Init() false); *versionCounter = 1; - RequestStats = CreateClientRequestStats(clientGroup, Timer); + RequestStats = CreateClientRequestStats( + clientGroup, + Timer, + EHistogramCounterOption::ReportMultipleCounters); VolumeStats = CreateVolumeStats( Monitoring, diff --git a/cloud/blockstore/config/diagnostics.proto b/cloud/blockstore/config/diagnostics.proto index 7335664155..43efce182e 100644 --- a/cloud/blockstore/config/diagnostics.proto +++ b/cloud/blockstore/config/diagnostics.proto @@ -203,4 +203,10 @@ message TDiagnosticsConfig // Monitoring data necessary for link generation on monpages. optional TMonitoringUrlData MonitoringUrlData = 47; + + // Report histogram as a set of manually created counters + optional bool ReportHistogramAsMultipleCounters = 48; + + // Report histogram as a single counter (THistogramCounter) + optional bool ReportHistogramAsSingleCounter = 49; } diff --git a/cloud/blockstore/libs/daemon/common/bootstrap.cpp b/cloud/blockstore/libs/daemon/common/bootstrap.cpp index 47818fb187..a0c9669df2 100644 --- a/cloud/blockstore/libs/daemon/common/bootstrap.cpp +++ b/cloud/blockstore/libs/daemon/common/bootstrap.cpp @@ -274,7 +274,10 @@ void TBootstrapBase::Init() } PostponedCriticalEvents.clear(); - RequestStats = CreateServerRequestStats(serverGroup, Timer); + RequestStats = CreateServerRequestStats( + serverGroup, + Timer, + Configs->DiagnosticsConfig->GetHistogramCounterOptions()); if (!VolumeStats) { VolumeStats = CreateVolumeStats( diff --git a/cloud/blockstore/libs/diagnostics/config.cpp b/cloud/blockstore/libs/diagnostics/config.cpp index fc6ff79b56..de8edb1a26 100644 --- a/cloud/blockstore/libs/diagnostics/config.cpp +++ b/cloud/blockstore/libs/diagnostics/config.cpp @@ -51,6 +51,8 @@ namespace { xxx(Mirror2SSDDowntimeThreshold, TDuration, TDuration::Seconds(5) )\ xxx(Mirror3SSDDowntimeThreshold, TDuration, TDuration::Seconds(5) )\ xxx(LocalSSDDowntimeThreshold, TDuration, TDuration::Seconds(5) )\ + xxx(ReportHistogramAsMultipleCounters, bool, true )\ + xxx(ReportHistogramAsSingleCounter, bool, false )\ // BLOCKSTORE_DIAGNOSTICS_CONFIG #define BLOCKSTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \ @@ -175,6 +177,19 @@ TRequestThresholds TDiagnosticsConfig::GetRequestThresholds() const DiagnosticsConfig.GetRequestThresholds()); } +EHistogramCounterOptions TDiagnosticsConfig::GetHistogramCounterOptions() const +{ + EHistogramCounterOptions histogramCounterOptions; + if (GetReportHistogramAsMultipleCounters()) { + histogramCounterOptions |= + EHistogramCounterOption::ReportMultipleCounters; + } + if (GetReportHistogramAsSingleCounter()) { + histogramCounterOptions |= EHistogramCounterOption::ReportSingleCounter; + } + return histogramCounterOptions; +} + void TDiagnosticsConfig::Dump(IOutputStream& out) const { #define BLOCKSTORE_CONFIG_DUMP(name, ...) \ diff --git a/cloud/blockstore/libs/diagnostics/config.h b/cloud/blockstore/libs/diagnostics/config.h index 6a73988147..1a012bb52b 100644 --- a/cloud/blockstore/libs/diagnostics/config.h +++ b/cloud/blockstore/libs/diagnostics/config.h @@ -3,7 +3,7 @@ #include "public.h" #include - +#include "cloud/storage/core/libs/diagnostics/histogram_counter_options.h" #include #include @@ -146,7 +146,11 @@ class TDiagnosticsConfig TDuration GetMirror3SSDDowntimeThreshold() const; TDuration GetMirror2SSDDowntimeThreshold() const; TDuration GetLocalSSDDowntimeThreshold() const; + bool GetReportHistogramAsMultipleCounters() const; + bool GetReportHistogramAsSingleCounter() const; + TRequestThresholds GetRequestThresholds() const; + EHistogramCounterOptions GetHistogramCounterOptions() const; void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; diff --git a/cloud/blockstore/libs/diagnostics/request_stats.cpp b/cloud/blockstore/libs/diagnostics/request_stats.cpp index 2667a9660a..d9c5f796af 100644 --- a/cloud/blockstore/libs/diagnostics/request_stats.cpp +++ b/cloud/blockstore/libs/diagnostics/request_stats.cpp @@ -192,38 +192,55 @@ class TRequestStats final TRequestStats( TDynamicCountersPtr counters, bool isServerSide, - ITimerPtr timer) + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions) : Counters(std::move(counters)) , IsServerSide(isServerSide) - , Total(MakeRequestCounters(timer, + , Total(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters)) - , TotalSSD(MakeRequestCounters(timer, + TRequestCounters::EOption::AddSpecialCounters, + histogramCounterOptions)) + , TotalSSD(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalHDD(MakeRequestCounters(timer, + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalHDD(MakeRequestCounters( + timer, + TRequestCounters::EOption::ReportDataPlaneHistogram | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalSSDNonrepl(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalSSDNonrepl(MakeRequestCounters(timer, + TRequestCounters::EOption::AddSpecialCounters | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalSSDMirror2(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalSSDMirror2(MakeRequestCounters(timer, + TRequestCounters::EOption::AddSpecialCounters | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalSSDMirror3(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalSSDMirror3(MakeRequestCounters(timer, + TRequestCounters::EOption::AddSpecialCounters | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalSSDLocal(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalSSDLocal(MakeRequestCounters(timer, + TRequestCounters::EOption::AddSpecialCounters | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) + , TotalHDDNonrepl(MakeRequestCounters( + timer, TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters | - TRequestCounters::EOption::OnlyReadWriteRequests)) - , TotalHDDNonrepl(MakeRequestCounters(timer, - TRequestCounters::EOption::ReportDataPlaneHistogram | - TRequestCounters::EOption::AddSpecialCounters | - TRequestCounters::EOption::OnlyReadWriteRequests)) + TRequestCounters::EOption::AddSpecialCounters | + TRequestCounters::EOption::OnlyReadWriteRequests, + histogramCounterOptions)) { Total.Register(*Counters); @@ -650,22 +667,26 @@ struct TRequestStatsStub final IRequestStatsPtr CreateClientRequestStats( TDynamicCountersPtr counters, - ITimerPtr timer) + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions) { return std::make_shared( std::move(counters), false, - std::move(timer)); + std::move(timer), + histogramCounterOptions); } IRequestStatsPtr CreateServerRequestStats( TDynamicCountersPtr counters, - ITimerPtr timer) + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions) { return std::make_shared( std::move(counters), true, - std::move(timer)); + std::move(timer), + histogramCounterOptions); } IRequestStatsPtr CreateRequestStatsStub() diff --git a/cloud/blockstore/libs/diagnostics/request_stats.h b/cloud/blockstore/libs/diagnostics/request_stats.h index 724490ca01..ed258244bc 100644 --- a/cloud/blockstore/libs/diagnostics/request_stats.h +++ b/cloud/blockstore/libs/diagnostics/request_stats.h @@ -75,10 +75,12 @@ struct IRequestStats IRequestStatsPtr CreateClientRequestStats( NMonitoring::TDynamicCountersPtr counters, - ITimerPtr timer); + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions); IRequestStatsPtr CreateServerRequestStats( NMonitoring::TDynamicCountersPtr counters, - ITimerPtr timer); + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions); IRequestStatsPtr CreateRequestStatsStub(); } // namespace NCloud::NBlockStore diff --git a/cloud/blockstore/libs/diagnostics/request_stats_ut.cpp b/cloud/blockstore/libs/diagnostics/request_stats_ut.cpp index 037ec9a873..8bfee4a32f 100644 --- a/cloud/blockstore/libs/diagnostics/request_stats_ut.cpp +++ b/cloud/blockstore/libs/diagnostics/request_stats_ut.cpp @@ -63,7 +63,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); auto totalCounters = monitoring ->GetCounters() @@ -228,7 +229,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); auto totalCounters = monitoring->GetCounters()->GetSubgroup("request", "WriteBlocks"); @@ -316,7 +318,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); auto totalCounters = monitoring ->GetCounters() @@ -422,7 +425,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); auto totalCounters = monitoring ->GetCounters() @@ -518,7 +522,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); auto totalCounters = monitoring ->GetCounters() @@ -586,7 +591,8 @@ Y_UNIT_TEST_SUITE(TRequestStatsTest) auto monitoring = CreateMonitoringServiceStub(); auto requestStats = CreateServerRequestStats( monitoring->GetCounters(), - CreateWallClockTimer()); + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters); unsigned int totalShots = 0; auto shoot = [&] (auto mediaKind, unsigned int count) { diff --git a/cloud/blockstore/libs/diagnostics/server_stats_ut.cpp b/cloud/blockstore/libs/diagnostics/server_stats_ut.cpp index a9ec55626e..b0489f5836 100644 --- a/cloud/blockstore/libs/diagnostics/server_stats_ut.cpp +++ b/cloud/blockstore/libs/diagnostics/server_stats_ut.cpp @@ -142,7 +142,10 @@ Y_UNIT_TEST_SUITE(TServerStatsTest) std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup,timer), + CreateServerRequestStats( + serverGroup, + timer, + EHistogramCounterOption::ReportMultipleCounters), std::move(volumeStats) ); @@ -209,7 +212,10 @@ Y_UNIT_TEST_SUITE(TServerStatsTest) std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup,timer), + CreateServerRequestStats( + serverGroup, + timer, + EHistogramCounterOption::ReportMultipleCounters), std::move(volumeStats) ); @@ -242,7 +248,10 @@ Y_UNIT_TEST_SUITE(TServerStatsTest) std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup,timer), + CreateServerRequestStats( + serverGroup, + timer, + EHistogramCounterOption::ReportMultipleCounters), std::move(volumeStats) ); @@ -323,7 +332,10 @@ Y_UNIT_TEST_SUITE(TServerStatsTest) std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup, timer), + CreateServerRequestStats( + serverGroup, + timer, + EHistogramCounterOption::ReportMultipleCounters), std::move(volumeStats) ); diff --git a/cloud/blockstore/libs/diagnostics/stats_helpers.cpp b/cloud/blockstore/libs/diagnostics/stats_helpers.cpp index ce5a023f0a..cb6124306f 100644 --- a/cloud/blockstore/libs/diagnostics/stats_helpers.cpp +++ b/cloud/blockstore/libs/diagnostics/stats_helpers.cpp @@ -9,7 +9,8 @@ namespace NCloud::NBlockStore { TRequestCounters MakeRequestCounters( ITimerPtr timer, - TRequestCounters::EOptions options) + TRequestCounters::EOptions options, + EHistogramCounterOptions histogramCounterOptions) { return TRequestCounters( std::move(timer), @@ -24,7 +25,8 @@ TRequestCounters MakeRequestCounters( const auto bt = static_cast(t); return IsNonLocalReadWriteRequest(bt); }, - options + options, + histogramCounterOptions ); } diff --git a/cloud/blockstore/libs/diagnostics/stats_helpers.h b/cloud/blockstore/libs/diagnostics/stats_helpers.h index e6f883a1e6..26fa29a323 100644 --- a/cloud/blockstore/libs/diagnostics/stats_helpers.h +++ b/cloud/blockstore/libs/diagnostics/stats_helpers.h @@ -3,6 +3,7 @@ #include "public.h" #include +#include namespace NCloud::NBlockStore { @@ -10,6 +11,7 @@ namespace NCloud::NBlockStore { TRequestCounters MakeRequestCounters( ITimerPtr timer, - TRequestCounters::EOptions options); + TRequestCounters::EOptions options, + EHistogramCounterOptions histogramCounterOptions); } // namespace NCloud::NBlockStore diff --git a/cloud/blockstore/libs/diagnostics/volume_stats.cpp b/cloud/blockstore/libs/diagnostics/volume_stats.cpp index 923f05ff3b..aa169b4503 100644 --- a/cloud/blockstore/libs/diagnostics/volume_stats.cpp +++ b/cloud/blockstore/libs/diagnostics/volume_stats.cpp @@ -270,12 +270,14 @@ class TVolumeInfo final TVolumeInfo( std::shared_ptr volumeBase, ITimerPtr timer, - TRealInstanceId realInstanceId) + TRealInstanceId realInstanceId, + EHistogramCounterOptions histogramCounterOptions) : VolumeBase(std::move(volumeBase)) , RealInstanceId(std::move(realInstanceId)) , RequestCounters(MakeRequestCounters( std::move(timer), - GetRequestCountersOptions(*VolumeBase))) + GetRequestCountersOptions(*VolumeBase), + histogramCounterOptions)) {} const NProto::TVolume& GetInfo() const override @@ -858,7 +860,8 @@ class TVolumeStats final auto info = std::make_shared( volumeBase, Timer, - realInstanceId); + realInstanceId, + DiagnosticsConfig->GetHistogramCounterOptions()); if (!Counters) { InitCounters(); diff --git a/cloud/blockstore/libs/endpoints_vhost/external_endpoint_stats_ut.cpp b/cloud/blockstore/libs/endpoints_vhost/external_endpoint_stats_ut.cpp index d210d0c9d2..b40eaefcf8 100644 --- a/cloud/blockstore/libs/endpoints_vhost/external_endpoint_stats_ut.cpp +++ b/cloud/blockstore/libs/endpoints_vhost/external_endpoint_stats_ut.cpp @@ -124,7 +124,10 @@ struct TFixture std::make_shared(), Monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup, Timer), + CreateServerRequestStats( + serverGroup, + Timer, + EHistogramCounterOption::ReportMultipleCounters), std::move(volumeStats) ); diff --git a/cloud/blockstore/libs/service_local/compound_storage_ut.cpp b/cloud/blockstore/libs/service_local/compound_storage_ut.cpp index 97a89fafd7..b47b42d79a 100644 --- a/cloud/blockstore/libs/service_local/compound_storage_ut.cpp +++ b/cloud/blockstore/libs/service_local/compound_storage_ut.cpp @@ -176,7 +176,10 @@ IStoragePtr CreateTestStorage( std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup, CreateWallClockTimer()), + CreateServerRequestStats( + serverGroup, + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters), CreateVolumeStatsStub()); return CreateCompoundStorage( @@ -700,7 +703,10 @@ Y_UNIT_TEST_SUITE(TCompoundStorageTest) std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup, CreateWallClockTimer()), + CreateServerRequestStats( + serverGroup, + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters), CreateVolumeStatsStub()); auto storage = CreateCompoundStorage( diff --git a/cloud/blockstore/libs/service_local/storage_spdk_ut.cpp b/cloud/blockstore/libs/service_local/storage_spdk_ut.cpp index fc35a279d3..b503b0d719 100644 --- a/cloud/blockstore/libs/service_local/storage_spdk_ut.cpp +++ b/cloud/blockstore/libs/service_local/storage_spdk_ut.cpp @@ -247,7 +247,10 @@ IStoragePtr CreateSpdkStorage( std::make_shared(), monitoring, CreateProfileLogStub(), - CreateServerRequestStats(serverGroup, CreateWallClockTimer()), + CreateServerRequestStats( + serverGroup, + CreateWallClockTimer(), + EHistogramCounterOption::ReportMultipleCounters), CreateVolumeStatsStub()); auto storageProvider = CreateSpdkStorageProvider( diff --git a/cloud/blockstore/tools/nbd/bootstrap.cpp b/cloud/blockstore/tools/nbd/bootstrap.cpp index 919d8d319a..6ab83df0ea 100644 --- a/cloud/blockstore/tools/nbd/bootstrap.cpp +++ b/cloud/blockstore/tools/nbd/bootstrap.cpp @@ -386,7 +386,10 @@ void TBootstrap::InitControlClient() auto clientGroup = rootGroup->GetSubgroup("component", "client"); - RequestStats = CreateClientRequestStats(clientGroup, Timer); + RequestStats = CreateClientRequestStats( + clientGroup, + Timer, + EHistogramCounterOption::ReportMultipleCounters); VolumeStats = CreateVolumeStats( Monitoring, diff --git a/cloud/blockstore/tools/testing/loadtest/lib/bootstrap.cpp b/cloud/blockstore/tools/testing/loadtest/lib/bootstrap.cpp index 311f34809a..3f230c94d1 100644 --- a/cloud/blockstore/tools/testing/loadtest/lib/bootstrap.cpp +++ b/cloud/blockstore/tools/testing/loadtest/lib/bootstrap.cpp @@ -210,7 +210,10 @@ void TBootstrap::Init() false); *versionCounter = 1; - RequestStats = CreateClientRequestStats(clientGroup, Timer); + RequestStats = CreateClientRequestStats( + clientGroup, + Timer, + EHistogramCounterOption::ReportMultipleCounters); VolumeStats = CreateVolumeStats( Monitoring, diff --git a/cloud/filestore/config/diagnostics.proto b/cloud/filestore/config/diagnostics.proto index cc6eb5e4bd..6883cfa0b0 100644 --- a/cloud/filestore/config/diagnostics.proto +++ b/cloud/filestore/config/diagnostics.proto @@ -97,4 +97,10 @@ message TDiagnosticsConfig // Monitoring data necessary for link generation on monpages. optional TMonitoringUrlData MonitoringUrlData = 22; + + // Report histogram as a set of manually created counters + optional bool ReportHistogramAsMultipleCounters = 24; + + // Report histogram as a single counter (THistogramCounter) + optional bool ReportHistogramAsSingleCounter = 25; } diff --git a/cloud/filestore/libs/diagnostics/config.cpp b/cloud/filestore/libs/diagnostics/config.cpp index 4944a7ab74..1340504af8 100644 --- a/cloud/filestore/libs/diagnostics/config.cpp +++ b/cloud/filestore/libs/diagnostics/config.cpp @@ -35,6 +35,8 @@ namespace { xxx(PostponeTimePredictorMaxTime, TDuration, TDuration::Minutes(1) )\ xxx(PostponeTimePredictorPercentage, double, 0.0 )\ xxx(MonitoringUrlData, TMonitoringUrlData, {} )\ + xxx(ReportHistogramAsMultipleCounters, bool, true )\ + xxx(ReportHistogramAsSingleCounter, bool, false )\ // FILESTORE_DIAGNOSTICS_CONFIG #define FILESTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \ @@ -103,6 +105,19 @@ TRequestThresholds TDiagnosticsConfig::GetRequestThresholds() const DiagnosticsConfig.GetRequestThresholds()); } +EHistogramCounterOptions TDiagnosticsConfig::GetHistogramCounterOptions() const +{ + EHistogramCounterOptions histogramCounterOptions; + if (GetReportHistogramAsMultipleCounters()) { + histogramCounterOptions |= + EHistogramCounterOption::ReportMultipleCounters; + } + if (GetReportHistogramAsSingleCounter()) { + histogramCounterOptions |= EHistogramCounterOption::ReportSingleCounter; + } + return histogramCounterOptions; +} + void TDiagnosticsConfig::Dump(IOutputStream& out) const { #define FILESTORE_CONFIG_DUMP(name, ...) \ diff --git a/cloud/filestore/libs/diagnostics/config.h b/cloud/filestore/libs/diagnostics/config.h index 73dc5ab0ec..ef1143dd87 100644 --- a/cloud/filestore/libs/diagnostics/config.h +++ b/cloud/filestore/libs/diagnostics/config.h @@ -3,6 +3,7 @@ #include "public.h" #include +#include "cloud/storage/core/libs/diagnostics/histogram_counter_options.h" #include #include @@ -66,6 +67,10 @@ class TDiagnosticsConfig TMonitoringUrlData GetMonitoringUrlData() const; + bool GetReportHistogramAsMultipleCounters() const; + bool GetReportHistogramAsSingleCounter() const; + EHistogramCounterOptions GetHistogramCounterOptions() const; + void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; }; diff --git a/cloud/filestore/libs/diagnostics/request_stats.cpp b/cloud/filestore/libs/diagnostics/request_stats.cpp index eba356c707..cbaf9f8a8e 100644 --- a/cloud/filestore/libs/diagnostics/request_stats.cpp +++ b/cloud/filestore/libs/diagnostics/request_stats.cpp @@ -45,7 +45,8 @@ const auto REQUEST_COUNTERS_OPTIONS = TRequestCountersPtr MakeRequestCounters( ITimerPtr timer, TDynamicCounters& counters, - TRequestCounters::EOptions options) + TRequestCounters::EOptions options, + EHistogramCounterOptions histogramCounterOptions) { auto requestCounters = std::make_shared( std::move(timer), @@ -56,7 +57,8 @@ TRequestCountersPtr MakeRequestCounters( [] (TRequestCounters::TRequestType t) { return IsReadWriteRequest(static_cast(t)); }, - options + options, + histogramCounterOptions ); requestCounters->Register(counters); return requestCounters; @@ -158,21 +160,25 @@ class TRequestStats final TDynamicCountersPtr counters, ITimerPtr timer, TDuration executionTimeThreshold, - TDuration totalTimeThreshold) + TDuration totalTimeThreshold, + EHistogramCounterOptions histogramCounterOptions) : TRequestLogger(executionTimeThreshold, totalTimeThreshold) , RootCounters(std::move(counters)) , TotalCounters(MakeRequestCounters( timer, *RootCounters, - REQUEST_COUNTERS_OPTIONS)) + REQUEST_COUNTERS_OPTIONS, + histogramCounterOptions)) , SsdCounters(MakeRequestCounters( timer, *RootCounters->GetSubgroup("type", "ssd"), - REQUEST_COUNTERS_OPTIONS)) + REQUEST_COUNTERS_OPTIONS, + histogramCounterOptions)) , HddCounters(MakeRequestCounters( timer, *RootCounters->GetSubgroup("type", "hdd"), - REQUEST_COUNTERS_OPTIONS)) + REQUEST_COUNTERS_OPTIONS, + histogramCounterOptions)) { auto revisionGroup = RootCounters->GetSubgroup("revision", GetFullVersionString()); @@ -425,7 +431,8 @@ class TFileSystemStats final TDynamicCountersPtr counters, IPostponeTimePredictorPtr predictor, TDuration executionTimeThreshold, - TDuration totalTimeThreshold) + TDuration totalTimeThreshold, + EHistogramCounterOptions histogramCounterOptions) : TRequestLogger{executionTimeThreshold, totalTimeThreshold} , FileSystemId{std::move(fileSystemId)} , ClientId{std::move(clientId)} @@ -433,7 +440,8 @@ class TFileSystemStats final timer, *counters, REQUEST_COUNTERS_OPTIONS - | TRequestCounters::EOption::LazyRequestInitialization)} + | TRequestCounters::EOption::LazyRequestInitialization, + histogramCounterOptions)} , Predictor{std::move(predictor)} , PredictorStats{counters, std::move(timer)} {} @@ -699,7 +707,8 @@ class TRequestStatsRegistry final std::move(totalCounters), Timer, DiagnosticsConfig->GetSlowExecutionTimeRequestThreshold(), - DiagnosticsConfig->GetSlowTotalTimeRequestThreshold()); + DiagnosticsConfig->GetSlowTotalTimeRequestThreshold(), + DiagnosticsConfig->GetHistogramCounterOptions()); FsCounters = RootCounters ->GetSubgroup("component", component + "_fs") @@ -729,7 +738,8 @@ class TRequestStatsRegistry final DiagnosticsConfig->GetPostponeTimePredictorPercentage(), DiagnosticsConfig->GetPostponeTimePredictorMaxTime(), DiagnosticsConfig->GetSlowExecutionTimeRequestThreshold(), - DiagnosticsConfig->GetSlowTotalTimeRequestThreshold()); + DiagnosticsConfig->GetSlowTotalTimeRequestThreshold(), + DiagnosticsConfig->GetHistogramCounterOptions()); it = StatsMap.emplace(key, stats).first; stats->Subscribe(RequestStats->GetTotalCounters()); } @@ -837,7 +847,8 @@ class TRequestStatsRegistry final double delayWindowPercentage, TMaybe delayMaxTime, TDuration executionTimeThreshold, - TDuration totalTimeThreshold) const + TDuration totalTimeThreshold, + EHistogramCounterOptions histogramCounterOptions) const { auto predictor = CreatePostponeTimePredictor( timer, @@ -852,7 +863,8 @@ class TRequestStatsRegistry final std::move(counters), std::move(predictor), executionTimeThreshold, - totalTimeThreshold); + totalTimeThreshold, + histogramCounterOptions); } }; diff --git a/cloud/storage/core/libs/diagnostics/histogram_counter_options.h b/cloud/storage/core/libs/diagnostics/histogram_counter_options.h new file mode 100644 index 0000000000..dc48077822 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/histogram_counter_options.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace NCloud { + +enum class EHistogramCounterOption { + ReportSingleCounter = (1 << 0), + ReportMultipleCounters = (1 << 1), +}; + +Y_DECLARE_FLAGS(EHistogramCounterOptions, EHistogramCounterOption); +Y_DECLARE_OPERATORS_FOR_FLAGS(EHistogramCounterOptions); + +} // namespace NCloud + diff --git a/cloud/storage/core/libs/diagnostics/histogram_types.h b/cloud/storage/core/libs/diagnostics/histogram_types.h index 90fee6a5cc..9d66404506 100644 --- a/cloud/storage/core/libs/diagnostics/histogram_types.h +++ b/cloud/storage/core/libs/diagnostics/histogram_types.h @@ -101,5 +101,10 @@ struct TKbSizeBuckets static TVector MakeNames(); }; +template +inline TVector ConvertToHistBounds(const TBucketsType& buckets) { + return {buckets.begin(), std::prev(buckets.end())}; +} + } // namespace NCloud diff --git a/cloud/storage/core/libs/diagnostics/request_counters.cpp b/cloud/storage/core/libs/diagnostics/request_counters.cpp index fbda7fef49..ee5997b028 100644 --- a/cloud/storage/core/libs/diagnostics/request_counters.cpp +++ b/cloud/storage/core/libs/diagnostics/request_counters.cpp @@ -30,65 +30,69 @@ namespace { template struct THistBase { - struct TBucket - { - double Value; - TDynamicCounters::TCounterPtr Counter; - - TBucket(double value = 0) - : Value(value) - , Counter(new TCounterForPtr(true)) - {} - }; + const TBucketBounds HistBounds; + const EHistogramCounterOptions CounterOptions; - std::array Buckets; + THistogramPtr Hist; + std::array Counters; - THistBase() + explicit THistBase(EHistogramCounterOptions counterOptions) + : HistBounds(ConvertToHistBounds(TDerived::Buckets)) + , CounterOptions(counterOptions) + , Hist( + new THistogramCounter(NMonitoring::ExplicitHistogram(HistBounds))) { - std::copy( - TDerived::Buckets.begin(), - TDerived::Buckets.end(), - Buckets.begin()); + std::fill(Counters.begin(), Counters.end(), new TCounterForPtr(true)); } void Register( TDynamicCounters& counters, + const TString& name, TCountableBase::EVisibility vis = TCountableBase::EVisibility::Public) { - const auto names = TDerived::MakeNames(); - for (size_t i = 0; i < Buckets.size(); ++i) { - Buckets[i].Counter = counters.GetCounter(names[i], true, vis); + auto subgroup = MakeVisibilitySubgroup( + counters, + "histogram", + name, + vis); + if (CounterOptions & EHistogramCounterOption::ReportSingleCounter) { + Hist = subgroup->GetHistogram(name, + NMonitoring::ExplicitHistogram(HistBounds), + true, + vis); + } + if (CounterOptions & EHistogramCounterOption::ReportMultipleCounters) { + const auto names = TDerived::MakeNames(); + for (size_t i = 0; i < Counters.size(); ++i) { + Counters[i] = subgroup->GetCounter(names[i], true, vis); + } } } - void Increment(double value, ui64 count = 1) + void Increment(double value, ui64 count) { - auto comparer = [] (const TBucket& bucket, double value) { - return bucket.Value < value; - }; + Hist->Collect(value, count); auto it = LowerBound( - Buckets.begin(), - Buckets.end(), - value, - comparer); + TDerived::Buckets.begin(), + TDerived::Buckets.end(), + value); STORAGE_VERIFY( - it != Buckets.end(), + it != TDerived::Buckets.end(), "Bucket", value); - - it->Counter->Add(count); + size_t index = std::distance(TDerived::Buckets.begin(), it); + Counters[index]->Add(count); } TVector GetBuckets() const { - TVector result(Reserve(Buckets.size())); - for (const auto& bucket: Buckets) { - result.emplace_back( - bucket.Value, - bucket.Counter->Val()); - } + const auto snapshot = Hist->Snapshot(); + TVector result(snapshot->Count()); + for (size_t i = 0; i < snapshot->Count(); ++i) { + result.emplace_back(snapshot->UpperBound(i), snapshot->Value(i)); + } return result; } }; @@ -98,6 +102,11 @@ struct THistBase struct TTimeHist : public THistBase { + explicit TTimeHist(EHistogramCounterOptions counterOptions) + : THistBase(counterOptions) + { + } + void Increment(TDuration requestTime, ui64 count = 1) { THistBase::Increment(requestTime.MicroSeconds() / 1000., count); @@ -109,6 +118,11 @@ struct TTimeHist struct TSizeHist : public THistBase { + explicit TSizeHist(EHistogramCounterOptions counterOptions) + : THistBase(counterOptions) + { + } + void Increment(double requestBytes, ui64 count = 1) { THistBase::Increment(requestBytes / 1024, count); @@ -281,8 +295,17 @@ struct TRequestCounters::TStatCounters TMutex FullInitLock; TAtomic FullyInitialized = false; - explicit TStatCounters(ITimerPtr timer) - : MaxTimeCalc(timer) + explicit TStatCounters( + ITimerPtr timer, + EHistogramCounterOptions histogramCounterOptions) + : SizeHist(histogramCounterOptions) + , TimeHist(histogramCounterOptions) + , TimeHistUnaligned(histogramCounterOptions) + , ExecutionTimeHist(histogramCounterOptions) + , ExecutionTimeHistUnaligned(histogramCounterOptions) + , RequestCompletionTimeHist(histogramCounterOptions) + , PostponedTimeHist(histogramCounterOptions) + , MaxTimeCalc(timer) , MaxTotalTimeCalc(timer) , MaxSizeCalc(timer) , MaxInProgressCalc(timer) @@ -363,12 +386,10 @@ struct TRequestCounters::TStatCounters if (ReportDataPlaneHistogram) { auto unalignedClassGroup = counters.GetSubgroup("sizeclass", "Unaligned"); - SizeHist.Register(*counters.GetSubgroup("histogram", "Size")); - TimeHistUnaligned.Register(*unalignedClassGroup->GetSubgroup("histogram", "Time")); - ExecutionTimeHist.Register( - *counters.GetSubgroup("histogram", "ExecutionTime")); - ExecutionTimeHistUnaligned.Register( - *unalignedClassGroup->GetSubgroup("histogram", "ExecutionTime")); + SizeHist.Register(counters, "Size"); + TimeHistUnaligned.Register(*unalignedClassGroup, "Time"); + ExecutionTimeHist.Register(counters, "ExecutionTime"); + ExecutionTimeHistUnaligned.Register(*unalignedClassGroup, "ExecutionTime"); } else { SizePercentiles.Register(*counters.GetSubgroup("percentiles", "Size")); ExecutionTimePercentiles.Register( @@ -382,24 +403,13 @@ struct TRequestCounters::TStatCounters ? TCountableBase::EVisibility::Public : TCountableBase::EVisibility::Private; - PostponedTimeHist.Register(*MakeVisibilitySubgroup( - counters, - "histogram", - "ThrottlerDelay", - visibleHistogram), visibleHistogram); - - TimeHist.Register(*MakeVisibilitySubgroup( - counters, - "histogram", - "Time", - visibleHistogram), visibleHistogram); + PostponedTimeHist.Register(counters, "ThrottlerDelay", visibleHistogram); + TimeHist.Register(counters, "Time", visibleHistogram); // Always enough only percentiles. - RequestCompletionTimeHist.Register(*MakeVisibilitySubgroup( - counters, - "histogram", - "RequestCompletionTime", - TCountableBase::EVisibility::Private), + RequestCompletionTimeHist.Register( + counters, + "RequestCompletionTime", TCountableBase::EVisibility::Private); RequestCompletionTimePercentiles.Register( *counters.GetSubgroup("percentiles", "RequestCompletionTime")); @@ -418,9 +428,7 @@ struct TRequestCounters::TStatCounters FastPathHits = counters.GetCounter("FastPathHits", true); } else { if (ReportControlPlaneHistogram) { - TimeHist.Register(*counters.GetSubgroup( - "histogram", - "Time")); + TimeHist.Register(counters, "Time"); } else { TimePercentiles.Register( *counters.GetSubgroup("percentiles", "Time")); @@ -688,7 +696,8 @@ TRequestCounters::TRequestCounters( ui32 requestCount, std::function requestType2Name, std::function isReadWriteRequestType, - EOptions options) + EOptions options, + EHistogramCounterOptions histogramCounterOptions) : RequestType2Name(std::move(requestType2Name)) , IsReadWriteRequestType(std::move(isReadWriteRequestType)) , Options(options) @@ -699,7 +708,7 @@ TRequestCounters::TRequestCounters( CountersByRequest.reserve(requestCount); for (ui32 i = 0; i < requestCount; ++i) { - CountersByRequest.emplace_back(timer); + CountersByRequest.emplace_back(timer, histogramCounterOptions); } } diff --git a/cloud/storage/core/libs/diagnostics/request_counters.h b/cloud/storage/core/libs/diagnostics/request_counters.h index 46841d4d3b..d04fe1b6bd 100644 --- a/cloud/storage/core/libs/diagnostics/request_counters.h +++ b/cloud/storage/core/libs/diagnostics/request_counters.h @@ -3,6 +3,7 @@ #include "public.h" #include +#include #include #include @@ -55,7 +56,8 @@ class TRequestCounters ui32 requestCount, std::function requestType2Name, std::function isReadWriteRequestType, - EOptions options = {}); + EOptions options, + EHistogramCounterOptions histogramCounterOptions); ~TRequestCounters(); void Register(NMonitoring::TDynamicCounters& counters); diff --git a/cloud/storage/core/libs/diagnostics/request_counters_ut.cpp b/cloud/storage/core/libs/diagnostics/request_counters_ut.cpp index 059ed3cbae..13e366c518 100644 --- a/cloud/storage/core/libs/diagnostics/request_counters_ut.cpp +++ b/cloud/storage/core/libs/diagnostics/request_counters_ut.cpp @@ -4,6 +4,7 @@ #include #include +#include "cloud/storage/core/libs/diagnostics/histogram_types.h" #include #include @@ -96,24 +97,32 @@ auto IsReadWriteRequest(TRequestCounters::TRequestType t) //////////////////////////////////////////////////////////////////////////////// -auto MakeRequestCounters(TRequestCounters::EOption options = {}) +auto MakeRequestCounters( + TRequestCounters::EOption options = {}, + EHistogramCounterOptions histogramCounterOptions = + EHistogramCounterOption::ReportMultipleCounters) { return TRequestCounters( CreateWallClockTimer(), 2, RequestType2Name, IsReadWriteRequest, - options); + options, + histogramCounterOptions); } -auto MakeRequestCountersPtr(TRequestCounters::EOption options = {}) +auto MakeRequestCountersPtr( + TRequestCounters::EOption options = {}, + EHistogramCounterOptions histogramCounterOptions = + EHistogramCounterOption::ReportMultipleCounters) { return std::make_shared( CreateWallClockTimer(), 2, RequestType2Name, IsReadWriteRequest, - options); + options, + histogramCounterOptions); } } // namespace @@ -669,6 +678,127 @@ Y_UNIT_TEST_SUITE(TRequestCountersTest) UNIT_ASSERT_VALUES_EQUAL(time->Val(), 2); } } + + Y_UNIT_TEST(ShouldReportHistogramAsMultipleSensors) + { + auto warmupTimer = GetCyclesPerMillisecond(); + Y_UNUSED(warmupTimer); + + auto monitoring = CreateMonitoringServiceStub(); + auto counters = MakeRequestCountersPtr( + TRequestCounters::EOption::ReportDataPlaneHistogram, + EHistogramCounterOption::ReportMultipleCounters); + counters->Register(*monitoring->GetCounters()); + + AddRequestStats(*counters, WriteRequestType, { + { 1_KB, TDuration::MilliSeconds(800), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(1500), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(2000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(8000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(36000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(100000), TDuration::Zero() }, + }); + + TMap expectedHistogramValues; + for (const auto& bucketName : TRequestMsTimeBuckets::MakeNames()) { + expectedHistogramValues[bucketName] = 0; + } + expectedHistogramValues["1000ms"] = 1; + expectedHistogramValues["2000ms"] = 2; + expectedHistogramValues["10000ms"] = 1; + expectedHistogramValues["Inf"] = 2; + + counters->UpdateStats(); + const auto group = monitoring + ->GetCounters() + ->GetSubgroup("request", "WriteBlocks") + ->GetSubgroup("histogram", "Time"); + + for (const auto& [name, value]: expectedHistogramValues) { + const auto counter = group->FindCounter(name); + UNIT_ASSERT(counter); + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), value); + } + } + + Y_UNIT_TEST(ShouldReportHistogramAsSingleSensor) + { + auto warmupTimer = GetCyclesPerMillisecond(); + Y_UNUSED(warmupTimer); + + auto monitoring = CreateMonitoringServiceStub(); + auto counters = MakeRequestCountersPtr( + TRequestCounters::EOption::ReportDataPlaneHistogram, + EHistogramCounterOption::ReportSingleCounter); + counters->Register(*monitoring->GetCounters()); + + AddRequestStats(*counters, WriteRequestType, { + { 1_KB, TDuration::MilliSeconds(800), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(1500), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(2000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(8000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(36000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(100000), TDuration::Zero() }, + }); + + const TMap expectedHistogramValues = { + { 19, 1 }, // 1000ms + { 20, 2 }, // 2000ms + { 22, 1 }, // 10000ms + { 24, 2 }, // Inf + }; + + counters->UpdateStats(); + + const auto histogram = monitoring + ->GetCounters() + ->GetSubgroup("request", "WriteBlocks") + ->GetSubgroup("histogram", "Time") + ->FindHistogram("Time"); + UNIT_ASSERT(histogram); + + const auto snapshot = histogram->Snapshot(); + UNIT_ASSERT_VALUES_EQUAL(snapshot->Count(), TRequestMsTimeBuckets::Buckets.size()); + for (size_t bucketId = 0; bucketId < snapshot->Count(); bucketId++) { + auto expectedValue = expectedHistogramValues.contains(bucketId) ? + expectedHistogramValues.at(bucketId) : 0; + UNIT_ASSERT_VALUES_EQUAL(snapshot->Value(bucketId), expectedValue); + } + } + + Y_UNIT_TEST(ShouldNotReportHistogramIfOptionIsNotSet) + { + auto monitoring = CreateMonitoringServiceStub(); + auto counters = MakeRequestCountersPtr( + TRequestCounters::EOption::ReportDataPlaneHistogram, + {}); + counters->Register(*monitoring->GetCounters()); + + AddRequestStats(*counters, WriteRequestType, { + { 1_KB, TDuration::MilliSeconds(800), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(1500), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(2000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(8000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(36000), TDuration::Zero() }, + { 1_KB, TDuration::MilliSeconds(100000), TDuration::Zero() }, + }); + + auto counter = monitoring + ->GetCounters() + ->GetSubgroup("request", "WriteBlocks") + ->GetSubgroup("histogram", "Time") + ->FindCounter("1ms"); + + UNIT_ASSERT(!counter); + + auto histogram = monitoring + ->GetCounters() + ->GetSubgroup("request", "WriteBlocks") + ->GetSubgroup("histogram", "Time") + ->FindHistogram("Time"); + + UNIT_ASSERT(!histogram); + } } } // namespace NCloud diff --git a/cloud/vm/blockstore/bootstrap.cpp b/cloud/vm/blockstore/bootstrap.cpp index 0fcb19b4e8..b7c5a51bdd 100644 --- a/cloud/vm/blockstore/bootstrap.cpp +++ b/cloud/vm/blockstore/bootstrap.cpp @@ -158,7 +158,10 @@ void TBootstrap::Init() false); *versionCounter = 1; - RequestStats = CreateClientRequestStats(clientGroup, Timer); + RequestStats = CreateClientRequestStats( + clientGroup, + Timer, + EHistogramCounterOption::ReportMultipleCounters); VolumeStats = CreateVolumeStats( Monitoring,