Skip to content

Commit 080882e

Browse files
authored
Fix OLAP stats (#766)
* add tests for check olap stats, controller for act perioad and add filling stats for non-standalone table. * remove useless log * remove useless log * up controller * add test with some tables in store * support stats for table store * remove useless message * add size to stats and small up * fix flapping, up controller, add some log * reduce timeout * add check for primary index * up sender, set immutable var
1 parent 30e189b commit 080882e

File tree

12 files changed

+392
-67
lines changed

12 files changed

+392
-67
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#include <ydb/core/kqp/ut/common/kqp_ut_common.h>
2+
#include <ydb/core/kqp/ut/common/columnshard.h>
3+
#include <ydb/core/tx/columnshard/hooks/testing/controller.h>
4+
#include <ydb/core/testlib/common_helper.h>
5+
6+
namespace NKikimr {
7+
namespace NKqp {
8+
9+
using namespace NYdb;
10+
using namespace NYdb::NTable;
11+
12+
Y_UNIT_TEST_SUITE(KqpOlapStats) {
13+
constexpr size_t inserted_rows = 1000;
14+
constexpr size_t tables_in_store = 1000;
15+
constexpr size_t size_single_table = 13152;
16+
17+
const TVector<TTestHelper::TColumnSchema> schema = {
18+
TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false),
19+
TTestHelper::TColumnSchema().SetName("resource_id").SetType(NScheme::NTypeIds::Utf8),
20+
TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Int32)
21+
};
22+
23+
class TOlapStatsController: public NYDBTest::NColumnShard::TController {
24+
public:
25+
TDuration GetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override {
26+
return TDuration::MilliSeconds(10);
27+
}
28+
TDuration GetStatsReportInterval(const TDuration /*defaultValue*/) const override {
29+
return TDuration::MilliSeconds(10);
30+
}
31+
};
32+
33+
Y_UNIT_TEST(AddRowsTableStandalone) {
34+
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<TOlapStatsController>();
35+
36+
TKikimrSettings runnerSettings;
37+
runnerSettings.WithSampleTables = false;
38+
39+
TTestHelper testHelper(runnerSettings);
40+
41+
TTestHelper::TColumnTable testTable;
42+
43+
testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({"id"}).SetSharding({"id"}).SetSchema(schema);
44+
testHelper.CreateTable(testTable);
45+
{
46+
TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema));
47+
48+
for(size_t i=0; i<inserted_rows; i++) {
49+
tableInserter.AddRow().Add(i).Add("test_res_" + std::to_string(i)).AddNull();
50+
}
51+
52+
testHelper.InsertData(testTable, tableInserter);
53+
}
54+
55+
Sleep(TDuration::Seconds(1));
56+
57+
auto settings = TDescribeTableSettings().WithTableStatistics(true);
58+
auto describeResult = testHelper.GetSession().DescribeTable("/Root/ColumnTableTest", settings).GetValueSync();
59+
60+
UNIT_ASSERT_C(describeResult.IsSuccess(), describeResult.GetIssues().ToString());
61+
62+
const auto& description = describeResult.GetTableDescription();
63+
64+
UNIT_ASSERT_VALUES_EQUAL(inserted_rows, description.GetTableRows());
65+
UNIT_ASSERT_VALUES_EQUAL(size_single_table, description.GetTableSize());
66+
}
67+
68+
Y_UNIT_TEST(AddRowsTableInTableStore) {
69+
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<TOlapStatsController>();
70+
71+
TKikimrSettings runnerSettings;
72+
runnerSettings.WithSampleTables = false;
73+
74+
TTestHelper testHelper(runnerSettings);
75+
76+
TTestHelper::TColumnTableStore testTableStore;
77+
78+
testTableStore.SetName("/Root/TableStoreTest").SetPrimaryKey({"id"}).SetSchema(schema);
79+
testHelper.CreateTable(testTableStore);
80+
TTestHelper::TColumnTable testTable;
81+
testTable.SetName("/Root/TableStoreTest/ColumnTableTest").SetPrimaryKey({"id"}).SetSharding({"id"}).SetSchema(schema);
82+
testHelper.CreateTable(testTable);
83+
84+
{
85+
TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema));
86+
for(size_t i=0; i<inserted_rows; i++) {
87+
tableInserter.AddRow().Add(i).Add("test_res_" + std::to_string(i)).AddNull();
88+
}
89+
testHelper.InsertData(testTable, tableInserter);
90+
}
91+
92+
Sleep(TDuration::Seconds(1));
93+
94+
auto settings = TDescribeTableSettings().WithTableStatistics(true);
95+
auto describeResult = testHelper.GetSession().DescribeTable("/Root/TableStoreTest/ColumnTableTest", settings).GetValueSync();
96+
97+
UNIT_ASSERT_C(describeResult.IsSuccess(), describeResult.GetIssues().ToString());
98+
99+
const auto& description = describeResult.GetTableDescription();
100+
101+
UNIT_ASSERT_VALUES_EQUAL(inserted_rows, description.GetTableRows());
102+
UNIT_ASSERT_VALUES_EQUAL(size_single_table, description.GetTableSize());
103+
}
104+
105+
Y_UNIT_TEST(AddRowsSomeTablesInTableStore) {
106+
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<TOlapStatsController>();
107+
108+
TKikimrSettings runnerSettings;
109+
runnerSettings.WithSampleTables = false;
110+
111+
TTestHelper testHelper(runnerSettings);
112+
113+
TTestHelper::TColumnTableStore testTableStore;
114+
115+
testTableStore.SetName("/Root/TableStoreTest").SetPrimaryKey({"id"}).SetSchema(schema);
116+
testHelper.CreateTable(testTableStore);
117+
118+
Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).SetPriority(NActors::NLog::PRI_DEBUG).Initialize();
119+
120+
for(size_t t=0; t<tables_in_store; t++) {
121+
TTestHelper::TColumnTable testTable;
122+
testTable.SetName("/Root/TableStoreTest/ColumnTableTest_" + std::to_string(t)).SetPrimaryKey({"id"}).SetSharding({"id"}).SetSchema(schema);
123+
testHelper.CreateTable(testTable);
124+
125+
TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema));
126+
for(size_t i=0; i < t+ inserted_rows; i++) {
127+
tableInserter.AddRow().Add(i + t * tables_in_store).Add("test_res_" + std::to_string(i + t * tables_in_store)).AddNull();
128+
}
129+
testHelper.InsertData(testTable, tableInserter);;
130+
}
131+
132+
Sleep(TDuration::Seconds(20));
133+
134+
auto settings = TDescribeTableSettings().WithTableStatistics(true);
135+
for(size_t t=0; t<tables_in_store; t++) {
136+
auto describeResult = testHelper.GetSession().DescribeTable("/Root/TableStoreTest/ColumnTableTest_" + std::to_string(t), settings).GetValueSync();
137+
UNIT_ASSERT_C(describeResult.IsSuccess(), describeResult.GetIssues().ToString());
138+
const auto& description = describeResult.GetTableDescription();
139+
140+
UNIT_ASSERT_VALUES_EQUAL(t + inserted_rows, description.GetTableRows());
141+
}
142+
}
143+
}
144+
145+
} // namespace NKqp
146+
} // namespace NKikimr

ydb/core/kqp/ut/olap/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ ELSE()
1313
ENDIF()
1414

1515
SRCS(
16+
kqp_olap_stats_ut.cpp
1617
kqp_olap_ut.cpp
1718
)
1819

ydb/core/protos/tx_datashard.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,8 @@ message TEvPeriodicTableStats {
806806
optional uint64 TableOwnerId = 12;
807807

808808
optional bool IsDstSplit = 13;
809+
810+
repeated TEvPeriodicTableStats Tables = 14;
809811
}
810812

811813
message TSerializedRowColumnsScheme {

ydb/core/tx/columnshard/columnshard.cpp

Lines changed: 97 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) {
4646
EnqueueProgressTx(ctx);
4747
}
4848
EnqueueBackgroundActivities();
49-
ctx.Schedule(ActivationPeriod, new TEvPrivate::TEvPeriodicWakeup());
49+
ctx.Send(SelfId(), new TEvPrivate::TEvPeriodicWakeup());
5050
}
5151

5252
void TColumnShard::OnActivateExecutor(const TActorContext& ctx) {
@@ -158,7 +158,7 @@ void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorC
158158
SendWaitPlanStep(GetOutdatedStep());
159159

160160
SendPeriodicStats();
161-
ctx.Schedule(ActivationPeriod, new TEvPrivate::TEvPeriodicWakeup());
161+
ctx.Schedule(PeriodicWakeupActivationPeriod, new TEvPrivate::TEvPeriodicWakeup());
162162
}
163163
}
164164

@@ -293,15 +293,106 @@ void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage&
293293
metrics->TryUpdate(ctx);
294294
}
295295

296+
void TColumnShard::ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats * tabletStats) {
297+
NOlap::TSnapshot lastIndexUpdate = TablesManager.GetPrimaryIndexSafe().LastUpdate();
298+
auto activeIndexStats = indexStats.Active(); // data stats excluding inactive and evicted
299+
300+
if (activeIndexStats.Rows < 0 || activeIndexStats.Bytes < 0) {
301+
LOG_S_WARN("Negative stats counter. Rows: " << activeIndexStats.Rows
302+
<< " Bytes: " << activeIndexStats.Bytes << TabletID());
303+
304+
activeIndexStats.Rows = (activeIndexStats.Rows < 0) ? 0 : activeIndexStats.Rows;
305+
activeIndexStats.Bytes = (activeIndexStats.Bytes < 0) ? 0 : activeIndexStats.Bytes;
306+
}
307+
308+
tabletStats->SetRowCount(activeIndexStats.Rows);
309+
tabletStats->SetDataSize(activeIndexStats.Bytes + TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get());
310+
311+
// TODO: we need row/dataSize counters for evicted data (managed by tablet but stored outside)
312+
//tabletStats->SetIndexSize(); // TODO: calc size of internal tables
313+
314+
tabletStats->SetLastAccessTime(LastAccessTime.MilliSeconds());
315+
tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep());
316+
}
317+
318+
TDuration TColumnShard::GetControllerPeriodicWakeupActivationPeriod() {
319+
return NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod);
320+
}
321+
322+
TDuration TColumnShard::GetControllerStatsReportInterval() {
323+
return NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval);
324+
}
325+
326+
void TColumnShard::FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const {
327+
tableStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get());
328+
tableStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get());
329+
tableStats->SetInFlightTxCount(Executor()->GetStats().TxInFly);
330+
}
331+
332+
void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptr<TEvDataShard::TEvPeriodicTableStats>& ev) {
333+
ev->Record.SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready
334+
ev->Record.SetGeneration(Executor()->Generation());
335+
ev->Record.SetRound(StatsReportRound++);
336+
ev->Record.SetNodeId(ctx.ExecutorThread.ActorSystem->NodeId);
337+
ev->Record.SetStartTime(StartTime().MilliSeconds());
338+
if (auto* resourceMetrics = Executor()->GetResourceMetrics()) {
339+
resourceMetrics->Fill(*ev->Record.MutableTabletMetrics());
340+
}
341+
auto* tabletStats = ev->Record.MutableTableStats();
342+
FillTxTableStats(tabletStats);
343+
if (TablesManager.HasPrimaryIndex()) {
344+
const auto& indexStats = TablesManager.MutablePrimaryIndex().GetTotalStats();
345+
ConfigureStats(indexStats, tabletStats);
346+
}
347+
}
348+
349+
void TColumnShard::FillColumnTableStats(const TActorContext& ctx, std::unique_ptr<TEvDataShard::TEvPeriodicTableStats>& ev) {
350+
if (!TablesManager.HasPrimaryIndex()) {
351+
return;
352+
}
353+
const auto& tablesIndexStats = TablesManager.MutablePrimaryIndex().GetStats();
354+
LOG_S_DEBUG("There are stats for " << tablesIndexStats.size() << " tables");
355+
for(const auto& [tableLocalID, columnStats] : tablesIndexStats) {
356+
if (!columnStats) {
357+
LOG_S_ERROR("SendPeriodicStats: empty stats");
358+
continue;
359+
}
360+
361+
auto* periodicTableStats = ev->Record.AddTables();
362+
periodicTableStats->SetDatashardId(TabletID());
363+
periodicTableStats->SetTableLocalId(tableLocalID);
364+
365+
periodicTableStats->SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready
366+
periodicTableStats->SetGeneration(Executor()->Generation());
367+
periodicTableStats->SetRound(StatsReportRound++);
368+
periodicTableStats->SetNodeId(ctx.ExecutorThread.ActorSystem->NodeId);
369+
periodicTableStats->SetStartTime(StartTime().MilliSeconds());
370+
371+
if (auto* resourceMetrics = Executor()->GetResourceMetrics()) {
372+
resourceMetrics->Fill(*periodicTableStats->MutableTabletMetrics());
373+
}
374+
375+
auto* tableStats = periodicTableStats->MutableTableStats();
376+
FillTxTableStats(tableStats);
377+
ConfigureStats(*columnStats, tableStats);
378+
379+
LOG_S_TRACE("Add stats for table, tableLocalID=" << tableLocalID);
380+
}
381+
}
382+
296383
void TColumnShard::SendPeriodicStats() {
384+
LOG_S_DEBUG("Send periodic stats.");
385+
297386
if (!CurrentSchemeShardId || !OwnerPathId) {
298387
LOG_S_DEBUG("Disabled periodic stats at tablet " << TabletID());
299388
return;
300389
}
301390

302391
const TActorContext& ctx = ActorContext();
303-
TInstant now = TAppData::TimeProvider->Now();
392+
const TInstant now = TAppData::TimeProvider->Now();
393+
304394
if (LastStatsReport + StatsReportInterval > now) {
395+
LOG_S_TRACE("Skip send periodic stats: report interavl = " << StatsReportInterval);
305396
return;
306397
}
307398
LastStatsReport = now;
@@ -313,45 +404,10 @@ void TColumnShard::SendPeriodicStats() {
313404
}
314405

315406
auto ev = std::make_unique<TEvDataShard::TEvPeriodicTableStats>(TabletID(), OwnerPathId);
316-
{
317-
ev->Record.SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready
318-
ev->Record.SetGeneration(Executor()->Generation());
319-
ev->Record.SetRound(StatsReportRound++);
320-
ev->Record.SetNodeId(ctx.ExecutorThread.ActorSystem->NodeId);
321-
ev->Record.SetStartTime(StartTime().MilliSeconds());
322-
323-
if (auto* resourceMetrics = Executor()->GetResourceMetrics()) {
324-
resourceMetrics->Fill(*ev->Record.MutableTabletMetrics());
325-
}
326407

327-
auto* tabletStats = ev->Record.MutableTableStats();
328-
tabletStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get());
329-
tabletStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get());
330-
tabletStats->SetInFlightTxCount(Executor()->GetStats().TxInFly);
331-
332-
if (TablesManager.HasPrimaryIndex()) {
333-
const auto& indexStats = TablesManager.MutablePrimaryIndex().GetTotalStats();
334-
NOlap::TSnapshot lastIndexUpdate = TablesManager.GetPrimaryIndexSafe().LastUpdate();
335-
auto activeIndexStats = indexStats.Active(); // data stats excluding inactive and evicted
336-
337-
if (activeIndexStats.Rows < 0 || activeIndexStats.Bytes < 0) {
338-
LOG_S_WARN("Negative stats counter. Rows: " << activeIndexStats.Rows
339-
<< " Bytes: " << activeIndexStats.Bytes << TabletID());
340-
341-
activeIndexStats.Rows = (activeIndexStats.Rows < 0) ? 0 : activeIndexStats.Rows;
342-
activeIndexStats.Bytes = (activeIndexStats.Bytes < 0) ? 0 : activeIndexStats.Bytes;
343-
}
344-
345-
tabletStats->SetRowCount(activeIndexStats.Rows);
346-
tabletStats->SetDataSize(activeIndexStats.Bytes + TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get());
347-
// TODO: we need row/dataSize counters for evicted data (managed by tablet but stored outside)
348-
//tabletStats->SetIndexSize(); // TODO: calc size of internal tables
349-
tabletStats->SetLastAccessTime(LastAccessTime.MilliSeconds());
350-
tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep());
351-
}
352-
}
353-
354-
LOG_S_DEBUG("Sending periodic stats at tablet " << TabletID());
408+
FillOlapStats(ctx, ev);
409+
FillColumnTableStats(ctx, ev);
410+
355411
NTabletPipe::SendData(ctx, StatsReportPipe, ev.release());
356412
}
357413

ydb/core/tx/columnshard/columnshard_impl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet)
165165
: TActor(&TThis::StateInit)
166166
, TTabletExecutedFlat(info, tablet, nullptr)
167167
, ProgressTxController(std::make_unique<TTxController>(*this))
168+
, PeriodicWakeupActivationPeriod(GetControllerPeriodicWakeupActivationPeriod())
169+
, StatsReportInterval(GetControllerStatsReportInterval())
168170
, StoragesManager(std::make_shared<TStoragesManager>(*this))
169171
, InFlightReadsTracker(StoragesManager)
170172
, TablesManager(StoragesManager, info->TabletID)

ydb/core/tx/columnshard/columnshard_impl.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ struct TSettings {
6969

7070
static constexpr ui32 MAX_INDEXATIONS_TO_SKIP = 16;
7171
static constexpr TDuration GuaranteeIndexationInterval = TDuration::Seconds(10);
72+
static constexpr TDuration DefaultPeriodicWakeupActivationPeriod = TDuration::Seconds(60);
73+
static constexpr TDuration DefaultStatsReportInterval = TDuration::Seconds(10);
7274
static constexpr i64 GuaranteeIndexationStartBytesLimit = (i64)5 * 1024 * 1024 * 1024;
7375

7476
TControlWrapper BlobWriteGrouppingEnabled;
@@ -389,9 +391,9 @@ class TColumnShard
389391
bool MediatorTimeCastRegistered = false;
390392
TSet<ui64> MediatorTimeCastWaitingSteps;
391393
TDuration MaxReadStaleness = TDuration::Minutes(5); // TODO: Make configurable?
392-
TDuration ActivationPeriod = TDuration::Seconds(60);
394+
const TDuration PeriodicWakeupActivationPeriod;
393395
TDuration FailActivationDelay = TDuration::Seconds(1);
394-
TDuration StatsReportInterval = TDuration::Seconds(10);
396+
const TDuration StatsReportInterval;
395397
TInstant LastAccessTime;
396398
TInstant LastStatsReport;
397399

@@ -482,7 +484,16 @@ class TColumnShard
482484
void UpdateIndexCounters();
483485
void UpdateResourceMetrics(const TActorContext& ctx, const TUsage& usage);
484486
ui64 MemoryUsage() const;
487+
485488
void SendPeriodicStats();
489+
void FillOlapStats(const TActorContext& ctx, std::unique_ptr<TEvDataShard::TEvPeriodicTableStats>& ev);
490+
void FillColumnTableStats(const TActorContext& ctx, std::unique_ptr<TEvDataShard::TEvPeriodicTableStats>& ev);
491+
void ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats * tabletStats);
492+
void FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const;
493+
494+
static TDuration GetControllerPeriodicWakeupActivationPeriod();
495+
static TDuration GetControllerStatsReportInterval();
496+
486497
public:
487498
const std::shared_ptr<NOlap::IStoragesManager>& GetStoragesManager() const {
488499
return StoragesManager;

ydb/core/tx/columnshard/hooks/abstract/abstract.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ class ICSController {
9191
virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const {
9292
return defaultValue;
9393
}
94+
virtual TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const {
95+
return defaultValue;
96+
}
97+
virtual TDuration GetStatsReportInterval(const TDuration defaultValue) const {
98+
return defaultValue;
99+
}
94100
virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const {
95101
return defaultValue;
96102
}

0 commit comments

Comments
 (0)