Skip to content

Commit

Permalink
Merge 5a0e601 into 44dc0c6
Browse files Browse the repository at this point in the history
  • Loading branch information
kunga authored Feb 22, 2024
2 parents 44dc0c6 + 5a0e601 commit 9d7b6c3
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 39 deletions.
22 changes: 16 additions & 6 deletions ydb/core/tablet_flat/flat_stat_part.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class TStatsScreenedPartIterator {

public:
TStatsScreenedPartIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults,
TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large)
TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large,
ui64 rowCountResolution, ui64 dataSizeResolution)
: Part(std::move(partView.Part))
, KeyDefaults(std::move(keyDefaults))
, Groups(::Reserve(Part->GroupsCount))
Expand All @@ -31,11 +32,20 @@ class TStatsScreenedPartIterator {
, Large(std::move(large))
, CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1))
{
for (ui32 groupIndex : xrange(Part->GroupsCount)) {
Groups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex)));
}
for (ui32 groupIndex : xrange(Part->HistoricGroupsCount)) {
HistoricGroups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, true)));
for (bool historic : {false, true}) {
for (ui32 groupIndex : xrange(historic ? Part->HistoricGroupsCount : Part->GroupsCount)) {
ui64 groupRowCountResolution, groupDataSizeResolution;
if (groupIndex == 0 && Part->GroupsCount > 1) {
// make steps as small as possible because they will affect groups resolution
groupRowCountResolution = groupDataSizeResolution = 0;
} else {
groupRowCountResolution = rowCountResolution;
groupDataSizeResolution = dataSizeResolution;
}

(historic ? HistoricGroups : Groups).push_back(
CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, historic), groupRowCountResolution, groupDataSizeResolution));
}
}
}

Expand Down
46 changes: 32 additions & 14 deletions ydb/core/tablet_flat/flat_stat_part_group_btree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,32 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {
TRowId BeginRowId;
TRowId EndRowId;
TCellsIterable BeginKey;
ui64 DataSize;
ui64 BeginDataSize;
ui64 EndDataSize;

TNodeState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, ui64 dataSize)
TNodeState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, ui64 beginDataSize, ui64 endDataSize)
: PageId(pageId)
, BeginRowId(beginRowId)
, EndRowId(endRowId)
, BeginKey(beginKey)
, DataSize(dataSize)
, BeginDataSize(beginDataSize)
, EndDataSize(endDataSize)
{
}
};

public:
TStatsPartGroupBtreeIndexIterator(const TPart* part, IPages* env, TGroupId groupId)
TStatsPartGroupBtreeIndexIterator(const TPart* part, IPages* env, TGroupId groupId,
ui64 rowCountResolution, ui64 dataSizeResolution)
: Part(part)
, Env(env)
, GroupId(groupId)
, GroupInfo(part->Scheme->GetLayout(groupId))
, Meta(groupId.IsHistoric() ? part->IndexPages.BTreeHistoric[groupId.Index] : part->IndexPages.BTreeGroups[groupId.Index])
, GroupChannel(Part->GetGroupChannel(GroupId))
, NodeIndex(0)
, RowCountResolution(rowCountResolution)
, DataSizeResolution(dataSizeResolution)
{
}

Expand All @@ -51,14 +56,22 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {

bool ready = true;
TVector<TNodeState> nextNodes;
Nodes.emplace_back(Meta.PageId, 0, GetEndRowId(), EmptyKey, Meta.DataSize);
Nodes.emplace_back(Meta.PageId, 0, GetEndRowId(), EmptyKey, 0, Meta.DataSize);

for (ui32 height = 0; height < Meta.LevelCount; height++) {
bool hasChanges = false;

for (auto &nodeState : Nodes) {
if (nodeState.EndRowId - nodeState.BeginRowId <= RowCountResolution
&& nodeState.EndDataSize - nodeState.BeginDataSize <= DataSizeResolution) {
nextNodes.push_back(nodeState); // move current node on the next level as-is
continue; // don't go deeper
}

auto page = Env->TryGetPage(Part, nodeState.PageId);
if (!page) {
ready = false;
continue;
continue; // continue requesting other nodes
}
TBtreeIndexNode node(*page);

Expand All @@ -68,14 +81,20 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {
TRowId beginRowId = pos ? node.GetShortChild(pos - 1).RowCount : nodeState.BeginRowId;
TRowId endRowId = child.RowCount;
TCellsIterable beginKey = pos ? node.GetKeyCellsIterable(pos - 1, GroupInfo.ColsKeyIdx) : nodeState.BeginKey;
ui64 dataSize = child.DataSize;
ui64 beginDataSize = pos ? node.GetShortChild(pos - 1).DataSize : nodeState.BeginDataSize;
ui64 endDataSize = child.DataSize;

nextNodes.emplace_back(child.PageId, beginRowId, endRowId, beginKey, dataSize);
nextNodes.emplace_back(child.PageId, beginRowId, endRowId, beginKey, beginDataSize, endDataSize);
hasChanges = true;
}
}

Nodes.swap(nextNodes);
nextNodes.clear();

if (!hasChanges) {
break; // don't go deeper
}
}

if (!ready) {
Expand All @@ -90,19 +109,16 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {
Y_ABORT_UNLESS(IsValid());

NodeIndex++;

Y_DEBUG_ABORT_UNLESS(NodeIndex == Nodes.size() || Nodes[NodeIndex - 1].EndRowId == Nodes[NodeIndex].BeginRowId);

return DataOrGone();
}

void AddLastDeltaDataSize(TChanneledDataSize& dataSize) override {
Y_DEBUG_ABORT_UNLESS(NodeIndex);
ui64 delta = Nodes[NodeIndex - 1].DataSize;
if (NodeIndex > 1) {
Y_DEBUG_ABORT_UNLESS(delta >= Nodes[NodeIndex - 2].DataSize);
delta -= Nodes[NodeIndex - 2].DataSize;
}
Y_DEBUG_ABORT_UNLESS(Nodes[NodeIndex - 1].EndDataSize >= Nodes[NodeIndex - 1].BeginDataSize);
ui64 delta = Nodes[NodeIndex - 1].EndDataSize - Nodes[NodeIndex - 1].BeginDataSize;
ui8 channel = Part->GetGroupChannel(GroupId);
dataSize.Add(delta, channel);
}
Expand Down Expand Up @@ -151,6 +167,8 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {
ui8 GroupChannel;
ui32 NodeIndex;
TVector<TNodeState> Nodes;
ui64 RowCountResolution;
ui64 DataSizeResolution;
};

}
4 changes: 2 additions & 2 deletions ydb/core/tablet_flat/flat_stat_part_group_iter_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

namespace NKikimr::NTable {

THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId)
THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId, ui64 rowCountResolution, ui64 dataSizeResolution)
{
if (groupId.Index < (groupId.IsHistoric() ? part->IndexPages.BTreeHistoric : part->IndexPages.BTreeGroups).size()) {
return MakeHolder<TStatsPartGroupBtreeIndexIterator>(part, env, groupId);
return MakeHolder<TStatsPartGroupBtreeIndexIterator>(part, env, groupId, rowCountResolution, dataSizeResolution);
} else {
return MakeHolder<TPartIndexIt>(part, env, groupId);
}
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tablet_flat/flat_stat_part_group_iter_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ struct IStatsPartGroupIterator {
virtual ~IStatsPartGroupIterator() = default;
};

THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId);
THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId, ui64 rowCountResolution, ui64 dataSizeResolution);

}
11 changes: 10 additions & 1 deletion ydb/core/tablet_flat/flat_stat_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,20 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
TDataStats iteratorStats = { };
TStatsIterator statsIterator(subset.Scheme->Keys);

THashSet<ui64> epochs;
for (const auto& part : subset.Flatten) {
epochs.insert(part->Epoch.ToCounter());
}
// if rowCountResolution = 300, 3-leveled SST, let's move each iterator up to 25 rows
ui64 iterRowCountResolution = rowCountResolution / epochs.size() / 4;
ui64 iterDataSizeResolution = dataSizeResolution / epochs.size() / 4;

// Make index iterators for all parts
bool started = true;
for (const auto& part : subset.Flatten) {
stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel());
TAutoPtr<TStatsScreenedPartIterator> iter = new TStatsScreenedPartIterator(part, env, subset.Scheme->Keys, part->Small, part->Large);
TAutoPtr<TStatsScreenedPartIterator> iter = new TStatsScreenedPartIterator(part, env, subset.Scheme->Keys, part->Small, part->Large,
iterRowCountResolution, iterDataSizeResolution);
auto ready = iter->Start();
if (ready == EReady::Page) {
started = false;
Expand Down
8 changes: 4 additions & 4 deletions ydb/core/tablet_flat/flat_table_part_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Y_UNIT_TEST_SUITE(TLegacy) {
TDataStats stats = { };
TTestEnv env;
// TScreenedPartIndexIterator without screen previously was TPartIndexIterator
TStatsScreenedPartIterator idxIter(TPartView{part, nullptr, nullptr}, &env, scheme->Keys, nullptr, nullptr);
TStatsScreenedPartIterator idxIter(TPartView{part, nullptr, nullptr}, &env, scheme->Keys, nullptr, nullptr, 0, 0);
sizes.clear();

UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data);
Expand Down Expand Up @@ -147,7 +147,7 @@ Y_UNIT_TEST_SUITE(TLegacy) {
TIntrusiveConstPtr<TRowScheme> scheme, TIntrusiveConstPtr<NPage::TFrames> frames) -> std::pair<ui64, ui64> {
TDataStats stats = { };
TTestEnv env;
TStatsScreenedPartIterator idxIter(TPartView{part, screen, nullptr}, &env, scheme->Keys, std::move(frames), nullptr);
TStatsScreenedPartIterator idxIter(TPartView{part, screen, nullptr}, &env, scheme->Keys, std::move(frames), nullptr, 0, 0);

UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data);
while (idxIter.IsValid()) {
Expand Down Expand Up @@ -308,8 +308,8 @@ Y_UNIT_TEST_SUITE(TLegacy) {
TTestEnv env;
TStatsIterator stIter(lay2.RowScheme()->Keys);
{
auto it1 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs2.At(0), screen2, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr);
auto it2 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs1.At(0), screen1, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr);
auto it1 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs2.At(0), screen2, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr, 0, 0);
auto it2 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs1.At(0), screen1, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr, 0, 0);
UNIT_ASSERT_VALUES_EQUAL(it1->Start(), EReady::Data);
UNIT_ASSERT_VALUES_EQUAL(it2->Start(), EReady::Data);
stIter.Add(std::move(it1));
Expand Down
72 changes: 61 additions & 11 deletions ydb/core/tablet_flat/ut/ut_stat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@ namespace {
TMap<TGroupId, TSet<TPageId>> Touched;
};

NPage::TConf PageConf(size_t groups, bool writeBTreeIndex) noexcept
NPage::TConf PageConf(size_t groups, bool writeBTreeIndex, bool lowResolution = false) noexcept
{
NPage::TConf conf{ true, 2 * 1024 };

conf.Groups.resize(groups);
for (size_t group : xrange(groups)) {
conf.Group(group).IndexMin = 1024; /* Should cover index buffer grow code */
conf.Group(group).BTreeIndexNodeTargetSize = 512; /* Should cover up/down moves */
if (lowResolution) {
// make more levels
conf.Group(group).BTreeIndexNodeKeysMin = conf.Group(group).BTreeIndexNodeKeysMax = 2;
}
}
conf.SmallEdge = 19; /* Packed to page collection large cell values */
conf.LargeEdge = 29; /* Large values placed to single blobs */
Expand All @@ -57,14 +61,25 @@ namespace {
}
}

template<typename TEnv>
void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex) {
void Check(const TSubset& subset, THistogram histogram, ui64 resolution) {
ui64 additionalErrorRate = 1;
if (subset.Flatten.size() > 1 && subset.Flatten[0]->GroupsCount > 1) {
additionalErrorRate = 2;
}
for (ui32 i = 1; i < histogram.size(); i++) {
auto delta = histogram[i].Value - histogram[i - 1].Value;
UNIT_ASSERT_GE_C(delta, resolution, "Delta = " << delta << " Resolution = " << resolution);
UNIT_ASSERT_LE_C(delta, resolution * additionalErrorRate * 3 / 2, "Delta = " << delta << " Resolution = " << resolution);
}
}

void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex, ui64 rowCountResolution = 531, ui64 dataSizeResolution = 53105) {
TStats stats;
TEnv env;
TTouchEnv env;

const ui32 attempts = 10;
for (ui32 attempt : xrange(attempts)) {
if (NTable::BuildStats(subset, stats, 531, 53105, &env)) {
if (NTable::BuildStats(subset, stats, rowCountResolution, dataSizeResolution, &env)) {
break;
}
UNIT_ASSERT_C(attempt + 1 < attempts, "Too many attempts");
Expand All @@ -77,14 +92,10 @@ namespace {

Cerr << "RowCountHistogram:" << Endl;
Dump(subset, stats.RowCountHistogram);
Check(subset, stats.RowCountHistogram, rowCountResolution);
Cerr << "DataSizeHistogram:" << Endl;
Dump(subset, stats.DataSizeHistogram);
}


void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex) {
Check<TTestEnv>(subset, expectedRows, expectedData, expectedIndex);
Check<TTouchEnv>(subset, expectedRows, expectedData, expectedIndex);
Check(subset, stats.DataSizeHistogram, dataSizeResolution);
}
}

Expand Down Expand Up @@ -246,6 +257,45 @@ Y_UNIT_TEST_SUITE(BuildStats) {
auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), false)).Mixed(0, 4, mixer, 0.3);
Check(*subset, 24000, 4054290, 19168);
}

Y_UNIT_TEST(Single_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ });
Check(*subset, 24000, 2106439, 56610, 5310, 531050);
}

Y_UNIT_TEST(Single_Slices_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0, 13);
subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
Check(*subset, 12816, 1121048, 56610, 5310, 531050);
}

Y_UNIT_TEST(Single_Groups_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ });
Check(*subset, 24000, 2460139, 29557, 5310, 531050);
}

Y_UNIT_TEST(Single_Groups_Slices_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0, 13);
subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
Check(*subset, 10440, 1060798, 29557, 5310, 531050);
}

Y_UNIT_TEST(Single_Groups_History_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0.3);
Check(*subset, 24000, 4054050, 42292, 5310, 531050);
}

Y_UNIT_TEST(Single_Groups_History_Slices_LowResolution_BTreeIndex)
{
auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0.3, 13);
subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
Check(*subset, 13570, 2277890, 42292, 5310, 531050);
}
}

}

0 comments on commit 9d7b6c3

Please sign in to comment.