Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable B-Tree index #4417

Merged
merged 6 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ydb/core/protos/feature_flags.proto
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ message TFeatureFlags {
optional bool EnableStatistics = 106 [default = false];
optional bool EnableUuidAsPrimaryKey = 107 [default = false];
optional bool EnableTablePgTypes = 108 [default = false];
optional bool EnableLocalDBBtreeIndex = 109 [default = false];
optional bool EnableLocalDBBtreeIndex = 109 [default = true];
optional bool EnablePDiskHighHDDInFlight = 110 [default = false];
optional bool UseVDisksBalancing = 111 [default = false];
optional bool EnableViews = 112 [default = false];
Expand Down
86 changes: 72 additions & 14 deletions ydb/core/tablet_flat/flat_executor_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5197,7 +5197,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand Down Expand Up @@ -5264,7 +5264,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand Down Expand Up @@ -5330,7 +5330,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount, TRowVersion(2, 0));
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand Down Expand Up @@ -5398,7 +5398,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount, TRowVersion(2, 0));
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand Down Expand Up @@ -5463,7 +5463,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand Down Expand Up @@ -5498,7 +5498,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {

auto policy = MakeIntrusive<TCompactionPolicy>();
policy->MinBTreeIndexNodeSize = 128;
env.SendSync(rows.MakeScheme(std::move(policy)));
env.SendSync(rows.MakeScheme(std::move(policy), true));

env.SendSync(rows.MakeRows(rowsCount, 10*1024));

Expand All @@ -5508,29 +5508,29 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
{ // no read ahead
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(rowsCount);
queueScan->ReadAhead = {1, 1};
queueScan->ExpectedPageFaults = 1028;
queueScan->ExpectedPageFaults = 1032;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
}

{ // small read ahead
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(rowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
queueScan->ExpectedPageFaults = 189;
queueScan->ExpectedPageFaults = 191;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
}

{ // infinite read ahead
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(rowsCount);
queueScan->ReadAhead = {Max<ui64>(), Max<ui64>()};
queueScan->ExpectedPageFaults = 5;
queueScan->ExpectedPageFaults = 7;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
}

for (ui64 leadKey = 1; ; leadKey += rowsCount / 10) {
ui64 expectedRowsCount = rowsCount > leadKey ? rowsCount - leadKey + 1 : 0;
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
Expand All @@ -5546,6 +5546,64 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) {
env.SendSync(new TEvents::TEvPoison, false, true);
}

Y_UNIT_TEST(Scan_Groups_BTreeIndex_Empty) {
TMyEnvBase env;
TRowsModel rows;
const ui32 rowsCount = 10;

auto &appData = env->GetAppData();
appData.FeatureFlags.SetEnableLocalDBBtreeIndex(true);
appData.FeatureFlags.SetEnableLocalDBFlatIndex(false);

env->SetLogPriority(NKikimrServices::TABLET_OPS_HOST, NActors::NLog::PRI_DEBUG);

env.FireTablet(env.Edge, env.Tablet, [&env](const TActorId &tablet, TTabletStorageInfo *info) {
return new TTestFlatTablet(env.Edge, tablet, info);
});
env.WaitForWakeUp();
ZeroSharedCache(env);

env.SendSync(rows.MakeScheme(new TCompactionPolicy(), true));

env.SendSync(rows.MakeRows(rowsCount, 10));

env.SendSync(new NFake::TEvCompact(TRowsModel::TableId));
env.WaitFor<NFake::TEvCompacted>();

{ // no read ahead
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(rowsCount);
queueScan->ReadAhead = {1, 1};
queueScan->ExpectedPageFaults = 2;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
}

{ // read ahead
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(rowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
queueScan->ExpectedPageFaults = 2;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
}

for (ui64 leadKey = 1; ; leadKey++) {
ui64 expectedRowsCount = rowsCount >= leadKey ? rowsCount - leadKey + 1 : 0;
auto queueScan = new TEvTestFlatTablet::TEvQueueScan(expectedRowsCount);
queueScan->ReadAhead = {5*10*1024, 10*10*1024};
TVector<TCell> leadKey_ = {TCell::Make(leadKey)};
queueScan->LeadKey = leadKey_;
queueScan->ExpectedPageFaults = expectedRowsCount ? 2 : 0;
env.SendAsync(std::move(queueScan));
env.WaitFor<TEvTestFlatTablet::TEvScanFinished>();
if (!expectedRowsCount) {
break;
}
}

// If we didn't crash, then assume the test succeeded
env.SendSync(new TEvents::TEvPoison, false, true);
}

}

Y_UNIT_TEST_SUITE(TFlatTableExecutorStickyPages) {
Expand Down Expand Up @@ -6064,7 +6122,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) {
}
};

Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses flat index
Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses b-tree index
TMyEnvBase env;
TRowsModel rows;

Expand All @@ -6086,8 +6144,8 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) {
env.SendSync(new NFake::TEvCompact(TRowsModel::TableId));
env.WaitFor<NFake::TEvCompacted>();

// all pages are always kept in shared cache
UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 290);
// all pages are always kept in shared cache (except flat index)
UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 334);

env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) });
UNIT_ASSERT_VALUES_EQUAL(readRows, 1000);
Expand All @@ -6100,7 +6158,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) {
// after restart we have no pages in private cache
env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }, true);
UNIT_ASSERT_VALUES_EQUAL(readRows, 1000);
UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 288);
UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 332);
}

Y_UNIT_TEST(EnableLocalDBBtreeIndex_True) { // uses b-tree index
Expand Down
4 changes: 3 additions & 1 deletion ydb/core/tablet_flat/flat_fwd_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,9 @@ namespace NFwd {
auto& meta = Part->IndexPages.GetBTree(groupId);
Levels.resize(meta.LevelCount + 1);
Levels[0].Queue.push_back({meta.PageId, meta.DataSize});
IndexPageLocator.Add(meta.PageId, GroupId, 0);
if (meta.LevelCount) {
IndexPageLocator.Add(meta.PageId, GroupId, 0);
}
}

~TBTreeIndexCache()
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tablet_flat/flat_page_conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ namespace NPage {

bool Final = true;
bool CutIndexKeys = true;
bool WriteBTreeIndex = false;
bool WriteBTreeIndex = true;
bool WriteFlatIndex = true;
ui32 MaxLargeBlob = 8 * 1024 * 1024 - 8; /* Maximum large blob size */
ui32 LargeEdge = Max<ui32>(); /* External blob edge size */
Expand Down
12 changes: 12 additions & 0 deletions ydb/core/tablet_flat/flat_part_index_iter_flat_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,18 @@ class TPartGroupFlatIndexIter : public IPartGroupIndexIter, public IStatsPartGro
return Iter.GetRecord()->Cell(GroupInfo.ColsKeyIdx[index]);
}

void GetKeyCells(TSmallVec<TCell>& keyCells) const override {
keyCells.clear();

Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);

auto record = Iter.GetRecord();
for (auto index : xrange(GroupInfo.KeyTypes.size())) {
keyCells.push_back(record->Cell(GroupInfo.ColsKeyIdx[index]));
}
}

const TRecord * GetRecord() const {
Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);
Expand Down
11 changes: 3 additions & 8 deletions ydb/core/tablet_flat/flat_stat_part.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,12 @@ class TStatsScreenedPartIterator {
if (!IsValid())
return;

ui32 keyIdx = 0;
// Add columns that are present in the part
if (ui32 keyCellsCount = Groups[0]->GetKeyCellsCount()) {
for (;keyIdx < keyCellsCount; ++keyIdx) {
CurrentKey.push_back(Groups[0]->GetKeyCell(keyIdx));
}
}
Groups[0]->GetKeyCells(CurrentKey);

// Extend with default values if needed
for (;keyIdx < KeyDefaults->Defs.size(); ++keyIdx) {
CurrentKey.push_back(KeyDefaults->Defs[keyIdx]);
for (ui32 index = CurrentKey.size(); index < KeyDefaults->Defs.size(); ++index) {
CurrentKey.push_back(KeyDefaults->Defs[index]);
}
}

Expand Down
10 changes: 10 additions & 0 deletions ydb/core/tablet_flat/flat_stat_part_group_btree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,16 @@ class TStatsPartGroupBtreeIndexIter : public IStatsPartGroupIter {
return GetCurrentNode().BeginKey.Iter().At(index);
}

void GetKeyCells(TSmallVec<TCell>& keyCells) const override {
keyCells.clear();

auto iter = GetCurrentNode().BeginKey.Iter();
for (TPos pos : xrange(iter.Count())) {
Y_UNUSED(pos);
keyCells.push_back(iter.Next());
}
}

private:
EReady DataOrGone() const {
return IsValid() ? EReady::Data : EReady::Gone;
Expand Down
1 change: 1 addition & 0 deletions ydb/core/tablet_flat/flat_stat_part_group_iter_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct IStatsPartGroupIter {

virtual TPos GetKeyCellsCount() const = 0;
virtual TCell GetKeyCell(TPos index) const = 0;
virtual void GetKeyCells(TSmallVec<TCell>& keyCells) const = 0;

virtual ~IStatsPartGroupIter() = default;
};
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tablet_flat/test/libs/table/test_wreck.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace NTest {
} else if (cache == EWreck::Forward) {
Y_ABORT_UNLESS(Direction == EDirection::Forward, "ForwardEnv may only be used with forward iteration");

TWrap wrap(eggs, { nullptr, 4 /* worst case: main, next, groups, blobs */, false }, std::forward<TArgs>(args)...);
TWrap wrap(eggs, { nullptr, 10 /* worst case: main, next, groups, blobs, plus b-tree index */, false }, std::forward<TArgs>(args)...);

auto make = []() { return new TForwardEnv(512, 1024); };

Expand Down
3 changes: 1 addition & 2 deletions ydb/core/tablet_flat/ut/ut_btree_index_nodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,8 +670,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) {
Y_UNIT_TEST(Conf) {
NPage::TConf conf;

// do not accidentally turn this setting on in trunk
UNIT_ASSERT_VALUES_EQUAL(conf.WriteBTreeIndex, false);
UNIT_ASSERT_VALUES_EQUAL(conf.WriteBTreeIndex, true);
UNIT_ASSERT_VALUES_EQUAL(conf.WriteFlatIndex, true);
}

Expand Down
6 changes: 3 additions & 3 deletions ydb/core/tablet_flat/ut/ut_slice_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ Y_UNIT_TEST_SUITE(TPartSliceLoader) {
screen = new TScreen(std::move(holes));
}
auto result = RunLoaderTest(Part0(), screen);
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 70 : 1) + IndexTools::CountMainPages(*Part0()), // index + all data pages
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 84 : 1) + IndexTools::CountMainPages(*Part0()), // index + all data pages
"Restoring slice bounds needed " << result.Pages << " extra pages");
}

Expand All @@ -261,7 +261,7 @@ Y_UNIT_TEST_SUITE(TPartSliceLoader) {
screen = new TScreen(std::move(holes));
}
auto result = RunLoaderTest(Part0(), screen);
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 70 : 1) + IndexTools::CountMainPages(*Part0()), // index + all data pages
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 84 : 1) + IndexTools::CountMainPages(*Part0()), // index + all data pages
"Restoring slice bounds needed " << result.Pages << " extra pages");
}

Expand Down Expand Up @@ -290,7 +290,7 @@ Y_UNIT_TEST_SUITE(TPartSliceLoader) {
screen = new TScreen(std::move(holes));
}
auto result = RunLoaderTest(Part0(), screen);
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 70 : 1) + screen->Size(), // index + data pages
UNIT_ASSERT_VALUES_EQUAL_C(result.Pages, (IsBTreeIndex() ? 84 : 1) + screen->Size(), // index + data pages
"Restoring slice bounds needed " << result.Pages <<
" extra pages, expected " << screen->Size());
}
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/tx/datashard/datashard_ut_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,11 @@ Y_UNIT_TEST_SUITE(DataShardStats) {
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetRowCount(), count);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetPartCount(), 1);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetDataSize(), 30100);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetIndexSize(), bTreeIndex ? 193u : 138);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetIndexSize(), bTreeIndex ? 233 : 138);

UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetChannels()[0].GetChannel(), 1);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetChannels()[0].GetDataSize(), 30100);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetChannels()[0].GetIndexSize(), bTreeIndex ? 193u : 138);
UNIT_ASSERT_VALUES_EQUAL(stats.GetTableStats().GetChannels()[0].GetIndexSize(), bTreeIndex ? 233 : 138);
}

{
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3542,6 +3542,7 @@ Y_UNIT_TEST_SUITE(TStoragePoolsQuotasTest) {
opts.EnablePersistentPartitionStats(true);
opts.EnableBackgroundCompaction(false);
TTestEnv env(runtime, opts);
bool bTreeIndex = runtime.GetAppData().FeatureFlags.GetEnableLocalDBBtreeIndex();

NDataShard::gDbStatsReportInterval = TDuration::Seconds(0);
NDataShard::gDbStatsDataSizeResolution = 1;
Expand Down Expand Up @@ -3669,7 +3670,7 @@ Y_UNIT_TEST_SUITE(TStoragePoolsQuotasTest) {
// The logic of the test expects:
// batchSizes[0] <= batchSizes[1] <= batchSizes[2],
// because rows are never deleted, only updated.
constexpr std::array<ui32, 3> batchSizes = {25, 35, 50};
const std::array<ui32, 3> batchSizes = {25, 35, bTreeIndex ? 60u : 50u};

constexpr const char* longText = "this_text_is_very_long_and_takes_a_lot_of_disk_space";
constexpr const char* middleLengthText = "this_text_is_significantly_shorter";
Expand Down
Loading