Skip to content

Commit

Permalink
Add a new compaction priority that picks file whose overlapping ratio…
Browse files Browse the repository at this point in the history
… is smallest

Summary:
Add a new compaction priority as following:
For every file, we calculate total size of files overalapping with the file in the next level, over the file's size itself. The file with smallest ratio will be picked first.
My "db_bench --fillrandom" shows about 5% less compaction than kOldestSmallestSeqFirst if --hard_pending_compaction_bytes_limit value to keep LSM tree in shape. If not limiting hard_pending_compaction_bytes_limit, improvement is only 1% or 2%.

Test Plan: Add a unit test

Reviewers: andrewkr, kradhakrishnan, anthony, IslamAbdelRahman, yhchiang

Reviewed By: yhchiang

Subscribers: MarkCallaghan, leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D54075
  • Loading branch information
siying committed Feb 11, 2016
1 parent 3dc3d1c commit 92a9ccf
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 2 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Rocksdb Change Log
## Unreleased
### New Features
* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification.

## 4.5.0 (2/5/2016)
### Public API Changes
Expand Down
81 changes: 81 additions & 0 deletions db/compaction_picker_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,87 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
}
#endif // ROCKSDB_LITE

TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;

Add(2, 6U, "150", "179", 50000000U);
Add(2, 7U, "180", "220", 50000000U);
Add(2, 8U, "321", "400", 50000000U); // File not overlapping
Add(2, 9U, "721", "800", 50000000U);

Add(3, 26U, "150", "170", 260000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 260000000U);
Add(3, 30U, "750", "900", 260000000U);
UpdateVersionStorageInfo();

std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Pick file 8 because it overlaps with 0 files on level 3.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}

TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;

Add(2, 6U, "150", "175",
60000000U); // Overlaps with file 26, 27, total size 521M
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size
// 520M, the smalelst overlapping
Add(2, 8U, "201", "300",
60000000U); // Overlaps with file 28, 29, total size 521M

Add(3, 26U, "100", "110", 261000000U);
Add(3, 26U, "150", "170", 261000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 261000000U);
Add(3, 30U, "321", "400", 261000000U);
UpdateVersionStorageInfo();

std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 7 because overlapping ratio is the biggest.
ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber());
}

TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;

// file 7 and 8 over lap with the same file, but file 8 is smaller so
// it will be picked.
Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27
Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 27

Add(3, 26U, "160", "165", 260000000U);
Add(3, 26U, "166", "170", 260000000U);
Add(3, 27U, "180", "400", 260000000U);
Add(3, 28U, "401", "500", 260000000U);
UpdateVersionStorageInfo();

std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 8 because overlapping ratio is the biggest.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}

// This test exhibits the bug where we don't properly reset parent_index in
// PickCompaction()
TEST_F(CompactionPickerTest, ParentIndexResetBug) {
Expand Down
8 changes: 6 additions & 2 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2507,8 +2507,12 @@ TEST_P(CompactionPriTest, Test) {
}
}

INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
::testing::Values(0, 1, 2));
INSTANTIATE_TEST_CASE_P(
CompactionPriTest, CompactionPriTest,
::testing::Values(CompactionPri::kByCompensatedSize,
CompactionPri::kOldestLargestSeqFirst,
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));

#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb
Expand Down
45 changes: 45 additions & 0 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,47 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() {
}
}

namespace {
// Sort `temp` based on ratio of overlapping size over file size
void SortFileByOverlappingRatio(
const InternalKeyComparator& icmp, const std::vector<FileMetaData*>& files,
const std::vector<FileMetaData*>& next_level_files,
std::vector<Fsize>* temp) {
std::unordered_map<uint64_t, uint64_t> file_to_order;
auto next_level_it = next_level_files.begin();

for (auto& file : files) {
uint64_t overlapping_bytes = 0;
// Skip files in next level that is smaller than current file
while (next_level_it != next_level_files.end() &&
icmp.Compare((*next_level_it)->largest, file->smallest) < 0) {
next_level_it++;
}

while (next_level_it != next_level_files.end() &&
icmp.Compare((*next_level_it)->smallest, file->largest) < 0) {
overlapping_bytes += (*next_level_it)->fd.file_size;

if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) {
// next level file cross large boundary of current file.
break;
}
next_level_it++;
}

assert(file->fd.file_size != 0);
file_to_order[file->fd.GetNumber()] =
overlapping_bytes * 1024u / file->fd.file_size;
}

std::sort(temp->begin(), temp->end(),
[&](const Fsize& f1, const Fsize& f2) -> bool {
return file_to_order[f1.file->fd.GetNumber()] <
file_to_order[f2.file->fd.GetNumber()];
});
}
} // namespace

void VersionStorageInfo::UpdateFilesByCompactionPri(
const MutableCFOptions& mutable_cf_options) {
if (compaction_style_ == kCompactionStyleFIFO ||
Expand Down Expand Up @@ -1419,6 +1460,10 @@ void VersionStorageInfo::UpdateFilesByCompactionPri(
return f1.file->smallest_seqno < f2.file->smallest_seqno;
});
break;
case kMinOverlappingRatio:
SortFileByOverlappingRatio(*internal_comparator_, files_[level],
files_[level + 1], &temp);
break;
default:
assert(false);
}
Expand Down
7 changes: 7 additions & 0 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ enum CompactionStyle : char {
kCompactionStyleNone = 0x3,
};

// In Level-based comapction, it Determines which file from a level to be
// picked to merge to the next level. We suggest people try
// kMinOverlappingRatio first when you tune your database.
enum CompactionPri : char {
// Slightly Priotize larger files by size compensated by #deletes
kByCompensatedSize = 0x0,
Expand All @@ -90,6 +93,10 @@ enum CompactionPri : char {
// for the longest. If your updates are random across the key space,
// write amplification is slightly better with this option.
kOldestSmallestSeqFirst = 0x2,
// First compact files whose ratio between overlapping size in next level
// and its size is the smallest. It in many cases can optimize write
// amplification.
kMinOverlappingRatio = 0x3,
};

enum class WALRecoveryMode : char {
Expand Down

0 comments on commit 92a9ccf

Please sign in to comment.