Skip to content

Commit

Permalink
SPDB-671: Block Bloom - Part 1
Browse files Browse the repository at this point in the history
  • Loading branch information
udi-speedb committed May 25, 2022
1 parent 6547c56 commit 04c31c2
Show file tree
Hide file tree
Showing 8 changed files with 557 additions and 4 deletions.
63 changes: 63 additions & 0 deletions db/db_bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,69 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) {
}
}

template<class T>
std::string FormatWithCommas(T value)
{
std::stringstream ss;
ss.imbue(std::locale(""));
ss << std::fixed << value;
return ss.str();
}

TEST_F(DBBloomFilterTest, DISABLED_SpdbBlockBloomFilterRate) {
option_config_ = kSpdbBlockBloomFilter;
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();

get_perf_context()->EnablePerLevelPerfContext();
CreateAndReopenWithCF({"pikachu"}, options);

int num_keys = 1000 * 1000;
int first_key = num_keys;
int last_key = first_key + num_keys - 1;
for (auto i = first_key; i <= last_key; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
}

// Add a large key to make the file contain wide range
int very_large_key_val = 1 << 31;
ASSERT_OK(Put(1, Key(very_large_key_val), Key(very_large_key_val)));
Flush(1);

// Check if they can be found
std::cerr << "Checking that " << FormatWithCommas(num_keys) << " keys that are in the filter are found\n";
for (auto i = first_key; i <= last_key; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}

// Check if filter is useful
int multiplier = 10;
int num_not_found_keys = num_keys * multiplier;

std::cerr << "Checking that " << FormatWithCommas(num_not_found_keys) << " keys that are NOT in the filter are NOT found\n";
auto first_not_found_key = last_key + 1;
auto last_not_found_key = first_not_found_key + num_not_found_keys - 1;
for (int not_found_key = first_not_found_key; not_found_key <= last_not_found_key; ++not_found_key) {
if ((not_found_key - first_not_found_key) % 1000000 == 0) std::cerr << not_found_key << " Keys\n";
ASSERT_EQ("NOT_FOUND", Get(1, Key(not_found_key)));
}

std::cerr << "AFTER Get() for " << num_not_found_keys << " Keys NOT IN THE DB\n";
auto useful = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
auto full_positive = TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE);
auto true_positive = TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE);

std::cerr << "BLOOM_FILTER_USEFUL = " << FormatWithCommas(useful) << '\n';
std::cerr << "BLOOM_FILTER_FULL_POSITIVE = " << FormatWithCommas(full_positive) << '\n';
std::cerr << "BLOOM_FILTER_FULL_TRUE_POSITIVE = " << FormatWithCommas(true_positive) << '\n';

auto false_positive = full_positive - true_positive;
auto fpr = false_positive / static_cast<double>(useful + false_positive);
std::cerr << "FPR = 1 in " << static_cast<double>(1 / fpr) << '\n';

get_perf_context()->Reset();
}

TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
Expand Down
10 changes: 10 additions & 0 deletions db/db_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "rocksdb/utilities/object_registry.h"
#include "util/random.h"

#include <iostream>

namespace ROCKSDB_NAMESPACE {

namespace {
Expand Down Expand Up @@ -261,9 +263,14 @@ bool DBTestBase::ChangeFilterOptions() {
option_config_ = kFullFilterWithNewTableReaderForCompactions;
} else if (option_config_ == kFullFilterWithNewTableReaderForCompactions) {
option_config_ = kPartitionedFilterWithNewTableReaderForCompactions;
} else if (option_config_ == kPartitionedFilterWithNewTableReaderForCompactions) {
option_config_ = kSpdbBlockBloomFilter;
} else {
std::cerr << "ChangeFilterOptions - Returns False\n";
return false;
}
std::cerr << "option_config_ = " << option_config_ << '\n';

Destroy(last_options_);

auto options = CurrentOptions();
Expand Down Expand Up @@ -431,6 +438,9 @@ Options DBTestBase::GetOptions(
options.new_table_reader_for_compaction_inputs = true;
options.compaction_readahead_size = 10 * 1024 * 1024;
break;
case kSpdbBlockBloomFilter:
table_options.filter_policy.reset(NewSpdbBlockBloomFilterPolicy());
break;
case kUncompressed:
options.compression = kNoCompression;
break;
Expand Down
2 changes: 2 additions & 0 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,8 @@ class DBTestBase : public testing::Test {
kUniversalSubcompactions,
kxxHash64Checksum,
kUnorderedWrite,
kSpdbBlockBloomFilter,

// This must be the last line
kEnd,
};
Expand Down
1 change: 1 addition & 0 deletions include/rocksdb/filter_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,5 +277,6 @@ inline const FilterPolicy* NewExperimentalRibbonFilterPolicy(
}

extern const FilterPolicy* NewSpdbHybridFilterPolicy();
extern const FilterPolicy* NewSpdbBlockBloomFilterPolicy();

} // namespace ROCKSDB_NAMESPACE
4 changes: 2 additions & 2 deletions include/rocksdb/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ struct BlockBasedTableOptions {
// Many applications will benefit from passing the result of
// NewBloomFilterPolicy() here.
std::shared_ptr<const FilterPolicy> filter_policy{
NewSpdbHybridFilterPolicy()};

// // // NewSpdbHybridFilterPolicy()};
NewSpdbBlockBloomFilterPolicy()};
// If true, place whole keys in the filter (not just prefixes).
// This must generally be true for gets to be efficient.
bool whole_key_filtering = true;
Expand Down
Loading

0 comments on commit 04c31c2

Please sign in to comment.