Skip to content

Commit

Permalink
cache friendly blocked bloomfilter
Browse files Browse the repository at this point in the history
Summary:
By constraining the probes within cache line(s), we can improve the
cache miss rate thus performance. This probably only makes sense for
in-memory workload so defaults the option to off.

Numbers and comparision can be found in wiki:
https://our.intern.facebook.com/intern/wiki/index.php/Ljin/rocksdb_perf/2014_03_17#Bloom_Filter_Study

Test Plan: benchmarked this change substantially. Will run make all check as well

Reviewers: haobo, igor, dhruba, sdong, yhchiang

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D17133
  • Loading branch information
Lei Jin committed Mar 28, 2014
1 parent 10cebec commit 0d755ff
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 83 deletions.
13 changes: 10 additions & 3 deletions db/db_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ DEFINE_int64(read_range, 1, "When ==1 reads use ::Get, when >1 reads use"

DEFINE_bool(use_prefix_blooms, false, "Whether to place prefixes in blooms");

DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality");

DEFINE_bool(use_prefix_api, false, "Whether to set ReadOptions.prefix for"
" prefixscanrandom. If true, use_prefix_blooms must also be true.");

Expand Down Expand Up @@ -1543,6 +1545,7 @@ class Benchmark {
NewFixedPrefixTransform(FLAGS_prefix_size));
}
options.memtable_prefix_bloom_bits = FLAGS_memtable_bloom_bits;
options.bloom_locality = FLAGS_bloom_locality;
options.max_open_files = FLAGS_open_files;
options.statistics = dbstats;
options.env = FLAGS_env;
Expand Down Expand Up @@ -1916,14 +1919,15 @@ class Benchmark {
Duration duration(FLAGS_duration, reads_);

int64_t found = 0;

int64_t read = 0;
if (FLAGS_use_multiget) { // MultiGet
const long& kpg = FLAGS_keys_per_multiget; // keys per multiget group
long keys_left = reads_;

// Recalculate number of keys per group, and call MultiGet until done
long num_keys;
while(num_keys = std::min(keys_left, kpg), !duration.Done(num_keys)) {
read += num_keys;
found +=
MultiGetRandom(options, num_keys, &thread->rand, FLAGS_num, "");
thread->stats.FinishedSingleOp(db_);
Expand All @@ -1937,8 +1941,9 @@ class Benchmark {
std::string key = GenerateKeyFromInt(k, FLAGS_num);

iter->Seek(key);
read++;
if (iter->Valid() && iter->key().compare(Slice(key)) == 0) {
++found;
found++;
}

thread->stats.FinishedSingleOp(db_);
Expand All @@ -1957,6 +1962,7 @@ class Benchmark {
}

if (FLAGS_read_range < 2) {
read++;
if (db_->Get(options, key, &value).ok()) {
found++;
}
Expand All @@ -1972,6 +1978,7 @@ class Benchmark {
db_->GetApproximateSizes(&range, 1, &sizes);
}

read += FLAGS_read_range;
for (iter->Seek(key);
iter->Valid() && count <= FLAGS_read_range;
++count, iter->Next()) {
Expand All @@ -1992,7 +1999,7 @@ class Benchmark {

char msg[100];
snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)",
found, reads_);
found, read);

thread->stats.AddMessage(msg);

Expand Down
1 change: 1 addition & 0 deletions db/memtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
assert(!should_flush_);
if (prefix_extractor_ && options.memtable_prefix_bloom_bits > 0) {
prefix_bloom_.reset(new DynamicBloom(options.memtable_prefix_bloom_bits,
options.bloom_locality,
options.memtable_prefix_bloom_probes));
}
}
Expand Down
11 changes: 11 additions & 0 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,17 @@ struct Options {
// number of hash probes per key
uint32_t memtable_prefix_bloom_probes;

// Control locality of bloom filter probes to improve cache miss rate.
// This option only applies to memtable prefix bloom and plaintable
// prefix bloom. It essentially limits the max number of cache lines each
// bloom filter check can touch.
// This optimization is turned off when set to 0. The number should never
// be greater than number of probes. This option can boost performance
// for in-memory workload but should use with care since it can cause
// higher false positive rate.
// Default: 0
uint32_t bloom_locality;

// Maximum number of successive merge operations on a key in the memtable.
//
// When a merge operation is added to the memtable and the maximum number of
Expand Down
2 changes: 2 additions & 0 deletions port/port_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,8 @@ inline bool GetHeapProfile(void (*func)(void *, const char *, int), void *arg) {
return false;
}

#define CACHE_LINE_SIZE 64U

} // namespace port
} // namespace rocksdb

Expand Down
4 changes: 2 additions & 2 deletions table/plain_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) {
if (options_.prefix_extractor != nullptr) {
uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey;
if (bloom_total_bits > 0) {
bloom_.reset(new DynamicBloom(bloom_total_bits));
bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality));
}
}

Expand Down Expand Up @@ -388,7 +388,7 @@ Status PlainTableReader::PopulateIndex() {
if (IsTotalOrderMode()) {
uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey;
if (num_bloom_bits > 0) {
bloom_.reset(new DynamicBloom(num_bloom_bits));
bloom_.reset(new DynamicBloom(num_bloom_bits, options_.bloom_locality));
}
}

Expand Down
38 changes: 26 additions & 12 deletions util/dynamic_bloom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

#include "dynamic_bloom.h"

#include <algorithm>

#include "port/port.h"
#include "rocksdb/slice.h"
#include "util/hash.h"

Expand All @@ -17,20 +20,31 @@ static uint32_t BloomHash(const Slice& key) {
}

DynamicBloom::DynamicBloom(uint32_t total_bits,
uint32_t (*hash_func)(const Slice& key),
uint32_t num_probes)
: hash_func_(hash_func),
kTotalBits((total_bits + 7) / 8 * 8),
kNumProbes(num_probes) {
assert(hash_func_);
uint32_t cl_per_block,
uint32_t num_probes,
uint32_t (*hash_func)(const Slice& key))
: kBlocked(cl_per_block > 0),
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock
* kBitsPerBlock :
total_bits + 7) / 8 * 8),
kNumBlocks(kBlocked ? kTotalBits / kBitsPerBlock : 1),
kNumProbes(num_probes),
hash_func_(hash_func == nullptr ? &BloomHash : hash_func) {
assert(kBlocked ? kTotalBits > 0 : kTotalBits >= kBitsPerBlock);
assert(kNumProbes > 0);
assert(kTotalBits > 0);
data_.reset(new unsigned char[kTotalBits / 8]());
}

DynamicBloom::DynamicBloom(uint32_t total_bits,
uint32_t num_probes)
: DynamicBloom(total_bits, &BloomHash, num_probes) {
uint32_t sz = kTotalBits / 8;
if (kBlocked) {
sz += CACHE_LINE_SIZE - 1;
}
raw_ = new unsigned char[sz]();
if (kBlocked) {
data_ = raw_ + CACHE_LINE_SIZE -
reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE;
} else {
data_ = raw_;
}
}

} // rocksdb
61 changes: 45 additions & 16 deletions util/dynamic_bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@ class Slice;
class DynamicBloom {
public:
// total_bits: fixed total bits for the bloom
// hash_func: customized hash function
// num_probes: number of hash probes for a single key
DynamicBloom(uint32_t total_bits,
uint32_t (*hash_func)(const Slice& key),
uint32_t num_probes = 6);
// cl_per_block: block size in cache lines. When this is non-zero, a
// query/set is done within a block to improve cache locality.
// hash_func: customized hash function
explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0,
uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr);

explicit DynamicBloom(uint32_t total_bits, uint32_t num_probes = 6);
~DynamicBloom() {
delete[] raw_;
}

// Assuming single threaded access to this function.
void Add(const Slice& key);
Expand All @@ -36,10 +40,15 @@ class DynamicBloom {
bool MayContainHash(uint32_t hash);

private:
uint32_t (*hash_func_)(const Slice& key);
const bool kBlocked;
const uint32_t kBitsPerBlock;
const uint32_t kTotalBits;
const uint32_t kNumBlocks;
const uint32_t kNumProbes;
std::unique_ptr<unsigned char[]> data_;

uint32_t (*hash_func_)(const Slice& key);
unsigned char* data_;
unsigned char* raw_;
};

inline void DynamicBloom::Add(const Slice& key) { AddHash(hash_func_(key)); }
Expand All @@ -50,22 +59,42 @@ inline bool DynamicBloom::MayContain(const Slice& key) {

inline bool DynamicBloom::MayContainHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < kNumProbes; i++) {
const uint32_t bitpos = h % kTotalBits;
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
if (kBlocked) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * kBitsPerBlock;
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = b + h % kBitsPerBlock;
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
h += delta;
}
return true;
}

inline void DynamicBloom::AddHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < kNumProbes; i++) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
if (kBlocked) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * kBitsPerBlock;
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = b + h % kBitsPerBlock;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
}

Expand Down
Loading

0 comments on commit 0d755ff

Please sign in to comment.