diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 6a5544018..cd8175ee7 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -599,6 +599,69 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) { } } +template +std::string FormatWithCommas(T value) +{ + std::stringstream ss; + ss.imbue(std::locale("")); + ss << std::fixed << value; + return ss.str(); +} + +TEST_F(DBBloomFilterTest, DISABLED_SpdbBlockBloomFilterRate) { + option_config_ = kSpdbBlockBloomFilter; + Options options = CurrentOptions(); + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + get_perf_context()->EnablePerLevelPerfContext(); + CreateAndReopenWithCF({"pikachu"}, options); + + int num_keys = 1000 * 1000; + int first_key = num_keys; + int last_key = first_key + num_keys - 1; + for (auto i = first_key; i <= last_key; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + + // Add a large key to make the file contain wide range + int very_large_key_val = 1 << 31; + ASSERT_OK(Put(1, Key(very_large_key_val), Key(very_large_key_val))); + Flush(1); + + // Check if they can be found + std::cerr << "Checking that " << FormatWithCommas(num_keys) << " keys that are in the filter are found\n"; + for (auto i = first_key; i <= last_key; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + + // Check if filter is useful + int multiplier = 10; + int num_not_found_keys = num_keys * multiplier; + + std::cerr << "Checking that " << FormatWithCommas(num_not_found_keys) << " keys that are NOT in the filter are NOT found\n"; + auto first_not_found_key = last_key + 1; + auto last_not_found_key = first_not_found_key + num_not_found_keys - 1; + for (int not_found_key = first_not_found_key; not_found_key <= last_not_found_key; ++not_found_key) { + if ((not_found_key - first_not_found_key) % 1000000 == 0) std::cerr << not_found_key << " Keys\n"; + ASSERT_EQ("NOT_FOUND", Get(1, Key(not_found_key))); + } + + std::cerr << "AFTER Get() for " << num_not_found_keys << " Keys NOT IN THE DB\n"; + auto useful = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + auto full_positive = TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE); + auto true_positive = TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); + + std::cerr << "BLOOM_FILTER_USEFUL = " << FormatWithCommas(useful) << '\n'; + std::cerr << "BLOOM_FILTER_FULL_POSITIVE = " << FormatWithCommas(full_positive) << '\n'; + std::cerr << "BLOOM_FILTER_FULL_TRUE_POSITIVE = " << FormatWithCommas(true_positive) << '\n'; + + auto false_positive = full_positive - true_positive; + auto fpr = false_positive / static_cast(useful + false_positive); + std::cerr << "FPR = 1 in " << static_cast(1 / fpr) << '\n'; + + get_perf_context()->Reset(); +} + TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 4d6cf1004..3d51d91e0 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -16,6 +16,8 @@ #include "rocksdb/utilities/object_registry.h" #include "util/random.h" +#include + namespace ROCKSDB_NAMESPACE { namespace { @@ -261,9 +263,14 @@ bool DBTestBase::ChangeFilterOptions() { option_config_ = kFullFilterWithNewTableReaderForCompactions; } else if (option_config_ == kFullFilterWithNewTableReaderForCompactions) { option_config_ = kPartitionedFilterWithNewTableReaderForCompactions; + } else if (option_config_ == kPartitionedFilterWithNewTableReaderForCompactions) { + option_config_ = kSpdbBlockBloomFilter; } else { + std::cerr << "ChangeFilterOptions - Returns False\n"; return false; } + std::cerr << "option_config_ = " << option_config_ << '\n'; + Destroy(last_options_); auto options = CurrentOptions(); @@ -431,6 +438,9 @@ Options DBTestBase::GetOptions( options.new_table_reader_for_compaction_inputs = true; options.compaction_readahead_size = 10 * 1024 * 1024; break; + case kSpdbBlockBloomFilter: + table_options.filter_policy.reset(NewSpdbBlockBloomFilterPolicy()); + break; case kUncompressed: options.compression = kNoCompression; break; diff --git a/db/db_test_util.h b/db/db_test_util.h index 6c2cad45c..19a89821a 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -946,6 +946,8 @@ class DBTestBase : public testing::Test { kUniversalSubcompactions, kxxHash64Checksum, kUnorderedWrite, + kSpdbBlockBloomFilter, + // This must be the last line kEnd, }; diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 5e8b7dcfb..337e094e9 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -277,5 +277,6 @@ inline const FilterPolicy* NewExperimentalRibbonFilterPolicy( } extern const FilterPolicy* NewSpdbHybridFilterPolicy(); +extern const FilterPolicy* NewSpdbBlockBloomFilterPolicy(); } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 2dbd82725..072e5c31d 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -339,8 +339,8 @@ struct BlockBasedTableOptions { // Many applications will benefit from passing the result of // NewBloomFilterPolicy() here. std::shared_ptr filter_policy{ - NewSpdbHybridFilterPolicy()}; - + // // // NewSpdbHybridFilterPolicy()}; + NewSpdbBlockBloomFilterPolicy()}; // If true, place whole keys in the filter (not just prefixes). // This must generally be true for gets to be efficient. bool whole_key_filtering = true; diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index bcd43edac..b3684f4f2 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -10,9 +10,14 @@ #include "rocksdb/filter_policy.h" #include +#include #include #include #include +#include +#include +#include +#include #include "rocksdb/slice.h" #include "table/block_based/block_based_filter_block.h" @@ -24,6 +29,9 @@ #include "util/hash.h" #include "util/ribbon_config.h" #include "util/ribbon_impl.h" +#include "util/fastrange.h" + +#include namespace ROCKSDB_NAMESPACE { @@ -55,6 +63,7 @@ class XXH3pFilterBitsBuilder : public BuiltinFilterBitsBuilder { virtual void AddKey(const Slice& key) override { uint64_t hash = GetSliceHash64(key); + // Especially with prefixes, it is common to have repetition, // though only adjacent repetition, which we want to immediately // recognize and collapse for estimating true filter space @@ -398,6 +407,450 @@ class FastLocalBloomBitsReader : public FilterBitsReader { const uint32_t len_bytes_; }; + +// #################### SpeedbBlockedBloom implementation ################## +// TODO: See description in TBD + +namespace spdb_bloom { + constexpr double BloomBitsPerKey = 23.2; + constexpr size_t BatchSizeInBlocks = 128U; + + constexpr size_t InBatchIdxNumBits = std::ceil(std::log2(BatchSizeInBlocks)); + static_assert(InBatchIdxNumBits <= 8); + + constexpr size_t BlockSizeInBytes = 64U; + constexpr size_t BlockSizeInBits = BlockSizeInBytes * 8U; + + constexpr size_t BatchSizeInBytes = BatchSizeInBlocks * BlockSizeInBytes; + constexpr size_t NumBitsInBlockBloom = BlockSizeInBits - InBatchIdxNumBits; + + constexpr size_t NumHashFuncs = 16; + static_assert(NumHashFuncs % 2 == 0, "NumHashFuncs Must Be Even"); + constexpr size_t HashSetSize = NumHashFuncs / 2; + + using InBatchBlockIdx = uint8_t; + using EntryHashSet = std::array; + + EntryHashSet GetHashSetForEntry(uint32_t entry_hash, uint32_t hash_selector) { + assert(hash_selector <= 1U); + + EntryHashSet hash_set; + static constexpr uint32_t HashSetSeed[] = {uint32_t{0x9e3779b9}, uint32_t{0x52941879}}; + + auto seed = HashSetSeed[hash_selector]; + hash_set[0] = entry_hash * seed; + for (auto i = 1U; i < hash_set.size(); ++i) { + hash_set[i] = hash_set[i-1] * seed; + } + return hash_set; + } + + // A Bloom Block (Cache-Line) + // A contiguous region of 512 bits (64 bytes) + // The first InBatchIdxNumBits bits are used to store the in-batch index of the pair block + + namespace block { + constexpr uint8_t InBatchIdxMask = (uint8_t {1U} << InBatchIdxNumBits) - 1; + constexpr uint8_t FirstByteBitsMask = ~InBatchIdxMask; + static_assert((InBatchIdxMask | FirstByteBitsMask) == 0xFF); + + class BuildBlock { + public: + BuildBlock(char* block_address, bool fetch_cache_line) + : block_address_(block_address) + { + if (fetch_cache_line) { + FastLocalBloomImpl::PrefetchCacheLine(block_address_); + } + } + + void SetBlockIdxOfPair(InBatchBlockIdx pair_batch_block_idx) { + assert( ((*block_address_ & InBatchIdxMask) == 0U) || + ((*block_address_ & InBatchIdxMask) == pair_batch_block_idx)); + + *block_address_ = (pair_batch_block_idx | (*block_address_ & FirstByteBitsMask)); + } + + void SetBlockBloomBits(const EntryHashSet& hash_set) { + for (const auto& hash: hash_set) { + int bitpos = InBatchIdxNumBits + FastRange32(hash, NumBitsInBlockBloom); + block_address_[bitpos >> 3] |= (char{1} << (bitpos & InBatchIdxNumBits)); + } + } + + private: + char* const block_address_; + }; + + class ReadBlock { + public: + ReadBlock(const char* block_address, bool fetch_cache_line) + : block_address_(block_address) + { + if (fetch_cache_line) { + FastLocalBloomImpl::PrefetchCacheLine(block_address_); + } + } + + uint8_t GetBlockIdxOfPair() const { + return static_cast(*block_address_) & InBatchIdxMask; + } + + bool AreAllBlockBloomBitsSet( const EntryHashSet& hash_set) const { + for (const auto& hash: hash_set) { + int bitpos = InBatchIdxNumBits + FastRange32(hash, NumBitsInBlockBloom); + if ((block_address_[bitpos >> 3] & (char{1} << (bitpos & InBatchIdxNumBits))) == 0) { + return false; + } + } + return true; + } + + private: + const char* const block_address_; + + }; + } + + inline uint32_t GetContainingBatchIdx(uint32_t block_idx) { + return (block_idx / BatchSizeInBlocks); + } + + inline uint8_t GetInBatchBlockIdx(uint32_t block_idx) { + return (block_idx % BatchSizeInBlocks); + } + + inline uint32_t GetHashSelector(uint32_t first_idx, uint32_t second_idx) { + assert(first_idx < BatchSizeInBlocks && second_idx < BatchSizeInBlocks); + return (first_idx < second_idx)? 0U : 1U; + } + + inline uint32_t GetStartBlockIdxOfBatch(uint32_t batch_idx) { + return batch_idx * BatchSizeInBlocks; + } +} // namespace spdb_bloom + +class SpeedbBlockBloomBitsBuilder : public XXH3pFilterBitsBuilder { + public: + // Non-null aggregate_rounding_balance implies optimize_filters_for_memory + explicit SpeedbBlockBloomBitsBuilder( + const int /*millibits_per_key*/, + std::atomic* aggregate_rounding_balance) + : XXH3pFilterBitsBuilder(aggregate_rounding_balance) + // , + // millibits_per_key_(millibits_per_key) + { + millibits_per_key_ = spdb_bloom::BloomBitsPerKey * 1000; + // assert(millibits_per_key >= 1000); + } + + private: + struct BlockHistogramInfo { + size_t num_keys = 0U; + uint32_t original_batch_block_idx = std::numeric_limits::max(); + + bool operator<(const BlockHistogramInfo& other) const { + return (num_keys < other.num_keys); + } + }; + + struct PairingInfo { + uint32_t secondary_batch_block_idx; + uint32_t hash_set_selector; + }; + + using BatchBlocksHistogram = std::array; + using BatchPairingInfo = std::array; + + private: + size_t num_blocks_ = 0U; + size_t num_batches_ = 0U; + + std::vector blocks_histogram_; + std::vector pairing_table_; + + // No Copy allowed + SpeedbBlockBloomBitsBuilder(const SpeedbBlockBloomBitsBuilder&) = delete; + void operator=(const SpeedbBlockBloomBitsBuilder&) = delete; + + ~SpeedbBlockBloomBitsBuilder() override {} + + void InitVars(size_t num_entries) { + // Aligned means to the batch size - Being a multiple of spdb_bloom::BatchSizeInBlocks + const size_t UnalignedNumBlocks = num_entries * spdb_bloom::BloomBitsPerKey / spdb_bloom::BlockSizeInBits; + const size_t AlignedNumBlocksMaybeOdd = std::ceil(UnalignedNumBlocks / spdb_bloom::BatchSizeInBlocks) * spdb_bloom::BatchSizeInBlocks; + + num_blocks_ = AlignedNumBlocksMaybeOdd + (AlignedNumBlocksMaybeOdd % 2); + num_blocks_ = std::max(num_blocks_, spdb_bloom::BatchSizeInBlocks); + assert(num_blocks_ % 2 == 0); + assert(num_blocks_ % spdb_bloom::BatchSizeInBlocks == 0); + + num_batches_ = num_blocks_ / spdb_bloom::BatchSizeInBlocks; + assert(num_batches_ > 0U); + + pairing_table_.resize(num_batches_); + } + + virtual Slice Finish(std::unique_ptr* buf) override { + const size_t num_entries = hash_entries_.size(); + + InitVars(num_entries); + + // const size_t len_with_metadata = CalculateSpace(num_entries); + const size_t len_with_metadata = num_blocks_ * spdb_bloom::BlockSizeInBytes + kMetadataLen; + + std::unique_ptr mutable_buf; + mutable_buf.reset(new char[len_with_metadata]()); + + // TODO: Consider the ROCKSDB_MALLOC_USABLE_SIZE code inside AllocateMaybeRounding + // // len_with_metadata = + // // AllocateMaybeRounding(len_with_metadata, num_entries, &mutable_buf); + + assert(mutable_buf); + assert(len_with_metadata >= kMetadataLen); + + // Max size supported by implementation + assert(len_with_metadata <= 0xffffffffU); + + // Compute num_probes after any rounding / adjustments + // TODO: NOAM? + // // // int num_probes = GetNumProbes(num_entries, len_with_metadata); + const int num_probes = spdb_bloom::NumHashFuncs; + + uint32_t len = static_cast(len_with_metadata - kMetadataLen); + if (len > 0) { + AddAllEntries(mutable_buf.get(), len, num_probes); + } + + assert(hash_entries_.empty()); + + // See BloomFilterPolicy::GetBloomBitsReader re: metadata + // -1 = Marker for newer Bloom implementations + mutable_buf[len] = static_cast(-1); + // 1 = Marker for this sub-implementation + mutable_buf[len + 1] = static_cast(1); + // num_probes (and 0 in upper bits for 64-byte block size) + mutable_buf[len + 2] = static_cast(num_probes); + // rest of metadata stays zero + + Slice rv(mutable_buf.get(), len_with_metadata); + *buf = std::move(mutable_buf); + return rv; + } + + size_t ApproximateNumEntries(size_t bytes) override { + size_t bytes_no_meta = + bytes >= kMetadataLen ? RoundDownUsableSpace(bytes) - kMetadataLen : 0; + return static_cast(uint64_t{8000} * bytes_no_meta / + millibits_per_key_); + } + + size_t CalculateSpace(size_t num_entries) override { + // If not for cache line blocks in the filter, what would the target + // length in bytes be? + size_t raw_target_len = static_cast( + (uint64_t{num_entries} * millibits_per_key_ + 7999) / 8000); + + if (raw_target_len >= size_t{0xffffffc0}) { + // Max supported for this data structure implementation + raw_target_len = size_t{0xffffffc0}; + } + + // Round up to nearest multiple of 64 (block size). This adjustment is + // used for target FP rate only so that we don't receive complaints about + // lower FP rate vs. historic Bloom filter behavior. + return ((raw_target_len + 63) & ~size_t{63}) + kMetadataLen; + } + + double EstimatedFpRate(size_t keys, size_t len_with_metadata) override { + int num_probes = GetNumProbes(keys, len_with_metadata); + return FastLocalBloomImpl::EstimatedFpRate( + keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64); + } + + protected: + size_t RoundDownUsableSpace(size_t available_size) override { + size_t rv = available_size - kMetadataLen; + + if (rv >= size_t{0xffffffc0}) { + // Max supported for this data structure implementation + rv = size_t{0xffffffc0}; + } + + // round down to multiple of 64 (block size) + rv &= ~size_t{63}; + + return rv + kMetadataLen; + } + + private: + // Compute num_probes after any rounding / adjustments + int GetNumProbes(size_t keys, size_t len_with_metadata) { + uint64_t millibits = uint64_t{len_with_metadata - kMetadataLen} * 8000; + int actual_millibits_per_key = + static_cast(millibits / std::max(keys, size_t{1})); + // BEGIN XXX/TODO(peterd): preserving old/default behavior for now to + // minimize unit test churn. Remove this some time. + if (!aggregate_rounding_balance_) { + actual_millibits_per_key = millibits_per_key_; + } + // END XXX/TODO + return FastLocalBloomImpl::ChooseNumProbes(actual_millibits_per_key); + } + + void InitBlockHistogram() { + blocks_histogram_.resize(num_batches_); + + for (auto batch_idx = 0U; batch_idx < blocks_histogram_.size(); ++batch_idx) { + for (auto in_batch_block_idx = 0U; in_batch_block_idx < blocks_histogram_[batch_idx].size(); ++in_batch_block_idx) { + blocks_histogram_[batch_idx][in_batch_block_idx].original_batch_block_idx = in_batch_block_idx; + } + } + } + + void BuildBlocksHistogram(uint32_t data_len_bytes) { + for (auto i = 0U; i < hash_entries_.size(); ++i) { + uint64_t h = hash_entries_[i]; + auto block_idx = FastLocalBloomImpl::HashToCacheLineIdx(Lower32of64(h), data_len_bytes); + ++blocks_histogram_[spdb_bloom::GetContainingBatchIdx(block_idx)][spdb_bloom::GetInBatchBlockIdx(block_idx)].num_keys; + } + } + + void PairBatchBlocks(uint32_t batch_idx) { + assert(batch_idx < num_batches_); + + BatchBlocksHistogram& batch_blocks_histrogram = blocks_histogram_[batch_idx]; + + std::stable_sort(batch_blocks_histrogram.begin(), batch_blocks_histrogram.end()); + + auto& batch_pairing_info = pairing_table_[batch_idx]; + + for (auto in_batch_block_idx = 0U; in_batch_block_idx < spdb_bloom::BatchSizeInBlocks; ++in_batch_block_idx) { + const auto pair_in_batch_idx = batch_blocks_histrogram.size() - in_batch_block_idx - 1; + auto original_batch_block_idx = batch_blocks_histrogram[in_batch_block_idx].original_batch_block_idx; + + batch_pairing_info[original_batch_block_idx].secondary_batch_block_idx = + batch_blocks_histrogram[pair_in_batch_idx].original_batch_block_idx; + batch_pairing_info[original_batch_block_idx].hash_set_selector = + spdb_bloom::GetHashSelector(original_batch_block_idx, batch_blocks_histrogram[pair_in_batch_idx].original_batch_block_idx); + } + } + + void PairBlocks() { + for (auto batch_idx = 0U; batch_idx < num_batches_; ++batch_idx) { + PairBatchBlocks(batch_idx); + } + } + + void BuildBlocks(char* data, uint32_t data_len_bytes) { + while (hash_entries_.empty() == false) { + uint64_t h = hash_entries_.front(); + hash_entries_.pop_front(); + + const uint32_t primary_block_idx = FastLocalBloomImpl::HashToCacheLineIdx(Lower32of64(h), data_len_bytes); + const uint32_t batch_idx = spdb_bloom::GetContainingBatchIdx(primary_block_idx); + + // Primary Block + const uint8_t primary_in_batch_block_idx = spdb_bloom::GetInBatchBlockIdx(primary_block_idx); + const uint32_t secondary_in_batch_block_idx = pairing_table_[batch_idx][primary_in_batch_block_idx].secondary_batch_block_idx; + + const auto primary_block_hash_selector = pairing_table_[batch_idx][primary_in_batch_block_idx].hash_set_selector; + assert(primary_block_hash_selector == 0 || primary_block_hash_selector == 1); + + const uint32_t upper_32_bits_of_hash = Upper32of64(h); + const spdb_bloom::EntryHashSet primary_hashes = spdb_bloom::GetHashSetForEntry(Upper32of64(h), primary_block_hash_selector); + + char* const primary_block_address = data + primary_block_idx * spdb_bloom::BlockSizeInBytes; + spdb_bloom::block::BuildBlock primary_block(primary_block_address, true); + primary_block.SetBlockIdxOfPair(secondary_in_batch_block_idx); + primary_block.SetBlockBloomBits(primary_hashes); + + // Secondary Block + const uint32_t secondary_block_idx = spdb_bloom::GetStartBlockIdxOfBatch(batch_idx) + secondary_in_batch_block_idx; + + auto secondary_block_hash_selector = 1 - primary_block_hash_selector; + assert(secondary_block_hash_selector == pairing_table_[batch_idx][secondary_in_batch_block_idx].hash_set_selector); + + const spdb_bloom::EntryHashSet secondary_hashes = spdb_bloom::GetHashSetForEntry(upper_32_bits_of_hash, secondary_block_hash_selector); + + char* const secondary_block_address = data + secondary_block_idx * spdb_bloom::BlockSizeInBytes; + spdb_bloom::block::BuildBlock secondary_block(secondary_block_address, true); + secondary_block.SetBlockIdxOfPair(primary_in_batch_block_idx); + secondary_block.SetBlockBloomBits(secondary_hashes); + } + } + + void AddAllEntries(char* data, uint32_t data_len_bytes, int num_probes) { + assert(num_probes == spdb_bloom::NumHashFuncs); + (void)num_probes; + + InitBlockHistogram(); + BuildBlocksHistogram(data_len_bytes); + PairBlocks(); + BuildBlocks(data, data_len_bytes); + } + + // Target allocation per added key, in thousandths of a bit. + int millibits_per_key_; +}; + +// TODO: See description in TBD +class SpeedbBlockBloomBitsReader : public FilterBitsReader { + public: + SpeedbBlockBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes) + : data_(data), num_probes_(num_probes), len_bytes_(len_bytes) { + assert(num_probes == spdb_bloom::NumHashFuncs); + } + + // No Copy allowed + SpeedbBlockBloomBitsReader(const SpeedbBlockBloomBitsReader&) = delete; + void operator=(const SpeedbBlockBloomBitsReader&) = delete; + + ~SpeedbBlockBloomBitsReader() override {} + + bool MayMatch(const Slice& key) override { + uint64_t h = GetSliceHash64(key); + + uint32_t primary_block_idx = FastLocalBloomImpl::HashToCacheLineIdx(Lower32of64(h), len_bytes_); + const char* primary_block_address = data_ + primary_block_idx * spdb_bloom::BlockSizeInBytes; + + spdb_bloom::block::ReadBlock primary_block(primary_block_address, true); + + uint8_t primary_in_batch_block_idx = spdb_bloom::GetInBatchBlockIdx(primary_block_idx); + uint8_t secondary_in_batch_block_idx = primary_block.GetBlockIdxOfPair(); + auto primary_block_hash_selector = spdb_bloom::GetHashSelector(primary_in_batch_block_idx, secondary_in_batch_block_idx); + + const uint32_t upper_32_bits_of_hash = Upper32of64(h); + spdb_bloom::EntryHashSet primary_hashes = spdb_bloom::GetHashSetForEntry(upper_32_bits_of_hash, primary_block_hash_selector); + + if (primary_block.AreAllBlockBloomBitsSet(primary_hashes) == false) { + return false; + } + + uint32_t secondary_block_hash_selector = spdb_bloom::GetHashSelector(secondary_in_batch_block_idx, primary_in_batch_block_idx); + assert(primary_block_hash_selector != secondary_block_hash_selector); + + uint32_t batch_idx = spdb_bloom::GetContainingBatchIdx(primary_block_idx); + uint32_t secondary_block_idx = spdb_bloom::GetStartBlockIdxOfBatch(batch_idx) + secondary_in_batch_block_idx; + const char* secondary_block_address = data_ + secondary_block_idx * spdb_bloom::BlockSizeInBytes; + + spdb_bloom::block::ReadBlock secondary_block(secondary_block_address, true); + spdb_bloom::EntryHashSet secondary_hashes = spdb_bloom::GetHashSetForEntry(upper_32_bits_of_hash, secondary_block_hash_selector); + return secondary_block.AreAllBlockBloomBitsSet(secondary_hashes); + } + + void MayMatch(int num_keys, Slice** keys, bool* may_match) override { + for (auto i = 0; i < num_keys; ++i) { + may_match[i] = MayMatch(*keys[i]); + } + } + + private: + const char* data_; + const int num_probes_; + const uint32_t len_bytes_; +}; + // ##################### Ribbon filter implementation ################### // // Implements concept RehasherTypesAndSettings in ribbon_impl.h @@ -1030,6 +1483,8 @@ static std::string get_filter_config_spec(double bits_per_key, switch (mode) { case BloomFilterPolicy::kSpdbBloom: return "speedb:" + ToString(bits_per_key); + case BloomFilterPolicy::kSpdbBlockBloom: + return "speedb: Block Bloom"; case BloomFilterPolicy::kAutoBloom: case BloomFilterPolicy::kLegacyBloom: case BloomFilterPolicy::kFastLocalBloom: @@ -1079,7 +1534,8 @@ BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) BloomFilterPolicy::~BloomFilterPolicy() {} -const char* BloomFilterPolicy::Name() const { return "speedb.HybridFilter"; } +const char* BloomFilterPolicy::Name() const { + return (mode_ == kSpdbBlockBloom) ? "speedb.BlockBloomFilter": "speedb.HybridFilter"; } const char* BloomFilterPolicy::FilterConfigSpec() const { return config_spec_.c_str(); @@ -1170,6 +1626,8 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( case kFastLocalBloom: return new FastLocalBloomBitsBuilder( millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr); + case kSpdbBlockBloom: + return new SpeedbBlockBloomBitsBuilder(millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr); case kLegacyBloom: if (whole_bits_per_key_ >= 40 && context.info_log && !warned_.load(std::memory_order_relaxed)) { @@ -1370,6 +1828,8 @@ FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader( if (log2_block_bytes == 6) { // Only block size supported for now return new FastLocalBloomBitsReader(contents.data(), num_probes, len); } + } else if (sub_impl_val == 1) { + return new SpeedbBlockBloomBitsReader(contents.data(), num_probes, len); } // otherwise // Reserved / future safe @@ -1400,6 +1860,10 @@ const FilterPolicy* NewSpdbHybridFilterPolicy() { return new BloomFilterPolicy(32, BloomFilterPolicy::kSpdbBloom); } +const FilterPolicy* NewSpdbBlockBloomFilterPolicy() { + return new BloomFilterPolicy(23.2, BloomFilterPolicy::kSpdbBlockBloom); +} + class NoFilterPolicy : public FilterPolicy { public: const char* Name() const override { return kNullptrString.c_str(); } @@ -1421,6 +1885,8 @@ FilterBuildingContext::FilterBuildingContext( FilterPolicy::~FilterPolicy() { } +// TODO: WHAT TO DO HERE? + Status FilterPolicy::CreateFromString( const ConfigOptions& /*options*/, const std::string& value, std::shared_ptr* policy) { diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 2eb2dcb6a..70c3d948e 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -74,6 +74,7 @@ class BloomFilterPolicy : public FilterPolicy { // context at build time, including compatibility with format_version. kAutoBloom = 100, kSpdbBloom = 101, + kSpdbBlockBloom = 102 }; // All the different underlying implementations that a BloomFilterPolicy // might use, as a mode that says "always use this implementation." diff --git a/util/bloom_impl.h b/util/bloom_impl.h index 546267d94..d86a4fcaa 100644 --- a/util/bloom_impl.h +++ b/util/bloom_impl.h @@ -209,10 +209,20 @@ class FastLocalBloomImpl { } } + static inline uint32_t HashToCacheLineIdx(uint32_t h1, uint32_t len_bytes) { + return FastRange32(h1, len_bytes >> 6); + } + + static inline void PrefetchCacheLine(const char* cacheline_address) { + PREFETCH(cacheline_address, 0 /* rw */, 1 /* locality */); + PREFETCH(cacheline_address + 63, 0 /* rw */, 1 /* locality */); + } + static inline void PrepareHash(uint32_t h1, uint32_t len_bytes, const char *data, uint32_t /*out*/ *byte_offset) { - uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6; + // Select a cache line + uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6; PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */); PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */); *byte_offset = bytes_to_cache_line;