From 140c3ee9f72d77a82a87e46dd4c8cebd8c0473b9 Mon Sep 17 00:00:00 2001 From: Udi Date: Thu, 7 Mar 2024 15:50:56 +0200 Subject: [PATCH 1/2] Support Speedb's Paired Bloom Filter in db_bloom_filter_test - With CR Updates --- db/db_bloom_filter_test.cc | 204 +- .../speedb_db_bloom_filter_test.cc | 2728 ----------------- plugin/speedb/speedb.mk | 8 +- 3 files changed, 165 insertions(+), 2775 deletions(-) delete mode 100644 plugin/speedb/paired_filter/speedb_db_bloom_filter_test.cc diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index bdeb5706d6..897725d2bf 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -31,11 +31,6 @@ namespace ROCKSDB_NAMESPACE { -namespace { -std::shared_ptr Create(double bits_per_key, - const std::string& name) { - return BloomLikeFilterPolicy::Create(name, bits_per_key); -} const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kClassName(); @@ -43,6 +38,29 @@ const std::string kStandard128Ribbon = test::Standard128RibbonFilterPolicy::kClassName(); const std::string kAutoBloom = BloomFilterPolicy::kClassName(); const std::string kAutoRibbon = RibbonFilterPolicy::kClassName(); +const std::string kSpeedbPairedBloomFilter = "speedb.PairedBloomFilter"; + +namespace { +bool IsPairedBloomFilterName(const std::string& name) { + return (name == kSpeedbPairedBloomFilter); +} + +std::shared_ptr Create(double bits_per_key, + const std::string& name) { + if (IsPairedBloomFilterName(name) == false) { + return BloomLikeFilterPolicy::Create(name, bits_per_key); + } else { + ConfigOptions config_options; + config_options.ignore_unsupported_options = false; + std::shared_ptr filter_policy; + Status s = FilterPolicy::CreateFromString( + config_options, + kSpeedbPairedBloomFilter + ":" + std::to_string(bits_per_key), + &filter_policy); + EXPECT_NE(filter_policy, nullptr); + return filter_policy; + } +} template T Pop(T& var) { @@ -63,11 +81,24 @@ class DBBloomFilterTest : public DBTestBase { : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {} }; +class DBBloomFilterTestWithPairedBloomOnOff + : public DBTestBase, + public testing::WithParamInterface { +public: + DBBloomFilterTestWithPairedBloomOnOff() + : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} + + void SetUp() override { + use_paired_bloom_ = GetParam(); + } + + bool use_paired_bloom_ = false; +}; + class DBBloomFilterTestWithParam : public DBTestBase, public testing::WithParamInterface< std::tuple> { - // public testing::WithParamInterface { protected: std::string bfp_impl_; bool partition_filters_; @@ -189,7 +220,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); } -TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, GetFilterByPrefixBloomCustomPrefixExtractor) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor = @@ -197,7 +228,13 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } + if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -261,14 +298,20 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { } } -TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, GetFilterByPrefixBloom) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } + if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -321,7 +364,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { } } -TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, WholeKeyFilterProp) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(3)); @@ -329,7 +372,12 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.whole_key_filtering = false; if (partition_filters) { bbto.partition_filters = true; @@ -593,15 +641,19 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) { ASSERT_TRUE(db_->GetMapProperty( handles_[1], DB::Properties::kAggregatedTableProperties, &props)); uint64_t nkeys = N + N / 100; - uint64_t filter_size = ParseUint64(props["filter_size"]); - EXPECT_LE(filter_size, - (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8); - if (bfp_impl_ == kAutoRibbon) { - // Sometimes using Ribbon filter which is more space-efficient - EXPECT_GE(filter_size, 7 * nkeys / /*bits / byte*/ 8); - } else { - // Always Bloom - EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8); + + // Size calculations for the PairedBloomFilter are tricky => Skip them. + if (IsPairedBloomFilterName(bfp_impl_) == false) { + uint64_t filter_size = ParseUint64(props["filter_size"]); + EXPECT_LE(filter_size, + (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8); + if (bfp_impl_ == kAutoRibbon) { + // Sometimes using Ribbon filter which is more space-efficient + EXPECT_GE(filter_size, 7 * nkeys / /*bits / byte*/ 8); + } else { + // Always Bloom + EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8); + } } uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]); @@ -762,20 +814,33 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), - std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); + std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, true, + test::kDefaultFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, false, + test::kDefaultFormatVersion))); + +INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithPairedBloomOnOff, DBBloomFilterTestWithPairedBloomOnOff, testing::Bool()); INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), - std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); + std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, true, + test::kDefaultFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, false, + test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values(std::make_tuple(kAutoBloom, true, kLatestFormatVersion), std::make_tuple(kAutoBloom, false, kLatestFormatVersion), - std::make_tuple(kAutoRibbon, false, + std::make_tuple(kAutoRibbon, false, kLatestFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, true, + kLatestFormatVersion), + std::make_tuple(kSpeedbPairedBloomFilter, false, kLatestFormatVersion))); #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) @@ -1036,7 +1101,19 @@ INSTANTIATE_TEST_CASE_P( kStandard128Ribbon, true, true), std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom, - false, false))); + false, false), + + std::make_tuple(CacheEntryRoleOptions::Decision::kDisabled, + kSpeedbPairedBloomFilter, false, false), + + std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, + kSpeedbPairedBloomFilter, false, false), + std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, + kSpeedbPairedBloomFilter, false, true), + std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, + kSpeedbPairedBloomFilter, true, false), + std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, + kSpeedbPairedBloomFilter, true, true))); // TODO: Speed up this test, and reduce disk space usage (~700MB) // The current test inserts many keys (on the scale of dummy entry size) @@ -1418,7 +1495,10 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(true, kFastLocalBloom, false), std::make_tuple(true, kFastLocalBloom, true), std::make_tuple(true, kStandard128Ribbon, false), - std::make_tuple(true, kStandard128Ribbon, true))); + std::make_tuple(true, kStandard128Ribbon, true), + std::make_tuple(false, kSpeedbPairedBloomFilter, false), + std::make_tuple(true, kSpeedbPairedBloomFilter, false), + std::make_tuple(true, kSpeedbPairedBloomFilter, true))); TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { Options options = CurrentOptions(); @@ -1793,9 +1873,14 @@ class SliceTransformLimitedDomain : public SliceTransform { } }; -TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, PrefixExtractorWithFilter1) { BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.whole_key_filtering = false; Options options = CurrentOptions(); @@ -1821,9 +1906,14 @@ TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) { ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); } -TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, PrefixExtractorWithFilter2) { BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); @@ -2010,6 +2100,7 @@ TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(20)); + bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; bbto.whole_key_filtering = !use_prefix_; @@ -2342,10 +2433,12 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(kLegacyBloom, true), std::make_tuple(kFastLocalBloom, false), std::make_tuple(kFastLocalBloom, true), - std::make_tuple(kPlainTable, false))); + std::make_tuple(kPlainTable, false), + std::make_tuple(kSpeedbPairedBloomFilter, false), + std::make_tuple(kSpeedbPairedBloomFilter, true))); namespace { -void PrefixScanInit(DBBloomFilterTest* dbtest) { +void PrefixScanInit(DBBloomFilterTestWithPairedBloomOnOff* dbtest) { char buf[100]; std::string keystr; const int small_range_sstfiles = 5; @@ -2395,7 +2488,7 @@ void PrefixScanInit(DBBloomFilterTest* dbtest) { } } // anonymous namespace -TEST_F(DBBloomFilterTest, PrefixScan) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, PrefixScan) { while (ChangeFilterOptions()) { int count; Slice prefix; @@ -2422,7 +2515,12 @@ TEST_F(DBBloomFilterTest, PrefixScan) { BlockBasedTableOptions table_options; table_options.no_block_cache = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + table_options.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + table_options.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } table_options.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -2446,6 +2544,8 @@ TEST_F(DBBloomFilterTest, PrefixScan) { } // end of while } +// Speedb Paired Bloom Filters currently do NOT support the 'optimize_filters_for_hits' options => +// This test doesn't cover paired bloom filters TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { const int kNumKeysPerFlush = 1000; @@ -3078,7 +3178,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { } } -TEST_F(DBBloomFilterTest, SeekForPrevWithPartitionedFilters) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, SeekForPrevWithPartitionedFilters) { Options options = CurrentOptions(); constexpr size_t kNumKeys = 10000; static_assert(kNumKeys <= 10000, "kNumKeys have to be <= 10000"); @@ -3089,7 +3189,12 @@ TEST_F(DBBloomFilterTest, SeekForPrevWithPartitionedFilters) { options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixLength)); options.compression = kNoCompression; BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(50)); + constexpr double bpk = 50; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; bbto.block_size = 128; @@ -3189,9 +3294,14 @@ std::pair HitAndMiss(uint64_t hits, uint64_t misses) { // one of the old obsolete, unnecessary axioms of prefix extraction: // * key.starts_with(prefix(key)) // This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, WeirdPrefixExtractorWithFilter1) { BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.whole_key_filtering = false; Options options = CurrentOptions(); @@ -3257,9 +3367,14 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { // one of the old obsolete, unnecessary axioms of prefix extraction: // * Compare(prefix(key), key) <= 0 // This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, WeirdPrefixExtractorWithFilter2) { BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.whole_key_filtering = false; Options options = CurrentOptions(); @@ -3398,9 +3513,14 @@ class NonIdempotentFixed4Transform : public SliceTransform { // * prefix(prefix(key)) == prefix(key) // * If Compare(k1, k2) <= 0, then Compare(prefix(k1), prefix(k2)) <= 0 // This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, WeirdPrefixExtractorWithFilter3) { BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + constexpr double bpk = 10; + if (use_paired_bloom_) { + bbto.filter_policy = Create(bpk, kSpeedbPairedBloomFilter); + } else { + bbto.filter_policy.reset(NewBloomFilterPolicy(bpk)); + } bbto.whole_key_filtering = false; Options options = CurrentOptions(); diff --git a/plugin/speedb/paired_filter/speedb_db_bloom_filter_test.cc b/plugin/speedb/paired_filter/speedb_db_bloom_filter_test.cc deleted file mode 100644 index 2fb18b5737..0000000000 --- a/plugin/speedb/paired_filter/speedb_db_bloom_filter_test.cc +++ /dev/null @@ -1,2728 +0,0 @@ -// Copyright (C) 2022 Speedb Ltd. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include "cache/cache_reservation_manager.h" -#include "db/db_test_util.h" -#include "options/options_helper.h" -#include "plugin/speedb/paired_filter/speedb_paired_bloom.h" -#include "port/stack_trace.h" -#include "rocksdb/advanced_options.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/format.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -std::shared_ptr Create(double bits_per_key, - const std::string& name) { - if (name == SpdbPairedBloomFilterPolicy::kClassName()) { - return std::make_shared(bits_per_key); - } else { - return nullptr; - } -} -const std::string kSpdbPairedBloom = SpdbPairedBloomFilterPolicy::kClassName(); - -} // namespace - -// DB tests related to Speedb's Paired Block Bloom Filter. - -class DISABLED_SpdbDBBloomFilterTest : public DBTestBase { - public: - DISABLED_SpdbDBBloomFilterTest() - : DBTestBase("speedb_db_bloom_filter_test", /*env_do_fsync=*/true) {} -}; - -class DISABLED_SpdbDBBloomFilterTestWithParam - : public DBTestBase, - public testing::WithParamInterface> { - protected: - bool partition_filters_; - - public: - DISABLED_SpdbDBBloomFilterTestWithParam() - : DBTestBase("speedb_db_bloom_filter_test", /*env_do_fsync=*/true) {} - - ~DISABLED_SpdbDBBloomFilterTestWithParam() override {} - - void SetUp() override { partition_filters_ = std::get<0>(GetParam()); } -}; - -class SpdbDBBloomFilterTestDefFormatVersion - : public DISABLED_SpdbDBBloomFilterTestWithParam {}; - -class SliceTransformLimitedDomainGeneric : public SliceTransform { - static constexpr size_t kPrefixLen = 5U; - - const char* Name() const override { - return "SliceTransformLimitedDomainGeneric"; - } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), kPrefixLen); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= kPrefixLen; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == kPrefixLen; - } -}; - -// KeyMayExist can lead to a few false positives, but not false negatives. -// To make test deterministic, use a much larger number of bits per key-20 than -// bits in the key, so that false positives are eliminated -TEST_P(SpdbDBBloomFilterTestDefFormatVersion, KeyMayExist) { - do { - ReadOptions ropts; - std::string value; - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, kSpdbPairedBloom); - options_override.partition_filters = partition_filters_; - options_override.metadata_block_size = 32; - Options options = CurrentOptions(options_override); - if (partition_filters_) { - auto* table_options = - options.table_factory->GetOptions(); - if (table_options != nullptr && - table_options->index_type != - BlockBasedTableOptions::kTwoLevelIndexSearch) { - // In the current implementation partitioned filters depend on - // partitioned indexes - continue; - } - } - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - - ASSERT_OK(Put(1, "a", "b")); - bool value_found = false; - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(value_found); - ASSERT_EQ("b", value); - - ASSERT_OK(Flush(1)); - value.clear(); - - uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(!value_found); - // assert that no new files were opened and no new blocks were - // read into block cache. - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "a")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */)); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "c")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // KeyMayExist function only checks data in block caches, which is not used - // by plain table format. - } while ( - ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); -} - -TEST_P(DISABLED_SpdbDBBloomFilterTestWithParam, - GetFilterByPrefixBloomCustomPrefixExtractor) { - Options options = last_options_; - options.prefix_extractor = - std::make_shared(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - if (partition_filters_) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - - ASSERT_OK(dbfull()->Flush(fo)); - - ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ( - 1, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ( - 2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ro.total_order_seek = true; - // NOTE: total_order_seek no longer affects Get() - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - // No bloom on extractor changed - - ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - get_perf_context()->Reset(); -} - -TEST_P(DISABLED_SpdbDBBloomFilterTestWithParam, GetFilterByPrefixBloom) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - if (partition_filters_) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - - ASSERT_OK(dbfull()->Flush(fo)); - - ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - - ro.total_order_seek = true; - // NOTE: total_order_seek no longer affects Get() - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - // No bloom on extractor changed - - ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - get_perf_context()->Reset(); -} - -TEST_P(DISABLED_SpdbDBBloomFilterTestWithParam, WholeKeyFilterProp) { - for (bool partition_filters : {true, false}) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - bbto.whole_key_filtering = false; - if (partition_filters) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(dbfull()->Flush(fo)); - - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Reopen with whole key filtering enabled and prefix extractor - // NULL. Bloom filter should be off for both of whole key and - // prefix bloom. - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.prefix_extractor.reset(); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - // Write DB with only full key filtering. - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Reopen with both of whole key off and prefix extractor enabled. - // Still no bloom filter should be used. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Try to create a DB with mixed files: - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - options.prefix_extractor.reset(); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - // Try to create a DB with mixed files. - ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); - // In this case needs insert some keys to make sure files are - // not filtered out by key ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(Flush()); - - // Now we have two files: - // File 1: An older file with prefix bloom. - // File 2: A newer file with whole bloom filter. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - - // Reopen with the same setting: only whole key is used - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - - // Restart with both filters are allowed - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - // File 1 will has it filtered out. - // File 2 will not, as prefix `foo` exists in the file. - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - - // Restart with only prefix bloom is allowed. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - uint64_t bloom_filter_useful_all_levels = 0; - for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { - if (kv.second.bloom_filter_useful > 0) { - bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; - } - } - ASSERT_EQ(12, bloom_filter_useful_all_levels); - get_perf_context()->Reset(); - } -} - -TEST_P(DISABLED_SpdbDBBloomFilterTestWithParam, BloomFilter) { - do { - Options options = CurrentOptions(); - env_->count_random_reads_ = true; - options.env = env_; - // ChangeCompactOptions() only changes compaction style, which does not - // trigger reset of table_factory - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - const auto kBpk = 20U; - const auto bytes_per_key = kBpk / 8; - table_options.filter_policy = Create(kBpk, kSpdbPairedBloom); - ASSERT_FALSE(table_options.filter_policy == nullptr); - table_options.partition_filters = partition_filters_; - if (partition_filters_) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - ASSERT_GE(table_options.format_version, 5U); - // value delta encoding challenged more with index interval > 1 - table_options.index_block_restart_interval = 8; - table_options.metadata_block_size = 32; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - CreateAndReopenWithCF({"pikachu"}, options); - - // Populate multiple layers - const int N = 10000; - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - Compact(1, "a", "z"); - for (int i = 0; i < N; i += 100) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - ASSERT_OK(Flush(1)); - - // Prevent auto compactions triggered by seeks - env_->delay_sstable_sync_.store(true, std::memory_order_release); - - // Lookup present keys. Should rarely read from small sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - int reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d present => %d reads\n", N, reads); - ASSERT_GE(reads, N); - if (partition_filters_) { - // Without block cache, we read an extra partition filter per each - // level*read and a partition index per each read - ASSERT_LE(reads, 4 * N + 2 * N / 100); - } else { - ASSERT_LE(reads, N + 2 * N / 100); - } - - // Lookup present keys. Should rarely read from either sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); - } - reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d missing => %d reads\n", N, reads); - if (partition_filters_) { - // With partitioned filter we read one extra filter per level per each - // missed read. - ASSERT_LE(reads, 2 * N + 3 * N / 100); - } else { - ASSERT_LE(reads, 3 * N / 100); - } - - // Sanity check some table properties - std::map props; - ASSERT_TRUE(db_->GetMapProperty( - handles_[1], DB::Properties::kAggregatedTableProperties, &props)); - uint64_t nkeys = N + N / 100; - uint64_t filter_size = ParseUint64(props["filter_size"]); - // TODO: Our Filter has a min size of 8192 bytes (64 X 128) => The upper - // limit depends on the number of filters - // => Adapt the caclulation - // // // EXPECT_LE(filter_size, - // // // (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ - // 8); Always Bloom - EXPECT_GE(filter_size, static_cast(bytes_per_key * nkeys)); - - uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]); - EXPECT_EQ(num_filter_entries, nkeys); - - // // // fprintf(stderr, "filter_size:%d, num_filter_entries:%d, - // nkeys:%d\n", (int)filter_size, (int)num_filter_entries, (int)nkeys); - - env_->delay_sstable_sync_.store(false, std::memory_order_release); - Close(); - } while (ChangeCompactOptions()); -} - -namespace { - -class AlwaysTrueBitsBuilder : public FilterBitsBuilder { - public: - void AddKey(const Slice&) override {} - size_t EstimateEntriesAdded() override { return 0U; } - Slice Finish(std::unique_ptr* /* buf */) override { - // Interpreted as "always true" filter (0 probes over 1 byte of - // payload, 5 bytes metadata) - return Slice("\0\0\0\0\0\0", 6); - } - using FilterBitsBuilder::Finish; - size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; } -}; - -class AlwaysTrueFilterPolicy : public ReadOnlyBuiltinFilterPolicy { - public: - explicit AlwaysTrueFilterPolicy(bool skip) : skip_(skip) {} - - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override { - if (skip_) { - return nullptr; - } else { - return new AlwaysTrueBitsBuilder(); - } - } - - private: - bool skip_; -}; - -} // namespace - -TEST_P(DISABLED_SpdbDBBloomFilterTestWithParam, - SkipFilterOnEssentiallyZeroBpk) { - constexpr int maxKey = 10; - auto PutFn = [&]() { - int i; - // Put - for (i = 0; i < maxKey; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - Flush(); - }; - auto GetFn = [&]() { - int i; - // Get OK - for (i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(Key(i))); - } - // Get NotFound - for (; i < maxKey * 2; i++) { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - }; - auto PutAndGetFn = [&]() { - PutFn(); - GetFn(); - }; - - std::map props; - const auto& kAggTableProps = DB::Properties::kAggregatedTableProperties; - - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.partition_filters = partition_filters_; - if (partition_filters_) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - - // Test 1: bits per key < 0.5 means skip filters -> no filter - // constructed or read. - table_options.filter_policy = Create(0.4, kSpdbPairedBloom); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor contruction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); - - // Test 2: use custom API to skip filters -> no filter constructed - // or read. - table_options.filter_policy.reset( - new AlwaysTrueFilterPolicy(/* skip */ true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor construction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); - - // Control test: using an actual filter with 100% FP rate -> the filter - // is constructed and checked on read. - table_options.filter_policy.reset( - new AlwaysTrueFilterPolicy(/* skip */ false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify filter is accessed (and constructed) - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_NE(props["filter_size"], "0"); - - // Test 3 (options test): Able to read existing filters with longstanding - // generated options file entry `filter_policy=rocksdb.BuiltinBloomFilter` - ASSERT_OK(FilterPolicy::CreateFromString(ConfigOptions(), - "rocksdb.BuiltinBloomFilter", - &table_options.filter_policy)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - GetFn(); - - // Verify filter is accessed - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); - - // But new filters are not generated (configuration details unknown) - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor construction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); -} - -INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, - DISABLED_SpdbDBBloomFilterTestWithParam, - ::testing::Values(false, true)); - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -INSTANTIATE_TEST_CASE_P(FormatDef, SpdbDBBloomFilterTestDefFormatVersion, - ::testing::Values(false, true)); - -INSTANTIATE_TEST_CASE_P(FormatDef, DISABLED_SpdbDBBloomFilterTestWithParam, - ::testing::Values(false, true)); - -INSTANTIATE_TEST_CASE_P(FormatLatest, DISABLED_SpdbDBBloomFilterTestWithParam, - ::testing::Values(false, true)); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(DISABLED_SpdbDBBloomFilterTest, BloomFilterRate) { - while (ChangeFilterOptions()) { - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, kSpdbPairedBloom); - Options options = CurrentOptions(options_override); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - // Check if filter is useful - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); - } - ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); - ASSERT_GE( - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, - maxKey * 0.98); - get_perf_context()->Reset(); - } -} - -namespace { -struct CompatibilityConfig { - std::shared_ptr policy; - bool partitioned; - uint32_t format_version; - - void SetInTableOptions(BlockBasedTableOptions* table_options) { - table_options->filter_policy = policy; - table_options->partition_filters = partitioned; - if (partitioned) { - table_options->index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } else { - table_options->index_type = - BlockBasedTableOptions::IndexType::kBinarySearch; - } - table_options->format_version = format_version; - } -}; -// // // // High bits per key -> almost no FPs -// // // std::shared_ptr kCompatibilityBloomPolicy{ -// // // NewBloomFilterPolicy(20)}; -// // // // bloom_before_level=-1 -> always use Ribbon -// // // std::shared_ptr kCompatibilityRibbonPolicy{ -// // // NewRibbonFilterPolicy(20, -1)}; - -// // // std::vector kCompatibilityConfigs = { -// // // {Create(20, kDeprecatedBlock), false, -// // // BlockBasedTableOptions().format_version}, -// // // {kCompatibilityBloomPolicy, false, -// BlockBasedTableOptions().format_version}, -// // // {kCompatibilityBloomPolicy, true, -// BlockBasedTableOptions().format_version}, -// // // {kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U}, -// // // {kCompatibilityRibbonPolicy, false, -// // // BlockBasedTableOptions().format_version}, -// // // {kCompatibilityRibbonPolicy, true, -// BlockBasedTableOptions().format_version}, -// // // }; -} // namespace - -// // // TEST_F(DISABLED_SpdbDBBloomFilterTest, BloomFilterCompatibility) { -// // // Options options = CurrentOptions(); -// // // options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); -// // // options.level0_file_num_compaction_trigger = -// // // static_cast(kCompatibilityConfigs.size()) + 1; -// // // options.max_open_files = -1; - -// // // Close(); - -// // // // Create one file for each kind of filter. Each file covers a -// distinct key -// // // // range. -// // // for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) { -// // // BlockBasedTableOptions table_options; -// // // kCompatibilityConfigs[i].SetInTableOptions(&table_options); -// // // ASSERT_TRUE(table_options.filter_policy != nullptr); -// // // options.table_factory.reset(NewBlockBasedTableFactory(table_options)); -// // // Reopen(options); - -// // // std::string prefix = ToString(i) + "_"; -// // // ASSERT_OK(Put(prefix + "A", "val")); -// // // ASSERT_OK(Put(prefix + "Z", "val")); -// // // ASSERT_OK(Flush()); -// // // } - -// // // // Test filter is used between each pair of {reader,writer} -// configurations, -// // // // because any built-in FilterPolicy should be able to read filters -// from any -// // // // other built-in FilterPolicy -// // // for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) { -// // // BlockBasedTableOptions table_options; -// // // kCompatibilityConfigs[i].SetInTableOptions(&table_options); -// // // options.table_factory.reset(NewBlockBasedTableFactory(table_options)); -// // // Reopen(options); -// // // for (size_t j = 0; j < kCompatibilityConfigs.size(); ++j) { -// // // std::string prefix = ToString(j) + "_"; -// // // ASSERT_EQ("val", Get(prefix + "A")); // Filter positive -// // // ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive -// // // // Filter negative, with high probability -// // // ASSERT_EQ("NOT_FOUND", Get(prefix + "Q")); -// // // // FULL_POSITIVE does not include block-based filter case (j == -// 0) -// // // EXPECT_EQ(TestGetAndResetTickerCount(options, -// BLOOM_FILTER_FULL_POSITIVE), -// // // j == 0 ? 0 : 2); -// // // EXPECT_EQ(TestGetAndResetTickerCount(options, -// BLOOM_FILTER_USEFUL), 1); -// // // } -// // // } -// // // } - -/* - * A cache wrapper that tracks peaks and increments of filter - * construction cache reservation. - * p0 - * / \ p1 - * / \ /\ - * / \/ \ - * a / b \ - * peaks = {p0, p1} - * increments = {p1-a, p2-b} - */ -class FilterConstructResPeakTrackingCache : public CacheWrapper { - public: - explicit FilterConstructResPeakTrackingCache(std::shared_ptr target) - : CacheWrapper(std::move(target)), - cur_cache_res_(0), - cache_res_peak_(0), - cache_res_increment_(0), - last_peak_tracked_(false), - cache_res_increments_sum_(0) {} - - Status Insert(const Slice& key, ObjectPtr value, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { - Status s = target_->Insert(key, value, helper, charge, handle, priority); - if (helper->del_cb == kNoopDeleterForFilterConstruction) { - if (last_peak_tracked_) { - cache_res_peak_ = 0; - cache_res_increment_ = 0; - last_peak_tracked_ = false; - } - cur_cache_res_ += charge; - cache_res_peak_ = std::max(cache_res_peak_, cur_cache_res_); - cache_res_increment_ += charge; - } - return s; - } - - using Cache::Release; - bool Release(Handle* handle, bool erase_if_last_ref = false) override { - auto helper = GetCacheItemHelper(handle); - if (helper->del_cb == kNoopDeleterForFilterConstruction) { - if (!last_peak_tracked_) { - cache_res_peaks_.push_back(cache_res_peak_); - cache_res_increments_sum_ += cache_res_increment_; - last_peak_tracked_ = true; - } - cur_cache_res_ -= GetCharge(handle); - } - bool is_successful = target_->Release(handle, erase_if_last_ref); - return is_successful; - } - - std::deque GetReservedCachePeaks() { return cache_res_peaks_; } - - std::size_t GetReservedCacheIncrementSum() { - return cache_res_increments_sum_; - } - - static const char* kClassName() { - return "FilterConstructResPeakTrackingCache"; - } - const char* Name() const override { return kClassName(); } - - private: - static const Cache::DeleterFn kNoopDeleterForFilterConstruction; - static const Cache::CacheItemHelper kHelper; - - std::size_t cur_cache_res_; - std::size_t cache_res_peak_; - std::size_t cache_res_increment_; - bool last_peak_tracked_; - std::deque cache_res_peaks_; - std::size_t cache_res_increments_sum_; -}; - -const Cache::CacheItemHelper FilterConstructResPeakTrackingCache::kHelper{ - CacheEntryRole::kFilterConstruction, - FilterConstructResPeakTrackingCache::kNoopDeleterForFilterConstruction}; - -const Cache::DeleterFn - FilterConstructResPeakTrackingCache::kNoopDeleterForFilterConstruction = - CacheReservationManagerImpl:: - TEST_GetCacheItemHelperForRole() - ->del_cb; - -// To align with the type of hash entry being reserved in implementation. -using FilterConstructionReserveMemoryHash = uint64_t; - -class DBFilterConstructionReserveMemoryTestWithParam - : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { - public: - DBFilterConstructionReserveMemoryTestWithParam() - : DBTestBase("db_bloom_filter_tests", - /*env_do_fsync=*/true), - num_key_(0), - charge_filter_construction_(std::get<0>(GetParam())), - partition_filters_(std::get<1>(GetParam())), - detect_filter_construct_corruption_(std::get<2>(GetParam())) { - if (charge_filter_construction_ == - CacheEntryRoleOptions::Decision::kDisabled) { - // For these cases, we only interested in whether filter construction - // cache reservation happens instead of its accuracy. Therefore we don't - // need many keys. - num_key_ = 5; - } else if (partition_filters_) { - // For PartitionFilter case, since we set - // table_options.metadata_block_size big enough such that each partition - // trigger at least 1 dummy entry reservation each for hash entries and - // final filter, we need a large number of keys to ensure we have at least - // two partitions. - num_key_ = 18 * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / - sizeof(FilterConstructionReserveMemoryHash); - } else { - // For Bloom Filter + FullFilter case, since we design the num_key_ to - // make hash entry cache reservation be a multiple of dummy entries, the - // correct behavior of charging final filter on top of it will trigger at - // least another dummy entry insertion. Therefore we can assert that - // behavior and we don't need a large number of keys to verify we - // indeed charge the final filter for cache reservation, even though final - // filter is a lot smaller than hash entries. - num_key_ = 1 * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / - sizeof(FilterConstructionReserveMemoryHash); - - // TODO: Add support for this test for our filter !!!!!!!!!!!!!!!!!! - } - } - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions table_options; - - // We set cache capacity big enough to prevent cache full for convenience in - // calculation. - constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024; - - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFilterConstruction, - {/*.charged = */ charge_filter_construction_}}); - table_options.filter_policy = Create(10, kSpdbPairedBloom); - table_options.partition_filters = partition_filters_; - if (table_options.partition_filters) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - // We set table_options.metadata_block_size big enough so that each - // partition trigger at least 1 dummy entry insertion each for hash - // entries and final filter. - table_options.metadata_block_size = 409000; - } - table_options.detect_filter_construct_corruption = - detect_filter_construct_corruption_; - - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - cache_ = std::make_shared( - (NewLRUCache(lo))); - table_options.block_cache = cache_; - - return table_options; - } - - std::size_t GetNumKey() { return num_key_; } - - CacheEntryRoleOptions::Decision ChargeFilterConstructMemory() { - return charge_filter_construction_; - } - - bool PartitionFilters() { return partition_filters_; } - - std::shared_ptr - GetFilterConstructResPeakTrackingCache() { - return cache_; - } - - private: - std::size_t num_key_; - CacheEntryRoleOptions::Decision charge_filter_construction_; - bool partition_filters_; - std::shared_ptr cache_; - bool detect_filter_construct_corruption_; -}; - -INSTANTIATE_TEST_CASE_P( - DBFilterConstructionReserveMemoryTestWithParam, - DBFilterConstructionReserveMemoryTestWithParam, - ::testing::Values( - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, false, - false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, false, true), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, true, false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, true, - true))); - -// TODO: Speed up this test, and reduce disk space usage (~700MB) -// The current test inserts many keys (on the scale of dummy entry size) -// in order to make small memory user (e.g, final filter, partitioned hash -// entries/filter/banding) , which is proportional to the number of -// keys, big enough so that its cache reservation triggers dummy entry insertion -// and becomes observable in the test. -// -// However, inserting that many keys slows down this test and leaves future -// developers an opportunity to speed it up. -// -// Possible approaches & challenges: -// 1. Use sync point during cache reservation of filter construction -// -// Benefit: It does not rely on triggering dummy entry insertion -// but the sync point to verify small memory user is charged correctly. -// -// Challenge: this approach is intrusive. -// -// 2. Make dummy entry size configurable and set it small in the test -// -// Benefit: It increases the precision of cache reservation and therefore -// small memory usage can still trigger insertion of dummy entry. -// -// Challenge: change CacheReservationManager related APIs and a hack -// might be needed to control the size of dummmy entry of -// CacheReservationManager used in filter construction for testing -// since CacheReservationManager is not exposed at the high level. -// -TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { - // // // Options options = CurrentOptions(); - // // // // We set write_buffer_size big enough so that in the case where - // there is - // // // // filter construction cache reservation, flush won't be triggered - // before we - // // // // manually trigger it for clean testing - // // // options.write_buffer_size = 640 << 20; - // // // BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - // // // - // options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - // // // std::shared_ptr cache = - // // // GetFilterConstructResPeakTrackingCache(); - // // // options.create_if_missing = true; - // // // // Disable auto compaction to prevent its unexpected side effect - // // // // to the number of keys per partition designed by us in the test - // // // options.disable_auto_compactions = true; - // // // DestroyAndReopen(options); - // // // int num_key = static_cast(GetNumKey()); - // // // for (int i = 0; i < num_key; i++) { - // // // ASSERT_OK(Put(Key(i), Key(i))); - // // // } - - // // // ASSERT_EQ(cache->GetReservedCacheIncrementSum(), 0) - // // // << "Flush was triggered too early in the test case with filter " - // // // "construction cache reservation - please make sure no flush - // triggered " - // // // "during the key insertions above"; - - // // // ASSERT_OK(Flush()); - - // // // bool reserve_table_builder_memory = ReserveTableBuilderMemory(); - // // // std::string policy = kSpdbPairedBloom; - // // // bool partition_filters = PartitionFilters(); - // // // bool detect_filter_construct_corruption = - // // // table_options.detect_filter_construct_corruption; - - // // // std::deque filter_construction_cache_res_peaks = - // // // cache->GetReservedCachePeaks(); - // // // std::size_t filter_construction_cache_res_increments_sum = - // // // cache->GetReservedCacheIncrementSum(); - - // // // if (!reserve_table_builder_memory) { - // // // EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0); - // // // return; - // // // } - - // // // const std::size_t kDummyEntrySize = CacheReservationManagerImpl< - // // // CacheEntryRole::kFilterConstruction>::GetDummyEntrySize(); - - // // // const std::size_t predicted_hash_entries_cache_res = - // // // num_key * sizeof(FilterConstructionReserveMemoryHash); - // // // ASSERT_EQ(predicted_hash_entries_cache_res % kDummyEntrySize, 0) - // // // << "It's by this test's design that - // predicted_hash_entries_cache_res is " - // // // "a multipe of dummy entry"; - - // // // const std::size_t predicted_hash_entries_cache_res_dummy_entry_num = - // // // predicted_hash_entries_cache_res / kDummyEntrySize; - // // // const std::size_t predicted_final_filter_cache_res = - // // // static_cast( - // // // std::ceil(1.0 * - // predicted_hash_entries_cache_res_dummy_entry_num / 6 * 1)) * - // kDummyEntrySize; - // // // const std::size_t predicted_banding_cache_res = - // // // static_cast( - // // // std::ceil(predicted_hash_entries_cache_res_dummy_entry_num - // * 2.5)) * - // // // kDummyEntrySize; - -#if 0 - if (policy == kFastLocalBloom) { - /* kFastLocalBloom + FullFilter - * p0 - * / \ - * b / \ - * / \ - * / \ - * 0/ \ - * hash entries = b - 0, final filter = p0 - b - * p0 = hash entries + final filter - * - * The test is designed in a way such that the reservation for b is a - * multiple of dummy entries so that reservation for (p0 - b) - * will trigger at least another dummy entry insertion. - * - * kFastLocalBloom + FullFilter + - * detect_filter_construct_corruption - * The peak p0 stays the same as - * (kFastLocalBloom + FullFilter) but just lasts - * longer since we release hash entries reservation later. - * - * kFastLocalBloom + PartitionedFilter - * p1 - * / \ - * p0 b'/ \ - * / \ / \ - * b / \ / \ - * / \ / \ - * / a \ - * 0/ \ - * partitioned hash entries1 = b - 0, partitioned hash entries1 = b' - a - * parittioned final filter1 = p0 - b, parittioned final filter2 = p1 - b' - * - * (increment p0 - 0) + (increment p1 - a) - * = partitioned hash entries1 + partitioned hash entries2 - * + parittioned final filter1 + parittioned final filter2 - * = hash entries + final filter - * - * kFastLocalBloom + PartitionedFilter + - * detect_filter_construct_corruption - * The peak p0, p1 stay the same as - * (kFastLocalBloom + PartitionedFilter) but just - * last longer since we release hash entries reservation later. - * - */ - if (!partition_filters) { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) - << "Filter construction cache reservation should have only 1 peak in " - "case: kFastLocalBloom + FullFilter"; - std::size_t filter_construction_cache_res_peak = - filter_construction_cache_res_peaks[0]; - EXPECT_GT(filter_construction_cache_res_peak, - predicted_hash_entries_cache_res) - << "The testing number of hash entries is designed to make hash " - "entries cache reservation be multiples of dummy entries" - " so the correct behavior of charging final filter on top of it" - " should've triggered at least another dummy entry insertion"; - - std::size_t predicted_filter_construction_cache_res_peak = - predicted_hash_entries_cache_res + predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 0.9); - EXPECT_LE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 1.1); - return; - } else { - EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) - << "Filter construction cache reservation should have multiple peaks " - "in case: kFastLocalBloom + " - "PartitionedFilter"; - std::size_t predicted_filter_construction_cache_res_increments_sum = - predicted_hash_entries_cache_res + predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 0.9); - EXPECT_LE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 1.1); - return; - } - } -#endif -} - -class DBFilterConstructionCorruptionTestWithParam - : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { - public: - DBFilterConstructionCorruptionTestWithParam() - : DBTestBase("db_bloom_filter_tests", - /*env_do_fsync=*/true) {} - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions table_options; - table_options.detect_filter_construct_corruption = std::get<0>(GetParam()); - table_options.filter_policy = Create(20, kSpdbPairedBloom); - table_options.partition_filters = std::get<1>(GetParam()); - if (table_options.partition_filters) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - // We set table_options.metadata_block_size small enough so we can - // trigger filter partitioning with GetNumKey() amount of keys - table_options.metadata_block_size = 10; - } - - return table_options; - } - - // Return an appropriate amount of keys for testing - // to generate a long filter (i.e, size >= 8 + kMetadataLen) - std::size_t GetNumKey() { return 5000; } -}; - -INSTANTIATE_TEST_CASE_P(DBFilterConstructionCorruptionTestWithParam, - DBFilterConstructionCorruptionTestWithParam, - ::testing::Values(std::make_tuple(false, false), - std::make_tuple(true, false), - std::make_tuple(true, true))); - -TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - int num_key = static_cast(GetNumKey()); - Status s; - - // Case 1: No corruption in filter construction - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - s = Flush(); - EXPECT_TRUE(s.ok()); - - // Case 2: Corruption of hash entries in filter construction - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - if (table_options.detect_filter_construct_corruption) { - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE( - s.ToString().find("Filter's hash entries checksum mismatched") != - std::string::npos); - } else { - EXPECT_TRUE(s.ok()); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - // Case 3: Corruption of filter content in filter construction - DestroyAndReopen(options); - - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperFilter", [&](void* arg) { - std::pair*, std::size_t>* TEST_arg_pair = - (std::pair*, std::size_t>*)arg; - std::size_t filter_size = TEST_arg_pair->second; - // 5 is the kMetadataLen and - assert(filter_size >= 8 + 5); - std::unique_ptr* filter_content_to_corrupt = - TEST_arg_pair->first; - std::memset(filter_content_to_corrupt->get(), '\0', 8); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - if (table_options.detect_filter_construct_corruption) { - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(s.ToString().find("Corrupted filter content") != - std::string::npos); - } else { - EXPECT_TRUE(s.ok()); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperFilter"); -} - -// RocksDB lite does not support dynamic options - -TEST_P(DBFilterConstructionCorruptionTestWithParam, - DynamicallyTurnOnAndOffDetectConstructCorruption) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - // We intend to turn on - // table_options.detect_filter_construct_corruption dynamically - // therefore we override this test parmater's value - table_options.detect_filter_construct_corruption = false; - - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - - int num_key = static_cast(GetNumKey()); - Status s; - - DestroyAndReopen(options); - - // Case 1: !table_options.detect_filter_construct_corruption - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - ASSERT_FALSE(table_options.detect_filter_construct_corruption); - EXPECT_TRUE(s.ok()); - - // Case 2: dynamically turn on - // table_options.detect_filter_construct_corruption - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{detect_filter_construct_corruption=true;}"}})); - - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - auto updated_table_options = - db_->GetOptions().table_factory->GetOptions(); - EXPECT_TRUE(updated_table_options->detect_filter_construct_corruption); - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(s.ToString().find("Filter's hash entries checksum mismatched") != - std::string::npos); - - // Case 3: dynamically turn off - // table_options.detect_filter_construct_corruption - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{detect_filter_construct_corruption=false;}"}})); - updated_table_options = - db_->GetOptions().table_factory->GetOptions(); - EXPECT_FALSE(updated_table_options->detect_filter_construct_corruption); -} - -namespace { -// // // // NOTE: This class is referenced by HISTORY.md as a model for a -// wrapper -// // // // FilterPolicy selecting among configurations based on context. -// // // class LevelAndStyleCustomFilterPolicy : public FilterPolicy { -// // // public: -// // // explicit LevelAndStyleCustomFilterPolicy(int bpk_fifo, int -// bpk_l0_other, -// // // int bpk_otherwise) -// // // : policy_fifo_(NewBloomFilterPolicy(bpk_fifo)), -// // // policy_l0_other_(NewBloomFilterPolicy(bpk_l0_other)), -// // // policy_otherwise_(NewBloomFilterPolicy(bpk_otherwise)) {} - -// // // const char* Name() const override { -// // // return "LevelAndStyleCustomFilterPolicy"; -// // // } - -// // // // OK to use built-in policy name because we are deferring to a -// // // // built-in builder. We aren't changing the serialized format. -// // // const char* CompatibilityName() const override { -// // // return policy_fifo_->CompatibilityName(); -// // // } - -// // // FilterBitsBuilder* GetBuilderWithContext( -// // // const FilterBuildingContext& context) const override { -// // // if (context.compaction_style == kCompactionStyleFIFO) { -// // // return policy_fifo_->GetBuilderWithContext(context); -// // // } else if (context.level_at_creation == 0) { -// // // return policy_l0_other_->GetBuilderWithContext(context); -// // // } else { -// // // return policy_otherwise_->GetBuilderWithContext(context); -// // // } -// // // } - -// // // FilterBitsReader* GetFilterBitsReader(const Slice& contents) const -// override { -// // // // OK to defer to any of them; they all can parse built-in filters -// // // // from any settings. -// // // return policy_fifo_->GetFilterBitsReader(contents); -// // // } - -// // // private: -// // // const std::unique_ptr policy_fifo_; -// // // const std::unique_ptr policy_l0_other_; -// // // const std::unique_ptr policy_otherwise_; -// // // }; - -// // // static std::map -// // // table_file_creation_reason_to_string{ -// // // {TableFileCreationReason::kCompaction, "kCompaction"}, -// // // {TableFileCreationReason::kFlush, "kFlush"}, -// // // {TableFileCreationReason::kMisc, "kMisc"}, -// // // {TableFileCreationReason::kRecovery, "kRecovery"}, -// // // }; - -// // // class TestingContextCustomFilterPolicy -// // // : public LevelAndStyleCustomFilterPolicy { -// // // public: -// // // explicit TestingContextCustomFilterPolicy(int bpk_fifo, int -// bpk_l0_other, -// // // int bpk_otherwise) -// // // : LevelAndStyleCustomFilterPolicy(bpk_fifo, bpk_l0_other, -// bpk_otherwise) { -// // // } - -// // // FilterBitsBuilder* GetBuilderWithContext( -// // // const FilterBuildingContext& context) const override { -// // // test_report_ += "cf="; -// // // test_report_ += context.column_family_name; -// // // test_report_ += ",s="; -// // // test_report_ += -// // // OptionsHelper::compaction_style_to_string[context.compaction_style]; -// // // test_report_ += ",n="; -// // // test_report_ += ROCKSDB_NAMESPACE::ToString(context.num_levels); -// // // test_report_ += ",l="; -// // // test_report_ += -// ROCKSDB_NAMESPACE::ToString(context.level_at_creation); -// // // test_report_ += ",b="; -// // // test_report_ += -// ROCKSDB_NAMESPACE::ToString(int{context.is_bottommost}); -// // // test_report_ += ",r="; -// // // test_report_ += -// table_file_creation_reason_to_string[context.reason]; -// // // test_report_ += "\n"; - -// // // return -// LevelAndStyleCustomFilterPolicy::GetBuilderWithContext(context); -// // // } - -// // // std::string DumpTestReport() { -// // // std::string rv; -// // // std::swap(rv, test_report_); -// // // return rv; -// // // } - -// // // private: -// // // mutable std::string test_report_; -// // // }; -} // namespace - -// // // TEST_F(DISABLED_SpdbDBBloomFilterTest, ContextCustomFilterPolicy) { -// // // auto policy = std::make_shared(15, -// 8, 5); -// // // Options options; -// // // for (bool fifo : {true, false}) { -// // // options = CurrentOptions(); -// // // options.max_open_files = fifo ? -1 : options.max_open_files; -// // // options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); -// // // options.compaction_style = -// // // fifo ? kCompactionStyleFIFO : kCompactionStyleLevel; - -// // // BlockBasedTableOptions table_options; -// // // table_options.filter_policy = policy; -// // // table_options.format_version = 5; -// // // options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - -// // // TryReopen(options); -// // // CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options); - -// // // const int maxKey = 10000; -// // // for (int i = 0; i < maxKey / 2; i++) { -// // // ASSERT_OK(Put(1, Key(i), Key(i))); -// // // } -// // // // Add a large key to make the file contain wide range -// // // ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); -// // // Flush(1); -// // // EXPECT_EQ(policy->DumpTestReport(), -// // // fifo ? -// "cf=abe,s=kCompactionStyleFIFO,n=1,l=0,b=0,r=kFlush\n" -// // // : -// "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); - -// // // for (int i = maxKey / 2; i < maxKey; i++) { -// // // ASSERT_OK(Put(1, Key(i), Key(i))); -// // // } -// // // Flush(1); -// // // EXPECT_EQ(policy->DumpTestReport(), -// // // fifo ? -// "cf=abe,s=kCompactionStyleFIFO,n=1,l=0,b=0,r=kFlush\n" -// // // : -// "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); - -// // // // Check that they can be found -// // // for (int i = 0; i < maxKey; i++) { -// // // ASSERT_EQ(Key(i), Get(1, Key(i))); -// // // } -// // // // Since we have two tables / two filters, we might have Bloom -// checks on -// // // // our queries, but no more than one "useful" per query on a found -// key. -// // // EXPECT_LE(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), -// maxKey); - -// // // // Check that we have two filters, each about -// // // // fifo: 0.12% FP rate (15 bits per key) -// // // // level: 2.3% FP rate (8 bits per key) -// // // for (int i = 0; i < maxKey; i++) { -// // // ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); -// // // } -// // // { -// // // auto useful_count = -// // // TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); -// // // EXPECT_GE(useful_count, maxKey * 2 * (fifo ? 0.9980 : 0.975)); -// // // EXPECT_LE(useful_count, maxKey * 2 * (fifo ? 0.9995 : 0.98)); -// // // } - -// // // if (!fifo) { // FIFO only has L0 -// // // // Full compaction -// // // ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[1], -// nullptr, -// // // nullptr)); -// // // EXPECT_EQ(policy->DumpTestReport(), -// // // "cf=bob,s=kCompactionStyleLevel,n=7,l=1,b=1,r=kCompaction\n"); - -// // // // Check that we now have one filter, about 9.2% FP rate (5 bits -// per key) -// // // for (int i = 0; i < maxKey; i++) { -// // // ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); -// // // } -// // // { -// // // auto useful_count = -// // // TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); -// // // EXPECT_GE(useful_count, maxKey * 0.90); -// // // EXPECT_LE(useful_count, maxKey * 0.91); -// // // } -// // // } else { -// // // -// // // // Also try external SST file -// // // { -// // // std::string file_path = dbname_ + "/external.sst"; -// // // SstFileWriter sst_file_writer(EnvOptions(), options, -// handles_[1]); -// // // ASSERT_OK(sst_file_writer.Open(file_path)); -// // // ASSERT_OK(sst_file_writer.Put("key", "value")); -// // // ASSERT_OK(sst_file_writer.Finish()); -// // // } -// // // // Note: kCompactionStyleLevel is default, ignored if num_levels -// == -1 -// // // EXPECT_EQ(policy->DumpTestReport(), -// // // "cf=abe,s=kCompactionStyleLevel,n=-1,l=-1,b=0,r=kMisc\n"); -// // // #endif -// // // } - -// // // // Destroy -// // // ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); -// // // ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); -// // // handles_[1] = nullptr; -// // // } -// // // } - -class SliceTransformLimitedDomain : public SliceTransform { - const char* Name() const override { return "SliceTransformLimitedDomain"; } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 5); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= 5 && src[0] == 'x'; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == 5 && dst[0] == 'x'; - } -}; - -TEST_F(DISABLED_SpdbDBBloomFilterTest, PrefixExtractorFullFilter) { - BlockBasedTableOptions bbto; - // Full Filter Block - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1111_AAAA", "val1")); - ASSERT_OK(Put("x1112_AAAA", "val2")); - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val5")); - - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("x1111_AAAA"), "val1"); - ASSERT_EQ(Get("x1112_AAAA"), "val2"); - ASSERT_EQ(Get("x1113_AAAA"), "val3"); - ASSERT_EQ(Get("x1114_AAAA"), "val4"); - // Was not added to filter but rocksdb will try to read it from the filter - ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); -} - -TEST_F(DISABLED_SpdbDBBloomFilterTest, PrefixExtractorBlockFilter) { - BlockBasedTableOptions bbto; - // Full Filter Block - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val1")); - ASSERT_OK(Put("zzzzz_AAAB", "val2")); - ASSERT_OK(Put("zzzzz_AAAC", "val3")); - ASSERT_OK(Put("zzzzz_AAAD", "val4")); - - ASSERT_OK(Flush()); - - std::vector iter_res; - auto iter = db_->NewIterator(ReadOptions()); - // Seek to a key that was not in Domain - for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { - iter_res.emplace_back(iter->value().ToString()); - } - - std::vector expected_res = {"val1", "val2", "val3", "val4"}; - ASSERT_EQ(iter_res, expected_res); - delete iter; -} - -TEST_F(DISABLED_SpdbDBBloomFilterTest, MemtableWholeKeyBloomFilter) { - // regression test for #2743. the range delete tombstones in memtable should - // be added even when Get() skips searching due to its prefix bloom filter - const int kMemtableSize = 1 << 20; // 1MB - const int kMemtablePrefixFilterSize = 1 << 13; // 8KB - const int kPrefixLen = 4; - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, kSpdbPairedBloom); - Options options = CurrentOptions(options_override); - options.memtable_prefix_bloom_size_ratio = - static_cast(kMemtablePrefixFilterSize) / kMemtableSize; - options.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); - options.write_buffer_size = kMemtableSize; - options.memtable_whole_key_filtering = false; - Reopen(options); - std::string key1("AAAABBBB"); - std::string key2("AAAACCCC"); // not in DB - std::string key3("AAAADDDD"); - std::string key4("AAAAEEEE"); - std::string value1("Value1"); - std::string value3("Value3"); - std::string value4("Value4"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - - // check memtable bloom stats - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - // same prefix, bloom filter false positive - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - - // enable whole key bloom filter - options.memtable_whole_key_filtering = true; - Reopen(options); - // check memtable bloom stats - ASSERT_OK(Put(key3, value3, WriteOptions())); - ASSERT_EQ("NOT_FOUND", Get(key2)); - // whole key bloom filter kicks in and determines it's a miss - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - - // verify whole key filtering does not depend on prefix_extractor - options.prefix_extractor.reset(); - Reopen(options); - // check memtable bloom stats - ASSERT_OK(Put(key4, value4, WriteOptions())); - ASSERT_EQ("NOT_FOUND", Get(key2)); - // whole key bloom filter kicks in and determines it's a miss - ASSERT_EQ(2, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); -} - -TEST_F(DISABLED_SpdbDBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) { - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, kSpdbPairedBloom); - Options options = CurrentOptions(options_override); - options.memtable_prefix_bloom_size_ratio = 0.015; - options.memtable_whole_key_filtering = true; - Reopen(options); - std::string key1("AA"); - std::string key2("BB"); - std::string key3("CC"); - std::string key4("DD"); - std::string key_not("EE"); - std::string value1("Value1"); - std::string value2("Value2"); - std::string value3("Value3"); - std::string value4("Value4"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key2, value2, WriteOptions())); - ASSERT_OK(Flush()); - ASSERT_OK(Put(key3, value3, WriteOptions())); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(Put(key4, value4, WriteOptions())); - - // Delete key2 and key3 - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "BA", "CZ")); - - // Read without snapshot - auto results = MultiGet({key_not, key1, key2, key3, key4}); - ASSERT_EQ(results[0], "NOT_FOUND"); - ASSERT_EQ(results[1], value1); - ASSERT_EQ(results[2], "NOT_FOUND"); - ASSERT_EQ(results[3], "NOT_FOUND"); - ASSERT_EQ(results[4], value4); - - // Also check Get - ASSERT_EQ(Get(key1), value1); - ASSERT_EQ(Get(key2), "NOT_FOUND"); - ASSERT_EQ(Get(key3), "NOT_FOUND"); - ASSERT_EQ(Get(key4), value4); - - // Read with snapshot - results = MultiGet({key_not, key1, key2, key3, key4}, snapshot); - ASSERT_EQ(results[0], "NOT_FOUND"); - ASSERT_EQ(results[1], value1); - ASSERT_EQ(results[2], value2); - ASSERT_EQ(results[3], value3); - ASSERT_EQ(results[4], "NOT_FOUND"); - - // Also check Get - ASSERT_EQ(Get(key1, snapshot), value1); - ASSERT_EQ(Get(key2, snapshot), value2); - ASSERT_EQ(Get(key3, snapshot), value3); - ASSERT_EQ(Get(key4, snapshot), "NOT_FOUND"); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DISABLED_SpdbDBBloomFilterTest, MemtablePrefixBloomOutOfDomain) { - constexpr size_t kPrefixSize = 8; - const std::string kKey = "key"; - assert(kKey.size() < kPrefixSize); - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, kSpdbPairedBloom); - Options options = CurrentOptions(options_override); - options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize)); - options.memtable_prefix_bloom_size_ratio = 0.25; - Reopen(options); - ASSERT_OK(Put(kKey, "v")); - ASSERT_EQ("v", Get(kKey)); - std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); - iter->Seek(kKey); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(kKey, iter->key()); - iter->SeekForPrev(kKey); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(kKey, iter->key()); -} - -namespace { -static const std::string kPlainTable = "test_PlainTableBloom"; -} // namespace - -class BloomStatsTestWithParam - : public DISABLED_SpdbDBBloomFilterTest, - public testing::WithParamInterface> { - public: - BloomStatsTestWithParam() { - partition_filters_ = std::get<1>(GetParam()); - - options_.create_if_missing = true; - options_.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4)); - options_.memtable_prefix_bloom_size_ratio = - 8.0 * 1024.0 / static_cast(options_.write_buffer_size); - BlockBasedTableOptions table_options; - if (partition_filters_) { - table_options.partition_filters = partition_filters_; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - table_options.filter_policy = Create(10, kSpdbPairedBloom); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options_.env = env_; - - get_perf_context()->Reset(); - DestroyAndReopen(options_); - } - - ~BloomStatsTestWithParam() override { - get_perf_context()->Reset(); - Destroy(options_); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - bool partition_filters_; - Options options_; -}; - -// 1 Insert 2 K-V pairs into DB -// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 -// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 -// 4 Call Flush() to create SST -// 5 Call Get() for both keys - expext SST bloom hit stat to be 2 -// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 -// Test both: block and plain SST -TEST_P(BloomStatsTestWithParam, BloomStatsTest) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - // check memtable bloom stats - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - - // sanity checks - ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); - - Flush(); - - // sanity checks - ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); - - // check SST bloom stats - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2, get_perf_context()->bloom_sst_hit_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); -} - -// Same scenario as in BloomStatsTest but using an iterator -TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); - - // check memtable bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - - Flush(); - - iter.reset(dbfull()->NewIterator(ReadOptions())); - - // Check SST bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - // The seek doesn't check block-based bloom filter because last index key - // starts with the same prefix we're seeking to. - uint64_t expected_hits = 2; - ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); - ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); -} - -// // INSTANTIATE_TEST_CASE_P( -// // BloomStatsTestWithParam, BloomStatsTestWithParam, -// // ::testing::Values(false, true)); - -namespace { -void PrefixScanInit(DISABLED_SpdbDBBloomFilterTest* dbtest) { - char buf[100]; - std::string keystr; - const int small_range_sstfiles = 5; - const int big_range_sstfiles = 5; - - // Generate 11 sst files with the following prefix ranges. - // GROUP 0: [0,10] (level 1) - // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) - // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) - // - // A seek with the previous API would do 11 random I/Os (to all the - // files). With the new API and a prefix filter enabled, we should - // only do 2 random I/O, to the 2 files containing the key. - - // GROUP 0 - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", 10); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - ASSERT_OK(dbtest->Flush()); - ASSERT_OK(dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, - nullptr)); // move to level 1 - - // GROUP 1 - for (int i = 1; i <= small_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", i); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", i + 1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } - - // GROUP 2 - for (int i = 1; i <= big_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } -} -} // namespace - -TEST_F(DISABLED_SpdbDBBloomFilterTest, PrefixScan) { - while (ChangeFilterOptions()) { - int count; - Slice prefix; - Slice key; - char buf[100]; - Iterator* iter; - snprintf(buf, sizeof(buf), "03______:"); - prefix = Slice(buf, 8); - key = Slice(buf, 9); - ASSERT_EQ(key.difference_offset(prefix), 8); - ASSERT_EQ(prefix.difference_offset(key), 8); - // db configs - env_->count_random_reads_ = true; - Options options = CurrentOptions(); - options.env = env_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.disable_auto_compactions = true; - options.max_background_compactions = 2; - options.create_if_missing = true; - options.memtable_factory.reset(NewHashSkipListRepFactory(16)); - assert(!options.unordered_write); - // It is incompatible with allow_concurrent_memtable_write=false - options.allow_concurrent_memtable_write = false; - - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - table_options.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - table_options.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // 11 RAND I/Os - DestroyAndReopen(options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - iter = db_->NewIterator(ReadOptions()); - for (iter->Seek(prefix); iter->Valid(); iter->Next()) { - if (!iter->key().starts_with(prefix)) { - break; - } - count++; - } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 2); - Close(); - } // end of while -} - -// TODO: The filter builder is created always with OFFM = false, both for us and -// rocksdb. Is that how it's supposed to be? -TEST_F(DISABLED_SpdbDBBloomFilterTest, OptimizeFiltersForHits) { - Options options = CurrentOptions(); - options.write_buffer_size = 64 * 1024; - options.arena_block_size = 4 * 1024; - options.target_file_size_base = 64 * 1024; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.max_bytes_for_level_base = 256 * 1024; - options.max_write_buffer_number = 2; - options.max_background_compactions = 8; - options.max_background_flushes = 8; - options.compression = kNoCompression; - options.compaction_style = kCompactionStyleLevel; - options.level_compaction_dynamic_level_bytes = true; - BlockBasedTableOptions bbto; - bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(new SpdbPairedBloomFilterPolicy(20)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.optimize_filters_for_hits = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - CreateAndReopenWithCF({"mypikachu"}, options); - - int numkeys = 200000; - - // Generate randomly shuffled keys, so the updates are almost - // random. - std::vector keys; - keys.reserve(numkeys); - for (int i = 0; i < numkeys; i += 2) { - keys.push_back(i); - } - RandomShuffle(std::begin(keys), std::end(keys)); - int num_inserted = 0; - for (int key : keys) { - ASSERT_OK(Put(1, Key(key), "val")); - if (++num_inserted % 1000 == 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - } - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - if (NumTableFilesAtLevel(0, 1) == 0) { - // No Level 0 file. Create one. - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - for (int i = 1; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); - } - - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); - - // Now we have three sorted run, L0, L5 and L6 with most files in L6 have - // no bloom filter. Most keys be checked bloom filters twice. - ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); - ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); - uint64_t bloom_filter_useful_all_levels = 0; - for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { - if (kv.second.bloom_filter_useful > 0) { - bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; - } - } - ASSERT_GT(bloom_filter_useful_all_levels, 65000 * 2); - ASSERT_LT(bloom_filter_useful_all_levels, 120000 * 2); - - for (int i = 0; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "val"); - } - - // Part 2 (read path): rewrite last level with blooms, then verify they get - // cached only if !optimize_filters_for_hits - options.disable_auto_compactions = true; - options.num_levels = 9; - options.optimize_filters_for_hits = false; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - MoveFilesToLevel(7 /* level */, 1 /* column family index */); - - std::string value = Get(1, Key(0)); - uint64_t prev_cache_filter_hits = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - value = Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_hits + 1, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Now that we know the filter blocks exist in the last level files, see if - // filter caching is skipped for this optimization - options.optimize_filters_for_hits = true; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - value = Get(1, Key(0)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // Check filter block ignored for files preloaded during DB::Open() - options.max_open_files = -1; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - uint64_t prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Check filter block ignored for file trivially-moved to bottom level - bbto.block_cache.reset(); - options.max_open_files = 100; // setting > -1 makes it not preload all files - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - ASSERT_OK(Put(1, Key(numkeys + 1), "val")); - ASSERT_OK(Flush(1)); - - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - CompactRangeOptions compact_options; - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kSkip; - compact_options.change_level = true; - compact_options.target_level = 7; - ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - value = Get(1, Key(numkeys + 1)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - - // Check filter block not cached for iterator - bbto.block_cache.reset(); - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); - iter->SeekToFirst(); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - get_perf_context()->Reset(); -} - -int CountIter(std::unique_ptr& iter, const Slice& key) { - int count = 0; - for (iter->Seek(key); iter->Valid(); iter->Next()) { - count++; - } - EXPECT_OK(iter->status()); - return count; -} - -// use iterate_upper_bound to hint compatiability of existing bloom filters. -// The BF is considered compatible if 1) upper bound and seek key transform -// into the same string, or 2) the transformed seek key is of the same length -// as the upper bound and two keys are adjacent according to the comparator. -TEST_F(DISABLED_SpdbDBBloomFilterTest, DynamicBloomFilterUpperBound) { - auto bfp_impl = kSpdbPairedBloom; - int using_full_builder = true; - Options options; - options.create_if_missing = true; - options.env = CurrentOptions().env; - options.prefix_extractor.reset(NewCappedPrefixTransform(4)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(20, bfp_impl); - table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparatorsAndSuccessor; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("abcdxxx0", "val1")); - ASSERT_OK(Put("abcdxxx1", "val2")); - ASSERT_OK(Put("abcdxxx2", "val3")); - ASSERT_OK(Put("abcdxxx3", "val4")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // prefix_extractor has not changed, BF will always be read - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcd0000"), 4); - } - { - Slice upper_bound("abcdzzzz"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcd0000"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.FixedPrefix.5"); - { - // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); - // should check bloom filter since upper bound meets requirement - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [abcdxx01, abcey) is not valid bound since upper bound is too long for - // the BF in SST (capped:4) - Slice upper_bound("abcey"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); - // should skip bloom filter since upper bound is too long - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [abcdxx02, abcdy) is a valid bound since the prefix is the same - Slice upper_bound("abcdy"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); - // should check bloom filter since upper bound matches transformed seek - // key - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the - // same prefix, 2) the prefixes are not consecutive - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); - // should skip bloom filter since mismatch is found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); - { - // [abc, abd) is not a valid bound since the upper bound is too short - // for BF (capped:4) - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}})); - { - // set back to capped:4 and verify BF is always read - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - } -} - -// Create multiple SST files each with a different prefix_extractor config, -// verify iterators can read all SST files using the latest config. -TEST_F(DISABLED_SpdbDBBloomFilterTest, DynamicBloomFilterMultipleSST) { - auto bfp_impl = kSpdbPairedBloom; - int using_full_builder = true; - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.filter_policy = Create(20, bfp_impl); - table_options.cache_index_and_filter_blocks = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - Slice upper_bound("foz90000"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - - // first SST with fixed:1 BF - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foq1", "bar1")); - ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); - std::unique_ptr iter_old(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1); - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); - ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - - // second SST with capped:3 BF - ASSERT_OK(Put("foo3", "bar3")); - ASSERT_OK(Put("foo4", "bar4")); - ASSERT_OK(Put("foq5", "bar5")); - ASSERT_OK(Put("fpb", "1")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // BF is cappped:3 now - std::unique_ptr iter_tmp(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); - // both counters are incremented because BF is "not changed" for 1 of the - // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - } - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.FixedPrefix.2"); - // third SST with fixed:2 BF - ASSERT_OK(Put("foo6", "bar6")); - ASSERT_OK(Put("foo7", "bar7")); - ASSERT_OK(Put("foq8", "bar8")); - ASSERT_OK(Put("fpc", "2")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // BF is fixed:2 now - std::unique_ptr iter_tmp(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); - // the first and last BF are checked - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 4 + using_full_builder * 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); - // only last BF is checked and not found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 5 + using_full_builder * 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); - } - - // iter_old can only see the first SST, so checked plus 1 - ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 3); - // iter was created after the first setoptions call so only full filter - // will check the filter - ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 4); - - { - // keys in all three SSTs are visible to iterator - // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2) - // so +2 for checked counter - std::unique_ptr iter_all(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_all, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 7 + using_full_builder * 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); - ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 8 + using_full_builder * 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - { - std::unique_ptr iter_all(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_all, "foo"), 6); - // all three SST are checked because the current options has the same as - // the remaining SST (capped:3) - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 9 + using_full_builder * 7); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); - ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 10 + using_full_builder * 7); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); - } - // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? -} - -// Create a new column family in a running DB, change prefix_extractor -// dynamically, verify the iterator created on the new column family behaves -// as expected -// TODO: No filter is created here (in rocksdb's test it's the same) => Why is -// this test in this suite? -TEST_F(DISABLED_SpdbDBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { - auto bfp_impl = kSpdbPairedBloom; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(20, bfp_impl); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu0"}, options); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - // create a new CF and set prefix_extractor dynamically - options.prefix_extractor.reset(NewCappedPrefixTransform(3)); - CreateColumnFamilies({"ramen_dojo_0"}, options); - ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - ASSERT_OK(Put(2, "foo3", "bar3")); - ASSERT_OK(Put(2, "foo4", "bar4")); - ASSERT_OK(Put(2, "foo5", "bar5")); - ASSERT_OK(Put(2, "foq6", "bar6")); - ASSERT_OK(Put(2, "fpq7", "bar7")); - dbfull()->Flush(FlushOptions()); - { - std::unique_ptr iter(db_->NewIterator(read_options, handles_[2])); - ASSERT_EQ(CountIter(iter, "foo"), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK( - dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), - "rocksdb.FixedPrefix.2"); - { - std::unique_ptr iter(db_->NewIterator(read_options, handles_[2])); - ASSERT_EQ(CountIter(iter, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->DropColumnFamily(handles_[2])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[2])); - handles_[2] = nullptr; - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); - handles_[1] = nullptr; -} - -// Verify it's possible to change prefix_extractor at runtime and iterators -// behaves as expected -TEST_F(DISABLED_SpdbDBBloomFilterTest, DynamicBloomFilterOptions) { - auto bfp_impl = kSpdbPairedBloom; - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(20, bfp_impl); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("foo3", "bar3")); - ASSERT_OK(Put("foo4", "bar4")); - ASSERT_OK(Put("foo5", "bar5")); - ASSERT_OK(Put("fpb", "1")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("foo6", "bar6")); - ASSERT_OK(Put("foo7", "bar7")); - ASSERT_OK(Put("foo8", "bar8")); - ASSERT_OK(Put("fpc", "2")); - dbfull()->Flush(FlushOptions()); - - ReadOptions read_options; - read_options.prefix_same_as_start = true; - { - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - std::unique_ptr iter_old(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - { - std::unique_ptr iter(db_->NewIterator(read_options)); - // "fp*" should be skipped - ASSERT_EQ(CountIter(iter, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - - // iterator created before should not be affected and see all keys - ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(CountIter(iter_old, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/plugin/speedb/speedb.mk b/plugin/speedb/speedb.mk index 114e5d7f11..04e6abec41 100644 --- a/plugin/speedb/speedb.mk +++ b/plugin/speedb/speedb.mk @@ -27,11 +27,9 @@ speedb_HEADERS = \ speedb_TESTS = \ speedb_customizable_test.cc \ - paired_filter/speedb_db_bloom_filter_test.cc \ pinning_policy/scoped_pinning_policy_test.cc \ -speedb_TESTS = \ - speedb_customizable_test.cc \ - paired_filter/speedb_db_bloom_filter_test.cc \ +speedb_TESTS = \ + speedb_customizable_test.cc \ -speedb_JAVA_TESTS = org.rocksdb.SpeedbFilterTest \ +speedb_JAVA_TESTS = org.rocksdb.SpeedbFilterTest \ From bb86a5bde89dbe2a40d6fb819db036937412e264 Mon Sep 17 00:00:00 2001 From: Udi Date: Thu, 7 Mar 2024 16:20:59 +0200 Subject: [PATCH 2/2] Update HISTORY & add license to db/db_bloom_filter_test.cc --- HISTORY.md | 3 ++- db/db_bloom_filter_test.cc | 35 +++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index efef071696..a28ceefca3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,7 +5,8 @@ ### New Features ### Enhancements -* set the default bucket size of hashspdb to be 400k for best memory use and performance (#854) +* set the default bucket size of hashspdb to be 400k for best memory use and performance (#854). +* Support Speedb's Paired Bloom Filter in db_bloom_filter_test (#810). ### Bug Fixes * LOG Consistency:Display the pinning policy options same as block cache options / metadata cache options (#804). diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 897725d2bf..d9ce1d59d8 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1,3 +1,17 @@ +// Copyright (C) 2023 Speedb Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License @@ -84,13 +98,11 @@ class DBBloomFilterTest : public DBTestBase { class DBBloomFilterTestWithPairedBloomOnOff : public DBTestBase, public testing::WithParamInterface { -public: + public: DBBloomFilterTestWithPairedBloomOnOff() : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} - void SetUp() override { - use_paired_bloom_ = GetParam(); - } + void SetUp() override { use_paired_bloom_ = GetParam(); } bool use_paired_bloom_ = false; }; @@ -220,7 +232,8 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); } -TEST_P(DBBloomFilterTestWithPairedBloomOnOff, GetFilterByPrefixBloomCustomPrefixExtractor) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, + GetFilterByPrefixBloomCustomPrefixExtractor) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor = @@ -820,7 +833,8 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(kSpeedbPairedBloomFilter, false, test::kDefaultFormatVersion))); -INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithPairedBloomOnOff, DBBloomFilterTestWithPairedBloomOnOff, testing::Bool()); +INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithPairedBloomOnOff, + DBBloomFilterTestWithPairedBloomOnOff, testing::Bool()); INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, @@ -2544,8 +2558,9 @@ TEST_P(DBBloomFilterTestWithPairedBloomOnOff, PrefixScan) { } // end of while } -// Speedb Paired Bloom Filters currently do NOT support the 'optimize_filters_for_hits' options => -// This test doesn't cover paired bloom filters +// Speedb Paired Bloom Filters currently do NOT support the +// 'optimize_filters_for_hits' options => This test doesn't cover paired bloom +// filters TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { const int kNumKeysPerFlush = 1000; @@ -3178,7 +3193,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { } } -TEST_P(DBBloomFilterTestWithPairedBloomOnOff, SeekForPrevWithPartitionedFilters) { +TEST_P(DBBloomFilterTestWithPairedBloomOnOff, + SeekForPrevWithPartitionedFilters) { Options options = CurrentOptions(); constexpr size_t kNumKeys = 10000; static_assert(kNumKeys <= 10000, "kNumKeys have to be <= 10000"); @@ -3623,7 +3639,6 @@ TEST_P(DBBloomFilterTestWithPairedBloomOnOff, WeirdPrefixExtractorWithFilter3) { } } - } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) {