facebook · gitbw95 · Aug 14, 2022 · Aug 14, 2022 · Aug 15, 2022 · Aug 15, 2022
diff --git a/HISTORY.md b/HISTORY.md
@@ -18,6 +18,8 @@
 
 ### Behavior Change
 * Right now, when the option migration tool (OptionChangeMigration()) migrates to FIFO compaction, it compacts all the data into one single SST file and move to L0. This might create a problem for some users: the giant file may be soon deleted to satisfy max_table_files_size, and might cayse the DB to be almost empty. We change the behavior so that the files are cut to be smaller, but these files might not follow the data insertion order. With the change, after the migration, migrated data might not be dropped by insertion order by FIFO compaction.
+* When a block is firstly found from `CompressedSecondaryCache`, we just insert a dummy block into the primary cache and don’t erase the block from `CompressedSecondaryCache`. A standalone handle is returned to the caller. Only if the block is found again from `CompressedSecondaryCache` before the dummy block is evicted, we erase the block from `CompressedSecondaryCache` and insert it into the primary cache.
+* When a block is firstly evicted from the primary cache to `CompressedSecondaryCache`, we just insert a dummy block in `CompressedSecondaryCache`. Only if it is evicted again before the dummy block is evicted from the cache, it is treated as a hot block and is inserted into `CompressedSecondaryCache`.
 
 ### New Features
 *  RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2).

diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc
@@ -22,9 +22,9 @@ CompressedSecondaryCache::CompressedSecondaryCache(
     CacheMetadataChargePolicy metadata_charge_policy,
     CompressionType compression_type, uint32_t compress_format_version)
     : cache_options_(capacity, num_shard_bits, strict_capacity_limit,
-                     high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
-                     metadata_charge_policy, compression_type,
-                     compress_format_version, low_pri_pool_ratio) {
+                     high_pri_pool_ratio, low_pri_pool_ratio, memory_allocator,
+                     use_adaptive_mutex, metadata_charge_policy,
+                     compression_type, compress_format_version) {
   cache_ =
       NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
                   high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
@@ -35,58 +35,79 @@ CompressedSecondaryCache::~CompressedSecondaryCache() { cache_.reset(); }
 
 std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
     const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
-    bool& is_in_sec_cache) {
+    bool advise_erase, bool& is_in_sec_cache) {
   std::unique_ptr<SecondaryCacheResultHandle> handle;
   is_in_sec_cache = false;
   Cache::Handle* lru_handle = cache_->Lookup(key);
   if (lru_handle == nullptr) {
-    return handle;
+    return nullptr;
   }
 
-  CacheValueChunk* handle_value =
-      reinterpret_cast<CacheValueChunk*>(cache_->Value(lru_handle));
-  size_t handle_value_charge{0};
-  CacheAllocationPtr merged_value =
-      MergeChunksIntoValue(handle_value, handle_value_charge);
+  void* handle_value = cache_->Value(lru_handle);
+  if (handle_value == nullptr) {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+    return nullptr;
+  }
+
+  CacheAllocationPtr* ptr = reinterpret_cast<CacheAllocationPtr*>(handle_value);
 
   Status s;
   void* value{nullptr};
   size_t charge{0};
   if (cache_options_.compression_type == kNoCompression) {
-    s = create_cb(merged_value.get(), handle_value_charge, &value, &charge);
+    s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
   } else {
     UncompressionContext uncompression_context(cache_options_.compression_type);
     UncompressionInfo uncompression_info(uncompression_context,
                                          UncompressionDict::GetEmptyDict(),
                                          cache_options_.compression_type);
 
     size_t uncompressed_size{0};
-    CacheAllocationPtr uncompressed;
-    uncompressed = UncompressData(uncompression_info, (char*)merged_value.get(),
-                                  handle_value_charge, &uncompressed_size,
-                                  cache_options_.compress_format_version,
-                                  cache_options_.memory_allocator.get());
+    CacheAllocationPtr uncompressed = UncompressData(
+        uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
+        &uncompressed_size, cache_options_.compress_format_version,
+        cache_options_.memory_allocator.get());
 
     if (!uncompressed) {
-      cache_->Release(lru_handle, /* erase_if_last_ref */ true);
-      return handle;
+      cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+      return nullptr;
     }
     s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
   }
 
   if (!s.ok()) {
-    cache_->Release(lru_handle, /* erase_if_last_ref */ true);
-    return handle;
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+    return nullptr;
   }
 
-  cache_->Release(lru_handle, /* erase_if_last_ref */ true);
+  if (advise_erase) {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+    // Insert a dummy handle.
+    cache_->Insert(key, /*value=*/nullptr, /*charge=*/0, DeletionCallback)
+        .PermitUncheckedError();
+  } else {
+    is_in_sec_cache = true;
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+  }
   handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
-
   return handle;
 }
 
 Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
                                         const Cache::CacheItemHelper* helper) {
+  if (value == nullptr) {
+    return Status::InvalidArgument();
+  }
+
+  Cache::Handle* lru_handle = cache_->Lookup(key);
+  if (lru_handle == nullptr) {
+    // Insert a dummy handle if the handle is evicted for the first time.
+    return cache_->Insert(key, /*value=*/nullptr, /*charge=*/0,
+                          DeletionCallback);
+  } else {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+  }
+
   size_t size = (*helper->size_cb)(value);
   CacheAllocationPtr ptr =
       AllocateBlock(size, cache_options_.memory_allocator.get());
@@ -115,12 +136,14 @@ Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
     }
 
     val = Slice(compressed_val);
+    size = compressed_val.size();
+    ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
+    memcpy(ptr.get(), compressed_val.data(), size);
   }
 
-  size_t charge{0};
-  CacheValueChunk* value_chunks_head =
-      SplitValueIntoChunks(val, cache_options_.compression_type, charge);
-  return cache_->Insert(key, value_chunks_head, charge, DeletionCallback);
+  CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
+
+  return cache_->Insert(key, buf, size, DeletionCallback);
 }
 
 void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
@@ -212,22 +235,16 @@ CacheAllocationPtr CompressedSecondaryCache::MergeChunksIntoValue(
 
 void CompressedSecondaryCache::DeletionCallback(const Slice& /*key*/,
                                                 void* obj) {
-  CacheValueChunk* chunks_head = reinterpret_cast<CacheValueChunk*>(obj);
-  while (chunks_head != nullptr) {
-    CacheValueChunk* tmp_chunk = chunks_head;
-    chunks_head = chunks_head->next;
-    tmp_chunk->Free();
-  }
+  delete reinterpret_cast<CacheAllocationPtr*>(obj);
   obj = nullptr;
 }
 
 std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
-    double high_pri_pool_ratio,
+    double high_pri_pool_ratio, double low_pri_pool_ratio,
     std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
     CacheMetadataChargePolicy metadata_charge_policy,
-    CompressionType compression_type, uint32_t compress_format_version,
-    double low_pri_pool_ratio) {
+    CompressionType compression_type, uint32_t compress_format_version) {
   return std::make_shared<CompressedSecondaryCache>(
       capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
       low_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
@@ -240,9 +257,9 @@ std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
   assert(opts.secondary_cache == nullptr);
   return NewCompressedSecondaryCache(
       opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
-      opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
-      opts.metadata_charge_policy, opts.compression_type,
-      opts.compress_format_version, opts.low_pri_pool_ratio);
+      opts.high_pri_pool_ratio, opts.low_pri_pool_ratio, opts.memory_allocator,
+      opts.use_adaptive_mutex, opts.metadata_charge_policy,
+      opts.compression_type, opts.compress_format_version);
 }
 
 }  // namespace ROCKSDB_NAMESPACE
diff --git a/cache/compressed_secondary_cache.h b/cache/compressed_secondary_cache.h
@@ -45,7 +45,21 @@ class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
 // The CompressedSecondaryCache is a concrete implementation of
 // rocksdb::SecondaryCache.
 //
-// Users can also cast a pointer to it and call methods on
+// When a block is found from CompressedSecondaryCache::Lookup, we check whether
+// there is a dummy block with the same key in the primary cache.
+// 1. If the dummy block exits, we erase the block from
+//    CompressedSecondaryCache and insert it into the primary cache.
+// 2. If not, we just insert a dummy block into the primary cache
+//    (charging the actual size of the block) and don not erase the block from
+//    CompressedSecondaryCache. A standalone handle is returned to the caller.
+//
+// When a block is evicted from the primary cache, we check whether
+// there is a dummy block with the same key in CompressedSecondaryCache.
+// 1. If the dummy block exits, the block is inserted into
+//    CompressedSecondaryCache.
+// 2. If not, we just insert a dummy block (size 0) in CompressedSecondaryCache.
+//
+// Users can also cast a pointer to CompressedSecondaryCache and call methods on
 // it directly, especially custom methods that may be added
 // in the future.  For example -
 // std::unique_ptr<rocksdb::SecondaryCache> cache =
@@ -72,7 +86,9 @@ class CompressedSecondaryCache : public SecondaryCache {
 
   std::unique_ptr<SecondaryCacheResultHandle> Lookup(
       const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
-      bool& is_in_sec_cache) override;
+      bool advise_erase, bool& is_in_sec_cache) override;
+
+  bool SupportForceErase() const override { return true; }
 
   void Erase(const Slice& key) override;