From c6275956e2d73b6aa5023d0b1298a495620289d8 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 25 Sep 2014 16:15:23 -0700 Subject: [PATCH] improve memory efficiency of cuckoo reader Summary: When creating a new iterator, instead of storing mapping from key to bucket id for sorting, store only bucket id and read key from mmap file based on the id. This reduces from 20 bytes per entry to only 4 bytes. Test Plan: db_bench Reviewers: igor, yhchiang, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23757 --- include/rocksdb/table.h | 2 + table/cuckoo_table_builder.cc | 7 ++ table/cuckoo_table_reader.cc | 126 +++++++++++++++++++--------------- table/cuckoo_table_reader.h | 1 + 4 files changed, 82 insertions(+), 54 deletions(-) diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index e8ac6bd6254..4c06c23f7da 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -255,6 +255,8 @@ struct CuckooTablePropertyNames { static const std::string kIdentityAsFirstHash; // Indicate if using module or bit and to calculate hash value static const std::string kUseModuleHash; + // Fixed user key length + static const std::string kUserKeyLength; }; struct CuckooTableOptions { diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 17184ae2ced..56eb377fac2 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -39,6 +39,8 @@ const std::string CuckooTablePropertyNames::kIdentityAsFirstHash = "rocksdb.cuckoo.hash.identityfirst"; const std::string CuckooTablePropertyNames::kUseModuleHash = "rocksdb.cuckoo.hash.usemodule"; +const std::string CuckooTablePropertyNames::kUserKeyLength = + "rocksdb.cuckoo.hash.userkeylength"; // Obtained by running echo rocksdb.table.cuckoo | sha1sum extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; @@ -280,6 +282,11 @@ Status CuckooTableBuilder::Finish() { CuckooTablePropertyNames::kUseModuleHash].assign( reinterpret_cast(&use_module_hash_), sizeof(use_module_hash_)); + uint32_t user_key_len = static_cast(smallest_user_key_.size()); + properties_.user_collected_properties[ + CuckooTablePropertyNames::kUserKeyLength].assign( + reinterpret_cast(&user_key_len), + sizeof(user_key_len)); // Write meta blocks. MetaIndexBuilder meta_index_builder; diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index 30a8d80791e..8c3f58eac24 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -16,6 +16,7 @@ #include #include #include "rocksdb/iterator.h" +#include "rocksdb/table.h" #include "table/meta_blocks.h" #include "table/cuckoo_table_factory.h" #include "util/arena.h" @@ -23,7 +24,8 @@ namespace rocksdb { namespace { - static const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1); +const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1); +const uint32_t kInvalidIndex = std::numeric_limits::max(); } extern const uint64_t kCuckooTableMagicNumber; @@ -62,6 +64,14 @@ CuckooTableReader::CuckooTableReader( unused_key_ = unused_key->second; key_length_ = props->fixed_key_len; + auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength); + if (user_key_len == user_props.end()) { + status_ = Status::Corruption("User key length not found"); + return; + } + user_key_length_ = *reinterpret_cast( + user_key_len->second.data()); + auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength); if (value_length == user_props.end()) { status_ = Status::Corruption("Value length not found"); @@ -104,7 +114,6 @@ CuckooTableReader::CuckooTableReader( } use_module_hash_ = *reinterpret_cast( use_module_hash->second.data()); - fprintf(stderr, "use_module_hash %d\n", use_module_hash_); auto cuckoo_block_size = user_props.find( CuckooTablePropertyNames::kCuckooBlockSize); if (cuckoo_block_size == user_props.end()) { @@ -185,30 +194,39 @@ class CuckooTableIterator : public Iterator { void LoadKeysFromReader(); private: - struct CompareKeys { - CompareKeys(const Comparator* ucomp, const bool last_level) - : ucomp_(ucomp), - is_last_level_(last_level) {} - bool operator()(const std::pair& first, - const std::pair& second) const { - if (is_last_level_) { - return ucomp_->Compare(first.first, second.first) < 0; - } else { - return ucomp_->Compare(ExtractUserKey(first.first), - ExtractUserKey(second.first)) < 0; - } + struct BucketComparator { + BucketComparator(const Slice file_data, const Comparator* ucomp, + uint32_t bucket_len, uint32_t user_key_len, + const Slice target = Slice()) + : file_data_(file_data), + ucomp_(ucomp), + bucket_len_(bucket_len), + user_key_len_(user_key_len), + target_(target) {} + bool operator()(const uint32_t first, const uint32_t second) const { + const char* first_bucket = + (first == kInvalidIndex) ? target_.data() : + &file_data_.data()[first * bucket_len_]; + const char* second_bucket = + (second == kInvalidIndex) ? target_.data() : + &file_data_.data()[second * bucket_len_]; + return ucomp_->Compare(Slice(first_bucket, user_key_len_), + Slice(second_bucket, user_key_len_)) < 0; } - private: + const Slice file_data_; const Comparator* ucomp_; - const bool is_last_level_; + const uint32_t bucket_len_; + const uint32_t user_key_len_; + const Slice target_; }; - const CompareKeys comparator_; + + const BucketComparator bucket_comparator_; void PrepareKVAtCurrIdx(); CuckooTableReader* reader_; Status status_; // Contains a map of keys to bucket_id sorted in key order. - std::vector> key_to_bucket_id_; + std::vector sorted_bucket_ids_; // We assume that the number of items can be stored in uint32 (4 Billion). uint32_t curr_key_idx_; Slice curr_value_; @@ -219,29 +237,31 @@ class CuckooTableIterator : public Iterator { }; CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) - : comparator_(reader->ucomp_, reader->is_last_level_), + : bucket_comparator_(reader->file_data_, reader->ucomp_, + reader->bucket_length_, reader->user_key_length_), reader_(reader), - curr_key_idx_(std::numeric_limits::max()) { - key_to_bucket_id_.clear(); + curr_key_idx_(kInvalidIndex) { + sorted_bucket_ids_.clear(); curr_value_.clear(); curr_key_.Clear(); } void CuckooTableIterator::LoadKeysFromReader() { - key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries); + sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries); uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; - for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) { - Slice read_key; - status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_, - reader_->key_length_, &read_key, nullptr); - if (read_key != Slice(reader_->unused_key_)) { - key_to_bucket_id_.push_back(std::make_pair(read_key, bucket_id)); + assert(num_buckets < kInvalidIndex); + const char* bucket = reader_->file_data_.data(); + for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) { + if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) { + sorted_bucket_ids_.push_back(bucket_id); } + bucket += reader_->bucket_length_; } - assert(key_to_bucket_id_.size() == + assert(sorted_bucket_ids_.size() == reader_->GetTableProperties()->num_entries); - std::sort(key_to_bucket_id_.begin(), key_to_bucket_id_.end(), comparator_); - curr_key_idx_ = key_to_bucket_id_.size(); + std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), + bucket_comparator_); + curr_key_idx_ = kInvalidIndex; } void CuckooTableIterator::SeekToFirst() { @@ -250,25 +270,25 @@ void CuckooTableIterator::SeekToFirst() { } void CuckooTableIterator::SeekToLast() { - curr_key_idx_ = key_to_bucket_id_.size() - 1; + curr_key_idx_ = sorted_bucket_ids_.size() - 1; PrepareKVAtCurrIdx(); } void CuckooTableIterator::Seek(const Slice& target) { - // We assume that the target is an internal key. If this is last level file, - // we need to take only the user key part to seek. - Slice target_to_search = reader_->is_last_level_ ? - ExtractUserKey(target) : target; - auto seek_it = std::lower_bound(key_to_bucket_id_.begin(), - key_to_bucket_id_.end(), - std::make_pair(target_to_search, 0), - comparator_); - curr_key_idx_ = std::distance(key_to_bucket_id_.begin(), seek_it); + const BucketComparator seek_comparator( + reader_->file_data_, reader_->ucomp_, + reader_->bucket_length_, reader_->user_key_length_, + ExtractUserKey(target)); + auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(), + sorted_bucket_ids_.end(), + kInvalidIndex, + seek_comparator); + curr_key_idx_ = std::distance(sorted_bucket_ids_.begin(), seek_it); PrepareKVAtCurrIdx(); } bool CuckooTableIterator::Valid() const { - return curr_key_idx_ < key_to_bucket_id_.size(); + return curr_key_idx_ < sorted_bucket_ids_.size(); } void CuckooTableIterator::PrepareKVAtCurrIdx() { @@ -277,15 +297,17 @@ void CuckooTableIterator::PrepareKVAtCurrIdx() { curr_key_.Clear(); return; } - uint64_t offset = ((uint64_t) key_to_bucket_id_[curr_key_idx_].second - * reader_->bucket_length_) + reader_->key_length_; - status_ = reader_->file_->Read(offset, reader_->value_length_, - &curr_value_, nullptr); + uint32_t id = sorted_bucket_ids_[curr_key_idx_]; + const char* offset = reader_->file_data_.data() + + id * reader_->bucket_length_; if (reader_->is_last_level_) { // Always return internal key. - curr_key_.SetInternalKey( - key_to_bucket_id_[curr_key_idx_].first, 0, kTypeValue); + curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_), + 0, kTypeValue); + } else { + curr_key_.SetKey(Slice(offset, reader_->key_length_)); } + curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_); } void CuckooTableIterator::Next() { @@ -300,7 +322,7 @@ void CuckooTableIterator::Next() { void CuckooTableIterator::Prev() { if (curr_key_idx_ == 0) { - curr_key_idx_ = key_to_bucket_id_.size(); + curr_key_idx_ = sorted_bucket_ids_.size(); } if (!Valid()) { curr_value_.clear(); @@ -313,11 +335,7 @@ void CuckooTableIterator::Prev() { Slice CuckooTableIterator::key() const { assert(Valid()); - if (reader_->is_last_level_) { - return curr_key_.GetKey(); - } else { - return key_to_bucket_id_[curr_key_idx_].first; - } + return curr_key_.GetKey(); } Slice CuckooTableIterator::value() const { diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index 8b3ad4b9114..8f7635cfa55 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -71,6 +71,7 @@ class CuckooTableReader: public TableReader { uint32_t num_hash_func_; std::string unused_key_; uint32_t key_length_; + uint32_t user_key_length_; uint32_t value_length_; uint32_t bucket_length_; uint32_t cuckoo_block_size_;