From f3b753a340fa0492e4931d165ae633cb8d6cbc56 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:52:18 +0200 Subject: [PATCH 01/12] db: add BTree index for kv snapshots --- cmd/dev/snapshots.cpp | 51 +++++ .../db/snapshots/common/bitmask_operators.hpp | 48 ++++ silkworm/db/snapshots/index/btree.cpp | 216 ++++++++++++++++++ silkworm/db/snapshots/index/btree.hpp | 104 +++++++++ silkworm/db/snapshots/index/btree_index.cpp | 154 +++++++++++++ silkworm/db/snapshots/index/btree_index.hpp | 113 +++++++++ .../db/snapshots/index/btree_index_test.cpp | 132 +++++++++++ .../rec_split/encoding/elias_fano.hpp | 15 ++ silkworm/db/snapshots/rec_split/rec_split.hpp | 26 +-- silkworm/db/snapshots/seg/decompressor.cpp | 24 +- silkworm/db/snapshots/seg/decompressor.hpp | 37 ++- 11 files changed, 885 insertions(+), 35 deletions(-) create mode 100644 silkworm/db/snapshots/common/bitmask_operators.hpp create mode 100644 silkworm/db/snapshots/index/btree.cpp create mode 100644 silkworm/db/snapshots/index/btree.hpp create mode 100644 silkworm/db/snapshots/index/btree_index.cpp create mode 100644 silkworm/db/snapshots/index/btree_index.hpp create mode 100644 silkworm/db/snapshots/index/btree_index_test.cpp diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index b9633d3b21..32657d639a 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,7 @@ enum class SnapshotTool { // NOLINT(performance-enum-size) count_headers, create_index, open_index, + open_btree_index, decode_segment, download, lookup_header, @@ -218,6 +220,10 @@ void parse_command_line(int argc, char* argv[], CLI::App& app, SnapshotToolboxSe ->capture_default_str(); } + commands[SnapshotTool::open_btree_index] + ->add_option("--file", snapshot_settings.input_file_path, ".kv file to open with associated .bt file") + ->required() + ->check(CLI::ExistingFile); commands[SnapshotTool::recompress] ->add_option("--file", snapshot_settings.input_file_path, ".seg file to decompress and compress again") ->required() @@ -382,6 +388,48 @@ void open_index(const SnapshotSubcommandSettings& settings) { SILK_INFO << "Open index elapsed: " << duration_as(elapsed) << " msec"; } +void open_btree_index(const SnapshotSubcommandSettings& settings) { + ensure(!settings.input_file_path.empty(), "open_btree_index: --file must be specified"); + ensure(settings.input_file_path.extension() == ".kv", "open_btree_index: --file must be .kv file"); + + std::filesystem::path bt_index_file_path = settings.input_file_path; + bt_index_file_path.replace_extension(".bt"); + SILK_INFO << "KV file: " << settings.input_file_path.string() << " BT file: " << bt_index_file_path.string(); + std::chrono::time_point start{std::chrono::steady_clock::now()}; + seg::Decompressor kv_decompressor{settings.input_file_path}; + kv_decompressor.open(); + snapshots::index::BTreeIndex bt_index{kv_decompressor, bt_index_file_path}; + SILK_INFO << "Starting KV scan and BTreeIndex check, total keys: " << bt_index.key_count(); + size_t matching_count{0}, key_count{0}; + bool is_key{true}; + Bytes key, value; + auto kv_iterator = kv_decompressor.begin(); + while (kv_iterator != kv_decompressor.end()) { + if (is_key) { + key = *kv_iterator; + ++key_count; + } else { + value = *kv_iterator; + const auto v = bt_index.get(key, kv_iterator); + SILK_DEBUG << "KV: key=" << to_hex(key) << " value=" << to_hex(value) << " v=" << (v ? to_hex(*v) : ""); + ensure(v == value, + [&]() { return "open_btree_index: value mismatch for key=" + to_hex(key) + " position=" + std::to_string(key_count); }); + if (v == value) { + ++matching_count; + } + if (key_count % 10'000'000 == 0) { + SILK_INFO << "BTreeIndex check progress: " << key_count << " different: " << (key_count - matching_count); + } + } + ++kv_iterator; + is_key = !is_key; + } + ensure(key_count == bt_index.key_count(), "open_btree_index: total key count does not match"); + SILK_INFO << "Open btree index matching: " << matching_count << " different: " << (key_count - matching_count); + std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; + SILK_INFO << "Open btree index elapsed: " << duration_as(elapsed) << " msec"; +} + static TorrentInfoPtrList download_web_seed(const DownloadSettings& settings) { const auto known_config{snapshots::Config::lookup_known_config(settings.chain_id)}; WebSeedClient web_client{/*url_seeds=*/{settings.url_seed}, known_config.preverified_snapshots()}; @@ -872,6 +920,9 @@ int main(int argc, char* argv[]) { case SnapshotTool::open_index: open_index(settings.snapshot_settings); break; + case SnapshotTool::open_btree_index: + open_btree_index(settings.snapshot_settings); + break; case SnapshotTool::decode_segment: decode_segment(settings.snapshot_settings, settings.repetitions); break; diff --git a/silkworm/db/snapshots/common/bitmask_operators.hpp b/silkworm/db/snapshots/common/bitmask_operators.hpp new file mode 100644 index 0000000000..a811c48654 --- /dev/null +++ b/silkworm/db/snapshots/common/bitmask_operators.hpp @@ -0,0 +1,48 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +namespace silkworm::snapshots { + +template + requires(std::is_enum_v and requires(T e) { + enable_bitmask_operator_or(e); + }) +constexpr auto operator|(const T lhs, const T rhs) { + using underlying = std::underlying_type_t; + return static_cast(static_cast(lhs) | static_cast(rhs)); +} +template + requires(std::is_enum_v and requires(T e) { + enable_bitmask_operator_and(e); + }) +constexpr auto operator&(const T lhs, const T rhs) { + using underlying = std::underlying_type_t; + return static_cast(static_cast(lhs) & static_cast(rhs)); +} +template + requires(std::is_enum_v and requires(T e) { + enable_bitmask_operator_not(e); + }) +constexpr auto operator~(const T t) { + using underlying = std::underlying_type_t; + return static_cast(~static_cast(t)); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index/btree.cpp b/silkworm/db/snapshots/index/btree.cpp new file mode 100644 index 0000000000..ec93080c01 --- /dev/null +++ b/silkworm/db/snapshots/index/btree.cpp @@ -0,0 +1,216 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "btree.hpp" + +#include + +#include +#include +#include +#include +#include + +namespace silkworm::snapshots::index { + +//! Smallest shard available for scan instead of binary search +static constexpr auto kDefaultBtreeStartSkip{4}; + +static bool enable_assert_btree_keys() { + bool enabled{false}; + auto environment = boost::this_process::environment(); + const auto env_var = environment["BT_ASSERT_OFFSETS"]; + if (!env_var.empty()) { + enabled = std::stoul(env_var.to_string()) != 0; + } + return enabled; +} + +BTree::BTree(uint64_t num_nodes, + uint64_t fanout, + DataLookup data_lookup, + KeyCompare compare_key, + DataIterator& data_it, + std::span encoded_nodes) + : num_nodes_(num_nodes), + fanout_{fanout}, + data_lookup_{std::move(data_lookup)}, + compare_key_{std::move(compare_key)}, + check_encoded_keys_(enable_assert_btree_keys()) { + if (encoded_nodes.empty()) { + // Build the cache from data using some heuristics + warmup(data_it); + } else { + // Materialize the cache from its encoded representation + decode_nodes(encoded_nodes, data_it); + } +} + +BTree::SeekResult BTree::seek(ByteView seek_key, DataIterator& data_it) { + if (seek_key.empty() && num_nodes_ > 0) { + const auto [found, kv_pair] = data_lookup_(0, data_it); + if (!found) { + return {/*found=*/false, {}, {}, 0}; + } + return {kv_pair.first == seek_key, kv_pair.first, kv_pair.second, 0}; + } + auto [_, left_index, right_index] = binary_search_in_cache(seek_key); // left_index == right_index when key is found + uint64_t median = 0; + while (left_index < right_index) { + if (right_index - left_index <= kDefaultBtreeStartSkip) { // found small range, faster to scan now + const auto [cmp, key] = compare_key_(seek_key, left_index, data_it); + if (cmp == 0) { + right_index = left_index; + break; + } + if (cmp < 0) { // found key is greater than seek_key + if (left_index + 1 < num_nodes_) { + ++left_index; + continue; + } + } + right_index = left_index; + break; + } + median = (left_index + right_index) >> 1; + const auto [cmp, key] = compare_key_(seek_key, median, data_it); + if (cmp == 0) { + left_index = right_index = median; + break; + } + if (cmp > 0) { + right_index = median; + } else { + left_index = median + 1; + } + } + if (left_index == right_index) { + median = left_index; + } + const auto [found, kv_pair] = data_lookup_(median, data_it); + if (!found) { + return {/*found=*/false, {}, {}, 0}; + } + return {kv_pair.first == seek_key, kv_pair.first, kv_pair.second, left_index}; +} + +BTree::GetResult BTree::get(ByteView key, DataIterator& data_it) { + if (key.empty() && num_nodes_ > 0) { + const auto [found, kv_pair] = data_lookup_(0, data_it); + if (!found) { + return {/*found=*/false, {}, 0}; + } + return {kv_pair.first == key, kv_pair.first, 0}; + } + auto [_, left_index, right_index] = binary_search_in_cache(key); // left_index == right_index when key is found + uint64_t median = 0; + while (left_index < right_index) { + median = (left_index + right_index) >> 1; + const auto [cmp, k] = compare_key_(key, median, data_it); + switch (cmp) { + case 0: + return {/*found=*/true, k, median}; + case 1: + right_index = median; + break; + case -1: + left_index = median + 1; + break; + default: + SILKWORM_ASSERT(false); + } + } + auto [cmp, k] = compare_key_(key, left_index, data_it); + if (cmp != 0) { + return {/*found=*/false, {}, 0}; + } + return {/*found=*/true, std::move(k), left_index}; +} + +std::pair BTree::Node::from_encoded_data(std::span encoded_node) { + constexpr size_t kEncodedIndexPlusKeyLengthSize{sizeof(uint64_t) + sizeof(uint16_t)}; + ensure(encoded_node.size() >= kEncodedIndexPlusKeyLengthSize, "snapshots::index::BTree invalid encoded node size"); + const auto key_index = endian::load_big_u64(encoded_node.data()); + const auto encoded_key = encoded_node.subspan(sizeof(uint64_t)); + const auto key_length = endian::load_big_u16(encoded_key.data()); + const auto encoded_size = kEncodedIndexPlusKeyLengthSize + key_length; + ensure(encoded_node.size() >= encoded_size, "snapshots::index::BTree invalid encoded node size"); + const auto key = encoded_key.subspan(sizeof(uint16_t)); + return {Node{key_index, Bytes{key.data(), key.size()}}, encoded_size}; +} + +void BTree::warmup(DataIterator& data_it) { + if (num_nodes_ == 0) { + return; + } + cache_.reserve(num_nodes_ / fanout_); + + uint64_t cached_bytes{0}; + const size_t step = num_nodes_ < fanout_ ? 1 : fanout_; // cache all keys if less than M + for (size_t i{step}; i < num_nodes_; i += step) { + const size_t data_index = i - 1; + auto [_, key] = compare_key_({}, data_index, data_it); + cache_.emplace_back(Node{data_index, Bytes{key}}); + cached_bytes += sizeof(Node) + key.length(); + } + SILK_DEBUG << "BTree::warmup finished M=" << fanout_ << " N=" << num_nodes_ << " cache_size=" << cached_bytes; +} + +void BTree::decode_nodes(std::span encoded_nodes, DataIterator& data_it) { + ensure(encoded_nodes.size() >= sizeof(uint64_t), "snapshots::index::BTree invalid encoded list of nodes"); + + const uint64_t node_count = endian::load_big_u64(encoded_nodes.data()); + cache_.reserve(node_count); + + size_t data_position{sizeof(uint64_t)}; + for (size_t n{0}; n < node_count; ++n) { + auto [node, node_size] = Node::from_encoded_data(encoded_nodes.subspan(data_position)); + if (check_encoded_keys_) { + const auto [cmp, key] = compare_key_(node.key, node.key_index, data_it); + ensure(cmp == 0, [&]() { return "key mismatch node.key=" + to_hex(node.key) + " key=" + to_hex(key) + + " n=" + std::to_string(n) + " key_index=" + std::to_string(node.key_index); }); + } + cache_.emplace_back(std::move(node)); + data_position += node_size; + } +} + +BTree::BinarySearchResult BTree::binary_search_in_cache(ByteView key) { + uint64_t left_index = 0, right_index = num_nodes_; + uint64_t left_pos = 0, right_pos = cache_.size(); + BTree::Node* node{nullptr}; + while (left_pos < right_pos) { + uint64_t median_pos = (left_pos + right_pos) >> 1; + node = &cache_[median_pos]; + switch (node->key.compare(key)) { + case 0: + return {node, node->key_index, node->key_index}; + case 1: + right_pos = median_pos; + right_index = node->key_index; + break; + case -1: + left_pos = median_pos + 1; + left_index = node->key_index; + break; + default: + SILKWORM_ASSERT(false); + } + } + return {node, left_index, right_index}; +} + +} // namespace silkworm::snapshots::index diff --git a/silkworm/db/snapshots/index/btree.hpp b/silkworm/db/snapshots/index/btree.hpp new file mode 100644 index 0000000000..9c2412aea2 --- /dev/null +++ b/silkworm/db/snapshots/index/btree.hpp @@ -0,0 +1,104 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +#include + +#include "../seg/decompressor.hpp" + +namespace silkworm::snapshots::index { + +class BTree { + public: + using DataIterator = seg::Decompressor::Iterator; + using DataIndex = uint64_t; + using KeyValue = std::pair; + using LookupResult = std::pair; + using DataLookup = std::function; + using CompareResult = std::pair; + using KeyCompare = std::function; + + using SeekResult = std::tuple; + using GetResult = std::tuple; + + BTree(uint64_t num_nodes, + uint64_t fanout, + DataLookup data_lookup, + KeyCompare compare_key, + DataIterator& data_it, + std::span encoded_nodes = {}); + + //! \brief Search and return first key-value pair w/ key greater than or equal to \p seek_key + //! \param seek_key the key to look for + //! \param data_it an iterator to the key-value data sequence + //! \return tuple (found, key, value, data index) + //! \verbatim + //! - found is true iff exact key match is encountered + //! - if seek_key is empty, return first key and found=true + //! - if found item.key has \p seek_key as prefix, return found=false and item.key + //! - if key is greater than all keys, return found=false and empty key + //! \endverbatim + SeekResult seek(ByteView seek_key, DataIterator& data_it); + + //! \brief Search and return key equal to the given \p key + //! \param key the key to look for + //! \param data_it an iterator to the key-value data sequence + //! \return tuple (found, key, data index) + //! \verbatim + //! - found is true iff exact key match is encountered + //! \endverbatim + GetResult get(ByteView key, DataIterator& data_it); + + protected: + struct Node { + DataIndex key_index{0}; + Bytes key; + + static std::pair from_encoded_data(std::span encoded_node); + }; + using Nodes = std::vector; + using BinarySearchResult = std::tuple; + + void warmup(DataIterator& data_it); + void decode_nodes(std::span encoded_nodes, DataIterator& data_it); + + BinarySearchResult binary_search_in_cache(ByteView key); + + //! The total number of nodes in the B-Tree index (most of them are only in file, not in cache) + uint64_t num_nodes_; + + //! The number of children for each node in the B-Tree (often identified as M) + uint64_t fanout_; + + //! The function called to obtain data key-value from data index + DataLookup data_lookup_; + + //! The function called to compare keys + KeyCompare compare_key_; + + //! The part of B-Tree nodes held in memory + Nodes cache_; + + //! Flag indicating if encoded node keys must be checked against data keys + bool check_encoded_keys_; +}; + +} // namespace silkworm::snapshots::index diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp new file mode 100644 index 0000000000..cc3ca807b8 --- /dev/null +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -0,0 +1,154 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "btree_index.hpp" + +#include + +#include + +#include +#include +#include + +namespace silkworm::snapshots::index { + +BTreeIndex::BTreeIndex(seg::Decompressor& data_decompressor, + std::filesystem::path index_file_path, + std::optional index_region, + uint64_t btree_fanout) + : file_path_(std::move(index_file_path)) { + ensure(data_decompressor.is_open(), "BTreeIndex: data decompressor must be opened"); + + // Gracefully handle the case of empty index file before memory mapping to avoid error + if (std::filesystem::file_size(file_path_) == 0) { + return; + } + + // Either use given memory-mapped region or create a new one + memory_file_ = std::make_unique(file_path_, index_region); + SILKWORM_ASSERT(memory_file_->size() > 0); + const auto memory_mapped_range = memory_file_->region(); + + // Read encoded Elias-Fano 32-bit list of integers representing data offsets + data_offsets_ = EliasFanoList32::from_encoded_data(memory_mapped_range); + ensure(data_offsets_->sequence_length() > 0, "BTreeIndex: invalid zero-length data offsets"); + + const auto encoded_nodes = memory_mapped_range.subspan(data_offsets_->encoded_data_size()); + + // Let the OS know we're going to read data sequentially now, then restore normal (i.e. unknown) reading behavior + data_decompressor.advise_sequential(); + [[maybe_unused]] auto _ = gsl::finally([&]() { data_decompressor.advise_normal(); }); + auto data_it = data_decompressor.begin(); + + btree_ = std::make_unique( + data_offsets_->sequence_length(), + btree_fanout, + [this](auto data_index, auto& data_it) { return lookup_data(data_index, data_it); }, + [this](auto key, auto data_index, auto& data_it) { return compare_key(key, data_index, data_it); }, + data_it, + encoded_nodes); +} + +BTreeIndex::Cursor::Cursor(BTreeIndex* index, ByteView key, ByteView value, DataIndex data_index, DataIterator data_it) + : index_(index), key_(key), value_(value), data_index_(data_index), data_it_(std::move(data_it)) {} + +std::unique_ptr BTreeIndex::seek(ByteView seek_key, DataIterator data_it) { + const auto [found, key, value, data_index] = btree_->seek(seek_key, data_it); + if (!found) { + return nullptr; + } + if (key.compare(seek_key) >= 0) { + return new_cursor(key, value, data_index, data_it); + } + return new_cursor(ByteView{}, ByteView{}, 0, data_it); +} + +std::optional BTreeIndex::get(ByteView key, DataIterator data_it) { + if (empty()) { + return {}; + } + const auto [key_found, _, data_index] = btree_->get(key, data_it); + if (!key_found) { + return {}; + } + const auto [kv_found, kv] = lookup_data(data_index, data_it); + if (!kv_found) { + return {}; + } + return kv.second; +} + +BTree::LookupResult BTreeIndex::lookup_data(DataIndex data_index, DataIterator data_it) { + if (data_index >= data_offsets_->sequence_length()) { + return {/*found=*/false, {}}; + } + + const auto data_offset = data_offsets_->get(data_index); + data_it.reset(data_offset); + if (!data_it.has_next()) { + throw std::runtime_error{"key not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()}; + } + Bytes key; + data_it.next(key); + if (!data_it.has_next()) { + throw std::runtime_error{"value not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()}; + } + Bytes value; + data_it.next(value); + return {/*found=*/true, {key, value}}; +} + +BTree::CompareResult BTreeIndex::compare_key(ByteView key, DataIndex data_index, DataIterator data_it) { + ensure(data_index < data_offsets_->sequence_length(), + [&]() { return "out-of-bounds data_index=" + std::to_string(data_index) + " key=" + to_hex(key); }); + + const auto data_offset = data_offsets_->get(data_index); + data_it.reset(data_offset); + if (!data_it.has_next()) { + throw std::runtime_error{"key not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()}; + } + Bytes data_key; + data_it.next(data_key); + return {data_key.compare(key), data_key}; +} + +std::unique_ptr BTreeIndex::new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it) { + return std::unique_ptr{new BTreeIndex::Cursor{this, key, value, data_index, std::move(data_it)}}; +} + +bool BTreeIndex::Cursor::next() { + if (!to_next()) { + return false; + } + const auto [found, kv] = index_->lookup_data(data_index_, data_it_); + if (!found) { + return false; + } + key_ = kv.first; + value_ = kv.second; + return true; +} + +bool BTreeIndex::Cursor::to_next() { + if (data_index_ + 1 == index_->data_offsets_->sequence_length()) { + return false; + } + ++data_index_; + return true; +} + +} // namespace silkworm::snapshots::index diff --git a/silkworm/db/snapshots/index/btree_index.hpp b/silkworm/db/snapshots/index/btree_index.hpp new file mode 100644 index 0000000000..710e7d5eb5 --- /dev/null +++ b/silkworm/db/snapshots/index/btree_index.hpp @@ -0,0 +1,113 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include + +#include + +#include "../rec_split/encoding/elias_fano.hpp" // TODO(canepat) move to snapshots/common +#include "../seg/decompressor.hpp" +#include "btree.hpp" + +namespace silkworm::snapshots::index { + +using rec_split::encoding::EliasFanoList32; // TODO(canepat) remove after moving + +class BTreeIndex { + public: + static constexpr auto kDefaultFanout{256}; + + using DataIndex = BTree::DataIndex; + using DataIterator = BTree::DataIterator; + + class Cursor { + public: + using iterator_category [[maybe_unused]] = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = std::tuple; + using pointer = value_type*; + using reference = value_type&; + + // reference operator*() { return key_value_index_; } + // pointer operator->() { return &key_value_index_; } + + Cursor& operator++() { + next(); + return *this; + } + + ByteView key() const noexcept { return key_; } + ByteView value() const noexcept { return value_; } + DataIndex data_index() const noexcept { return data_index_; } + + bool next(); + + private: + friend class BTreeIndex; + + Cursor(BTreeIndex* index, ByteView key, ByteView value, DataIndex data_index, DataIterator data_it); + bool to_next(); + + BTreeIndex* index_; + Bytes key_; + Bytes value_; + DataIndex data_index_; + // value_type key_value_index_; + DataIterator data_it_; + }; + + BTreeIndex(seg::Decompressor& data_decompressor, + std::filesystem::path index_file_path, + std::optional index_region = {}, + uint64_t btree_fanout = kDefaultFanout); + + //! Return the Elias-Fano encoding of the sequence of key offsets or nullptr if not present + const EliasFanoList32* data_offsets() const { return data_offsets_.get(); } + + //! Is this index empty or not? + bool empty() const { return data_offsets_ ? data_offsets_->sequence_length() == 0 : true; } + + //! Return the number of keys included into this index + size_t key_count() const { return data_offsets_ ? data_offsets_->sequence_length() : 0; }; + + //! Seek and return cursor at position where key >= \p seek_key + //! \param seek_key the given key to seek cursor at + //! \return a cursor positioned at key >= \p seek_key + //! \details if \p seek_key is empty, first key is returned + //! \details if \p seek_key greater than any other key, nullptr is returned + std::unique_ptr seek(ByteView seek_key, DataIterator data_it); + + //! Get the value associated to the given key with exact match + std::optional get(ByteView key, DataIterator data_it); + + private: + std::unique_ptr new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it); + + BTree::LookupResult lookup_data(DataIndex data_index, DataIterator data_it); + BTree::CompareResult compare_key(ByteView key, DataIndex data_index, DataIterator data_it); + + std::filesystem::path file_path_; + std::unique_ptr memory_file_; + std::unique_ptr data_offsets_; + std::unique_ptr btree_; +}; + +} // namespace silkworm::snapshots::index diff --git a/silkworm/db/snapshots/index/btree_index_test.cpp b/silkworm/db/snapshots/index/btree_index_test.cpp new file mode 100644 index 0000000000..dbba292945 --- /dev/null +++ b/silkworm/db/snapshots/index/btree_index_test.cpp @@ -0,0 +1,132 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "btree_index.hpp" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace silkworm::snapshots::index { + +using namespace silkworm::test_util; + +using KeyAndValue = std::pair; +using KeysAndValues = std::vector; + +static std::filesystem::path sample_kv_file(const std::filesystem::path& tmp_dir_path, const KeysAndValues& kv_pairs) { + const auto kv_file_path = TemporaryDirectory::get_unique_temporary_path(); + seg::Compressor kv_compressor{kv_file_path, tmp_dir_path}; + for (const auto& kv_pair : kv_pairs) { + kv_compressor.add_word(*from_hex(kv_pair.first), /*is_compressed=*/false); + kv_compressor.add_word(*from_hex(kv_pair.second), /*is_compressed=*/false); + } + seg::Compressor::compress(std::move(kv_compressor)); + return kv_file_path; +} + +static std::filesystem::path sample_bt_index_file(const EliasFanoList32& key_offsets) { + TemporaryFile index_file; + std::stringstream str_stream; + str_stream << key_offsets; + const std::string stream = str_stream.str(); + Bytes ef_bytes{stream.cbegin(), stream.cend()}; + index_file.write(ef_bytes); + return index_file.path(); +} + +TEST_CASE("BTreeIndex", "[db]") { + TemporaryDirectory tmp_dir; + + SECTION("empty") { + const auto kv_file_path = sample_kv_file(tmp_dir.path(), {}); + seg::Decompressor kv_decompressor{kv_file_path}; + kv_decompressor.open(); + + TemporaryFile index_file; + index_file.write(Bytes{}); + BTreeIndex bt_index{kv_decompressor, index_file.path()}; + CHECK(bt_index.empty()); + } + + SECTION("sample KV file") { + // Prepare sample uncompressed KV file containing some key-value pairs + const auto kv_file_path = sample_kv_file( + tmp_dir.path(), + { + {"0000000000000000000000000000000000000000", "000a0269024e3c8decd159600000"}, + {"0000000000000000000000000000000000000001", "0008cf2fa48840ba8add0000"}, + {"0000000000000000000000000000000000000002", "0008146c4643c28ed8200000"}, + }); + + // Prepare the BT index for such KV file + // Note: key offsets can be computed from KV file layout + // 000000000000000600000000000000000000000000000000000000000000000801000215030F030D + // 01 + // 0000000000000000000000000000000000000000 <- 1st key, offset 0 + // 03 + // 000A0269024E3C8DECD159600000 + // 01 + // 0000000000000000000000000000000000000001 <- 2nd key, offset 0 + 20 + 1 + 14 + 1 + // 07 + // 0008CF2FA48840BA8ADD0000 + // 01 + // 0000000000000000000000000000000000000002 <- 3rd key, offset 0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1 + // 07 + // 0008146C4643C28ED8200000 + EliasFanoList32 encoded_key_offsets{3, 70}; + encoded_key_offsets.add_offset(0); + encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1); + encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1); + encoded_key_offsets.build(); + const auto bt_file_path = sample_bt_index_file(encoded_key_offsets); + + // Open the KV and BT index files + seg::Decompressor kv_decompressor{kv_file_path}; + kv_decompressor.open(); + BTreeIndex bt_index{kv_decompressor, bt_file_path}; + REQUIRE(bt_index.key_count() == 3); + + // Check that all values retrieved through BT index match + size_t key_count{0}; + bool is_key{true}; + Bytes key, value; + auto kv_iterator = kv_decompressor.begin(); + while (kv_iterator != kv_decompressor.end()) { + if (is_key) { + key = *kv_iterator; + ++key_count; + } else { + value = *kv_iterator; + const auto v = bt_index.get(key, kv_iterator); + CHECK(v == value); + } + ++kv_iterator; + is_key = !is_key; + } + CHECK(key_count == bt_index.key_count()); + } +} + +} // namespace silkworm::snapshots::index diff --git a/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp b/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp index ff147a43d3..2c21eb23a6 100644 --- a/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp +++ b/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,18 @@ static void set_bits(std::span bits, const uint64_t start, const uint //! 32-bit Elias-Fano (EF) list that can be used to encode one monotone non-decreasing sequence class EliasFanoList32 { public: + static constexpr std::size_t kCountLength{sizeof(uint64_t)}; + static constexpr std::size_t kULength{sizeof(uint64_t)}; + + //! Create a new 32-bit EF list from the given encoded data (i.e. data plus data header) + static std::unique_ptr from_encoded_data(std::span encoded_data) { + ensure(encoded_data.size() >= kCountLength + kULength, "EliasFanoList32::from_encoded_data data too short"); + const uint64_t count = endian::load_big_u64(encoded_data.data()); + const uint64_t u = endian::load_big_u64(encoded_data.subspan(kCountLength).data()); + const auto remaining_data = encoded_data.subspan(kCountLength + kULength); + return std::make_unique(count, u, remaining_data); + } + //! Create an empty new 32-bit EF list prepared for specified sequence length and max offset EliasFanoList32(uint64_t sequence_length, uint64_t max_offset) : count_(sequence_length - 1), @@ -136,6 +149,8 @@ class EliasFanoList32 { [[nodiscard]] const Uint64Sequence& data() const { return data_; } + [[nodiscard]] std::size_t encoded_data_size() const { return kCountLength + kULength + data_.size() * sizeof(uint64_t); } + [[nodiscard]] uint64_t get(uint64_t i) const { uint64_t lower = i * l_; std::size_t idx64 = lower / 64; diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 0cbd97e9e1..7c233474b1 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -190,31 +191,6 @@ struct RecSplitSettings { bool less_false_positives{false}; // Flag indicating if existence filter to reduce false-positives is required }; -template - requires(std::is_enum_v and requires(T e) { - enable_bitmask_operator_or(e); - }) -constexpr auto operator|(const T lhs, const T rhs) { - using underlying = std::underlying_type_t; - return static_cast(static_cast(lhs) | static_cast(rhs)); -} -template - requires(std::is_enum_v and requires(T e) { - enable_bitmask_operator_and(e); - }) -constexpr auto operator&(const T lhs, const T rhs) { - using underlying = std::underlying_type_t; - return static_cast(static_cast(lhs) & static_cast(rhs)); -} -template - requires(std::is_enum_v and requires(T e) { - enable_bitmask_operator_not(e); - }) -constexpr auto operator~(const T t) { - using underlying = std::underlying_type_t; - return static_cast(~static_cast(t)); -} - enum class RecSplitFeatures : uint8_t { kNone = 0b0, // no specific feature kEnums = 0b1, // 2-layer index with PHT pointing to enumeration and enumeration pointing to offsets diff --git a/silkworm/db/snapshots/seg/decompressor.cpp b/silkworm/db/snapshots/seg/decompressor.cpp index 9733ec3032..384ceb8773 100644 --- a/silkworm/db/snapshots/seg/decompressor.cpp +++ b/silkworm/db/snapshots/seg/decompressor.cpp @@ -335,8 +335,10 @@ class Decompressor::ReadModeGuard { Decompressor::ReadMode old_mode_; }; -Decompressor::Decompressor(std::filesystem::path compressed_path, std::optional compressed_region) - : compressed_path_(std::move(compressed_path)), compressed_region_{compressed_region} {} +Decompressor::Decompressor(std::filesystem::path compressed_path, + std::optional compressed_region, + CompressionKind compression) + : compressed_path_(std::move(compressed_path)), compressed_region_{compressed_region}, compression_(compression) {} Decompressor::~Decompressor() { close(); @@ -344,7 +346,7 @@ Decompressor::~Decompressor() { void Decompressor::open() { compressed_file_ = std::make_unique(compressed_path_, compressed_region_); - auto compressed_file_size = compressed_file_->size(); + const auto compressed_file_size = compressed_file_->size(); if (compressed_file_size < kMinimumFileSize) { throw std::runtime_error("compressed file is too short: " + std::to_string(compressed_file_size)); } @@ -361,6 +363,9 @@ void Decompressor::open() { // Read patterns from compressed file const auto pattern_dict_length = endian::load_big_u64(address + kWordsCountSize + kEmptyWordsCountSize); SILK_TRACE << "Decompress pattern dictionary length: " << pattern_dict_length; + if (pattern_dict_length > compressed_file_size - kMinimumFileSize) { + throw std::runtime_error("compressed file is too short: " + std::to_string(compressed_file_size)); + } const std::size_t patterns_dict_offset{kWordsCountSize + kEmptyWordsCountSize + kDictionaryLengthSize}; read_patterns(ByteView{address + patterns_dict_offset, pattern_dict_length}); @@ -368,6 +373,9 @@ void Decompressor::open() { // Read positions from compressed file const auto position_dict_length = endian::load_big_u64(address + patterns_dict_offset + pattern_dict_length); SILK_TRACE << "Decompress position dictionary length: " << position_dict_length; + if (position_dict_length > compressed_file_size - pattern_dict_length - kMinimumFileSize) { + throw std::runtime_error("compressed file is too short: " + std::to_string(compressed_file_size)); + } const std::size_t positions_dict_offset{patterns_dict_offset + pattern_dict_length + kDictionaryLengthSize}; read_positions(ByteView{address + positions_dict_offset, position_dict_length}); @@ -776,6 +784,7 @@ uint64_t Decompressor::Iterator::skip_uncompressed() { } void Decompressor::Iterator::reset(uint64_t data_offset) { + is_next_value_ = false; word_offset_ = data_offset; bit_position_ = 0; } @@ -857,7 +866,14 @@ Decompressor::Iterator& Decompressor::Iterator::operator++() { if (has_next()) { current_word_offset_ = word_offset_; current_word_.clear(); - next(current_word_); + + const auto compression = is_next_value_ ? CompressionKind::kValues : CompressionKind::kKeys; + is_next_value_ = !is_next_value_; + if ((decoder_->compression_ & compression) != CompressionKind::kNone) { + next(current_word_); + } else { + next_uncompressed(current_word_); + } } else { *this = make_end(decoder_); } diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index 04f0c0abb6..e19633034d 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -29,6 +29,8 @@ #include #include +#include "../common/bitmask_operators.hpp" + namespace silkworm::snapshots::seg { class DecodingTable { @@ -167,6 +169,12 @@ class PositionTable : public DecodingTable { friend std::ostream& operator<<(std::ostream& out, const PositionTable& pt); }; +enum class CompressionKind : uint8_t { + kNone = 0b0, + kKeys = 0b1, + kValues = 0b10, +}; + //! Snapshot decoder using modified Condensed Huffman Table (CHT) algorithm class Decompressor { public: @@ -269,11 +277,16 @@ class Decompressor { Bytes current_word_; std::shared_ptr read_mode_guard_; + + //! Flag indicating if next word is key (false) or value (true) + bool is_next_value_{false}; }; static_assert(std::input_or_output_iterator); - explicit Decompressor(std::filesystem::path compressed_path, std::optional compressed_region = {}); + explicit Decompressor(std::filesystem::path compressed_path, + std::optional compressed_region = {}, + CompressionKind compression = CompressionKind::kKeys); ~Decompressor(); Decompressor(Decompressor&&) = default; @@ -297,6 +310,10 @@ class Decompressor { void open(); + void advise_normal() const { compressed_file_->advise_normal(); } + void advise_random() const { compressed_file_->advise_random(); } + void advise_sequential() const { compressed_file_->advise_sequential(); } + //! Get an iterator to the compressed data [[nodiscard]] Iterator make_iterator() const { return Iterator{this, {}}; } @@ -304,11 +321,10 @@ class Decompressor { Iterator begin() const; Iterator end() const { return Iterator::make_end(this); } - /** - * Returns an iterator at a given offset. - * If the offset is invalid it returns end(). - * Seek makes sure that the result starts with a given prefix, otherwise returns end(). - */ + //! \brief Return an iterator at a given \p offset optionally starting with a given \p prefix. + //! \param offset the offset in the data to place iterator at. If the offset is invalid, returns end() + //! \param prefix the prefix which the result should start with + //! \details Makes sure that the result starts with a given prefix, otherwise returns end() Iterator seek(uint64_t offset, ByteView prefix = {}) const; void close(); @@ -320,8 +336,13 @@ class Decompressor { //! The path to the compressed file std::filesystem::path compressed_path_; + + //! The memory-mapped region of the compressed file (if already mapped) std::optional compressed_region_; + //! The type of compression for this segment + CompressionKind compression_; + //! The memory-mapped compressed file std::unique_ptr compressed_file_; @@ -344,4 +365,8 @@ class Decompressor { uint64_t words_length_{0}; }; +consteval void enable_bitmask_operator_and(CompressionKind); +consteval void enable_bitmask_operator_or(CompressionKind); +consteval void enable_bitmask_operator_not(CompressionKind); + } // namespace silkworm::snapshots::seg From c705570bc3c6fc64b0d1e13d51dc251d5c46e9ec Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:05:56 +0200 Subject: [PATCH 02/12] fix shadowing --- silkworm/db/snapshots/index/btree_index.cpp | 12 ++++++------ silkworm/db/snapshots/index/btree_index.hpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index cc3ca807b8..96295fd5ab 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -26,12 +26,12 @@ namespace silkworm::snapshots::index { -BTreeIndex::BTreeIndex(seg::Decompressor& data_decompressor, +BTreeIndex::BTreeIndex(seg::Decompressor& kv_decompressor, std::filesystem::path index_file_path, std::optional index_region, uint64_t btree_fanout) : file_path_(std::move(index_file_path)) { - ensure(data_decompressor.is_open(), "BTreeIndex: data decompressor must be opened"); + ensure(kv_decompressor.is_open(), "BTreeIndex: KV file decompressor must be opened"); // Gracefully handle the case of empty index file before memory mapping to avoid error if (std::filesystem::file_size(file_path_) == 0) { @@ -50,16 +50,16 @@ BTreeIndex::BTreeIndex(seg::Decompressor& data_decompressor, const auto encoded_nodes = memory_mapped_range.subspan(data_offsets_->encoded_data_size()); // Let the OS know we're going to read data sequentially now, then restore normal (i.e. unknown) reading behavior - data_decompressor.advise_sequential(); - [[maybe_unused]] auto _ = gsl::finally([&]() { data_decompressor.advise_normal(); }); - auto data_it = data_decompressor.begin(); + kv_decompressor.advise_sequential(); + [[maybe_unused]] auto _ = gsl::finally([&]() { kv_decompressor.advise_normal(); }); + auto kv_it = kv_decompressor.begin(); btree_ = std::make_unique( data_offsets_->sequence_length(), btree_fanout, [this](auto data_index, auto& data_it) { return lookup_data(data_index, data_it); }, [this](auto key, auto data_index, auto& data_it) { return compare_key(key, data_index, data_it); }, - data_it, + kv_it, encoded_nodes); } diff --git a/silkworm/db/snapshots/index/btree_index.hpp b/silkworm/db/snapshots/index/btree_index.hpp index 710e7d5eb5..ccb277a74a 100644 --- a/silkworm/db/snapshots/index/btree_index.hpp +++ b/silkworm/db/snapshots/index/btree_index.hpp @@ -74,7 +74,7 @@ class BTreeIndex { DataIterator data_it_; }; - BTreeIndex(seg::Decompressor& data_decompressor, + BTreeIndex(seg::Decompressor& kv_decompressor, std::filesystem::path index_file_path, std::optional index_region = {}, uint64_t btree_fanout = kDefaultFanout); From 1d739f48252b7c4a332d48be186bb7ae68c73bc4 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:09:59 +0200 Subject: [PATCH 03/12] fix default compression in decompressor --- silkworm/db/snapshots/seg/decompressor.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index e19633034d..193ec3af51 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -175,6 +175,10 @@ enum class CompressionKind : uint8_t { kValues = 0b10, }; +consteval void enable_bitmask_operator_and(CompressionKind); +consteval void enable_bitmask_operator_or(CompressionKind); +consteval void enable_bitmask_operator_not(CompressionKind); + //! Snapshot decoder using modified Condensed Huffman Table (CHT) algorithm class Decompressor { public: @@ -286,7 +290,7 @@ class Decompressor { explicit Decompressor(std::filesystem::path compressed_path, std::optional compressed_region = {}, - CompressionKind compression = CompressionKind::kKeys); + CompressionKind compression = CompressionKind::kKeys | CompressionKind::kValues); ~Decompressor(); Decompressor(Decompressor&&) = default; @@ -365,8 +369,4 @@ class Decompressor { uint64_t words_length_{0}; }; -consteval void enable_bitmask_operator_and(CompressionKind); -consteval void enable_bitmask_operator_or(CompressionKind); -consteval void enable_bitmask_operator_not(CompressionKind); - } // namespace silkworm::snapshots::seg From 2f01aa71ee613a08e91276185e7cb85fc8bfc97c Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Thu, 19 Sep 2024 23:44:27 +0200 Subject: [PATCH 04/12] improve errors and fix unit test --- silkworm/db/snapshot_decompressor_test.cpp | 44 ++++++++++------------ silkworm/db/snapshots/seg/decompressor.cpp | 4 +- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/silkworm/db/snapshot_decompressor_test.cpp b/silkworm/db/snapshot_decompressor_test.cpp index 7edefb6048..4edda0b713 100644 --- a/silkworm/db/snapshot_decompressor_test.cpp +++ b/silkworm/db/snapshot_decompressor_test.cpp @@ -258,35 +258,31 @@ TEST_CASE("Decompressor::open invalid files", "[silkworm][node][seg][decompresso Decompressor decoder{tmp_file.path()}; CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("compressed file is too short: 31")); } - SECTION("cannot build pattern tree: highest_depth reached zero") { - TemporaryFile tmp_file; - tmp_file.write(*silkworm::from_hex("0x000000000000000C000000000000000400000000000000150309000000000000")); - Decompressor decoder{tmp_file.path()}; - CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("cannot build pattern tree: highest_depth reached zero")); + SECTION("invalid pattern_dict_length for compressed file size: 32") { + TemporaryFile tmp_file1; + tmp_file1.write(*silkworm::from_hex("0x000000000000000C000000000000000400000000000000150309000000000000")); + Decompressor decoder1{tmp_file1.path()}; + CHECK_THROWS_MATCHES(decoder1.open(), std::runtime_error, Message("invalid pattern_dict_length for compressed file size: 32")); + TemporaryFile tmp_file2; + tmp_file2.write(*silkworm::from_hex("0x0000000000000000000000000000000000000000000000010000000000000000")); + Decompressor decoder2{tmp_file2.path()}; + CHECK_THROWS_MATCHES(decoder2.open(), std::runtime_error, Message("invalid pattern_dict_length for compressed file size: 32")); } - SECTION("pattern dict is invalid: data skip failed at 22") { + SECTION("invalid pattern_dict_length for compressed file size: 34") { TemporaryFile tmp_file; tmp_file.write(*silkworm::from_hex("0x000000000000000C00000000000000040000000000000016000000000000000003ff")); Decompressor decoder{tmp_file.path()}; - CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("pattern dict is invalid: data skip failed at 11")); - } - SECTION("pattern dict is invalid: length read failed at 1") { - TemporaryFile tmp_file; - tmp_file.write(*silkworm::from_hex("0x0000000000000000000000000000000000000000000000010000000000000000")); - Decompressor decoder{tmp_file.path()}; - CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("pattern dict is invalid: length read failed at 1")); + CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("invalid pattern_dict_length for compressed file size: 34")); } - SECTION("cannot build position tree: highest_depth reached zero") { - TemporaryFile tmp_file; - tmp_file.write(*silkworm::from_hex("0x000000000000000C0000000000000004000000000000000000000000000000160309")); - Decompressor decoder{tmp_file.path()}; - CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("cannot build position tree: highest_depth reached zero")); - } - SECTION("position dict is invalid: position read failed at 22") { - TemporaryFile tmp_file; - tmp_file.write(*silkworm::from_hex("0x000000000000000C00000000000000040000000000000000000000000000001603ff")); - Decompressor decoder{tmp_file.path()}; - CHECK_THROWS_MATCHES(decoder.open(), std::runtime_error, Message("position dict is invalid: position read failed at 22")); + SECTION("invalid position_dict_length for compressed file size: 34") { + TemporaryFile tmp_file1; + tmp_file1.write(*silkworm::from_hex("0x000000000000000C0000000000000004000000000000000000000000000000160309")); + Decompressor decoder1{tmp_file1.path()}; + CHECK_THROWS_MATCHES(decoder1.open(), std::runtime_error, Message("invalid position_dict_length for compressed file size: 34")); + TemporaryFile tmp_file2; + tmp_file2.write(*silkworm::from_hex("0x000000000000000C00000000000000040000000000000000000000000000001603ff")); + Decompressor decoder2{tmp_file2.path()}; + CHECK_THROWS_MATCHES(decoder2.open(), std::runtime_error, Message("invalid position_dict_length for compressed file size: 34")); } } diff --git a/silkworm/db/snapshots/seg/decompressor.cpp b/silkworm/db/snapshots/seg/decompressor.cpp index 384ceb8773..d7e3fc9bab 100644 --- a/silkworm/db/snapshots/seg/decompressor.cpp +++ b/silkworm/db/snapshots/seg/decompressor.cpp @@ -364,7 +364,7 @@ void Decompressor::open() { const auto pattern_dict_length = endian::load_big_u64(address + kWordsCountSize + kEmptyWordsCountSize); SILK_TRACE << "Decompress pattern dictionary length: " << pattern_dict_length; if (pattern_dict_length > compressed_file_size - kMinimumFileSize) { - throw std::runtime_error("compressed file is too short: " + std::to_string(compressed_file_size)); + throw std::runtime_error("invalid pattern_dict_length for compressed file size: " + std::to_string(compressed_file_size)); } const std::size_t patterns_dict_offset{kWordsCountSize + kEmptyWordsCountSize + kDictionaryLengthSize}; @@ -374,7 +374,7 @@ void Decompressor::open() { const auto position_dict_length = endian::load_big_u64(address + patterns_dict_offset + pattern_dict_length); SILK_TRACE << "Decompress position dictionary length: " << position_dict_length; if (position_dict_length > compressed_file_size - pattern_dict_length - kMinimumFileSize) { - throw std::runtime_error("compressed file is too short: " + std::to_string(compressed_file_size)); + throw std::runtime_error("invalid position_dict_length for compressed file size: " + std::to_string(compressed_file_size)); } const std::size_t positions_dict_offset{patterns_dict_offset + pattern_dict_length + kDictionaryLengthSize}; From f77791fb23c008b5d1bfae22645a32ec0df487ee Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Fri, 20 Sep 2024 12:01:12 +0200 Subject: [PATCH 05/12] unit tests for seek --- silkworm/db/snapshots/index/btree_index.cpp | 5 +- .../db/snapshots/index/btree_index_test.cpp | 127 ++++++++++++------ 2 files changed, 87 insertions(+), 45 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index 96295fd5ab..3171fd96c7 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -68,13 +68,10 @@ BTreeIndex::Cursor::Cursor(BTreeIndex* index, ByteView key, ByteView value, Data std::unique_ptr BTreeIndex::seek(ByteView seek_key, DataIterator data_it) { const auto [found, key, value, data_index] = btree_->seek(seek_key, data_it); - if (!found) { - return nullptr; - } if (key.compare(seek_key) >= 0) { return new_cursor(key, value, data_index, data_it); } - return new_cursor(ByteView{}, ByteView{}, 0, data_it); + return nullptr; } std::optional BTreeIndex::get(ByteView key, DataIterator data_it) { diff --git a/silkworm/db/snapshots/index/btree_index_test.cpp b/silkworm/db/snapshots/index/btree_index_test.cpp index dbba292945..af5f1d379e 100644 --- a/silkworm/db/snapshots/index/btree_index_test.cpp +++ b/silkworm/db/snapshots/index/btree_index_test.cpp @@ -33,9 +33,8 @@ namespace silkworm::snapshots::index { using namespace silkworm::test_util; using KeyAndValue = std::pair; -using KeysAndValues = std::vector; -static std::filesystem::path sample_kv_file(const std::filesystem::path& tmp_dir_path, const KeysAndValues& kv_pairs) { +static std::filesystem::path sample_kv_file(const std::filesystem::path& tmp_dir_path, std::span kv_pairs) { const auto kv_file_path = TemporaryDirectory::get_unique_temporary_path(); seg::Compressor kv_compressor{kv_file_path, tmp_dir_path}; for (const auto& kv_pair : kv_pairs) { @@ -56,58 +55,70 @@ static std::filesystem::path sample_bt_index_file(const EliasFanoList32& key_off return index_file.path(); } +using KvAndBtPaths = std::tuple; + +static KvAndBtPaths sample_3_keys_kv_and_bt_files(const std::filesystem::path& tmp_dir_path) { + // Prepare sample uncompressed KV file containing some key-value pairs + const auto kv_file_path = sample_kv_file( + tmp_dir_path, + std::vector{ + KeyAndValue{"0000000000000000000000000000000000000000"sv, "000a0269024e3c8decd159600000"sv}, + KeyAndValue{"0000000000000000000000000000000000000004"sv, "0008cf2fa48840ba8add0000"sv}, + KeyAndValue{"0000000000000000000000000000000000000008"sv, "0008146c4643c28ed8200000"sv}, + }); + + // Prepare the BT index for such KV file + // Note: key offsets can be computed from KV file layout + // 000000000000000600000000000000000000000000000000000000000000000801000215030F030D + // 01 + // 0000000000000000000000000000000000000000 <- 1st key, offset 0 + // 03 + // 000A0269024E3C8DECD159600000 + // 01 + // 0000000000000000000000000000000000000004 <- 2nd key, offset 0 + 20 + 1 + 14 + 1 + // 07 + // 0008CF2FA48840BA8ADD0000 + // 01 + // 0000000000000000000000000000000000000008 <- 3rd key, offset 0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1 + // 07 + // 0008146C4643C28ED8200000 + EliasFanoList32 encoded_key_offsets{3, 70}; + encoded_key_offsets.add_offset(0); + encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1); + encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1); + encoded_key_offsets.build(); + const auto bt_file_path = sample_bt_index_file(encoded_key_offsets); + + return {kv_file_path, bt_file_path}; +} + TEST_CASE("BTreeIndex", "[db]") { TemporaryDirectory tmp_dir; SECTION("empty") { + // Prepare empty KV and BT index files const auto kv_file_path = sample_kv_file(tmp_dir.path(), {}); - seg::Decompressor kv_decompressor{kv_file_path}; - kv_decompressor.open(); - TemporaryFile index_file; index_file.write(Bytes{}); + + // Open the KV and BT index files + seg::Decompressor kv_decompressor{kv_file_path}; + kv_decompressor.open(); BTreeIndex bt_index{kv_decompressor, index_file.path()}; CHECK(bt_index.empty()); } - SECTION("sample KV file") { - // Prepare sample uncompressed KV file containing some key-value pairs - const auto kv_file_path = sample_kv_file( - tmp_dir.path(), - { - {"0000000000000000000000000000000000000000", "000a0269024e3c8decd159600000"}, - {"0000000000000000000000000000000000000001", "0008cf2fa48840ba8add0000"}, - {"0000000000000000000000000000000000000002", "0008146c4643c28ed8200000"}, - }); - - // Prepare the BT index for such KV file - // Note: key offsets can be computed from KV file layout - // 000000000000000600000000000000000000000000000000000000000000000801000215030F030D - // 01 - // 0000000000000000000000000000000000000000 <- 1st key, offset 0 - // 03 - // 000A0269024E3C8DECD159600000 - // 01 - // 0000000000000000000000000000000000000001 <- 2nd key, offset 0 + 20 + 1 + 14 + 1 - // 07 - // 0008CF2FA48840BA8ADD0000 - // 01 - // 0000000000000000000000000000000000000002 <- 3rd key, offset 0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1 - // 07 - // 0008146C4643C28ED8200000 - EliasFanoList32 encoded_key_offsets{3, 70}; - encoded_key_offsets.add_offset(0); - encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1); - encoded_key_offsets.add_offset(0 + 20 + 1 + 14 + 1 + 20 + 1 + 12 + 1); - encoded_key_offsets.build(); - const auto bt_file_path = sample_bt_index_file(encoded_key_offsets); + // Prepare sample uncompressed KV file containing 3 key-value pairs and its BT index file + const auto [kv_file_path, bt_file_path] = sample_3_keys_kv_and_bt_files(tmp_dir.path()); - // Open the KV and BT index files - seg::Decompressor kv_decompressor{kv_file_path}; - kv_decompressor.open(); - BTreeIndex bt_index{kv_decompressor, bt_file_path}; - REQUIRE(bt_index.key_count() == 3); + // Open the KV and BT index files + seg::Decompressor kv_decompressor{kv_file_path}; + kv_decompressor.open(); + BTreeIndex bt_index{kv_decompressor, bt_file_path}; + REQUIRE(!bt_index.empty()); + REQUIRE(bt_index.key_count() == 3); + SECTION("BTreeIndex::get") { // Check that all values retrieved through BT index match size_t key_count{0}; bool is_key{true}; @@ -127,6 +138,40 @@ TEST_CASE("BTreeIndex", "[db]") { } CHECK(key_count == bt_index.key_count()); } + + SECTION("BTreeIndex::seek") { + auto kv_iterator = kv_decompressor.begin(); + + // Seek using exact keys starting from the first one + auto index_it = bt_index.seek(ByteView{}, kv_iterator); + REQUIRE(index_it); + REQUIRE(index_it->key() == *from_hex("0000000000000000000000000000000000000000"sv)); + REQUIRE(index_it->value() == *from_hex("000a0269024e3c8decd159600000"sv)); + REQUIRE(index_it->data_index() == 0); + REQUIRE(index_it->next()); + REQUIRE(index_it->key() == *from_hex("0000000000000000000000000000000000000004"sv)); + REQUIRE(index_it->value() == *from_hex("0008cf2fa48840ba8add0000"sv)); + REQUIRE(index_it->data_index() == 1); + REQUIRE(index_it->next()); + REQUIRE(index_it->key() == *from_hex("0000000000000000000000000000000000000008"sv)); + REQUIRE(index_it->value() == *from_hex("0008146c4643c28ed8200000"sv)); + REQUIRE(index_it->data_index() == 2); + REQUIRE(!index_it->next()); + + // Seek using lower keys than existing ones + index_it = bt_index.seek(*from_hex("0000000000000000000000000000000000000003"sv), kv_iterator); + REQUIRE(index_it->key() == *from_hex("0000000000000000000000000000000000000004"sv)); + REQUIRE(index_it->value() == *from_hex("0008cf2fa48840ba8add0000"sv)); + REQUIRE(index_it->data_index() == 1); + index_it = bt_index.seek(*from_hex("0000000000000000000000000000000000000007"sv), kv_iterator); + REQUIRE(index_it->key() == *from_hex("0000000000000000000000000000000000000008"sv)); + REQUIRE(index_it->value() == *from_hex("0008146c4643c28ed8200000"sv)); + REQUIRE(index_it->data_index() == 2); + + // Seek beyond the last key + CHECK(!bt_index.seek(*from_hex("0000000000000000000000000000000000000009"), kv_iterator)); + CHECK(!bt_index.seek(*from_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), kv_iterator)); + } } } // namespace silkworm::snapshots::index From 9205e8f445c345e6b4351a548c37c3061066879e Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:45:35 +0200 Subject: [PATCH 06/12] use std::optional instead of std::unique_ptr --- silkworm/db/snapshots/index/btree_index.cpp | 8 +++--- silkworm/db/snapshots/index/btree_index.hpp | 31 +++++++-------------- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index 3171fd96c7..72839e2fdd 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -66,12 +66,12 @@ BTreeIndex::BTreeIndex(seg::Decompressor& kv_decompressor, BTreeIndex::Cursor::Cursor(BTreeIndex* index, ByteView key, ByteView value, DataIndex data_index, DataIterator data_it) : index_(index), key_(key), value_(value), data_index_(data_index), data_it_(std::move(data_it)) {} -std::unique_ptr BTreeIndex::seek(ByteView seek_key, DataIterator data_it) { +std::optional BTreeIndex::seek(ByteView seek_key, DataIterator data_it) { const auto [found, key, value, data_index] = btree_->seek(seek_key, data_it); if (key.compare(seek_key) >= 0) { return new_cursor(key, value, data_index, data_it); } - return nullptr; + return std::nullopt; } std::optional BTreeIndex::get(ByteView key, DataIterator data_it) { @@ -123,8 +123,8 @@ BTree::CompareResult BTreeIndex::compare_key(ByteView key, DataIndex data_index, return {data_key.compare(key), data_key}; } -std::unique_ptr BTreeIndex::new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it) { - return std::unique_ptr{new BTreeIndex::Cursor{this, key, value, data_index, std::move(data_it)}}; +BTreeIndex::Cursor BTreeIndex::new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it) { + return BTreeIndex::Cursor{this, key, value, data_index, std::move(data_it)}; } bool BTreeIndex::Cursor::next() { diff --git a/silkworm/db/snapshots/index/btree_index.hpp b/silkworm/db/snapshots/index/btree_index.hpp index ccb277a74a..112a79f6cb 100644 --- a/silkworm/db/snapshots/index/btree_index.hpp +++ b/silkworm/db/snapshots/index/btree_index.hpp @@ -40,20 +40,6 @@ class BTreeIndex { class Cursor { public: - using iterator_category [[maybe_unused]] = std::input_iterator_tag; - using difference_type = std::ptrdiff_t; - using value_type = std::tuple; - using pointer = value_type*; - using reference = value_type&; - - // reference operator*() { return key_value_index_; } - // pointer operator->() { return &key_value_index_; } - - Cursor& operator++() { - next(); - return *this; - } - ByteView key() const noexcept { return key_; } ByteView value() const noexcept { return value_; } DataIndex data_index() const noexcept { return data_index_; } @@ -70,7 +56,6 @@ class BTreeIndex { Bytes key_; Bytes value_; DataIndex data_index_; - // value_type key_value_index_; DataIterator data_it_; }; @@ -88,18 +73,22 @@ class BTreeIndex { //! Return the number of keys included into this index size_t key_count() const { return data_offsets_ ? data_offsets_->sequence_length() : 0; }; - //! Seek and return cursor at position where key >= \p seek_key - //! \param seek_key the given key to seek cursor at - //! \return a cursor positioned at key >= \p seek_key + //! Seek and return a cursor at position where key >= \p seek_key + //! \param seek_key the given key at which the cursor must be seeked + //! \param data_it an iterator to the key-value data sequence + //! \return a cursor positioned at key >= \p seek_key or nullptr //! \details if \p seek_key is empty, first key is returned - //! \details if \p seek_key greater than any other key, nullptr is returned - std::unique_ptr seek(ByteView seek_key, DataIterator data_it); + //! \details if \p seek_key is greater than any other key, std::nullopt is returned + std::optional seek(ByteView seek_key, DataIterator data_it); //! Get the value associated to the given key with exact match + //! \param key the data key to match exactly + //! \param data_it an iterator to the key-value data sequence + //! \return the value associated at \p key or std::nullopt if not found std::optional get(ByteView key, DataIterator data_it); private: - std::unique_ptr new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it); + Cursor new_cursor(ByteView key, ByteView value, DataIndex data_index, DataIterator data_it); BTree::LookupResult lookup_data(DataIndex data_index, DataIterator data_it); BTree::CompareResult compare_key(ByteView key, DataIndex data_index, DataIterator data_it); From 33c882a33700afa7462afe096bb7ccf852270057 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:09:49 +0200 Subject: [PATCH 07/12] snapshots: move encoding modules to common (#2374) --- .../{rec_split => common}/encoding/elias_fano.hpp | 9 +++++---- .../{rec_split => common}/encoding/elias_fano_test.cpp | 4 ++-- .../{rec_split => common}/encoding/golomb_rice.hpp | 9 +++++---- .../{rec_split => common}/encoding/golomb_rice_test.cpp | 5 ++--- .../{rec_split => common}/encoding/sequence.hpp | 4 ++-- .../{rec_split => common}/encoding/sequence_test.cpp | 4 ++-- .../common/common.hpp => common/encoding/util.hpp} | 4 ++-- silkworm/db/snapshots/index/btree_index.hpp | 4 ++-- .../db/snapshots/rec_split/{common => }/murmur_hash3.cpp | 0 .../db/snapshots/rec_split/{common => }/murmur_hash3.hpp | 0 .../rec_split/{common => }/murmur_hash3_test.cpp | 0 silkworm/db/snapshots/rec_split/rec_split.hpp | 7 ++++--- 12 files changed, 26 insertions(+), 24 deletions(-) rename silkworm/db/snapshots/{rec_split => common}/encoding/elias_fano.hpp (99%) rename silkworm/db/snapshots/{rec_split => common}/encoding/elias_fano_test.cpp (98%) rename silkworm/db/snapshots/{rec_split => common}/encoding/golomb_rice.hpp (97%) rename silkworm/db/snapshots/{rec_split => common}/encoding/golomb_rice_test.cpp (94%) rename silkworm/db/snapshots/{rec_split => common}/encoding/sequence.hpp (95%) rename silkworm/db/snapshots/{rec_split => common}/encoding/sequence_test.cpp (94%) rename silkworm/db/snapshots/{rec_split/common/common.hpp => common/encoding/util.hpp} (99%) rename silkworm/db/snapshots/rec_split/{common => }/murmur_hash3.cpp (100%) rename silkworm/db/snapshots/rec_split/{common => }/murmur_hash3.hpp (100%) rename silkworm/db/snapshots/rec_split/{common => }/murmur_hash3_test.cpp (100%) diff --git a/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp b/silkworm/db/snapshots/common/encoding/elias_fano.hpp similarity index 99% rename from silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp rename to silkworm/db/snapshots/common/encoding/elias_fano.hpp index 2c21eb23a6..c14ef9dd7d 100644 --- a/silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp +++ b/silkworm/db/snapshots/common/encoding/elias_fano.hpp @@ -58,16 +58,17 @@ #include #include #include -#include -#include #include #include +#include "sequence.hpp" +#include "util.hpp" + // EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html // P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974. // Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf -namespace silkworm::snapshots::rec_split::encoding { +namespace silkworm::snapshots::encoding { //! Log2Q = Log2(Quantum) static constexpr uint64_t kLog2q = 8; @@ -569,4 +570,4 @@ class DoubleEliasFanoList16 { } }; -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/encoding/elias_fano_test.cpp b/silkworm/db/snapshots/common/encoding/elias_fano_test.cpp similarity index 98% rename from silkworm/db/snapshots/rec_split/encoding/elias_fano_test.cpp rename to silkworm/db/snapshots/common/encoding/elias_fano_test.cpp index b4322a0fed..12d77f88dd 100644 --- a/silkworm/db/snapshots/rec_split/encoding/elias_fano_test.cpp +++ b/silkworm/db/snapshots/common/encoding/elias_fano_test.cpp @@ -26,7 +26,7 @@ #include #include -namespace silkworm::snapshots::rec_split::encoding { +namespace silkworm::snapshots::encoding { struct EliasFanoList32Test { std::vector offsets; @@ -168,4 +168,4 @@ TEST_CASE("DoubleEliasFanoList16", "[silkworm][recsplit][elias_fano]") { "0000000000000000010000000000000000000000000000000000000000000000")); } -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/encoding/golomb_rice.hpp b/silkworm/db/snapshots/common/encoding/golomb_rice.hpp similarity index 97% rename from silkworm/db/snapshots/rec_split/encoding/golomb_rice.hpp rename to silkworm/db/snapshots/common/encoding/golomb_rice.hpp index 7f6ed5c227..52d079c095 100644 --- a/silkworm/db/snapshots/rec_split/encoding/golomb_rice.hpp +++ b/silkworm/db/snapshots/common/encoding/golomb_rice.hpp @@ -50,11 +50,12 @@ #include #include -#include -#include #include -namespace silkworm::snapshots::rec_split::encoding { +#include "sequence.hpp" +#include "util.hpp" + +namespace silkworm::snapshots::encoding { //! Storage for Golomb-Rice codes of a RecSplit bucket. class GolombRiceVector { @@ -253,4 +254,4 @@ class GolombRiceVector { } }; -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/encoding/golomb_rice_test.cpp b/silkworm/db/snapshots/common/encoding/golomb_rice_test.cpp similarity index 94% rename from silkworm/db/snapshots/rec_split/encoding/golomb_rice_test.cpp rename to silkworm/db/snapshots/common/encoding/golomb_rice_test.cpp index 49bd5a56aa..6927b54618 100644 --- a/silkworm/db/snapshots/rec_split/encoding/golomb_rice_test.cpp +++ b/silkworm/db/snapshots/common/encoding/golomb_rice_test.cpp @@ -21,10 +21,9 @@ #include #include -#include #include -namespace silkworm::snapshots::rec_split::encoding { +namespace silkworm::snapshots::encoding { static const std::size_t kGolombRiceTestNumKeys{128}; static const std::size_t kGolombRiceTestNumTrees{1'000}; @@ -86,4 +85,4 @@ TEST_CASE("GolombRiceVector", "[silkworm][recsplit][golomb_rice]") { } } -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/encoding/sequence.hpp b/silkworm/db/snapshots/common/encoding/sequence.hpp similarity index 95% rename from silkworm/db/snapshots/rec_split/encoding/sequence.hpp rename to silkworm/db/snapshots/common/encoding/sequence.hpp index b4a7f9a598..56dc2117fe 100644 --- a/silkworm/db/snapshots/rec_split/encoding/sequence.hpp +++ b/silkworm/db/snapshots/common/encoding/sequence.hpp @@ -25,7 +25,7 @@ #include #include -namespace silkworm::snapshots::rec_split::encoding { +namespace silkworm::snapshots::encoding { template using UnsignedIntegralSequence = std::vector; @@ -64,4 +64,4 @@ std::istream& operator>>(std::istream& is, UnsignedIntegralSequence& s) { return is; } -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/encoding/sequence_test.cpp b/silkworm/db/snapshots/common/encoding/sequence_test.cpp similarity index 94% rename from silkworm/db/snapshots/rec_split/encoding/sequence_test.cpp rename to silkworm/db/snapshots/common/encoding/sequence_test.cpp index 9c1325d3ab..442b9f5b4a 100644 --- a/silkworm/db/snapshots/rec_split/encoding/sequence_test.cpp +++ b/silkworm/db/snapshots/common/encoding/sequence_test.cpp @@ -25,7 +25,7 @@ #include #include -namespace silkworm::snapshots::rec_split::encoding { +namespace silkworm::snapshots::encoding { TEST_CASE("Uint64Sequence", "[silkworm][snapshots][recsplit][sequence]") { test_util::SetLogVerbosityGuard guard{log::Level::kNone}; @@ -52,4 +52,4 @@ TEST_CASE("Uint64Sequence: size too big", "[silkworm][snapshots][recsplit][seque CHECK_THROWS_AS((ss >> input_sequence), std::logic_error); } -} // namespace silkworm::snapshots::rec_split::encoding +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/rec_split/common/common.hpp b/silkworm/db/snapshots/common/encoding/util.hpp similarity index 99% rename from silkworm/db/snapshots/rec_split/common/common.hpp rename to silkworm/db/snapshots/common/encoding/util.hpp index a7376d969b..8b12f3b2d3 100644 --- a/silkworm/db/snapshots/rec_split/common/common.hpp +++ b/silkworm/db/snapshots/common/encoding/util.hpp @@ -58,7 +58,7 @@ #include -namespace silkworm::snapshots::rec_split { +namespace silkworm::snapshots::encoding { using std::memcpy; @@ -206,4 +206,4 @@ inline uint64_t select64(uint64_t x, uint64_t k) { #endif } -} // namespace silkworm::snapshots::rec_split +} // namespace silkworm::snapshots::encoding diff --git a/silkworm/db/snapshots/index/btree_index.hpp b/silkworm/db/snapshots/index/btree_index.hpp index 112a79f6cb..9925ce5d3c 100644 --- a/silkworm/db/snapshots/index/btree_index.hpp +++ b/silkworm/db/snapshots/index/btree_index.hpp @@ -23,13 +23,13 @@ #include -#include "../rec_split/encoding/elias_fano.hpp" // TODO(canepat) move to snapshots/common +#include "../common/encoding/elias_fano.hpp" #include "../seg/decompressor.hpp" #include "btree.hpp" namespace silkworm::snapshots::index { -using rec_split::encoding::EliasFanoList32; // TODO(canepat) remove after moving +using encoding::EliasFanoList32; class BTreeIndex { public: diff --git a/silkworm/db/snapshots/rec_split/common/murmur_hash3.cpp b/silkworm/db/snapshots/rec_split/murmur_hash3.cpp similarity index 100% rename from silkworm/db/snapshots/rec_split/common/murmur_hash3.cpp rename to silkworm/db/snapshots/rec_split/murmur_hash3.cpp diff --git a/silkworm/db/snapshots/rec_split/common/murmur_hash3.hpp b/silkworm/db/snapshots/rec_split/murmur_hash3.hpp similarity index 100% rename from silkworm/db/snapshots/rec_split/common/murmur_hash3.hpp rename to silkworm/db/snapshots/rec_split/murmur_hash3.hpp diff --git a/silkworm/db/snapshots/rec_split/common/murmur_hash3_test.cpp b/silkworm/db/snapshots/rec_split/murmur_hash3_test.cpp similarity index 100% rename from silkworm/db/snapshots/rec_split/common/murmur_hash3_test.cpp rename to silkworm/db/snapshots/rec_split/murmur_hash3_test.cpp diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 7c233474b1..b23e4ac505 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -72,9 +72,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -87,6 +87,7 @@ namespace silkworm::snapshots::rec_split { using namespace std::chrono; +using encoding::remap16, encoding::remap128; //! Assumed *maximum* size of a bucket. Works with high probability up to average bucket size ~2000 static const int kMaxBucketSize = 3000; From bef472f15844019aaac2bb3e8599bd12c785f5eb Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Sun, 29 Sep 2024 07:40:14 +0200 Subject: [PATCH 08/12] naming and comments --- .../snapshots/common/encoding/elias_fano.hpp | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/silkworm/db/snapshots/common/encoding/elias_fano.hpp b/silkworm/db/snapshots/common/encoding/elias_fano.hpp index c14ef9dd7d..bfdae6bd6b 100644 --- a/silkworm/db/snapshots/common/encoding/elias_fano.hpp +++ b/silkworm/db/snapshots/common/encoding/elias_fano.hpp @@ -64,7 +64,11 @@ #include "sequence.hpp" #include "util.hpp" -// EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html +// Elias-Fano encoding is a high bits / low bits representation of a monotonically increasing sequence of N > 0 natural numbers x[i] +// 0 <= x[0] <= x[1] <= ... <= x[N-2] <= x[N-1] <= U +// where U > 0 is an upper bound on the last value. + +// EliasFano algorithm overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html // P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974. // Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf @@ -117,23 +121,25 @@ class EliasFanoList32 { return std::make_unique(count, u, remaining_data); } - //! Create an empty new 32-bit EF list prepared for specified sequence length and max offset - EliasFanoList32(uint64_t sequence_length, uint64_t max_offset) + //! Create an empty new 32-bit EF list prepared for the given data sequence length and max value + //! \param sequence_length the length of the data sequence + //! \param max_value the max value in the data sequence + EliasFanoList32(uint64_t sequence_length, uint64_t max_value) : count_(sequence_length - 1), - u_(max_offset + 1), - max_offset_(max_offset) { + u_(max_value + 1), + max_value_(max_value) { ensure(sequence_length > 0, "sequence length is zero"); derive_fields(); } //! Create a new 32-bit EF list from an existing data sequence //! \param count the number of EF data points - //! \param u u + //! \param u the strict upper bound on the EF data points, i.e. max value plus one //! \param data the existing data sequence (portion exceeding the total words will be ignored) EliasFanoList32(uint64_t count, uint64_t u, std::span data) : count_(count), u_(u), - max_offset_(u - 1) { + max_value_(u - 1) { const auto total_words = derive_fields(); SILKWORM_ASSERT(total_words * sizeof(uint64_t) <= data.size()); data = data.subspan(0, total_words * sizeof(uint64_t)); @@ -144,7 +150,7 @@ class EliasFanoList32 { [[nodiscard]] std::size_t count() const { return count_; } - [[nodiscard]] std::size_t max() const { return max_offset_; } + [[nodiscard]] std::size_t max() const { return max_value_; } [[nodiscard]] std::size_t min() const { return get(0); } @@ -272,7 +278,7 @@ class EliasFanoList32 { uint64_t count_{0}; uint64_t u_{0}; uint64_t l_{0}; - uint64_t max_offset_{0}; + uint64_t max_value_{0}; uint64_t i_{0}; Uint64Sequence data_; }; From d850cbc09576c1f533eefdac318d9f106825df04 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Sun, 29 Sep 2024 08:09:03 +0200 Subject: [PATCH 09/12] consistent usage of nullopt --- silkworm/db/snapshots/index/btree_index.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index 72839e2fdd..f4e15d4fe1 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -76,15 +76,15 @@ std::optional BTreeIndex::seek(ByteView seek_key, DataIterat std::optional BTreeIndex::get(ByteView key, DataIterator data_it) { if (empty()) { - return {}; + return std::nullopt; } const auto [key_found, _, data_index] = btree_->get(key, data_it); if (!key_found) { - return {}; + return std::nullopt; } const auto [kv_found, kv] = lookup_data(data_index, data_it); if (!kv_found) { - return {}; + return std::nullopt; } return kv.second; } From 603b22431d23ffc8e985e2a4954ce44bb4b11b86 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Sun, 29 Sep 2024 08:25:42 +0200 Subject: [PATCH 10/12] remove unnecessary madvise calls --- silkworm/db/snapshots/index/btree_index.cpp | 4 ---- silkworm/db/snapshots/seg/decompressor.hpp | 4 ---- 2 files changed, 8 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index f4e15d4fe1..e660d99298 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -48,10 +48,6 @@ BTreeIndex::BTreeIndex(seg::Decompressor& kv_decompressor, ensure(data_offsets_->sequence_length() > 0, "BTreeIndex: invalid zero-length data offsets"); const auto encoded_nodes = memory_mapped_range.subspan(data_offsets_->encoded_data_size()); - - // Let the OS know we're going to read data sequentially now, then restore normal (i.e. unknown) reading behavior - kv_decompressor.advise_sequential(); - [[maybe_unused]] auto _ = gsl::finally([&]() { kv_decompressor.advise_normal(); }); auto kv_it = kv_decompressor.begin(); btree_ = std::make_unique( diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index 193ec3af51..32b996af13 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -314,10 +314,6 @@ class Decompressor { void open(); - void advise_normal() const { compressed_file_->advise_normal(); } - void advise_random() const { compressed_file_->advise_random(); } - void advise_sequential() const { compressed_file_->advise_sequential(); } - //! Get an iterator to the compressed data [[nodiscard]] Iterator make_iterator() const { return Iterator{this, {}}; } From 92fb1c936dcbbf83479faaa72ecc2ca3d3f91092 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Sun, 29 Sep 2024 08:30:20 +0200 Subject: [PATCH 11/12] explicit type for literal constant --- silkworm/db/snapshots/index/btree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silkworm/db/snapshots/index/btree.cpp b/silkworm/db/snapshots/index/btree.cpp index ec93080c01..5a3e3dd613 100644 --- a/silkworm/db/snapshots/index/btree.cpp +++ b/silkworm/db/snapshots/index/btree.cpp @@ -27,7 +27,7 @@ namespace silkworm::snapshots::index { //! Smallest shard available for scan instead of binary search -static constexpr auto kDefaultBtreeStartSkip{4}; +static constexpr uint64_t kDefaultBtreeStartSkip{4}; static bool enable_assert_btree_keys() { bool enabled{false}; From 49a8a62a099b3e998086012ccbc625fcdc59a91d Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:22:05 +0200 Subject: [PATCH 12/12] throw for empty index file --- silkworm/db/snapshots/index/btree_index.cpp | 5 +---- silkworm/db/snapshots/index/btree_index.hpp | 5 +---- silkworm/db/snapshots/index/btree_index_test.cpp | 4 +--- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/silkworm/db/snapshots/index/btree_index.cpp b/silkworm/db/snapshots/index/btree_index.cpp index e660d99298..47f4d5f9ad 100644 --- a/silkworm/db/snapshots/index/btree_index.cpp +++ b/silkworm/db/snapshots/index/btree_index.cpp @@ -35,7 +35,7 @@ BTreeIndex::BTreeIndex(seg::Decompressor& kv_decompressor, // Gracefully handle the case of empty index file before memory mapping to avoid error if (std::filesystem::file_size(file_path_) == 0) { - return; + throw std::runtime_error("index " + file_path_.filename().string() + " is empty"); } // Either use given memory-mapped region or create a new one @@ -71,9 +71,6 @@ std::optional BTreeIndex::seek(ByteView seek_key, DataIterat } std::optional BTreeIndex::get(ByteView key, DataIterator data_it) { - if (empty()) { - return std::nullopt; - } const auto [key_found, _, data_index] = btree_->get(key, data_it); if (!key_found) { return std::nullopt; diff --git a/silkworm/db/snapshots/index/btree_index.hpp b/silkworm/db/snapshots/index/btree_index.hpp index 9925ce5d3c..2252d4afe9 100644 --- a/silkworm/db/snapshots/index/btree_index.hpp +++ b/silkworm/db/snapshots/index/btree_index.hpp @@ -67,11 +67,8 @@ class BTreeIndex { //! Return the Elias-Fano encoding of the sequence of key offsets or nullptr if not present const EliasFanoList32* data_offsets() const { return data_offsets_.get(); } - //! Is this index empty or not? - bool empty() const { return data_offsets_ ? data_offsets_->sequence_length() == 0 : true; } - //! Return the number of keys included into this index - size_t key_count() const { return data_offsets_ ? data_offsets_->sequence_length() : 0; }; + size_t key_count() const { return data_offsets_->sequence_length(); }; //! Seek and return a cursor at position where key >= \p seek_key //! \param seek_key the given key at which the cursor must be seeked diff --git a/silkworm/db/snapshots/index/btree_index_test.cpp b/silkworm/db/snapshots/index/btree_index_test.cpp index af5f1d379e..740f41e4f8 100644 --- a/silkworm/db/snapshots/index/btree_index_test.cpp +++ b/silkworm/db/snapshots/index/btree_index_test.cpp @@ -104,8 +104,7 @@ TEST_CASE("BTreeIndex", "[db]") { // Open the KV and BT index files seg::Decompressor kv_decompressor{kv_file_path}; kv_decompressor.open(); - BTreeIndex bt_index{kv_decompressor, index_file.path()}; - CHECK(bt_index.empty()); + CHECK_THROWS_AS(BTreeIndex(kv_decompressor, index_file.path()), std::runtime_error); } // Prepare sample uncompressed KV file containing 3 key-value pairs and its BT index file @@ -115,7 +114,6 @@ TEST_CASE("BTreeIndex", "[db]") { seg::Decompressor kv_decompressor{kv_file_path}; kv_decompressor.open(); BTreeIndex bt_index{kv_decompressor, bt_file_path}; - REQUIRE(!bt_index.empty()); REQUIRE(bt_index.key_count() == 3); SECTION("BTreeIndex::get") {