diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index da926a3c42..6f889c2992 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1266,29 +1266,15 @@ bool DataModel::read_block_from_snapshot(BlockNum block_num, Block& block) const } std::optional DataModel::read_header_from_snapshot(BlockNum block_num) const { - std::optional block_header; - // We know the header snapshot in advance: find it based on target block number - const auto [segment_and_index, _] = repository_.find_segment(blocks::kHeaderSegmentAndIdxNames, block_num); - if (segment_and_index) { - block_header = HeaderFindByBlockNumQuery{*segment_and_index}.exec(block_num); - } - return block_header; + return HeaderFindByBlockNumQuery{repository_}.exec(block_num); } std::optional DataModel::read_header_from_snapshot(const Hash& hash) const { - std::optional block_header; - // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order - for (const auto& bundle_ptr : repository_.view_bundles_reverse()) { - const auto& bundle = *bundle_ptr; - auto segment_and_index = bundle.segment_and_accessor_index(blocks::kHeaderSegmentAndIdxNames); - block_header = HeaderFindByHashQuery{segment_and_index}.exec(hash); - if (block_header) break; - } - return block_header; + return HeaderFindByHashQuery{repository_}.exec(hash); } std::optional DataModel::read_body_for_storage_from_snapshot(BlockNum block_num) const { - return BodyFindByBlockNumMultiQuery{repository_}.exec(block_num); + return BodyFindByBlockNumQuery{repository_}.exec(block_num); } bool DataModel::read_body_from_snapshot(BlockNum block_num, BlockBody& body) const { @@ -1310,14 +1296,7 @@ bool DataModel::read_body_from_snapshot(BlockNum block_num, BlockBody& body) con } bool DataModel::is_body_in_snapshot(BlockNum block_num) const { - // We know the body snapshot in advance: find it based on target block number - const auto [segment_and_index, _] = repository_.find_segment(blocks::kBodySegmentAndIdxNames, block_num); - if (segment_and_index) { - const auto stored_body = BodyFindByBlockNumQuery{*segment_and_index}.exec(block_num); - return stored_body.has_value(); - } - - return false; + return BodyFindByBlockNumQuery{repository_}.exec(block_num).has_value(); } bool DataModel::read_transactions_from_snapshot(BlockNum block_num, uint64_t base_txn_id, uint64_t txn_count, std::vector& txs) const { @@ -1325,31 +1304,27 @@ bool DataModel::read_transactions_from_snapshot(BlockNum block_num, uint64_t bas return true; } - const auto [segment_and_index, _] = repository_.find_segment(blocks::kTxnSegmentAndIdxNames, block_num); - if (!segment_and_index) return false; - - txs = TransactionRangeFromIdQuery{*segment_and_index}.exec_into_vector(base_txn_id, txn_count); + auto txs_opt = TransactionRangeFromIdQuery{repository_}.exec(block_num, base_txn_id, txn_count); + if (!txs_opt) return false; + txs = std::move(*txs_opt); return true; } bool DataModel::read_rlp_transactions_from_snapshot(BlockNum block_num, std::vector& rlp_txs) const { - const auto [body_segment_and_index, _] = repository_.find_segment(blocks::kBodySegmentAndIdxNames, block_num); - if (body_segment_and_index) { - auto stored_body = BodyFindByBlockNumQuery{*body_segment_and_index}.exec(block_num); - if (!stored_body) return false; + auto stored_body = BodyFindByBlockNumQuery{repository_}.exec(block_num); + if (!stored_body) return false; + { // Skip first and last *system transactions* in block body const auto base_txn_id{stored_body->base_txn_id + 1}; const auto txn_count{stored_body->txn_count >= 2 ? stored_body->txn_count - 2 : stored_body->txn_count}; - if (txn_count == 0) return true; - const auto [tx_segment_and_index, _2] = repository_.find_segment(blocks::kTxnSegmentAndIdxNames, block_num); - if (!tx_segment_and_index) return false; - - rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_segment_and_index}.exec_into_vector(base_txn_id, txn_count); + auto txs_opt = TransactionPayloadRlpRangeFromIdQuery{repository_}.exec(block_num, base_txn_id, txn_count); + if (!txs_opt) return false; + rlp_txs = std::move(*txs_opt); return true; } @@ -1382,7 +1357,7 @@ std::optional DataModel::read_tx_lookup_from_db(const evmc::bytes32& t } std::optional DataModel::read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash) const { - TransactionBlockNumByTxnHashMultiQuery query{repository_.view_bundles_reverse()}; + TransactionBlockNumByTxnHashQuery query{repository_}; return query.exec(tx_hash); } diff --git a/silkworm/db/access_layer.hpp b/silkworm/db/access_layer.hpp index 2df5e5097c..4930ad1436 100644 --- a/silkworm/db/access_layer.hpp +++ b/silkworm/db/access_layer.hpp @@ -290,7 +290,7 @@ class DataModel { public: DataModel( ROTxn& txn, - snapshots::SnapshotRepository& repository) + const snapshots::SnapshotRepositoryROAccess& repository) : txn_{txn}, repository_{repository} {} @@ -381,7 +381,7 @@ class DataModel { std::optional read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash) const; ROTxn& txn_; - snapshots::SnapshotRepository& repository_; + const snapshots::SnapshotRepositoryROAccess& repository_; }; class DataModelFactory { diff --git a/silkworm/db/blocks/bodies/body_queries.hpp b/silkworm/db/blocks/bodies/body_queries.hpp index c745fc62d4..e712e96ebe 100644 --- a/silkworm/db/blocks/bodies/body_queries.hpp +++ b/silkworm/db/blocks/bodies/body_queries.hpp @@ -17,29 +17,20 @@ #pragma once #include -#include +#include #include "../schema_config.hpp" #include "body_segment.hpp" namespace silkworm::snapshots { -using BodyFindByBlockNumQuery = FindByIdQuery; - -class BodyFindByBlockNumMultiQuery { - public: - // TODO: use a sub-interface of SnapshotRepository - explicit BodyFindByBlockNumMultiQuery(SnapshotRepository& repository) - : repository_{repository} {} +using BodyFindByBlockNumSegmentQuery = FindByIdSegmentQuery; +struct BodyFindByBlockNumQuery : public FindByTimestampMapQuery { + using FindByTimestampMapQuery::FindByTimestampMapQuery; std::optional exec(BlockNum block_num) { - const auto [segment_and_index, _] = repository_.find_segment(db::blocks::kBodySegmentAndIdxNames, block_num); - if (!segment_and_index) return std::nullopt; - return BodyFindByBlockNumQuery{*segment_and_index}.exec(block_num); + return FindByTimestampMapQuery::exec(block_num, block_num); } - - private: - SnapshotRepository& repository_; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query.cpp b/silkworm/db/blocks/bodies/body_txs_amount_query.cpp index 819e5d95fe..9f5930bc42 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query.cpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query.cpp @@ -22,10 +22,10 @@ namespace silkworm::snapshots { -BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() { +BodyTxsAmountSegmentQuery::Result BodyTxsAmountSegmentQuery::exec() { size_t body_count = segment_.item_count(); if (body_count == 0) { - throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + segment_.path().path().string()); + throw std::runtime_error("BodyTxsAmountSegmentQuery empty body snapshot: " + segment_.path().path().string()); } BodySegmentReader reader{segment_}; diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query.hpp b/silkworm/db/blocks/bodies/body_txs_amount_query.hpp index 0f57e54647..ef41769eda 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query.hpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query.hpp @@ -22,14 +22,14 @@ namespace silkworm::snapshots { -class BodyTxsAmountQuery { +class BodyTxsAmountSegmentQuery { public: struct Result { uint64_t first_tx_id{}; uint64_t count{}; }; - explicit BodyTxsAmountQuery(const segment::SegmentFileReader& segment) : segment_(segment) {} + explicit BodyTxsAmountSegmentQuery(const segment::SegmentFileReader& segment) : segment_(segment) {} Result exec(); diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp b/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp index ce7a2e5e7b..157667cf42 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp @@ -25,12 +25,12 @@ namespace silkworm::snapshots { -TEST_CASE("BodyTxsAmountQuery") { +TEST_CASE("BodyTxsAmountSegmentQuery") { TemporaryDirectory tmp_dir; test_util::SampleBodySnapshotFile snapshot_file{tmp_dir.path()}; segment::SegmentFileReader snapshot{snapshot_file.path()}; - BodyTxsAmountQuery query{snapshot}; + BodyTxsAmountSegmentQuery query{snapshot}; auto result = query.exec(); CHECK(result.first_tx_id == 7'341'262); diff --git a/silkworm/db/blocks/headers/header_queries.hpp b/silkworm/db/blocks/headers/header_queries.hpp index 0bb7e0e610..869b1cbea5 100644 --- a/silkworm/db/blocks/headers/header_queries.hpp +++ b/silkworm/db/blocks/headers/header_queries.hpp @@ -18,11 +18,21 @@ #include +#include "../schema_config.hpp" #include "header_segment.hpp" namespace silkworm::snapshots { -using HeaderFindByBlockNumQuery = FindByIdQuery; -using HeaderFindByHashQuery = FindByHashQuery; +using HeaderFindByBlockNumSegmentQuery = FindByIdSegmentQuery; + +struct HeaderFindByBlockNumQuery : public FindByTimestampMapQuery { + using FindByTimestampMapQuery::FindByTimestampMapQuery; + std::optional exec(BlockNum block_num) { + return FindByTimestampMapQuery::exec(block_num, block_num); + } +}; + +using HeaderFindByHashSegmentQuery = FindByHashSegmentQuery; +using HeaderFindByHashQuery = FindMapQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/schema_config.hpp b/silkworm/db/blocks/schema_config.hpp index 0d17dc7887..55bb413482 100644 --- a/silkworm/db/blocks/schema_config.hpp +++ b/silkworm/db/blocks/schema_config.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include "../datastore/common/entity_name.hpp" @@ -44,7 +43,7 @@ inline constexpr std::string_view kHeaderSegmentTag = kHeaderSegmentName.name; //! Index header_hash -> block_num -> headers_segment_offset inline constexpr datastore::EntityName kIdxHeaderHashName{"headers.idx"}; inline constexpr std::string_view kIdxHeaderHashTag = kHeaderSegmentTag; -inline constexpr std::array kHeaderSegmentAndIdxNames{ +inline constexpr snapshots::SegmentAndAccessorIndexNames kHeaderSegmentAndIdxNames{ snapshots::Schema::kDefaultEntityName, kHeaderSegmentName, kIdxHeaderHashName, @@ -55,7 +54,7 @@ inline constexpr std::string_view kBodySegmentTag = kBodySegmentName.name; //! Index block_num -> bodies_segment_offset inline constexpr datastore::EntityName kIdxBodyNumberName{"bodies.idx"}; inline constexpr std::string_view kIdxBodyNumberTag = kBodySegmentTag; -inline constexpr std::array kBodySegmentAndIdxNames{ +inline constexpr snapshots::SegmentAndAccessorIndexNames kBodySegmentAndIdxNames{ snapshots::Schema::kDefaultEntityName, kBodySegmentName, kIdxBodyNumberName, @@ -66,7 +65,7 @@ inline constexpr std::string_view kTxnSegmentTag = kTxnSegmentName.name; //! Index transaction_hash -> txn_id -> transactions_segment_offset inline constexpr datastore::EntityName kIdxTxnHashName{"transactions.idx"}; inline constexpr std::string_view kIdxTxnHashTag = kTxnSegmentTag; -inline constexpr std::array kTxnSegmentAndIdxNames{ +inline constexpr snapshots::SegmentAndAccessorIndexNames kTxnSegmentAndIdxNames{ snapshots::Schema::kDefaultEntityName, kTxnSegmentName, kIdxTxnHashName, diff --git a/silkworm/db/blocks/transactions/txn_index.cpp b/silkworm/db/blocks/transactions/txn_index.cpp index 617e90ddda..fee674dd1d 100644 --- a/silkworm/db/blocks/transactions/txn_index.cpp +++ b/silkworm/db/blocks/transactions/txn_index.cpp @@ -31,7 +31,7 @@ std::pair TransactionIndex::compute_txs_amount( SnapshotPath bodies_segment_path, std::optional bodies_segment_region) { segment::SegmentFileReader body_segment{std::move(bodies_segment_path), bodies_segment_region}; - auto result = BodyTxsAmountQuery{body_segment}.exec(); + auto result = BodyTxsAmountSegmentQuery{body_segment}.exec(); return {result.first_tx_id, result.count}; } diff --git a/silkworm/db/blocks/transactions/txn_queries.hpp b/silkworm/db/blocks/transactions/txn_queries.hpp index 100a0e817c..36a027bb43 100644 --- a/silkworm/db/blocks/transactions/txn_queries.hpp +++ b/silkworm/db/blocks/transactions/txn_queries.hpp @@ -21,62 +21,57 @@ #include #include #include +#include #include "../schema_config.hpp" #include "txn_segment.hpp" namespace silkworm::snapshots { -using TransactionFindByIdQuery = FindByIdQuery; -using TransactionFindByHashQuery = FindByHashQuery; -using TransactionRangeFromIdQuery = RangeFromIdQuery; -using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery>; +using TransactionFindByIdSegmentQuery = FindByIdSegmentQuery; +using TransactionFindByHashSegmentQuery = FindByHashSegmentQuery; -class TransactionBlockNumByTxnHashQuery { +using TransactionRangeFromIdSegmentQuery = RangeFromIdSegmentQuery; +using TransactionRangeFromIdQuery = FindByTimestampMapQuery; + +using TransactionPayloadRlpRangeFromIdSegmentQuery = RangeFromIdSegmentQuery, &db::blocks::kTxnSegmentAndIdxNames>; +using TransactionPayloadRlpRangeFromIdQuery = FindByTimestampMapQuery; + +class TransactionBlockNumByTxnHashSegmentQuery { public: - TransactionBlockNumByTxnHashQuery( + TransactionBlockNumByTxnHashSegmentQuery( const rec_split::AccessorIndex& index, - TransactionFindByHashQuery cross_check_query) + TransactionFindByHashSegmentQuery cross_check_query) : index_(index), cross_check_query_(cross_check_query) {} + explicit TransactionBlockNumByTxnHashSegmentQuery( + const SnapshotBundle& bundle) + : TransactionBlockNumByTxnHashSegmentQuery{ + make(db::blocks::BundleDataRef{*bundle})} {} + std::optional exec(const Hash& hash) { // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) const auto transaction = cross_check_query_.exec(hash); - auto result = transaction ? index_.lookup_ordinal_by_hash(hash) : std::nullopt; + auto result = transaction ? index_.lookup_by_key(hash) : std::nullopt; return result; } - private: - const rec_split::AccessorIndex& index_; - TransactionFindByHashQuery cross_check_query_; -}; - -template ::value_type> -class TransactionBlockNumByTxnHashMultiQuery { - public: - explicit TransactionBlockNumByTxnHashMultiQuery(TBundlesView bundles) - : bundles_(std::move(bundles)) {} - - std::optional exec(const Hash& hash) { - for (const TBundle& bundle_ptr : bundles_) { - db::blocks::BundleDataRef bundle{**bundle_ptr}; - const segment::SegmentFileReader& segment = bundle.txn_segment(); - const rec_split::AccessorIndex& idx_txn_hash = bundle.idx_txn_hash(); - const rec_split::AccessorIndex& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block(); - - TransactionFindByHashQuery cross_check_query{{segment, idx_txn_hash}}; - TransactionBlockNumByTxnHashQuery query{idx_txn_hash_2_block, cross_check_query}; - auto block_num = query.exec(hash); - if (block_num) { - return block_num; - } - } - return std::nullopt; + static TransactionBlockNumByTxnHashSegmentQuery make(db::blocks::BundleDataRef bundle) { + TransactionFindByHashSegmentQuery cross_check_query{ + SegmentAndAccessorIndex{ + bundle.txn_segment(), + bundle.idx_txn_hash(), + }, + }; + return {bundle.idx_txn_hash_2_block(), cross_check_query}; } private: - TBundlesView bundles_; + const rec_split::AccessorIndex& index_; + TransactionFindByHashSegmentQuery cross_check_query_; }; +using TransactionBlockNumByTxnHashQuery = FindMapQuery; + } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/transactions/txn_to_block_index.cpp b/silkworm/db/blocks/transactions/txn_to_block_index.cpp index dc2ed3aa9e..003aeb4493 100644 --- a/silkworm/db/blocks/transactions/txn_to_block_index.cpp +++ b/silkworm/db/blocks/transactions/txn_to_block_index.cpp @@ -20,7 +20,7 @@ namespace silkworm::snapshots { -static IndexInputDataQuery::Iterator::value_type query_entry(TxsAndBodiesQuery::Iterator& it) { +static IndexInputDataQuery::Iterator::value_type query_entry(TxsAndBodiesSegmentQuery::Iterator& it) { return { .key_data = it->tx_buffer, .value = it->block_num, @@ -28,12 +28,12 @@ static IndexInputDataQuery::Iterator::value_type query_entry(TxsAndBodiesQuery:: } IndexInputDataQuery::Iterator TransactionToBlockIndexInputDataQuery::begin() { - auto impl_it = std::make_shared(query_.begin()); + auto impl_it = std::make_shared(query_.begin()); return IndexInputDataQuery::Iterator{this, impl_it, query_entry(*impl_it)}; } IndexInputDataQuery::Iterator TransactionToBlockIndexInputDataQuery::end() { - auto impl_it = std::make_shared(query_.end()); + auto impl_it = std::make_shared(query_.end()); return IndexInputDataQuery::Iterator{this, impl_it, query_entry(*impl_it)}; } @@ -43,7 +43,7 @@ size_t TransactionToBlockIndexInputDataQuery::keys_count() { std::pair, IndexInputDataQuery::Iterator::value_type> TransactionToBlockIndexInputDataQuery::next_iterator(std::shared_ptr it_impl) { - auto& it_impl_ref = *reinterpret_cast(it_impl.get()); + auto& it_impl_ref = *reinterpret_cast(it_impl.get()); ++it_impl_ref; return {it_impl, query_entry(it_impl_ref)}; } @@ -51,8 +51,8 @@ TransactionToBlockIndexInputDataQuery::next_iterator(std::shared_ptr it_im bool TransactionToBlockIndexInputDataQuery::equal_iterators( std::shared_ptr lhs_it_impl, std::shared_ptr rhs_it_impl) const { - auto lhs = reinterpret_cast(lhs_it_impl.get()); - auto rhs = reinterpret_cast(rhs_it_impl.get()); + auto lhs = reinterpret_cast(lhs_it_impl.get()); + auto rhs = reinterpret_cast(rhs_it_impl.get()); return (*lhs == *rhs); } @@ -68,7 +68,7 @@ IndexBuilder TransactionToBlockIndex::make( auto descriptor = make_descriptor(segment_path, first_block_num, first_tx_id); - TxsAndBodiesQuery data_query{ + TxsAndBodiesSegmentQuery data_query{ std::move(segment_path), segment_region, std::move(bodies_segment_path), diff --git a/silkworm/db/blocks/transactions/txn_to_block_index.hpp b/silkworm/db/blocks/transactions/txn_to_block_index.hpp index 7b47430362..a94fbcbfdb 100644 --- a/silkworm/db/blocks/transactions/txn_to_block_index.hpp +++ b/silkworm/db/blocks/transactions/txn_to_block_index.hpp @@ -33,7 +33,7 @@ namespace silkworm::snapshots { class TransactionToBlockIndexInputDataQuery : public IndexInputDataQuery { public: - explicit TransactionToBlockIndexInputDataQuery(TxsAndBodiesQuery query) + explicit TransactionToBlockIndexInputDataQuery(TxsAndBodiesSegmentQuery query) : query_(std::move(query)) {} Iterator begin() override; @@ -43,7 +43,7 @@ class TransactionToBlockIndexInputDataQuery : public IndexInputDataQuery { bool equal_iterators(std::shared_ptr lhs_it_impl, std::shared_ptr rhs_it_impl) const override; private: - TxsAndBodiesQuery query_; + TxsAndBodiesSegmentQuery query_; }; class TransactionToBlockIndex { diff --git a/silkworm/db/blocks/transactions/txs_and_bodies_query.cpp b/silkworm/db/blocks/transactions/txs_and_bodies_query.cpp index bfa887c613..99613c4698 100644 --- a/silkworm/db/blocks/transactions/txs_and_bodies_query.cpp +++ b/silkworm/db/blocks/transactions/txs_and_bodies_query.cpp @@ -24,7 +24,7 @@ namespace silkworm::snapshots { -TxsAndBodiesQuery::Iterator::Iterator( +TxsAndBodiesSegmentQuery::Iterator::Iterator( std::shared_ptr txs_decoder, seg::Decompressor::Iterator tx_it, std::shared_ptr bodies_decoder, @@ -48,7 +48,7 @@ TxsAndBodiesQuery::Iterator::Iterator( value_.tx_buffer = *tx_it_; } -void TxsAndBodiesQuery::Iterator::skip_bodies_until_tx_id(uint64_t tx_id) { +void TxsAndBodiesSegmentQuery::Iterator::skip_bodies_until_tx_id(uint64_t tx_id) { while (!(tx_id < value_.body.base_txn_id + value_.body.txn_count)) { ++body_it_; if (body_it_ == bodies_decoder_->end()) { @@ -60,7 +60,7 @@ void TxsAndBodiesQuery::Iterator::skip_bodies_until_tx_id(uint64_t tx_id) { } } -TxsAndBodiesQuery::Iterator& TxsAndBodiesQuery::Iterator::operator++() { +TxsAndBodiesSegmentQuery::Iterator& TxsAndBodiesSegmentQuery::Iterator::operator++() { // check if already at the end if (!txs_decoder_) { return *this; @@ -92,14 +92,14 @@ TxsAndBodiesQuery::Iterator& TxsAndBodiesQuery::Iterator::operator++() { return *this; } -bool operator==(const TxsAndBodiesQuery::Iterator& lhs, const TxsAndBodiesQuery::Iterator& rhs) { +bool operator==(const TxsAndBodiesSegmentQuery::Iterator& lhs, const TxsAndBodiesSegmentQuery::Iterator& rhs) { return (lhs.txs_decoder_ == rhs.txs_decoder_) && (!lhs.txs_decoder_ || (lhs.tx_it_ == rhs.tx_it_)) && (lhs.bodies_decoder_ == rhs.bodies_decoder_) && (!lhs.bodies_decoder_ || (lhs.body_it_ == rhs.body_it_)); } -void TxsAndBodiesQuery::Iterator::decode_body_rlp(ByteView body_rlp, BlockBodyForStorage& body) { +void TxsAndBodiesSegmentQuery::Iterator::decode_body_rlp(ByteView body_rlp, BlockBodyForStorage& body) { auto decode_result = decode_stored_block_body(body_rlp, body); if (!decode_result) { std::stringstream error; @@ -112,8 +112,8 @@ void TxsAndBodiesQuery::Iterator::decode_body_rlp(ByteView body_rlp, BlockBodyFo } } -TxsAndBodiesQuery::Iterator TxsAndBodiesQuery::begin() const { - std::string log_title = "TxsAndBodiesQuery for: " + txs_segment_path_.path().string(); +TxsAndBodiesSegmentQuery::Iterator TxsAndBodiesSegmentQuery::begin() const { + std::string log_title = "TxsAndBodiesSegmentQuery for: " + txs_segment_path_.path().string(); auto txs_decoder = std::make_shared(txs_segment_path_.path(), txs_segment_region_); @@ -128,7 +128,7 @@ TxsAndBodiesQuery::Iterator TxsAndBodiesQuery::begin() const { auto bodies_decoder = std::make_shared(bodies_segment_path_.path(), bodies_segment_region_); - TxsAndBodiesQuery::Iterator it{ + TxsAndBodiesSegmentQuery::Iterator it{ txs_decoder, txs_decoder->begin(), bodies_decoder, @@ -146,7 +146,7 @@ TxsAndBodiesQuery::Iterator TxsAndBodiesQuery::begin() const { return it; } -TxsAndBodiesQuery::Iterator TxsAndBodiesQuery::end() const { +TxsAndBodiesSegmentQuery::Iterator TxsAndBodiesSegmentQuery::end() const { return Iterator{ {}, seg::Decompressor::Iterator::make_end(), @@ -155,7 +155,7 @@ TxsAndBodiesQuery::Iterator TxsAndBodiesQuery::end() const { std::numeric_limits::max(), first_tx_id_, expected_tx_count_, - "TxsAndBodiesQuery::end", + "TxsAndBodiesSegmentQuery::end", }; } diff --git a/silkworm/db/blocks/transactions/txs_and_bodies_query.hpp b/silkworm/db/blocks/transactions/txs_and_bodies_query.hpp index 8fca05c489..2f0e2ac709 100644 --- a/silkworm/db/blocks/transactions/txs_and_bodies_query.hpp +++ b/silkworm/db/blocks/transactions/txs_and_bodies_query.hpp @@ -32,7 +32,7 @@ namespace silkworm::snapshots { -class TxsAndBodiesQuery { +class TxsAndBodiesSegmentQuery { public: class Iterator { public: @@ -85,7 +85,7 @@ class TxsAndBodiesQuery { static_assert(std::input_or_output_iterator); - TxsAndBodiesQuery( + TxsAndBodiesSegmentQuery( SnapshotPath txs_segment_path, std::optional txs_segment_region, SnapshotPath bodies_segment_path, diff --git a/silkworm/db/cli/snapshots.cpp b/silkworm/db/cli/snapshots.cpp index df8f9189b5..42bfc87287 100644 --- a/silkworm/db/cli/snapshots.cpp +++ b/silkworm/db/cli/snapshots.cpp @@ -431,16 +431,16 @@ void open_index(const SnapshotSubcommandSettings& settings) { if (idx.double_enum_index()) { if (settings.lookup_block_num) { const uint64_t data_id{*settings.lookup_block_num}; - const uint64_t enumeration{data_id - idx.base_data_id()}; - if (enumeration < idx.key_count()) { - SILK_INFO << "Offset by ordinal lookup for " << data_id << ": " << idx.lookup_by_ordinal(enumeration); + auto offset = idx.lookup_by_data_id(data_id); + if (offset) { + SILK_INFO << "Offset by data id lookup for " << data_id << ": " << *offset; } else { - SILK_WARN << "Invalid absolute data number " << data_id << " for ordinal lookup"; + SILK_WARN << "Invalid data id " << data_id; } } else { for (size_t i{0}; i < idx.key_count(); ++i) { if (i % (idx.key_count() / 10) == 0) { - SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.lookup_by_ordinal(i) + SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.lookup_by_ordinal({i}) << " [existence filter: " << int{idx.existence_filter()[i]} << "]"; } } @@ -686,7 +686,7 @@ void lookup_header_by_hash(const SnapshotSubcommandSettings& settings) { for (const auto& bundle_ptr : repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; auto segment_and_index = bundle.segment_and_accessor_index(db::blocks::kHeaderSegmentAndIdxNames); - const auto header = HeaderFindByHashQuery{segment_and_index}.exec(*hash); + const auto header = HeaderFindByHashSegmentQuery{segment_and_index}.exec(*hash); if (header) { matching_header = header; matching_snapshot_path = segment_and_index.segment.path(); @@ -714,7 +714,7 @@ void lookup_header_by_number(const SnapshotSubcommandSettings& settings) { auto repository = make_repository(settings.settings); const auto [segment_and_index, _] = repository.find_segment(db::blocks::kHeaderSegmentAndIdxNames, block_num); if (segment_and_index) { - const auto header = HeaderFindByBlockNumQuery{*segment_and_index}.exec(block_num); + const auto header = HeaderFindByBlockNumSegmentQuery{*segment_and_index}.exec(block_num); ensure(header.has_value(), [&]() { return "lookup_header_by_number: " + std::to_string(block_num) + " NOT found in " + segment_and_index->segment.path().filename(); }); SILK_INFO << "Lookup header number: " << block_num << " found in: " << segment_and_index->segment.path().filename(); @@ -754,7 +754,7 @@ void lookup_body_in_one(const SnapshotSubcommandSettings& settings, BlockNum blo rec_split::AccessorIndex idx_body_number{snapshot_path->related_path_ext(db::blocks::kIdxExtension)}; - const auto body = BodyFindByBlockNumQuery{{body_segment, idx_body_number}}.exec(block_num); + const auto body = BodyFindByBlockNumSegmentQuery{{body_segment, idx_body_number}}.exec(block_num); if (body) { SILK_INFO << "Lookup body number: " << block_num << " found in: " << body_segment.path().filename(); if (settings.verbose) { @@ -773,7 +773,7 @@ void lookup_body_in_all(const SnapshotSubcommandSettings& settings, BlockNum blo std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto [segment_and_index, _] = repository.find_segment(db::blocks::kBodySegmentAndIdxNames, block_num); if (segment_and_index) { - const auto body = BodyFindByBlockNumQuery{*segment_and_index}.exec(block_num); + const auto body = BodyFindByBlockNumSegmentQuery{*segment_and_index}.exec(block_num); ensure(body.has_value(), [&]() { return "lookup_body: " + std::to_string(block_num) + " NOT found in " + segment_and_index->segment.path().filename(); }); SILK_INFO << "Lookup body number: " << block_num << " found in: " << segment_and_index->segment.path().filename(); @@ -873,7 +873,7 @@ void lookup_txn_by_hash_in_one(const SnapshotSubcommandSettings& settings, const { rec_split::AccessorIndex idx_txn_hash{snapshot_path->related_path_ext(db::blocks::kIdxExtension)}; - const auto transaction = TransactionFindByHashQuery{{txn_segment, idx_txn_hash}}.exec(hash); + const auto transaction = TransactionFindByHashSegmentQuery{{txn_segment, idx_txn_hash}}.exec(hash); if (transaction) { SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << txn_segment.path().filename(); if (settings.verbose) { @@ -895,7 +895,7 @@ void lookup_txn_by_hash_in_all(const SnapshotSubcommandSettings& settings, const for (const auto& bundle_ptr : repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; auto segment_and_index = bundle.segment_and_accessor_index(db::blocks::kTxnSegmentAndIdxNames); - const auto transaction = TransactionFindByHashQuery{segment_and_index}.exec(hash); + const auto transaction = TransactionFindByHashSegmentQuery{segment_and_index}.exec(hash); if (transaction) { matching_snapshot_path = segment_and_index.segment.path(); if (settings.verbose) { @@ -935,7 +935,7 @@ void lookup_txn_by_id_in_one(const SnapshotSubcommandSettings& settings, uint64_ { rec_split::AccessorIndex idx_txn_hash{snapshot_path->related_path_ext(db::blocks::kIdxExtension)}; - const auto transaction = TransactionFindByIdQuery{{txn_segment, idx_txn_hash}}.exec(txn_id); + const auto transaction = TransactionFindByIdSegmentQuery{{txn_segment, idx_txn_hash}}.exec(txn_id); if (transaction) { SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << txn_segment.path().filename(); if (settings.verbose) { @@ -957,7 +957,7 @@ void lookup_txn_by_id_in_all(const SnapshotSubcommandSettings& settings, uint64_ for (const auto& bundle_ptr : repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; auto segment_and_index = bundle.segment_and_accessor_index(db::blocks::kTxnSegmentAndIdxNames); - const auto transaction = TransactionFindByIdQuery{segment_and_index}.exec(txn_id); + const auto transaction = TransactionFindByIdSegmentQuery{segment_and_index}.exec(txn_id); if (transaction) { matching_snapshot_path = segment_and_index.segment.path(); if (settings.verbose) { diff --git a/silkworm/db/datastore/snapshots/basic_queries.hpp b/silkworm/db/datastore/snapshots/basic_queries.hpp index 7d0aa76d4e..880fddc849 100644 --- a/silkworm/db/datastore/snapshots/basic_queries.hpp +++ b/silkworm/db/datastore/snapshots/basic_queries.hpp @@ -18,30 +18,39 @@ #include #include +#include #include #include "segment/segment_reader.hpp" #include "segment_and_accessor_index.hpp" +#include "snapshot_repository_ro_access.hpp" namespace silkworm::snapshots { -template -class BasicQuery { +template < + segment::SegmentReaderConcept TSegmentReader, + const SegmentAndAccessorIndexNames* segment_names> +class BasicSegmentQuery { public: - explicit BasicQuery( + explicit BasicSegmentQuery( const SegmentAndAccessorIndex segment_and_index) : reader_{segment_and_index.segment}, index_{segment_and_index.index} {} + explicit BasicSegmentQuery(const SegmentAndAccessorIndexProvider& bundle) + : BasicSegmentQuery{bundle.segment_and_accessor_index(*segment_names)} {} + protected: TSegmentReader reader_; const rec_split::AccessorIndex& index_; }; -template -struct FindByIdQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template < + segment::SegmentReaderConcept TSegmentReader, + const SegmentAndAccessorIndexNames* segment_names> +struct FindByIdSegmentQuery : public BasicSegmentQuery { + using BasicSegmentQuery::BasicSegmentQuery; std::optional exec(uint64_t id) { auto offset = this->index_.lookup_by_data_id(id); @@ -53,12 +62,14 @@ struct FindByIdQuery : public BasicQuery { } }; -template -struct FindByHashQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template < + segment::SegmentReaderConcept TSegmentReader, + const SegmentAndAccessorIndexNames* segment_names> +struct FindByHashSegmentQuery : public BasicSegmentQuery { + using BasicSegmentQuery::BasicSegmentQuery; std::optional exec(const Hash& hash) { - auto offset = this->index_.lookup_by_hash(hash); + auto offset = this->index_.lookup_by_key(hash); if (!offset) { return std::nullopt; } @@ -74,18 +85,63 @@ struct FindByHashQuery : public BasicQuery { } }; -template -struct RangeFromIdQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template < + segment::SegmentReaderConcept TSegmentReader, + const SegmentAndAccessorIndexNames* segment_names> +struct RangeFromIdSegmentQuery : public BasicSegmentQuery { + using BasicSegmentQuery::BasicSegmentQuery; - std::vector exec_into_vector(uint64_t first_id, uint64_t count) { + std::optional> exec(uint64_t first_id, uint64_t count) { auto offset = this->index_.lookup_by_data_id(first_id); if (!offset) { - return {}; + return std::nullopt; } return this->reader_.read_into_vector(*offset, count); } }; +//! Given a TSegmentQuery that returns an optional value, runs it for all bundles and returns the last non-null result. +//! Iterating backwards by default is an optimization assuming that results are often found in the most recent snapshots. +template +struct FindMapQuery { + explicit FindMapQuery(const SnapshotRepositoryROAccess& repository) + : repository_{repository} {} + + auto exec(auto&&... args) { + for (const auto& bundle_ptr : repository_.view_bundles_reverse()) { + TSegmentQuery query{*bundle_ptr}; + auto result = query.exec(args...); + if (result) { + return result; + } + } + // std::nullopt + return decltype(std::declval().exec(args...)){}; + } + + protected: + const SnapshotRepositoryROAccess& repository_; +}; + +//! Given a timestamp and a TSegmentQuery, runs it for a bundle located by that timestamp. +template +struct FindByTimestampMapQuery { + explicit FindByTimestampMapQuery(const SnapshotRepositoryROAccess& repository) + : repository_{repository} {} + + auto exec(SnapshotRepositoryROAccess::Timestamp t, auto&&... args) { + auto bundle_ptr = repository_.find_bundle(t); + if (bundle_ptr) { + TSegmentQuery query{*bundle_ptr}; + return query.exec(args...); + } + // std::nullopt + return decltype(std::declval().exec(args...)){}; + } + + protected: + const SnapshotRepositoryROAccess& repository_; +}; + } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp b/silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp index 9eec6b57c6..c1ab875a8f 100644 --- a/silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp +++ b/silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp @@ -21,7 +21,7 @@ #include #include -namespace silkworm { +namespace silkworm::map_values_view::fallback { template class MapValuesView : std::ranges::view_interface> { @@ -75,16 +75,44 @@ class MapValuesView : std::ranges::view_interface -auto make_map_values_view(const std::map& map) { - // std::views::values is not present on clang 15 +class MapValuesView : public std::ranges::view_interface> { + public: + using Map = std::map; + + explicit MapValuesView(const Map& map) + : base_view_{std::views::values(map)} {} + + auto begin() const { return base_view_.begin(); } + auto end() const { return base_view_.end(); } + + private: + decltype(std::views::values([]() -> const Map& { throw 1; }())) base_view_; +}; + +} // namespace silkworm::map_values_view::builtin + +namespace silkworm { + +// std::views::values is not present on clang 15 #if defined(__clang__) && (__clang_major__ <= 15) && !defined(__apple_build_version__) - return MapValuesView{map}; +using silkworm::map_values_view::fallback::MapValuesView; #elif defined(__clang__) && (__clang_major__ <= 14) && defined(__apple_build_version__) // clang 15 == Apple clang 14 - return MapValuesView{map}; +using silkworm::map_values_view::fallback::MapValuesView; #else - return std::views::values(map); +using silkworm::map_values_view::builtin::MapValuesView; #endif + +template +MapValuesView make_map_values_view(const std::map& map) { + return MapValuesView{map}; } +template +using MapValuesViewReverse = decltype(std::ranges::reverse_view([]() -> MapValuesView&& { throw 1; }())); + } // namespace silkworm diff --git a/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp b/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp index a83cf9fd54..89927d569a 100644 --- a/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp +++ b/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp @@ -148,7 +148,10 @@ class EliasFanoList32 { std::copy(data.begin(), data.end(), reinterpret_cast(data_.data())); } - size_t sequence_length() const { return count_ + 1; } + size_t sequence_length() const { + if (u_ == 0) return 0; + return count_ + 1; + } size_t count() const { return count_; } @@ -254,7 +257,13 @@ class EliasFanoList32 { (data_ == other.data_); } + static EliasFanoList32 empty_list() { + return EliasFanoList32{}; + } + private: + EliasFanoList32() {} + uint64_t derive_fields() { l_ = u_ / (count_ + 1) == 0 ? 0 : 63 ^ static_cast(std::countl_zero(u_ / (count_ + 1))); lower_bits_mask_ = (uint64_t{1} << l_) - 1; diff --git a/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder.hpp b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder.hpp index 375f4b190e..41904b1be5 100644 --- a/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder.hpp +++ b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder.hpp @@ -24,7 +24,7 @@ namespace silkworm::snapshots::elias_fano { struct EliasFanoDecoder : public snapshots::Decoder { - std::optional value; + EliasFanoList32 value{EliasFanoList32::empty_list()}; ~EliasFanoDecoder() override = default; diff --git a/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder_test.cpp b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder_test.cpp index 375868dfd1..94cd60960a 100644 --- a/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder_test.cpp +++ b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder_test.cpp @@ -36,7 +36,6 @@ TEST_CASE("EliasFanoDecoder") { EliasFanoDecoder decoder; decoder.decode_word(string_view_to_byte_view(expected_list_str)); - REQUIRE(decoder.value.has_value()); CHECK(decoder.value == expected_list); } diff --git a/silkworm/db/datastore/snapshots/index_builders_factory.hpp b/silkworm/db/datastore/snapshots/index_builders_factory.hpp index fab263cdd4..44139fe6ec 100644 --- a/silkworm/db/datastore/snapshots/index_builders_factory.hpp +++ b/silkworm/db/datastore/snapshots/index_builders_factory.hpp @@ -16,14 +16,11 @@ #pragma once -#include -#include #include #include #include "common/snapshot_path.hpp" #include "index_builder.hpp" -#include "snapshot_bundle.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/datastore/snapshots/rec_split/accessor_index.hpp b/silkworm/db/datastore/snapshots/rec_split/accessor_index.hpp index 65291c99b4..f89f9e40e3 100644 --- a/silkworm/db/datastore/snapshots/rec_split/accessor_index.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/accessor_index.hpp @@ -16,54 +16,33 @@ #pragma once -#include -#include #include -#include -#include - #include "../common/snapshot_path.hpp" #include "rec_split.hpp" namespace silkworm::snapshots::rec_split { -class AccessorIndex { +class AccessorIndex : private RecSplitIndex { public: explicit AccessorIndex( SnapshotPath path, std::optional region = std::nullopt) - : path_{std::move(path)}, - index_{path_.path(), region} { + : RecSplitIndex{path.path(), region}, + path_{std::move(path)} { } - std::optional lookup_by_data_id(uint64_t id) const { - return index_.lookup_by_data_id(id); - } + using RecSplitIndex::lookup_by_data_id; + using RecSplitIndex::lookup_by_key; - std::optional lookup_by_hash(const Hash& hash) const { - return index_.lookup_by_key(hash); - } - - std::optional lookup_ordinal_by_hash(const Hash& hash) const { - auto [result, found] = index_.lookup(hash); - return found ? std::optional{result} : std::nullopt; - } + using RecSplitIndex::base_data_id; + using RecSplitIndex::memory_file_region; const SnapshotPath& path() const { return path_; } const std::filesystem::path& fs_path() const { return path_.path(); } - MemoryMappedRegion memory_file_region() const { - return index_.memory_file_region(); - } - - uint64_t base_data_id() const { - return index_.base_data_id(); - } - private: SnapshotPath path_; - rec_split::RecSplitIndex index_; }; } // namespace silkworm::snapshots::rec_split diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp b/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp index d9a831357f..13385a15b1 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp @@ -641,14 +641,31 @@ class RecSplit { //! Return the value associated with the given key within the MPHF mapping size_t operator()(const std::string& key) const { return operator()(string_view_to_byte_view(key)); } - //! Search result: value position and flag indicating if found or not - using LookupResult = std::pair; + /** + * If RecSplitFeatures::kEnums (double_enum_index_) is enabled + * Ordinal is an index of an item from the [0, key_count()) interval. + * It is output of MPHF mapping, and input to the EF mapping: + * - MPHF(key) = ordinal; + * - EF(ordinal) = value (offset); + * It can be converted to "data id" using base_data_id(): + * data_id = base_data_id + ordinal + * + * If RecSplitFeatures::kEnums (double_enum_index_) is disabled + * Ordinal is just the value (offset) output of MPHF mapping: + * - MPHF(key) = value (offset) = ordinal; + * In this case base_data_id() is not applicable. + */ + struct Ordinal { + uint64_t value{0}; + }; //! Return the value associated with the given key within the index - LookupResult lookup(const std::string& key) const { return lookup(string_view_to_byte_view(key)); } + std::optional lookup_ordinal_by_key(const std::string& key) const { + return lookup_ordinal_by_key(string_view_to_byte_view(key)); + } //! Return the value associated with the given key within the index - LookupResult lookup(ByteView key) const { + std::optional lookup_ordinal_by_key(ByteView key) const { const Hash128& hashed_key{murmur_hash_3(key)}; const auto record = operator()(hashed_key); const auto position = 1 + 8 + bytes_per_record_ * (record + 1); @@ -657,15 +674,27 @@ class RecSplit { ensure(position + sizeof(uint64_t) < region.size(), [&]() { return "position: " + std::to_string(position) + " plus 8 exceeds file length"; }); const auto value = endian::load_big_u64(region.data() + position) & record_mask_; - if (less_false_positives_ && value < existence_filter_.size()) { - return {value, existence_filter_.at(value) == static_cast(hashed_key.first)}; + + if (less_false_positives_ && (value < existence_filter_.size()) && + (existence_filter_.at(value) != static_cast(hashed_key.first))) { + return std::nullopt; } - return {value, true}; + + return Ordinal{value}; } //! Return the offset of the i-th element in the index. Perfect hash table lookup is not performed, //! only access to the Elias-Fano structure containing all offsets - size_t lookup_by_ordinal(uint64_t i) const { return ef_offsets_->get(i); } + size_t lookup_by_ordinal(Ordinal ord) const { + SILKWORM_ASSERT(double_enum_index_); + return ef_offsets_->get(ord.value); + } + + std::optional lookup_data_id_by_key(ByteView key) const { + SILKWORM_ASSERT(double_enum_index_); + auto ord = lookup_ordinal_by_key(key); + return ord ? std::optional{ord->value + base_data_id()} : std::nullopt; + } std::optional lookup_by_data_id(uint64_t data_id) const { // check if data_id is not out of range @@ -675,12 +704,13 @@ class RecSplit { return std::nullopt; } - return lookup_by_ordinal(data_id - base_data_id()); + return lookup_by_ordinal(Ordinal{data_id - base_data_id()}); } std::optional lookup_by_key(ByteView key) const { - auto [i, found] = lookup(key); - return found ? std::optional{lookup_by_ordinal(i)} : std::nullopt; + auto ord = lookup_ordinal_by_key(key); + if (!ord) return std::nullopt; + return double_enum_index_ ? lookup_by_ordinal(*ord) : std::optional{ord->value}; } //! Return the number of keys used to build the RecSplit instance diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split_par_test.cpp b/silkworm/db/datastore/snapshots/rec_split/rec_split_par_test.cpp index b423dbd84c..0cd2416fc2 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split_par_test.cpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split_par_test.cpp @@ -254,9 +254,9 @@ TEST_CASE("RecSplit8-Par: double index lookup", "[silkworm][node][recsplit]") { RecSplit8 rs2{settings.index_path}; for (size_t i{0}; i < settings.keys_count; ++i) { - const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); + const auto enumeration_index = rs2.lookup_ordinal_by_key("key " + std::to_string(i)); + REQUIRE(enumeration_index); CHECK(enumeration_index == i); - CHECK(found); CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split_seq_test.cpp b/silkworm/db/datastore/snapshots/rec_split/rec_split_seq_test.cpp index aade164db9..703772c80f 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split_seq_test.cpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split_seq_test.cpp @@ -249,9 +249,9 @@ TEST_CASE("RecSplit8: double index lookup", "[silkworm][snapshots][recsplit]") { RecSplit8 rs2{settings.index_path}; for (size_t i{0}; i < settings.keys_count; ++i) { - const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); + const auto enumeration_index = rs2.lookup_ordinal_by_key("key " + std::to_string(i)); + REQUIRE(enumeration_index); CHECK(enumeration_index == i); - CHECK(found); CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/datastore/snapshots/segment/kv_segment_reader.hpp b/silkworm/db/datastore/snapshots/segment/kv_segment_reader.hpp index a0aeba9715..e567020f9c 100644 --- a/silkworm/db/datastore/snapshots/segment/kv_segment_reader.hpp +++ b/silkworm/db/datastore/snapshots/segment/kv_segment_reader.hpp @@ -183,7 +183,6 @@ class KVSegmentReader { std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { auto it = seek(offset, hash_prefix); - auto& [key, value] = *it; return (it != end()) ? std::optional{it.move_value()} : std::nullopt; } @@ -255,8 +254,78 @@ class KVSegmentKeysReader { std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { auto it = seek(offset, hash_prefix); - auto& [key, value] = *it; - return (it != end()) ? std::optional{it.move_value()} : std::nullopt; + return (it != end()) ? std::optional{std::move(*it)} : std::nullopt; + } + + const SnapshotPath& path() const { return reader_.path(); } + + private: + const KVSegmentFileReader& reader_; +}; + +template +class KVSegmentValuesReader { + public: + class Iterator { + public: + using value_type = decltype(TValueDecoder::value); + using iterator_category [[maybe_unused]] = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit Iterator(KVSegmentFileReader::Iterator it) + : it_(std::move(it)) {} + + reference operator*() const { return value(); } + pointer operator->() const { return &value(); } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++() { + ++it_; + return *this; + } + + Iterator& operator+=(size_t count) { + it_ += count; + return *this; + } + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; + + private: + value_type& value() const { + Decoder& base_value_decoder = *(it_->second); + // dynamic_cast is safe because TValueDecoder was used when creating the Iterator + auto& value_decoder = dynamic_cast(base_value_decoder); + return value_decoder.value; + } + + KVSegmentFileReader::Iterator it_; + }; + + static_assert(std::input_iterator); + + using ValueDecoderType = TValueDecoder; + + explicit KVSegmentValuesReader(const KVSegmentFileReader& reader) : reader_(reader) {} + + Iterator begin() const { + return Iterator{reader_.begin({}, std::make_shared())}; + } + + Iterator end() const { + return Iterator{reader_.end()}; + } + + Iterator seek(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + return Iterator{reader_.seek(offset, hash_prefix, {}, std::make_shared())}; + } + + std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + auto it = seek(offset, hash_prefix); + return (it != end()) ? std::optional{std::move(*it)} : std::nullopt; } const SnapshotPath& path() const { return reader_.path(); } @@ -275,4 +344,9 @@ concept KVSegmentKeysReaderConcept = std::same_as> || std::derived_from>; +template +concept KVSegmentValuesReaderConcept = + std::same_as> || + std::derived_from>; + } // namespace silkworm::snapshots::segment diff --git a/silkworm/db/datastore/snapshots/segment_and_accessor_index.hpp b/silkworm/db/datastore/snapshots/segment_and_accessor_index.hpp index 77db4e8298..5c3bb657d4 100644 --- a/silkworm/db/datastore/snapshots/segment_and_accessor_index.hpp +++ b/silkworm/db/datastore/snapshots/segment_and_accessor_index.hpp @@ -16,6 +16,9 @@ #pragma once +#include + +#include "../common/entity_name.hpp" #include "rec_split/accessor_index.hpp" #include "segment/segment_reader.hpp" @@ -26,4 +29,12 @@ struct SegmentAndAccessorIndex { const rec_split::AccessorIndex& index; }; +using SegmentAndAccessorIndexNames = std::array; + +struct SegmentAndAccessorIndexProvider { + virtual ~SegmentAndAccessorIndexProvider() = default; + virtual SegmentAndAccessorIndex segment_and_accessor_index( + const SegmentAndAccessorIndexNames& names) const = 0; +}; + } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp index a2124c8b3d..ceb2cfa8c9 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include @@ -75,7 +74,7 @@ struct SnapshotBundlePaths { StepRange step_range_; }; -struct SnapshotBundle { +struct SnapshotBundle : public SegmentAndAccessorIndexProvider { using StepRange = datastore::StepRange; SnapshotBundle(StepRange step_range, SnapshotBundleData data) @@ -90,7 +89,7 @@ struct SnapshotBundle { range, open_bundle_data(schema, dir_path, range), } {} - virtual ~SnapshotBundle(); + ~SnapshotBundle() override; SnapshotBundle(SnapshotBundle&&) = default; SnapshotBundle& operator=(SnapshotBundle&&) noexcept = default; @@ -105,7 +104,7 @@ struct SnapshotBundle { datastore::EntityName entity_name, datastore::EntityName index_name) const; SegmentAndAccessorIndex segment_and_accessor_index( - std::array names) const { + const SegmentAndAccessorIndexNames& names) const override { return { segment(names[0], names[1]), accessor_index(names[0], names[2]), diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.cpp b/silkworm/db/datastore/snapshots/snapshot_repository.cpp index 469689a1aa..10e38ae622 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.cpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.cpp @@ -23,6 +23,8 @@ #include #include +#include "index_builders_factory.hpp" + namespace silkworm::snapshots { namespace fs = std::filesystem; @@ -91,9 +93,9 @@ Step SnapshotRepository::max_end_step() const { } std::pair, std::shared_ptr> SnapshotRepository::find_segment( - std::array names, + const SegmentAndAccessorIndexNames& names, Timestamp t) const { - auto bundle = find_bundle(step_converter_->step_from_timestamp(t)); + auto bundle = find_bundle(t); if (bundle) { return {bundle->segment_and_accessor_index(names), bundle}; } @@ -154,6 +156,10 @@ void SnapshotRepository::reopen_folder() { << " max block available: " << max_block_available(); } +std::shared_ptr SnapshotRepository::find_bundle(Timestamp t) const { + return find_bundle(step_converter_->step_from_timestamp(t)); +} + std::shared_ptr SnapshotRepository::find_bundle(Step step) const { // Search for target segment in reverse order (from the newest segment to the oldest one) for (const auto& bundle_ptr : this->view_bundles_reverse()) { diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.hpp b/silkworm/db/datastore/snapshots/snapshot_repository.hpp index 7ccc070508..c3fe889f77 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.hpp @@ -29,22 +29,21 @@ #include "../common/entity_name.hpp" #include "common/snapshot_path.hpp" -#include "common/util/iterator/map_values_view.hpp" -#include "index_builder.hpp" -#include "index_builders_factory.hpp" #include "segment_and_accessor_index.hpp" #include "snapshot_bundle.hpp" +#include "snapshot_repository_ro_access.hpp" namespace silkworm::snapshots { struct IndexBuilder; +struct IndexBuildersFactory; //! Read-only repository for all snapshot files. //! @details Some simplifications are currently in place: //! - all snapshots of given blocks range must exist (to make such range available) //! - gaps in blocks range are not allowed //! - segments have [from:to) semantic -class SnapshotRepository { +class SnapshotRepository : public SnapshotRepositoryROAccess { public: using Timestamp = datastore::Timestamp; using Step = datastore::Step; @@ -60,6 +59,8 @@ class SnapshotRepository { SnapshotRepository(SnapshotRepository&&) = default; SnapshotRepository& operator=(SnapshotRepository&&) noexcept = default; + ~SnapshotRepository() override = default; + const std::filesystem::path& path() const { return dir_path_; } const Schema::RepositoryDef& schema() const { return schema_; }; @@ -70,51 +71,31 @@ class SnapshotRepository { //! Replace bundles whose ranges are contained within the given bundle void replace_snapshot_bundles(SnapshotBundle bundle); - size_t bundles_count() const; + size_t bundles_count() const override; - //! All types of .seg and .idx files are available up to this block number - BlockNum max_block_available() const; - Timestamp max_timestamp_available() const; + BlockNum max_block_available() const override; + Timestamp max_timestamp_available() const override; std::vector> missing_indexes() const; void remove_stale_indexes() const; void build_indexes(const SnapshotBundlePaths& bundle) const; - using Bundles = std::map>; - - template - class BundlesView : public std::ranges::view_interface> { - public: - BundlesView( - TBaseView base_view, - std::shared_ptr bundles) - : base_view_(std::move(base_view)), - bundles_(std::move(bundles)) {} - - auto begin() const { return base_view_.begin(); } - auto end() const { return base_view_.end(); } - - private: - TBaseView base_view_; - std::shared_ptr bundles_{}; - }; - - auto view_bundles() const { + BundlesView> view_bundles() const override { std::scoped_lock lock(*bundles_mutex_); return BundlesView{make_map_values_view(*bundles_), bundles_}; } - - auto view_bundles_reverse() const { + BundlesView> view_bundles_reverse() const override { std::scoped_lock lock(*bundles_mutex_); return BundlesView{std::ranges::reverse_view(make_map_values_view(*bundles_)), bundles_}; } std::pair, std::shared_ptr> find_segment( - std::array names, - Timestamp t) const; - std::shared_ptr find_bundle(Step step) const; + const SegmentAndAccessorIndexNames& names, + Timestamp t) const override; + std::shared_ptr find_bundle(Timestamp t) const override; + std::shared_ptr find_bundle(Step step) const override; - std::vector> bundles_in_range(StepRange range) const; + std::vector> bundles_in_range(StepRange range) const override; private: Step max_end_step() const; diff --git a/silkworm/db/datastore/snapshots/snapshot_repository_ro_access.hpp b/silkworm/db/datastore/snapshots/snapshot_repository_ro_access.hpp new file mode 100644 index 0000000000..9ce366f3d1 --- /dev/null +++ b/silkworm/db/datastore/snapshots/snapshot_repository_ro_access.hpp @@ -0,0 +1,80 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "../common/entity_name.hpp" +#include "../common/step.hpp" +#include "common/util/iterator/map_values_view.hpp" +#include "segment_and_accessor_index.hpp" + +namespace silkworm::snapshots { + +struct SnapshotBundle; + +struct SnapshotRepositoryROAccess { + using Timestamp = datastore::Timestamp; + using Step = datastore::Step; + using StepRange = datastore::StepRange; + using Bundles = std::map>; + + template + class BundlesView : public std::ranges::view_interface> { + public: + BundlesView( + TBaseView base_view, + std::shared_ptr bundles) + : base_view_(std::move(base_view)), + bundles_(std::move(bundles)) {} + + auto begin() const { return base_view_.begin(); } + auto end() const { return base_view_.end(); } + + private: + TBaseView base_view_; + std::shared_ptr bundles_{}; + }; + + virtual ~SnapshotRepositoryROAccess() = default; + + virtual size_t bundles_count() const = 0; + + //! All types of .seg and .idx files are available up to this block number + virtual BlockNum max_block_available() const = 0; + //! All types of .seg and .idx files are available up to this timestamp + virtual Timestamp max_timestamp_available() const = 0; + + virtual BundlesView> view_bundles() const = 0; + virtual BundlesView> view_bundles_reverse() const = 0; + + virtual std::pair, std::shared_ptr> find_segment( + const SegmentAndAccessorIndexNames& names, + Timestamp t) const = 0; + virtual std::shared_ptr find_bundle(Timestamp t) const = 0; + virtual std::shared_ptr find_bundle(Step step) const = 0; + + virtual std::vector> bundles_in_range(StepRange range) const = 0; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/freezer.cpp b/silkworm/db/freezer.cpp index 16eab2db66..3949e0bf3a 100644 --- a/silkworm/db/freezer.cpp +++ b/silkworm/db/freezer.cpp @@ -61,7 +61,7 @@ static BlockNum get_first_stored_header_num(ROTxn& txn) { } static std::optional get_next_base_txn_id(SnapshotRepository& repository, BlockNum block_num) { - auto body = BodyFindByBlockNumMultiQuery{repository}.exec(block_num); + auto body = BodyFindByBlockNumQuery{repository}.exec(block_num); if (!body) return std::nullopt; return body->base_txn_id + body->txn_count; } diff --git a/silkworm/db/snapshot_repository_test.cpp b/silkworm/db/snapshot_repository_test.cpp index ea597d4417..e8e748ab4f 100644 --- a/silkworm/db/snapshot_repository_test.cpp +++ b/silkworm/db/snapshot_repository_test.cpp @@ -229,7 +229,7 @@ TEST_CASE("SnapshotRepository::find_block_num", "[silkworm][node][snapshot]") { auto repository = make_repository(tmp_dir.path()); - TransactionBlockNumByTxnHashMultiQuery query{repository.view_bundles_reverse()}; + TransactionBlockNumByTxnHashQuery query{repository}; // known block 1'500'012 txn hash auto block_num = query.exec(silkworm::Hash{from_hex("0x2224c39c930355233f11414e9f216f381c1f6b0c32fc77b192128571c2dc9eb9").value()}); diff --git a/silkworm/db/snapshot_test.cpp b/silkworm/db/snapshot_test.cpp index 874e1869f7..91c3c20674 100644 --- a/silkworm/db/snapshot_test.cpp +++ b/silkworm/db/snapshot_test.cpp @@ -78,7 +78,7 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind SegmentFileReader header_segment{header_segment_path}; AccessorIndex idx_header_hash{header_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - HeaderFindByBlockNumQuery header_by_number{{header_segment, idx_header_hash}}; + HeaderFindByBlockNumSegmentQuery header_by_number{{header_segment, idx_header_hash}}; CHECK(!header_by_number.exec(1'500'011)); CHECK(header_by_number.exec(1'500'012)); @@ -118,7 +118,7 @@ TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]" SegmentFileReader body_segment{body_segment_path}; AccessorIndex idx_body_number{body_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - BodyFindByBlockNumQuery body_by_number{{body_segment, idx_body_number}}; + BodyFindByBlockNumSegmentQuery body_by_number{{body_segment, idx_body_number}}; CHECK(!body_by_number.exec(1'500'011)); CHECK(body_by_number.exec(1'500'012)); @@ -144,7 +144,7 @@ TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index SegmentFileReader txn_segment{txn_segment_path}; AccessorIndex idx_txn_hash{txn_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - TransactionFindByIdQuery txn_by_id{{txn_segment, idx_txn_hash}}; + TransactionFindByIdSegmentQuery txn_by_id{{txn_segment, idx_txn_hash}}; const auto transaction = txn_by_id.exec(7'341'272); CHECK(transaction.has_value()); @@ -170,10 +170,10 @@ TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][sna SegmentFileReader txn_segment{txn_segment_path}; AccessorIndex idx_txn_hash{txn_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - TransactionFindByIdQuery txn_by_id{{txn_segment, idx_txn_hash}}; + TransactionFindByIdSegmentQuery txn_by_id{{txn_segment, idx_txn_hash}}; AccessorIndex idx_txn_hash_2_block{txn_segment_path.related_path(std::string{db::blocks::kIdxTxnHash2BlockTag}, db::blocks::kIdxExtension)}; - TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{{txn_segment, idx_txn_hash}}}; + TransactionBlockNumByTxnHashSegmentQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashSegmentQuery{{txn_segment, idx_txn_hash}}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 auto transaction = txn_by_id.exec(7'341'269); // known txn id in block 1'500'012 @@ -208,21 +208,21 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index SegmentFileReader txn_segment{txn_segment_path}; AccessorIndex idx_txn_hash{txn_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - TransactionRangeFromIdQuery query{{txn_segment, idx_txn_hash}}; + TransactionRangeFromIdSegmentQuery query{{txn_segment, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 - CHECK(query.exec_into_vector(7'341'263, 0).empty()); - CHECK(query.exec_into_vector(7'341'263, 7).size() == 7); + CHECK(query.exec(7'341'263, 0)->empty()); + CHECK(query.exec(7'341'263, 7)->size() == 7); // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 - CHECK(query.exec_into_vector(7'341'272, 0).empty()); - CHECK(query.exec_into_vector(7'341'272, 1).size() == 1); + CHECK(query.exec(7'341'272, 0)->empty()); + CHECK(query.exec(7'341'272, 1)->size() == 1); // invalid base_txn_id returns empty - CHECK(query.exec_into_vector(0, 1).empty()); - CHECK(query.exec_into_vector(10'000'000, 1).empty()); - CHECK(query.exec_into_vector(7'341'261, 1).empty()); // before the first system tx - CHECK(query.exec_into_vector(7'341'274, 1).empty()); // after the last system tx + CHECK_FALSE(query.exec(0, 1)); + CHECK_FALSE(query.exec(10'000'000, 1)); + CHECK_FALSE(query.exec(7'341'261, 1)); // before the first system tx + CHECK_FALSE(query.exec(7'341'274, 1)); // after the last system tx } TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][index]") { @@ -237,21 +237,21 @@ TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][i SegmentFileReader txn_segment{txn_segment_path}; AccessorIndex idx_txn_hash{txn_segment_path.related_path_ext(db::blocks::kIdxExtension)}; - TransactionPayloadRlpRangeFromIdQuery query{{txn_segment, idx_txn_hash}}; + TransactionPayloadRlpRangeFromIdSegmentQuery query{{txn_segment, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 - CHECK(query.exec_into_vector(7'341'263, 0).empty()); - CHECK(query.exec_into_vector(7'341'263, 7).size() == 7); + CHECK(query.exec(7'341'263, 0)->empty()); + CHECK(query.exec(7'341'263, 7)->size() == 7); // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 - CHECK(query.exec_into_vector(7'341'272, 0).empty()); - CHECK(query.exec_into_vector(7'341'272, 1).size() == 1); + CHECK(query.exec(7'341'272, 0)->empty()); + CHECK(query.exec(7'341'272, 1)->size() == 1); // invalid base_txn_id returns empty - CHECK(query.exec_into_vector(0, 1).empty()); - CHECK(query.exec_into_vector(10'000'000, 1).empty()); - CHECK(query.exec_into_vector(7'341'261, 1).empty()); // before the first system tx - CHECK(query.exec_into_vector(7'341'274, 1).empty()); // after the last system tx + CHECK_FALSE(query.exec(0, 1)); + CHECK_FALSE(query.exec(10'000'000, 1)); + CHECK_FALSE(query.exec(7'341'261, 1)); // before the first system tx + CHECK_FALSE(query.exec(7'341'274, 1)); // after the last system tx } TEST_CASE("slice_tx_payload", "[silkworm][node][snapshot]") {