Skip to content

Commit

Permalink
snapshots: queries refactoring (#2625)
Browse files Browse the repository at this point in the history
* switch names: Query -> SegmentQuery, MultiQuery -> Query
* simplify TransactionBlockNumByTxnHashMultiQuery to follow a "multi-bundle query" pattern
* SnapshotRepository read-only interface for queries
* multi-bundle queries: FindMapQuery, FindByTimestampMapQuery
* access_layer DataModel refactoring to use multi-bundle queries
* refactor RecSplitIndex ordinal lookup: improve type safety and naming
* non-null EliasFanoDecoder value
* KVSegmentValuesReader
  • Loading branch information
battlmonstr authored Jan 7, 2025
2 parents 90a6c09 + a28b316 commit e98a2d7
Show file tree
Hide file tree
Showing 34 changed files with 494 additions and 275 deletions.
53 changes: 14 additions & 39 deletions silkworm/db/access_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1266,29 +1266,15 @@ bool DataModel::read_block_from_snapshot(BlockNum block_num, Block& block) const
}

std::optional<BlockHeader> DataModel::read_header_from_snapshot(BlockNum block_num) const {
std::optional<BlockHeader> block_header;
// We know the header snapshot in advance: find it based on target block number
const auto [segment_and_index, _] = repository_.find_segment(blocks::kHeaderSegmentAndIdxNames, block_num);
if (segment_and_index) {
block_header = HeaderFindByBlockNumQuery{*segment_and_index}.exec(block_num);
}
return block_header;
return HeaderFindByBlockNumQuery{repository_}.exec(block_num);
}

std::optional<BlockHeader> DataModel::read_header_from_snapshot(const Hash& hash) const {
std::optional<BlockHeader> block_header;
// We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order
for (const auto& bundle_ptr : repository_.view_bundles_reverse()) {
const auto& bundle = *bundle_ptr;
auto segment_and_index = bundle.segment_and_accessor_index(blocks::kHeaderSegmentAndIdxNames);
block_header = HeaderFindByHashQuery{segment_and_index}.exec(hash);
if (block_header) break;
}
return block_header;
return HeaderFindByHashQuery{repository_}.exec(hash);
}

std::optional<BlockBodyForStorage> DataModel::read_body_for_storage_from_snapshot(BlockNum block_num) const {
return BodyFindByBlockNumMultiQuery{repository_}.exec(block_num);
return BodyFindByBlockNumQuery{repository_}.exec(block_num);
}

bool DataModel::read_body_from_snapshot(BlockNum block_num, BlockBody& body) const {
Expand All @@ -1310,46 +1296,35 @@ bool DataModel::read_body_from_snapshot(BlockNum block_num, BlockBody& body) con
}

bool DataModel::is_body_in_snapshot(BlockNum block_num) const {
// We know the body snapshot in advance: find it based on target block number
const auto [segment_and_index, _] = repository_.find_segment(blocks::kBodySegmentAndIdxNames, block_num);
if (segment_and_index) {
const auto stored_body = BodyFindByBlockNumQuery{*segment_and_index}.exec(block_num);
return stored_body.has_value();
}

return false;
return BodyFindByBlockNumQuery{repository_}.exec(block_num).has_value();
}

bool DataModel::read_transactions_from_snapshot(BlockNum block_num, uint64_t base_txn_id, uint64_t txn_count, std::vector<Transaction>& txs) const {
if (txn_count == 0) {
return true;
}

const auto [segment_and_index, _] = repository_.find_segment(blocks::kTxnSegmentAndIdxNames, block_num);
if (!segment_and_index) return false;

txs = TransactionRangeFromIdQuery{*segment_and_index}.exec_into_vector(base_txn_id, txn_count);
auto txs_opt = TransactionRangeFromIdQuery{repository_}.exec(block_num, base_txn_id, txn_count);
if (!txs_opt) return false;

txs = std::move(*txs_opt);
return true;
}

bool DataModel::read_rlp_transactions_from_snapshot(BlockNum block_num, std::vector<Bytes>& rlp_txs) const {
const auto [body_segment_and_index, _] = repository_.find_segment(blocks::kBodySegmentAndIdxNames, block_num);
if (body_segment_and_index) {
auto stored_body = BodyFindByBlockNumQuery{*body_segment_and_index}.exec(block_num);
if (!stored_body) return false;
auto stored_body = BodyFindByBlockNumQuery{repository_}.exec(block_num);
if (!stored_body) return false;

{
// Skip first and last *system transactions* in block body
const auto base_txn_id{stored_body->base_txn_id + 1};
const auto txn_count{stored_body->txn_count >= 2 ? stored_body->txn_count - 2 : stored_body->txn_count};

if (txn_count == 0) return true;

const auto [tx_segment_and_index, _2] = repository_.find_segment(blocks::kTxnSegmentAndIdxNames, block_num);
if (!tx_segment_and_index) return false;

rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_segment_and_index}.exec_into_vector(base_txn_id, txn_count);
auto txs_opt = TransactionPayloadRlpRangeFromIdQuery{repository_}.exec(block_num, base_txn_id, txn_count);
if (!txs_opt) return false;

rlp_txs = std::move(*txs_opt);
return true;
}

Expand Down Expand Up @@ -1382,7 +1357,7 @@ std::optional<BlockNum> DataModel::read_tx_lookup_from_db(const evmc::bytes32& t
}

std::optional<BlockNum> DataModel::read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash) const {
TransactionBlockNumByTxnHashMultiQuery query{repository_.view_bundles_reverse()};
TransactionBlockNumByTxnHashQuery query{repository_};
return query.exec(tx_hash);
}

Expand Down
4 changes: 2 additions & 2 deletions silkworm/db/access_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ class DataModel {
public:
DataModel(
ROTxn& txn,
snapshots::SnapshotRepository& repository)
const snapshots::SnapshotRepositoryROAccess& repository)
: txn_{txn},
repository_{repository} {}

Expand Down Expand Up @@ -381,7 +381,7 @@ class DataModel {
std::optional<BlockNum> read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash) const;

ROTxn& txn_;
snapshots::SnapshotRepository& repository_;
const snapshots::SnapshotRepositoryROAccess& repository_;
};

class DataModelFactory {
Expand Down
19 changes: 5 additions & 14 deletions silkworm/db/blocks/bodies/body_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,20 @@
#pragma once

#include <silkworm/db/datastore/snapshots/basic_queries.hpp>
#include <silkworm/db/datastore/snapshots/snapshot_repository.hpp>
#include <silkworm/db/datastore/snapshots/snapshot_repository_ro_access.hpp>

#include "../schema_config.hpp"
#include "body_segment.hpp"

namespace silkworm::snapshots {

using BodyFindByBlockNumQuery = FindByIdQuery<BodySegmentReader>;

class BodyFindByBlockNumMultiQuery {
public:
// TODO: use a sub-interface of SnapshotRepository
explicit BodyFindByBlockNumMultiQuery(SnapshotRepository& repository)
: repository_{repository} {}
using BodyFindByBlockNumSegmentQuery = FindByIdSegmentQuery<BodySegmentReader, &db::blocks::kBodySegmentAndIdxNames>;

struct BodyFindByBlockNumQuery : public FindByTimestampMapQuery<BodyFindByBlockNumSegmentQuery> {
using FindByTimestampMapQuery::FindByTimestampMapQuery;
std::optional<BlockBodyForStorage> exec(BlockNum block_num) {
const auto [segment_and_index, _] = repository_.find_segment(db::blocks::kBodySegmentAndIdxNames, block_num);
if (!segment_and_index) return std::nullopt;
return BodyFindByBlockNumQuery{*segment_and_index}.exec(block_num);
return FindByTimestampMapQuery::exec(block_num, block_num);
}

private:
SnapshotRepository& repository_;
};

} // namespace silkworm::snapshots
4 changes: 2 additions & 2 deletions silkworm/db/blocks/bodies/body_txs_amount_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@

namespace silkworm::snapshots {

BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() {
BodyTxsAmountSegmentQuery::Result BodyTxsAmountSegmentQuery::exec() {
size_t body_count = segment_.item_count();
if (body_count == 0) {
throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + segment_.path().path().string());
throw std::runtime_error("BodyTxsAmountSegmentQuery empty body snapshot: " + segment_.path().path().string());
}

BodySegmentReader reader{segment_};
Expand Down
4 changes: 2 additions & 2 deletions silkworm/db/blocks/bodies/body_txs_amount_query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@

namespace silkworm::snapshots {

class BodyTxsAmountQuery {
class BodyTxsAmountSegmentQuery {
public:
struct Result {
uint64_t first_tx_id{};
uint64_t count{};
};

explicit BodyTxsAmountQuery(const segment::SegmentFileReader& segment) : segment_(segment) {}
explicit BodyTxsAmountSegmentQuery(const segment::SegmentFileReader& segment) : segment_(segment) {}

Result exec();

Expand Down
4 changes: 2 additions & 2 deletions silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@

namespace silkworm::snapshots {

TEST_CASE("BodyTxsAmountQuery") {
TEST_CASE("BodyTxsAmountSegmentQuery") {
TemporaryDirectory tmp_dir;
test_util::SampleBodySnapshotFile snapshot_file{tmp_dir.path()};
segment::SegmentFileReader snapshot{snapshot_file.path()};

BodyTxsAmountQuery query{snapshot};
BodyTxsAmountSegmentQuery query{snapshot};
auto result = query.exec();

CHECK(result.first_tx_id == 7'341'262);
Expand Down
14 changes: 12 additions & 2 deletions silkworm/db/blocks/headers/header_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,21 @@

#include <silkworm/db/datastore/snapshots/basic_queries.hpp>

#include "../schema_config.hpp"
#include "header_segment.hpp"

namespace silkworm::snapshots {

using HeaderFindByBlockNumQuery = FindByIdQuery<HeaderSegmentReader>;
using HeaderFindByHashQuery = FindByHashQuery<HeaderSegmentReader>;
using HeaderFindByBlockNumSegmentQuery = FindByIdSegmentQuery<HeaderSegmentReader, &db::blocks::kHeaderSegmentAndIdxNames>;

struct HeaderFindByBlockNumQuery : public FindByTimestampMapQuery<HeaderFindByBlockNumSegmentQuery> {
using FindByTimestampMapQuery::FindByTimestampMapQuery;
std::optional<BlockHeader> exec(BlockNum block_num) {
return FindByTimestampMapQuery::exec(block_num, block_num);
}
};

using HeaderFindByHashSegmentQuery = FindByHashSegmentQuery<HeaderSegmentReader, &db::blocks::kHeaderSegmentAndIdxNames>;
using HeaderFindByHashQuery = FindMapQuery<HeaderFindByHashSegmentQuery>;

} // namespace silkworm::snapshots
7 changes: 3 additions & 4 deletions silkworm/db/blocks/schema_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#pragma once

#include <array>
#include <memory>

#include "../datastore/common/entity_name.hpp"
Expand Down Expand Up @@ -44,7 +43,7 @@ inline constexpr std::string_view kHeaderSegmentTag = kHeaderSegmentName.name;
//! Index header_hash -> block_num -> headers_segment_offset
inline constexpr datastore::EntityName kIdxHeaderHashName{"headers.idx"};
inline constexpr std::string_view kIdxHeaderHashTag = kHeaderSegmentTag;
inline constexpr std::array<datastore::EntityName, 3> kHeaderSegmentAndIdxNames{
inline constexpr snapshots::SegmentAndAccessorIndexNames kHeaderSegmentAndIdxNames{
snapshots::Schema::kDefaultEntityName,
kHeaderSegmentName,
kIdxHeaderHashName,
Expand All @@ -55,7 +54,7 @@ inline constexpr std::string_view kBodySegmentTag = kBodySegmentName.name;
//! Index block_num -> bodies_segment_offset
inline constexpr datastore::EntityName kIdxBodyNumberName{"bodies.idx"};
inline constexpr std::string_view kIdxBodyNumberTag = kBodySegmentTag;
inline constexpr std::array<datastore::EntityName, 3> kBodySegmentAndIdxNames{
inline constexpr snapshots::SegmentAndAccessorIndexNames kBodySegmentAndIdxNames{
snapshots::Schema::kDefaultEntityName,
kBodySegmentName,
kIdxBodyNumberName,
Expand All @@ -66,7 +65,7 @@ inline constexpr std::string_view kTxnSegmentTag = kTxnSegmentName.name;
//! Index transaction_hash -> txn_id -> transactions_segment_offset
inline constexpr datastore::EntityName kIdxTxnHashName{"transactions.idx"};
inline constexpr std::string_view kIdxTxnHashTag = kTxnSegmentTag;
inline constexpr std::array<datastore::EntityName, 3> kTxnSegmentAndIdxNames{
inline constexpr snapshots::SegmentAndAccessorIndexNames kTxnSegmentAndIdxNames{
snapshots::Schema::kDefaultEntityName,
kTxnSegmentName,
kIdxTxnHashName,
Expand Down
2 changes: 1 addition & 1 deletion silkworm/db/blocks/transactions/txn_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ std::pair<uint64_t, uint64_t> TransactionIndex::compute_txs_amount(
SnapshotPath bodies_segment_path,
std::optional<MemoryMappedRegion> bodies_segment_region) {
segment::SegmentFileReader body_segment{std::move(bodies_segment_path), bodies_segment_region};
auto result = BodyTxsAmountQuery{body_segment}.exec();
auto result = BodyTxsAmountSegmentQuery{body_segment}.exec();
return {result.first_tx_id, result.count};
}

Expand Down
65 changes: 30 additions & 35 deletions silkworm/db/blocks/transactions/txn_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,62 +21,57 @@
#include <silkworm/core/common/bytes.hpp>
#include <silkworm/core/types/hash.hpp>
#include <silkworm/db/datastore/snapshots/basic_queries.hpp>
#include <silkworm/db/datastore/snapshots/snapshot_repository_ro_access.hpp>

#include "../schema_config.hpp"
#include "txn_segment.hpp"

namespace silkworm::snapshots {

using TransactionFindByIdQuery = FindByIdQuery<TransactionSegmentReader>;
using TransactionFindByHashQuery = FindByHashQuery<TransactionSegmentReader>;
using TransactionRangeFromIdQuery = RangeFromIdQuery<TransactionSegmentReader>;
using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery<TransactionSegmentPayloadRlpReader<Bytes>>;
using TransactionFindByIdSegmentQuery = FindByIdSegmentQuery<TransactionSegmentReader, &db::blocks::kTxnSegmentAndIdxNames>;
using TransactionFindByHashSegmentQuery = FindByHashSegmentQuery<TransactionSegmentReader, &db::blocks::kTxnSegmentAndIdxNames>;

class TransactionBlockNumByTxnHashQuery {
using TransactionRangeFromIdSegmentQuery = RangeFromIdSegmentQuery<TransactionSegmentReader, &db::blocks::kTxnSegmentAndIdxNames>;
using TransactionRangeFromIdQuery = FindByTimestampMapQuery<TransactionRangeFromIdSegmentQuery>;

using TransactionPayloadRlpRangeFromIdSegmentQuery = RangeFromIdSegmentQuery<TransactionSegmentPayloadRlpReader<Bytes>, &db::blocks::kTxnSegmentAndIdxNames>;
using TransactionPayloadRlpRangeFromIdQuery = FindByTimestampMapQuery<TransactionPayloadRlpRangeFromIdSegmentQuery>;

class TransactionBlockNumByTxnHashSegmentQuery {
public:
TransactionBlockNumByTxnHashQuery(
TransactionBlockNumByTxnHashSegmentQuery(
const rec_split::AccessorIndex& index,
TransactionFindByHashQuery cross_check_query)
TransactionFindByHashSegmentQuery cross_check_query)
: index_(index),
cross_check_query_(cross_check_query) {}

explicit TransactionBlockNumByTxnHashSegmentQuery(
const SnapshotBundle& bundle)
: TransactionBlockNumByTxnHashSegmentQuery{
make(db::blocks::BundleDataRef{*bundle})} {}

std::optional<BlockNum> exec(const Hash& hash) {
// Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF)
const auto transaction = cross_check_query_.exec(hash);
auto result = transaction ? index_.lookup_ordinal_by_hash(hash) : std::nullopt;
auto result = transaction ? index_.lookup_by_key(hash) : std::nullopt;
return result;
}

private:
const rec_split::AccessorIndex& index_;
TransactionFindByHashQuery cross_check_query_;
};

template <std::ranges::view TBundlesView, class TBundle = typename std::ranges::iterator_t<TBundlesView>::value_type>
class TransactionBlockNumByTxnHashMultiQuery {
public:
explicit TransactionBlockNumByTxnHashMultiQuery(TBundlesView bundles)
: bundles_(std::move(bundles)) {}

std::optional<BlockNum> exec(const Hash& hash) {
for (const TBundle& bundle_ptr : bundles_) {
db::blocks::BundleDataRef bundle{**bundle_ptr};
const segment::SegmentFileReader& segment = bundle.txn_segment();
const rec_split::AccessorIndex& idx_txn_hash = bundle.idx_txn_hash();
const rec_split::AccessorIndex& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block();

TransactionFindByHashQuery cross_check_query{{segment, idx_txn_hash}};
TransactionBlockNumByTxnHashQuery query{idx_txn_hash_2_block, cross_check_query};
auto block_num = query.exec(hash);
if (block_num) {
return block_num;
}
}
return std::nullopt;
static TransactionBlockNumByTxnHashSegmentQuery make(db::blocks::BundleDataRef bundle) {
TransactionFindByHashSegmentQuery cross_check_query{
SegmentAndAccessorIndex{
bundle.txn_segment(),
bundle.idx_txn_hash(),
},
};
return {bundle.idx_txn_hash_2_block(), cross_check_query};
}

private:
TBundlesView bundles_;
const rec_split::AccessorIndex& index_;
TransactionFindByHashSegmentQuery cross_check_query_;
};

using TransactionBlockNumByTxnHashQuery = FindMapQuery<TransactionBlockNumByTxnHashSegmentQuery>;

} // namespace silkworm::snapshots
Loading

0 comments on commit e98a2d7

Please sign in to comment.