From 41c5f1a35b1879622fb7b812252a88dca99469bc Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Tue, 27 Feb 2024 20:53:44 +0000 Subject: [PATCH] WIP: static files (#6444) Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com> Co-authored-by: Bjerg Co-authored-by: Matthias Seitz Co-authored-by: Oliver Nordbjerg Co-authored-by: joshieDo Co-authored-by: Thomas Coratger --- .config/nextest.toml | 2 +- .github/workflows/hive.yml | 8 +- CODEOWNERS | 2 +- Cargo.lock | 68 +- Cargo.toml | 6 +- bin/reth/Cargo.toml | 2 +- bin/reth/src/builder.rs | 63 +- bin/reth/src/commands/db/clear.rs | 36 +- bin/reth/src/commands/db/diff.rs | 4 +- bin/reth/src/commands/db/get.rs | 191 ++- bin/reth/src/commands/db/list.rs | 6 +- bin/reth/src/commands/db/mod.rs | 131 +- .../db/{snapshots => static_files}/bench.rs | 23 +- .../db/{snapshots => static_files}/headers.rs | 65 +- .../db/{snapshots => static_files}/mod.rs | 161 +-- .../{snapshots => static_files}/receipts.rs | 55 +- .../transactions.rs | 60 +- bin/reth/src/commands/db/stats.rs | 290 +++++ .../src/commands/debug_cmd/build_block.rs | 12 +- bin/reth/src/commands/debug_cmd/execution.rs | 34 +- .../commands/debug_cmd/in_memory_merkle.rs | 23 +- bin/reth/src/commands/debug_cmd/merkle.rs | 8 +- .../src/commands/debug_cmd/replay_engine.rs | 21 +- bin/reth/src/commands/import.rs | 33 +- bin/reth/src/commands/init_cmd.rs | 7 +- bin/reth/src/commands/p2p/mod.rs | 6 +- .../src/commands/recover/storage_tries.rs | 5 +- bin/reth/src/commands/stage/drop.rs | 64 +- bin/reth/src/commands/stage/dump/execution.rs | 70 +- .../commands/stage/dump/hashing_account.rs | 34 +- .../commands/stage/dump/hashing_storage.rs | 32 +- bin/reth/src/commands/stage/dump/merkle.rs | 40 +- bin/reth/src/commands/stage/dump/mod.rs | 72 +- bin/reth/src/commands/stage/run.rs | 18 +- bin/reth/src/commands/stage/unwind.rs | 2 +- bin/reth/src/utils.rs | 22 +- book/SUMMARY.md | 6 +- book/cli/SUMMARY.md | 6 +- book/cli/reth/db.md | 20 +- book/cli/reth/db/clear.md | 9 +- book/cli/reth/db/clear/mdbx.md | 124 ++ book/cli/reth/db/clear/static-file.md | 127 ++ book/cli/reth/db/clear/static_file.md | 127 ++ book/cli/reth/db/create-static-files.md | 174 +++ book/cli/reth/db/get.md | 20 +- book/cli/reth/db/get/mdbx.md | 133 ++ book/cli/reth/db/get/static-file.md | 133 ++ book/cli/reth/db/get/static_file.md | 133 ++ .../reth/db/{snapshot.md => static_file.md} | 36 +- book/cli/reth/db/stats.md | 6 + book/cli/reth/node.md | 7 +- book/cli/reth/stage/drop.md | 23 +- book/cli/reth/stage/dump/account-hashing.md | 6 +- book/cli/reth/stage/dump/execution.md | 6 +- book/cli/reth/stage/dump/merkle.md | 6 +- book/cli/reth/stage/dump/storage-hashing.md | 6 +- book/cli/reth/stage/run.md | 35 +- book/run/config.md | 17 +- book/run/observability.md | 2 +- crates/blockchain-tree/src/blockchain_tree.rs | 2 +- crates/blockchain-tree/src/externals.rs | 38 +- crates/config/src/config.rs | 28 +- crates/consensus/beacon/Cargo.toml | 3 +- .../beacon/src/engine/hooks/controller.rs | 3 +- .../consensus/beacon/src/engine/hooks/mod.rs | 4 +- .../beacon/src/engine/hooks/snapshot.rs | 156 --- .../beacon/src/engine/hooks/static_file.rs | 163 +++ crates/consensus/beacon/src/engine/mod.rs | 132 +- crates/consensus/beacon/src/engine/sync.rs | 13 +- .../consensus/beacon/src/engine/test_utils.rs | 48 +- crates/etl/Cargo.toml | 17 + crates/etl/src/lib.rs | 264 ++++ crates/interfaces/src/provider.rs | 23 +- crates/net/downloaders/src/bodies/bodies.rs | 14 +- crates/net/downloaders/src/bodies/task.rs | 13 +- crates/net/downloaders/src/file_client.rs | 17 +- crates/net/eth-wire/Cargo.toml | 2 +- crates/node-builder/Cargo.toml | 2 +- crates/node-builder/src/builder.rs | 26 +- crates/node-core/Cargo.toml | 3 +- crates/node-core/src/args/stage_args.rs | 7 +- crates/node-core/src/dirs.rs | 6 +- crates/node-core/src/events/node.rs | 26 + crates/node-core/src/init.rs | 111 +- crates/node-core/src/node_config.rs | 16 +- crates/primitives/Cargo.toml | 2 +- crates/primitives/src/chain/spec.rs | 10 - crates/primitives/src/header.rs | 9 +- crates/primitives/src/lib.rs | 8 +- crates/primitives/src/prune/mod.rs | 6 +- crates/primitives/src/prune/mode.rs | 28 +- crates/primitives/src/prune/segment.rs | 27 +- crates/primitives/src/snapshot/mod.rs | 47 - crates/primitives/src/snapshot/segment.rs | 288 ----- crates/primitives/src/stage/id.rs | 10 +- .../{snapshot => static_file}/compression.rs | 4 +- .../src/{snapshot => static_file}/filters.rs | 12 +- crates/primitives/src/static_file/mod.rs | 54 + crates/primitives/src/static_file/segment.rs | 435 +++++++ crates/prune/Cargo.toml | 2 +- crates/prune/src/builder.rs | 8 +- crates/prune/src/event.rs | 2 + crates/prune/src/pruner.rs | 238 ++-- crates/prune/src/segments/account_history.rs | 4 +- crates/prune/src/segments/headers.rs | 18 +- crates/prune/src/segments/mod.rs | 14 +- crates/prune/src/segments/receipts.rs | 4 +- crates/prune/src/segments/receipts_by_logs.rs | 9 +- crates/prune/src/segments/sender_recovery.rs | 4 +- crates/prune/src/segments/set.rs | 7 +- crates/prune/src/segments/storage_history.rs | 4 +- .../prune/src/segments/transaction_lookup.rs | 4 +- crates/prune/src/segments/transactions.rs | 4 +- crates/rpc/rpc/Cargo.toml | 2 +- crates/snapshot/README.md | 88 -- crates/snapshot/src/error.rs | 25 - crates/snapshot/src/segments/headers.rs | 98 -- crates/snapshot/src/segments/mod.rs | 100 -- crates/snapshot/src/segments/receipts.rs | 84 -- crates/snapshot/src/segments/transactions.rs | 84 -- crates/snapshot/src/snapshotter.rs | 397 ------ crates/stages/Cargo.toml | 6 +- crates/stages/benches/criterion.rs | 107 +- .../stages/benches/setup/account_hashing.rs | 40 +- crates/stages/benches/setup/mod.rs | 22 +- crates/stages/src/error.rs | 45 +- crates/stages/src/lib.rs | 29 +- crates/stages/src/pipeline/builder.rs | 8 +- crates/stages/src/pipeline/mod.rs | 106 +- crates/stages/src/sets.rs | 64 +- crates/stages/src/stages/bodies.rs | 230 +++- crates/stages/src/stages/execution.rs | 521 +++++--- crates/stages/src/stages/finish.rs | 1 + crates/stages/src/stages/hashing_account.rs | 26 +- crates/stages/src/stages/hashing_storage.rs | 11 +- crates/stages/src/stages/headers.rs | 460 ++++--- .../src/stages/index_account_history.rs | 11 +- .../src/stages/index_storage_history.rs | 11 +- crates/stages/src/stages/merkle.rs | 60 +- crates/stages/src/stages/mod.rs | 26 +- crates/stages/src/stages/sender_recovery.rs | 112 +- crates/stages/src/stages/total_difficulty.rs | 314 ----- crates/stages/src/stages/tx_lookup.rs | 242 ++-- crates/stages/src/test_utils/macros.rs | 10 + crates/stages/src/test_utils/mod.rs | 2 +- crates/stages/src/test_utils/test_db.rs | 187 ++- crates/{snapshot => static-file}/Cargo.toml | 7 +- crates/static-file/README.md | 88 ++ crates/static-file/src/event.rs | 19 + crates/{snapshot => static-file}/src/lib.rs | 12 +- crates/static-file/src/segments/headers.rs | 128 ++ crates/static-file/src/segments/mod.rs | 116 ++ crates/static-file/src/segments/receipts.rs | 107 ++ .../static-file/src/segments/transactions.rs | 111 ++ .../static-file/src/static_file_producer.rs | 327 +++++ .../codecs/derive/src/compact/generator.rs | 2 +- .../storage/codecs/derive/src/compact/mod.rs | 2 +- crates/storage/db/src/lib.rs | 15 +- crates/storage/db/src/snapshot/masks.rs | 28 - crates/storage/db/src/snapshot/mod.rs | 76 -- .../src/{snapshot => static_file}/cursor.rs | 34 +- .../{snapshot => static_file}/generation.rs | 36 +- .../db/src/{snapshot => static_file}/mask.rs | 24 +- crates/storage/db/src/static_file/masks.rs | 21 + crates/storage/db/src/static_file/mod.rs | 76 ++ .../storage/db/src/tables/codecs/compact.rs | 1 + crates/storage/db/src/tables/raw.rs | 12 + crates/storage/libmdbx-rs/Cargo.toml | 6 +- crates/storage/libmdbx-rs/src/environment.rs | 14 +- crates/storage/libmdbx-rs/src/flags.rs | 2 +- crates/storage/nippy-jar/Cargo.toml | 4 +- crates/storage/nippy-jar/src/error.rs | 2 + crates/storage/nippy-jar/src/lib.rs | 193 +-- crates/storage/nippy-jar/src/writer.rs | 162 ++- crates/storage/provider/Cargo.toml | 1 + .../bundle_state_with_receipts.rs | 61 +- .../provider/src/providers/database/mod.rs | 250 ++-- .../src/providers/database/provider.rs | 318 ++--- crates/storage/provider/src/providers/mod.rs | 79 +- .../src/providers/snapshot/manager.rs | 685 ---------- .../src/providers/state/historical.rs | 169 ++- .../provider/src/providers/state/latest.rs | 55 +- .../{snapshot => static_file}/jar.rs | 75 +- .../src/providers/static_file/manager.rs | 1110 +++++++++++++++++ .../src/providers/static_file/metrics.rs | 90 ++ .../{snapshot => static_file}/mod.rs | 53 +- .../src/providers/static_file/writer.rs | 488 ++++++++ crates/storage/provider/src/test_utils/mod.rs | 5 +- crates/storage/provider/src/traits/mod.rs | 3 + crates/storage/provider/src/traits/stats.rs | 10 + crates/transaction-pool/src/pool/best.rs | 2 +- crates/transaction-pool/src/pool/pending.rs | 4 +- crates/trie/Cargo.toml | 2 +- docs/crates/stages.md | 4 - etc/grafana/dashboards/overview.json | 1 - examples/Cargo.toml | 3 + examples/db-access.rs | 6 +- examples/polygon-p2p/src/chain_cfg.rs | 1 - examples/rpc-db/src/main.rs | 9 +- testing/ef-tests/Cargo.toml | 3 +- testing/ef-tests/src/cases/blockchain_test.rs | 163 ++- 201 files changed, 8855 insertions(+), 5186 deletions(-) rename bin/reth/src/commands/db/{snapshots => static_files}/bench.rs (69%) rename bin/reth/src/commands/db/{snapshots => static_files}/headers.rs (64%) rename bin/reth/src/commands/db/{snapshots => static_files}/mod.rs (54%) rename bin/reth/src/commands/db/{snapshots => static_files}/receipts.rs (68%) rename bin/reth/src/commands/db/{snapshots => static_files}/transactions.rs (69%) create mode 100644 bin/reth/src/commands/db/stats.rs create mode 100644 book/cli/reth/db/clear/mdbx.md create mode 100644 book/cli/reth/db/clear/static-file.md create mode 100644 book/cli/reth/db/clear/static_file.md create mode 100644 book/cli/reth/db/create-static-files.md create mode 100644 book/cli/reth/db/get/mdbx.md create mode 100644 book/cli/reth/db/get/static-file.md create mode 100644 book/cli/reth/db/get/static_file.md rename book/cli/reth/db/{snapshot.md => static_file.md} (83%) delete mode 100644 crates/consensus/beacon/src/engine/hooks/snapshot.rs create mode 100644 crates/consensus/beacon/src/engine/hooks/static_file.rs create mode 100644 crates/etl/Cargo.toml create mode 100644 crates/etl/src/lib.rs delete mode 100644 crates/primitives/src/snapshot/mod.rs delete mode 100644 crates/primitives/src/snapshot/segment.rs rename crates/primitives/src/{snapshot => static_file}/compression.rs (86%) rename crates/primitives/src/{snapshot => static_file}/filters.rs (71%) create mode 100644 crates/primitives/src/static_file/mod.rs create mode 100644 crates/primitives/src/static_file/segment.rs delete mode 100644 crates/snapshot/README.md delete mode 100644 crates/snapshot/src/error.rs delete mode 100644 crates/snapshot/src/segments/headers.rs delete mode 100644 crates/snapshot/src/segments/mod.rs delete mode 100644 crates/snapshot/src/segments/receipts.rs delete mode 100644 crates/snapshot/src/segments/transactions.rs delete mode 100644 crates/snapshot/src/snapshotter.rs delete mode 100644 crates/stages/src/stages/total_difficulty.rs rename crates/{snapshot => static-file}/Cargo.toml (83%) create mode 100644 crates/static-file/README.md create mode 100644 crates/static-file/src/event.rs rename crates/{snapshot => static-file}/src/lib.rs (58%) create mode 100644 crates/static-file/src/segments/headers.rs create mode 100644 crates/static-file/src/segments/mod.rs create mode 100644 crates/static-file/src/segments/receipts.rs create mode 100644 crates/static-file/src/segments/transactions.rs create mode 100644 crates/static-file/src/static_file_producer.rs delete mode 100644 crates/storage/db/src/snapshot/masks.rs delete mode 100644 crates/storage/db/src/snapshot/mod.rs rename crates/storage/db/src/{snapshot => static_file}/cursor.rs (77%) rename crates/storage/db/src/{snapshot => static_file}/generation.rs (67%) rename crates/storage/db/src/{snapshot => static_file}/mask.rs (75%) create mode 100644 crates/storage/db/src/static_file/masks.rs create mode 100644 crates/storage/db/src/static_file/mod.rs delete mode 100644 crates/storage/provider/src/providers/snapshot/manager.rs rename crates/storage/provider/src/providers/{snapshot => static_file}/jar.rs (81%) create mode 100644 crates/storage/provider/src/providers/static_file/manager.rs create mode 100644 crates/storage/provider/src/providers/static_file/metrics.rs rename crates/storage/provider/src/providers/{snapshot => static_file}/mod.rs (73%) create mode 100644 crates/storage/provider/src/providers/static_file/writer.rs create mode 100644 crates/storage/provider/src/traits/stats.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index 3542bba5f5e..e107857a351 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -4,4 +4,4 @@ slow-timeout = { period = "30s", terminate-after = 4 } [[profile.default.overrides]] filter = "test(general_state_tests)" -slow-timeout = { period = "1m", terminate-after = 4 } +slow-timeout = { period = "1m", terminate-after = 10 } diff --git a/.github/workflows/hive.yml b/.github/workflows/hive.yml index da37528779f..3a31ab58274 100644 --- a/.github/workflows/hive.yml +++ b/.github/workflows/hive.yml @@ -37,8 +37,8 @@ jobs: - name: Checkout hive tests uses: actions/checkout@v4 with: - repository: ethereum/hive - ref: master + repository: joshiedo/hive + ref: reth/disable-pruner path: hivetests - uses: actions/setup-go@v3 @@ -182,8 +182,8 @@ jobs: - name: Checkout hive tests uses: actions/checkout@v4 with: - repository: ethereum/hive - ref: master + repository: joshiedo/hive + ref: reth/disable-pruner path: hivetests - name: Run ${{ matrix.sim }} simulator diff --git a/CODEOWNERS b/CODEOWNERS index 22cc62778dd..7ec66f75978 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -19,5 +19,5 @@ crates/metrics @onbjerg crates/tracing @onbjerg crates/tasks @mattsse crates/prune @shekhirin @joshieDo -crates/snapshot @joshieDo +crates/static-file @joshieDo @shekhirin .github/ @onbjerg @gakonst @DaniPopes diff --git a/Cargo.lock b/Cargo.lock index 3dcfacc8e60..d9b756f1c3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2291,6 +2291,7 @@ name = "ef-tests" version = "0.1.0-alpha.20" dependencies = [ "alloy-rlp", + "rayon", "reth-db", "reth-interfaces", "reth-node-ethereum", @@ -2717,6 +2718,7 @@ dependencies = [ "async-trait", "eyre", "futures", + "jemallocator", "reth-beacon-consensus", "reth-blockchain-tree", "reth-db", @@ -5751,8 +5753,8 @@ dependencies = [ "reth-rpc-engine-api", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -5840,12 +5842,13 @@ dependencies = [ "reth-revm", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tokio-util", "reth-tracing", "schnellru", + "tempfile", "thiserror", "tokio", "tokio-stream", @@ -6139,6 +6142,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "reth-etl" +version = "0.1.0-alpha.20" +dependencies = [ + "rayon", + "reth-db", + "reth-primitives", + "tempfile", +] + [[package]] name = "reth-interfaces" version = "0.1.0-alpha.20" @@ -6339,6 +6352,7 @@ dependencies = [ "memmap2 0.7.1", "ph", "rand 0.8.5", + "reth-primitives", "serde", "sucds", "tempfile", @@ -6383,8 +6397,8 @@ dependencies = [ "reth-revm", "reth-rpc", "reth-rpc-engine-api", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -6400,6 +6414,7 @@ dependencies = [ "assert_matches", "clap", "const-str", + "derive_more", "dirs-next", "eyre", "futures", @@ -6444,8 +6459,8 @@ dependencies = [ "reth-rpc-engine-api", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -6624,6 +6639,7 @@ dependencies = [ "pin-project", "rand 0.8.5", "rayon", + "reth-codecs", "reth-db", "reth-interfaces", "reth-metrics", @@ -6653,8 +6669,8 @@ dependencies = [ "reth-metrics", "reth-primitives", "reth-provider", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tokio-util", "thiserror", "tokio", @@ -6864,24 +6880,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "reth-snapshot" -version = "0.1.0-alpha.20" -dependencies = [ - "assert_matches", - "clap", - "reth-db", - "reth-interfaces", - "reth-nippy-jar", - "reth-primitives", - "reth-provider", - "reth-stages", - "tempfile", - "thiserror", - "tokio", - "tracing", -] - [[package]] name = "reth-stages" version = "0.1.0-alpha.20" @@ -6905,6 +6903,7 @@ dependencies = [ "reth-db", "reth-downloaders", "reth-eth-wire", + "reth-etl", "reth-interfaces", "reth-metrics", "reth-node-ethereum", @@ -6912,11 +6911,34 @@ dependencies = [ "reth-primitives", "reth-provider", "reth-revm", + "reth-static-file", "reth-tokio-util", "reth-trie", "revm", "serde", "serde_json", + "tempfile", + "thiserror", + "tokio", + "tokio-stream", + "tracing", +] + +[[package]] +name = "reth-static-file" +version = "0.1.0-alpha.20" +dependencies = [ + "assert_matches", + "clap", + "rayon", + "reth-db", + "reth-interfaces", + "reth-nippy-jar", + "reth-primitives", + "reth-provider", + "reth-stages", + "reth-tokio-util", + "tempfile", "thiserror", "tokio", "tokio-stream", diff --git a/Cargo.toml b/Cargo.toml index 872782b3efb..b33dd01f1dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/consensus/beacon-core/", "crates/consensus/common/", "crates/ethereum-forks/", + "crates/etl", "crates/interfaces/", "crates/metrics/", "crates/metrics/metrics-derive/", @@ -41,8 +42,8 @@ members = [ "crates/node-optimism/", "crates/node-core/", "crates/node-api/", - "crates/snapshot/", "crates/stages/", + "crates/static-file/", "crates/storage/codecs/", "crates/storage/codecs/derive/", "crates/storage/db/", @@ -136,6 +137,7 @@ reth-ecies = { path = "crates/net/ecies" } reth-eth-wire = { path = "crates/net/eth-wire" } reth-ethereum-forks = { path = "crates/ethereum-forks" } reth-ethereum-payload-builder = { path = "crates/payload/ethereum" } +reth-etl = { path = "crates/etl" } reth-optimism-payload-builder = { path = "crates/payload/optimism" } reth-interfaces = { path = "crates/interfaces" } reth-ipc = { path = "crates/rpc/ipc" } @@ -161,8 +163,8 @@ reth-rpc-builder = { path = "crates/rpc/rpc-builder" } reth-rpc-engine-api = { path = "crates/rpc/rpc-engine-api" } reth-rpc-types = { path = "crates/rpc/rpc-types" } reth-rpc-types-compat = { path = "crates/rpc/rpc-types-compat" } -reth-snapshot = { path = "crates/snapshot" } reth-stages = { path = "crates/stages" } +reth-static-file = { path = "crates/static-file" } reth-tasks = { path = "crates/tasks" } reth-tokio-util = { path = "crates/tokio-util" } reth-tracing = { path = "crates/tracing" } diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 1d1d4639ff7..afc71932e23 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -50,7 +50,7 @@ reth-payload-validator.workspace = true reth-basic-payload-builder.workspace = true reth-discv4.workspace = true reth-prune.workspace = true -reth-snapshot = { workspace = true, features = ["clap"] } +reth-static-file = { workspace = true, features = ["clap"] } reth-trie.workspace = true reth-nippy-jar.workspace = true reth-node-api.workspace = true diff --git a/bin/reth/src/builder.rs b/bin/reth/src/builder.rs index 41458ac8f61..59f4b4e2428 100644 --- a/bin/reth/src/builder.rs +++ b/bin/reth/src/builder.rs @@ -6,7 +6,7 @@ use fdlimit::raise_fd_limit; use futures::{future::Either, stream, stream_select, StreamExt}; use reth_auto_seal_consensus::AutoSealBuilder; use reth_beacon_consensus::{ - hooks::{EngineHooks, PruneHook}, + hooks::{EngineHooks, PruneHook, StaticFileHook}, BeaconConsensusEngine, MIN_BLOCKS_FOR_PIPELINE_RUN, }; use reth_blockchain_tree::{config::BlockchainTreeConfig, ShareableBlockchainTree}; @@ -40,6 +40,7 @@ use reth_primitives::format_ether; use reth_provider::{providers::BlockchainProvider, ProviderFactory}; use reth_prune::PrunerBuilder; use reth_rpc_engine_api::EngineApi; +use reth_static_file::StaticFileProducer; use reth_tasks::{TaskExecutor, TaskManager}; use reth_transaction_pool::TransactionPool; use std::{path::PathBuf, sync::Arc}; @@ -127,26 +128,18 @@ impl NodeBuilderWit let prometheus_handle = self.config.install_prometheus_recorder()?; - let mut provider_factory = - ProviderFactory::new(Arc::clone(&self.db), Arc::clone(&self.config.chain)); - - // configure snapshotter - let snapshotter = reth_snapshot::Snapshotter::new( - provider_factory.clone(), - self.data_dir.snapshots_path(), - self.config.chain.snapshot_block_interval, - )?; - - provider_factory = provider_factory.with_snapshots( - self.data_dir.snapshots_path(), - snapshotter.highest_snapshot_receiver(), - )?; + let provider_factory = ProviderFactory::new( + Arc::clone(&self.db), + Arc::clone(&self.config.chain), + self.data_dir.static_files_path(), + )? + .with_static_files_metrics(); self.config.start_metrics_endpoint(prometheus_handle, Arc::clone(&self.db)).await?; debug!(target: "reth::cli", chain=%self.config.chain.chain, genesis=?self.config.chain.genesis_hash(), "Initializing genesis"); - let genesis_hash = init_genesis(Arc::clone(&self.db), self.config.chain.clone())?; + let genesis_hash = init_genesis(provider_factory.clone())?; info!(target: "reth::cli", "{}", self.config.chain.display_hardforks()); @@ -270,6 +263,17 @@ impl NodeBuilderWit }; let max_block = self.config.max_block(&network_client, provider_factory.clone()).await?; + let mut hooks = EngineHooks::new(); + + let mut static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + prune_config.clone().unwrap_or_default().segments, + ); + let static_file_producer_events = static_file_producer.events(); + hooks.add(StaticFileHook::new(static_file_producer.clone(), Box::new(executor.clone()))); + info!(target: "reth::cli", "StaticFileProducer initialized"); + // Configure the pipeline let (mut pipeline, client) = if self.config.dev.dev { info!(target: "reth::cli", "Starting Reth in dev mode"); @@ -301,6 +305,7 @@ impl NodeBuilderWit sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -323,6 +328,7 @@ impl NodeBuilderWit sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -333,22 +339,16 @@ impl NodeBuilderWit let pipeline_events = pipeline.events(); let initial_target = self.config.initial_pipeline_target(genesis_hash); - let mut hooks = EngineHooks::new(); - let pruner_events = if let Some(prune_config) = prune_config { - let mut pruner = PrunerBuilder::new(prune_config.clone()) - .max_reorg_depth(tree_config.max_reorg_depth() as usize) - .prune_delete_limit(self.config.chain.prune_delete_limit) - .build(provider_factory, snapshotter.highest_snapshot_receiver()); + let prune_config = prune_config.unwrap_or_default(); + let mut pruner = PrunerBuilder::new(prune_config.clone()) + .max_reorg_depth(tree_config.max_reorg_depth() as usize) + .prune_delete_limit(self.config.chain.prune_delete_limit) + .build(provider_factory.clone()); - let events = pruner.events(); - hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); - - info!(target: "reth::cli", ?prune_config, "Pruner initialized"); - Either::Left(events) - } else { - Either::Right(stream::empty()) - }; + let pruner_events = pruner.events(); + hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); + info!(target: "reth::cli", ?prune_config, "Pruner initialized"); // Configure the consensus engine let (beacon_consensus_engine, beacon_engine_handle) = BeaconConsensusEngine::with_channel( @@ -380,7 +380,8 @@ impl NodeBuilderWit } else { Either::Right(stream::empty()) }, - pruner_events.map(Into::into) + pruner_events.map(Into::into), + static_file_producer_events.map(Into::into), ); executor.spawn_critical( "events task", diff --git a/bin/reth/src/commands/db/clear.rs b/bin/reth/src/commands/db/clear.rs index e26f60631af..a7c32cac1b4 100644 --- a/bin/reth/src/commands/db/clear.rs +++ b/bin/reth/src/commands/db/clear.rs @@ -1,27 +1,53 @@ -use clap::Parser; +use clap::{Parser, Subcommand}; use reth_db::{ database::Database, + static_file::iter_static_files, table::Table, transaction::{DbTx, DbTxMut}, TableViewer, Tables, }; +use reth_primitives::{static_file::find_fixed_range, StaticFileSegment}; +use reth_provider::ProviderFactory; /// The arguments for the `reth db clear` command #[derive(Parser, Debug)] pub struct Command { - /// Table name - pub table: Tables, + #[clap(subcommand)] + subcommand: Subcommands, } impl Command { /// Execute `db clear` command - pub fn execute(self, db: &DB) -> eyre::Result<()> { - self.table.view(&ClearViewer { db })?; + pub fn execute(self, provider_factory: ProviderFactory) -> eyre::Result<()> { + match self.subcommand { + Subcommands::Mdbx { table } => { + table.view(&ClearViewer { db: provider_factory.db_ref() })? + } + Subcommands::StaticFile { segment } => { + let static_file_provider = provider_factory.static_file_provider(); + let static_files = iter_static_files(static_file_provider.directory())?; + + if let Some(segment_static_files) = static_files.get(&segment) { + for (block_range, _) in segment_static_files { + static_file_provider + .delete_jar(segment, find_fixed_range(block_range.start()))?; + } + } + } + } Ok(()) } } +#[derive(Subcommand, Debug)] +enum Subcommands { + /// Deletes all database table entries + Mdbx { table: Tables }, + /// Deletes all static file segment entries + StaticFile { segment: StaticFileSegment }, +} + struct ClearViewer<'a, DB: Database> { db: &'a DB, } diff --git a/bin/reth/src/commands/db/diff.rs b/bin/reth/src/commands/db/diff.rs index 6ead06a66ca..d36278ca1b4 100644 --- a/bin/reth/src/commands/db/diff.rs +++ b/bin/reth/src/commands/db/diff.rs @@ -57,7 +57,7 @@ impl Command { /// /// The discrepancies and extra elements, along with a brief summary of the diff results are /// then written to a file in the output directory. - pub fn execute(self, tool: &DbTool<'_, DatabaseEnv>) -> eyre::Result<()> { + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { // open second db let second_db_path: PathBuf = self.secondary_datadir.join("db").into(); let second_db = open_db_read_only( @@ -71,7 +71,7 @@ impl Command { }; for table in tables { - let primary_tx = tool.db.tx()?; + let primary_tx = tool.provider_factory.db_ref().tx()?; let secondary_tx = second_db.tx()?; let output_dir = self.output.clone(); diff --git a/bin/reth/src/commands/db/get.rs b/bin/reth/src/commands/db/get.rs index f39fced266d..1ffe038e96a 100644 --- a/bin/reth/src/commands/db/get.rs +++ b/bin/reth/src/commands/db/get.rs @@ -2,64 +2,152 @@ use crate::utils::DbTool; use clap::Parser; use reth_db::{ database::Database, - table::{DupSort, Table}, - RawKey, RawTable, TableViewer, Tables, + static_file::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask, ReceiptMask, TransactionMask}, + table::{Decompress, DupSort, Table}, + tables, RawKey, RawTable, Receipts, TableViewer, Transactions, }; +use reth_primitives::{BlockHash, Header, StaticFileSegment}; use tracing::error; /// The arguments for the `reth db get` command #[derive(Parser, Debug)] pub struct Command { - /// The table name - /// - /// NOTE: The dupsort tables are not supported now. - pub table: Tables, - - /// The key to get content for - #[arg(value_parser = maybe_json_value_parser)] - pub key: String, - - /// The subkey to get content for - #[arg(value_parser = maybe_json_value_parser)] - pub subkey: Option, - - /// Output bytes instead of human-readable decoded value - #[clap(long)] - pub raw: bool, + #[clap(subcommand)] + subcommand: Subcommand, +} + +#[derive(clap::Subcommand, Debug)] +enum Subcommand { + /// Gets the content of a database table for the given key + Mdbx { + table: tables::Tables, + + /// The key to get content for + #[arg(value_parser = maybe_json_value_parser)] + key: String, + + /// The subkey to get content for + #[arg(value_parser = maybe_json_value_parser)] + subkey: Option, + + /// Output bytes instead of human-readable decoded value + #[clap(long)] + raw: bool, + }, + /// Gets the content of a static file segment for the given key + StaticFile { + segment: StaticFileSegment, + + /// The key to get content for + #[arg(value_parser = maybe_json_value_parser)] + key: String, + + /// Output bytes instead of human-readable decoded value + #[clap(long)] + raw: bool, + }, } impl Command { /// Execute `db get` command - pub fn execute(self, tool: &DbTool<'_, DB>) -> eyre::Result<()> { - self.table.view(&GetValueViewer { tool, args: &self }) - } + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { + match self.subcommand { + Subcommand::Mdbx { table, key, subkey, raw } => { + table.view(&GetValueViewer { tool, key, subkey, raw })? + } + Subcommand::StaticFile { segment, key, raw } => { + let (key, mask): (u64, _) = match segment { + StaticFileSegment::Headers => { + (table_key::(&key)?, >::MASK) + } + StaticFileSegment::Transactions => ( + table_key::(&key)?, + ::Value>>::MASK, + ), + StaticFileSegment::Receipts => ( + table_key::(&key)?, + ::Value>>::MASK, + ), + }; - /// Get an instance of key for given table - pub fn table_key(&self) -> Result { - assert_eq!(T::TABLE, self.table); - serde_json::from_str::(&self.key).map_err(Into::into) - } + let content = tool.provider_factory.static_file_provider().find_static_file( + segment, + |provider| { + let mut cursor = provider.cursor()?; + cursor.get(key.into(), mask).map(|result| { + result.map(|vec| { + vec.iter().map(|slice| slice.to_vec()).collect::>() + }) + }) + }, + )?; - /// Get an instance of subkey for given dupsort table - fn table_subkey(&self) -> Result { - assert_eq!(T::TABLE, self.table); - serde_json::from_str::(&self.subkey.clone().unwrap_or_default()) - .map_err(Into::into) + match content { + Some(content) => { + if raw { + println!("{:?}", content); + } else { + match segment { + StaticFileSegment::Headers => { + let header = Header::decompress(content[0].as_slice())?; + let block_hash = BlockHash::decompress(content[1].as_slice())?; + println!( + "{}\n{}", + serde_json::to_string_pretty(&header)?, + serde_json::to_string_pretty(&block_hash)? + ); + } + StaticFileSegment::Transactions => { + let transaction = <::Value>::decompress( + content[0].as_slice(), + )?; + println!("{}", serde_json::to_string_pretty(&transaction)?); + } + StaticFileSegment::Receipts => { + let receipt = <::Value>::decompress( + content[0].as_slice(), + )?; + println!("{}", serde_json::to_string_pretty(&receipt)?); + } + } + } + } + None => { + error!(target: "reth::cli", "No content for the given table key."); + } + }; + } + } + + Ok(()) } } +/// Get an instance of key for given table +fn table_key(key: &str) -> Result { + serde_json::from_str::(key).map_err(|e| eyre::eyre!(e)) +} + +/// Get an instance of subkey for given dupsort table +fn table_subkey(subkey: &Option) -> Result { + serde_json::from_str::(&subkey.clone().unwrap_or_default()) + .map_err(|e| eyre::eyre!(e)) +} + struct GetValueViewer<'a, DB: Database> { - tool: &'a DbTool<'a, DB>, - args: &'a Command, + tool: &'a DbTool, + key: String, + subkey: Option, + raw: bool, } impl TableViewer<()> for GetValueViewer<'_, DB> { type Error = eyre::Report; fn view(&self) -> Result<(), Self::Error> { - let key = self.args.table_key::()?; + let key = table_key::(&self.key)?; - let content = if self.args.raw { + let content = if self.raw { self.tool .get::>(RawKey::from(key))? .map(|content| format!("{:?}", content.raw_value())) @@ -81,10 +169,10 @@ impl TableViewer<()> for GetValueViewer<'_, DB> { fn view_dupsort(&self) -> Result<(), Self::Error> { // get a key for given table - let key = self.args.table_key::()?; + let key = table_key::(&self.key)?; // process dupsort table - let subkey = self.args.table_subkey::()?; + let subkey = table_subkey::(&self.subkey)?; match self.tool.get_dup::(key, subkey)? { Some(content) => { @@ -127,17 +215,12 @@ mod tests { #[test] fn parse_numeric_key_args() { - let args = CommandParser::::parse_from(["reth", "Headers", "123"]).args; - assert_eq!(args.table_key::().unwrap(), 123); - - let args = CommandParser::::parse_from([ - "reth", - "HashedAccounts", - "0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac", - ]) - .args; + assert_eq!(table_key::("123").unwrap(), 123); assert_eq!( - args.table_key::().unwrap(), + table_key::( + "\"0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac\"" + ) + .unwrap(), B256::from_str("0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac") .unwrap() ); @@ -145,17 +228,16 @@ mod tests { #[test] fn parse_string_key_args() { - let args = - CommandParser::::parse_from(["reth", "StageCheckpoints", "MerkleExecution"]) - .args; - assert_eq!(args.table_key::().unwrap(), "MerkleExecution"); + assert_eq!( + table_key::("\"MerkleExecution\"").unwrap(), + "MerkleExecution" + ); } #[test] fn parse_json_key_args() { - let args = CommandParser::::parse_from(["reth", "StoragesHistory", r#"{ "address": "0x01957911244e546ce519fbac6f798958fafadb41", "sharded_key": { "key": "0x0000000000000000000000000000000000000000000000000000000000000003", "highest_block_number": 18446744073709551615 } }"#]).args; assert_eq!( - args.table_key::().unwrap(), + table_key::(r#"{ "address": "0x01957911244e546ce519fbac6f798958fafadb41", "sharded_key": { "key": "0x0000000000000000000000000000000000000000000000000000000000000003", "highest_block_number": 18446744073709551615 } }"#).unwrap(), StorageShardedKey::new( Address::from_str("0x01957911244e546ce519fbac6f798958fafadb41").unwrap(), B256::from_str( @@ -169,9 +251,8 @@ mod tests { #[test] fn parse_json_key_for_account_history() { - let args = CommandParser::::parse_from(["reth", "AccountsHistory", r#"{ "key": "0x4448e1273fd5a8bfdb9ed111e96889c960eee145", "highest_block_number": 18446744073709551615 }"#]).args; assert_eq!( - args.table_key::().unwrap(), + table_key::(r#"{ "key": "0x4448e1273fd5a8bfdb9ed111e96889c960eee145", "highest_block_number": 18446744073709551615 }"#).unwrap(), ShardedKey::new( Address::from_str("0x4448e1273fd5a8bfdb9ed111e96889c960eee145").unwrap(), 18446744073709551615 diff --git a/bin/reth/src/commands/db/list.rs b/bin/reth/src/commands/db/list.rs index 49cb4affe9f..bd1b6033e37 100644 --- a/bin/reth/src/commands/db/list.rs +++ b/bin/reth/src/commands/db/list.rs @@ -50,7 +50,7 @@ pub struct Command { impl Command { /// Execute `db list` command - pub fn execute(self, tool: &DbTool<'_, DatabaseEnv>) -> eyre::Result<()> { + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { self.table.view(&ListTableViewer { tool, args: &self }) } @@ -81,7 +81,7 @@ impl Command { } struct ListTableViewer<'a> { - tool: &'a DbTool<'a, DatabaseEnv>, + tool: &'a DbTool, args: &'a Command, } @@ -89,7 +89,7 @@ impl TableViewer<()> for ListTableViewer<'_> { type Error = eyre::Report; fn view(&self) -> Result<(), Self::Error> { - self.tool.db.view(|tx| { + self.tool.provider_factory.db_ref().view(|tx| { let table_db = tx.inner.open_db(Some(self.args.table.name())).wrap_err("Could not open db.")?; let stats = tx.inner.db_stat(&table_db).wrap_err(format!("Could not find table: {}", stringify!($table)))?; let total_entries = stats.entries(); diff --git a/bin/reth/src/commands/db/mod.rs b/bin/reth/src/commands/db/mod.rs index e6f199cd7a6..66ccd24e2ed 100644 --- a/bin/reth/src/commands/db/mod.rs +++ b/bin/reth/src/commands/db/mod.rs @@ -9,18 +9,13 @@ use crate::{ utils::DbTool, }; use clap::{Parser, Subcommand}; -use comfy_table::{Cell, Row, Table as ComfyTable}; -use eyre::WrapErr; -use human_bytes::human_bytes; use reth_db::{ - database::Database, - mdbx, mdbx::DatabaseArguments, open_db, open_db_read_only, version::{get_db_version, DatabaseVersionError, DB_VERSION}, - Tables, }; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::{ io::{self, Write}, sync::Arc, @@ -30,7 +25,8 @@ mod clear; mod diff; mod get; mod list; -mod snapshots; +mod static_files; +mod stats; /// DB List TUI mod tui; @@ -71,7 +67,7 @@ pub struct Command { /// `reth db` subcommands pub enum Subcommands { /// Lists all the tables, their entry count and their size - Stats, + Stats(stats::Command), /// Lists the contents of a table List(list::Command), /// Create a diff between two database tables or two entire databases. @@ -86,8 +82,8 @@ pub enum Subcommands { }, /// Deletes all table entries Clear(clear::Command), - /// Snapshots tables from database - Snapshot(snapshots::Command), + /// Creates static files from database tables + CreateStaticFiles(static_files::Command), /// Lists current and local database versions Version, /// Returns the full database path @@ -103,99 +99,26 @@ impl Command { match self.command { // TODO: We'll need to add this on the DB trait. - Subcommands::Stats { .. } => { + Subcommands::Stats(command) => { let db = open_db_read_only( &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; - let mut stats_table = ComfyTable::new(); - stats_table.load_preset(comfy_table::presets::ASCII_MARKDOWN); - stats_table.set_header([ - "Table Name", - "# Entries", - "Branch Pages", - "Leaf Pages", - "Overflow Pages", - "Total Size", - ]); + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; - tool.db.view(|tx| { - let mut tables = - Tables::ALL.iter().map(|table| table.name()).collect::>(); - tables.sort(); - let mut total_size = 0; - for table in tables { - let table_db = - tx.inner.open_db(Some(table)).wrap_err("Could not open db.")?; - - let stats = tx - .inner - .db_stat(&table_db) - .wrap_err(format!("Could not find table: {table}"))?; - - // Defaults to 16KB right now but we should - // re-evaluate depending on the DB we end up using - // (e.g. REDB does not have these options as configurable intentionally) - let page_size = stats.page_size() as usize; - let leaf_pages = stats.leaf_pages(); - let branch_pages = stats.branch_pages(); - let overflow_pages = stats.overflow_pages(); - let num_pages = leaf_pages + branch_pages + overflow_pages; - let table_size = page_size * num_pages; - - total_size += table_size; - let mut row = Row::new(); - row.add_cell(Cell::new(table)) - .add_cell(Cell::new(stats.entries())) - .add_cell(Cell::new(branch_pages)) - .add_cell(Cell::new(leaf_pages)) - .add_cell(Cell::new(overflow_pages)) - .add_cell(Cell::new(human_bytes(table_size as f64))); - stats_table.add_row(row); - } - - let max_widths = stats_table.column_max_content_widths(); - - let mut seperator = Row::new(); - for width in max_widths { - seperator.add_cell(Cell::new("-".repeat(width as usize))); - } - stats_table.add_row(seperator); - - let mut row = Row::new(); - row.add_cell(Cell::new("Total DB size")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new(human_bytes(total_size as f64))); - stats_table.add_row(row); - - let freelist = tx.inner.env().freelist()?; - let freelist_size = freelist * - tx.inner.db_stat(&mdbx::Database::freelist_db())?.page_size() as usize; - - let mut row = Row::new(); - row.add_cell(Cell::new("Freelist size")) - .add_cell(Cell::new(freelist)) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new(human_bytes(freelist_size as f64))); - stats_table.add_row(row); - - Ok::<(), eyre::Report>(()) - })??; - - println!("{stats_table}"); + let tool = DbTool::new(provider_factory, self.chain.clone())?; + command.execute(data_dir, &tool)?; } Subcommands::List(command) => { let db = open_db_read_only( &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Diff(command) => { @@ -203,7 +126,10 @@ impl Command { &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Get(command) => { @@ -211,7 +137,10 @@ impl Command { &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Drop { force } => { @@ -232,16 +161,22 @@ impl Command { let db = open_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?; - let mut tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + + let mut tool = DbTool::new(provider_factory, self.chain.clone())?; tool.drop(db_path)?; } Subcommands::Clear(command) => { let db = open_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?; - command.execute(&db)?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + + command.execute(provider_factory)?; } - Subcommands::Snapshot(command) => { - command.execute(&db_path, self.db.log_level, self.chain.clone())?; + Subcommands::CreateStaticFiles(command) => { + command.execute(data_dir, self.db.log_level, self.chain.clone())?; } Subcommands::Version => { let local_db_version = match get_db_version(&db_path) { diff --git a/bin/reth/src/commands/db/snapshots/bench.rs b/bin/reth/src/commands/db/static_files/bench.rs similarity index 69% rename from bin/reth/src/commands/db/snapshots/bench.rs rename to bin/reth/src/commands/db/static_files/bench.rs index 928898205f0..d3de628e2b4 100644 --- a/bin/reth/src/commands/db/snapshots/bench.rs +++ b/bin/reth/src/commands/db/static_files/bench.rs @@ -1,7 +1,7 @@ use reth_db::DatabaseEnv; use reth_primitives::{ - snapshot::{Compression, Filters}, - ChainSpec, SnapshotSegment, + static_file::{Compression, Filters}, + StaticFileSegment, }; use reth_provider::{DatabaseProviderRO, ProviderFactory}; use std::{fmt::Debug, sync::Arc, time::Instant}; @@ -16,11 +16,11 @@ pub(crate) enum BenchKind { pub(crate) fn bench( bench_kind: BenchKind, - db: (DatabaseEnv, Arc), - segment: SnapshotSegment, + provider_factory: Arc>, + segment: StaticFileSegment, filters: Filters, compression: Compression, - mut snapshot_method: F1, + mut static_file_method: F1, database_method: F2, ) -> eyre::Result<()> where @@ -28,22 +28,19 @@ where F2: Fn(DatabaseProviderRO) -> eyre::Result, R: Debug + PartialEq, { - let (db, chain) = db; - println!(); println!("############"); println!("## [{segment:?}] [{compression:?}] [{filters:?}] [{bench_kind:?}]"); - let snap_result = { + let static_file_result = { let start = Instant::now(); - let result = snapshot_method()?; + let result = static_file_method()?; let end = start.elapsed().as_micros(); - println!("# snapshot {bench_kind:?} | {end} μs"); + println!("# static file {bench_kind:?} | {end} μs"); result }; let db_result = { - let factory = ProviderFactory::new(db, chain); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let start = Instant::now(); let result = database_method(provider)?; let end = start.elapsed().as_micros(); @@ -51,7 +48,7 @@ where result }; - assert_eq!(snap_result, db_result); + assert_eq!(static_file_result, db_result); Ok(()) } diff --git a/bin/reth/src/commands/db/snapshots/headers.rs b/bin/reth/src/commands/db/static_files/headers.rs similarity index 64% rename from bin/reth/src/commands/db/snapshots/headers.rs rename to bin/reth/src/commands/db/static_files/headers.rs index 6b6f2b11904..452063a89a1 100644 --- a/bin/reth/src/commands/db/snapshots/headers.rs +++ b/bin/reth/src/commands/db/static_files/headers.rs @@ -3,38 +3,27 @@ use super::{ Command, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::HeaderMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::HeaderMask, DatabaseEnv}; use reth_primitives::{ - snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, - BlockHash, ChainSpec, Header, SnapshotSegment, + static_file::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, + BlockHash, Header, StaticFileSegment, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, HeaderProvider, ProviderError, ProviderFactory, - TransactionsProviderExt, -}; -use std::{ - path::{Path, PathBuf}, - sync::Arc, + providers::StaticFileProvider, BlockNumReader, HeaderProvider, ProviderError, ProviderFactory, }; +use std::{ops::RangeInclusive, path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_headers_snapshot( + pub(crate) fn bench_headers_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -42,19 +31,16 @@ impl Command { Filters::WithoutFilters }; - let mut row_indexes = block_range.clone().collect::>(); + let range: RangeInclusive = (&block_range).into(); + let mut row_indexes = range.collect::>(); let mut rng = rand::thread_rng(); - let tx_range = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .provider()? - .transaction_range_by_block_range(block_range.clone())?; - - let path: PathBuf = SnapshotSegment::Headers - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Headers + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Headers, + StaticFileSegment::Headers, self.from, Some(&path), )?; @@ -63,8 +49,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { @@ -94,8 +80,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { @@ -114,16 +100,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM HEADER BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let header_hash = - ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .header_by_number(num)? - .ok_or(ProviderError::HeaderNotFound(num.into()))? - .hash_slow(); + let header_hash = provider_factory + .header_by_number(num)? + .ok_or(ProviderError::HeaderNotFound(num.into()))? + .hash_slow(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { diff --git a/bin/reth/src/commands/db/snapshots/mod.rs b/bin/reth/src/commands/db/static_files/mod.rs similarity index 54% rename from bin/reth/src/commands/db/snapshots/mod.rs rename to bin/reth/src/commands/db/static_files/mod.rs index 410f77b7f7a..eb433ac6143 100644 --- a/bin/reth/src/commands/db/snapshots/mod.rs +++ b/bin/reth/src/commands/db/static_files/mod.rs @@ -9,14 +9,17 @@ use reth_db::{ }; use reth_interfaces::db::LogLevel; use reth_nippy_jar::{NippyJar, NippyJarCursor}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_primitives::{ - snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentHeader}, - BlockNumber, ChainSpec, SnapshotSegment, + static_file::{ + Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentConfig, + SegmentHeader, SegmentRangeInclusive, + }, + BlockNumber, ChainSpec, StaticFileSegment, }; -use reth_provider::{BlockNumReader, ProviderFactory, TransactionsProviderExt}; -use reth_snapshot::{segments as snap_segments, segments::Segment}; +use reth_provider::{BlockNumReader, ProviderFactory}; +use reth_static_file::{segments as static_file_segments, segments::Segment}; use std::{ - ops::RangeInclusive, path::{Path, PathBuf}, sync::Arc, time::{Duration, Instant}, @@ -28,20 +31,20 @@ mod receipts; mod transactions; #[derive(Parser, Debug)] -/// Arguments for the `reth db snapshot` command. +/// Arguments for the `reth db create-static-files` command. pub struct Command { - /// Snapshot segments to generate. - segments: Vec, + /// Static File segments to generate. + segments: Vec, - /// Starting block for the snapshot. + /// Starting block for the static file. #[arg(long, short, default_value = "0")] from: BlockNumber, - /// Number of blocks in the snapshot. + /// Number of blocks in the static file. #[arg(long, short, default_value = "500000")] block_interval: u64, - /// Sets the number of snapshots built in parallel. Note: Each parallel build is + /// Sets the number of static files built in parallel. Note: Each parallel build is /// memory-intensive. #[arg( long, short, @@ -50,15 +53,15 @@ pub struct Command { )] parallel: u64, - /// Flag to skip snapshot creation and print snapshot files stats. + /// Flag to skip static file creation and print static files stats. #[arg(long, default_value = "false")] only_stats: bool, - /// Flag to enable database-to-snapshot benchmarking. + /// Flag to enable database-to-static file benchmarking. #[arg(long, default_value = "false")] bench: bool, - /// Flag to skip snapshot creation and only run benchmarks on existing snapshots. + /// Flag to skip static file creation and only run benchmarks on existing static files. #[arg(long, default_value = "false")] only_bench: bool, @@ -76,30 +79,33 @@ pub struct Command { } impl Command { - /// Execute `db snapshot` command + /// Execute `db create-static-files` command pub fn execute( self, - db_path: &Path, + data_dir: ChainPath, log_level: Option, chain: Arc, ) -> eyre::Result<()> { - let all_combinations = - self.segments.iter().cartesian_product(self.compression.iter()).cartesian_product( - if self.phf.is_empty() { - vec![None] - } else { - self.phf.iter().copied().map(Some).collect::>() - }, - ); + let all_combinations = self + .segments + .iter() + .cartesian_product(self.compression.iter().copied()) + .cartesian_product(if self.phf.is_empty() { + vec![None] + } else { + self.phf.iter().copied().map(Some).collect::>() + }); + + let db = open_db_read_only( + data_dir.db_path().as_path(), + DatabaseArguments::default() + .log_level(log_level) + .max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), + )?; + let provider_factory = + Arc::new(ProviderFactory::new(db, chain.clone(), data_dir.static_files_path())?); { - let db = open_db_read_only( - db_path, - DatabaseArguments::default() - .max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), - )?; - let factory = Arc::new(ProviderFactory::new(db, chain.clone())); - if !self.only_bench { for ((mode, compression), phf) in all_combinations.clone() { let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { @@ -109,17 +115,21 @@ impl Command { }; match mode { - SnapshotSegment::Headers => self.generate_snapshot::( - factory.clone(), - snap_segments::Headers::new(*compression, filters), + StaticFileSegment::Headers => self.generate_static_file::( + provider_factory.clone(), + static_file_segments::Headers, + SegmentConfig { filters, compression }, )?, - SnapshotSegment::Transactions => self.generate_snapshot::( - factory.clone(), - snap_segments::Transactions::new(*compression, filters), - )?, - SnapshotSegment::Receipts => self.generate_snapshot::( - factory.clone(), - snap_segments::Receipts::new(*compression, filters), + StaticFileSegment::Transactions => self + .generate_static_file::( + provider_factory.clone(), + static_file_segments::Transactions, + SegmentConfig { filters, compression }, + )?, + StaticFileSegment::Receipts => self.generate_static_file::( + provider_factory.clone(), + static_file_segments::Receipts, + SegmentConfig { filters, compression }, )?, } } @@ -127,29 +137,23 @@ impl Command { } if self.only_bench || self.bench { - for ((mode, compression), phf) in all_combinations.clone() { + for ((mode, compression), phf) in all_combinations { match mode { - SnapshotSegment::Headers => self.bench_headers_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Headers => self.bench_headers_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, - SnapshotSegment::Transactions => self.bench_transactions_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Transactions => self.bench_transactions_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, - SnapshotSegment::Receipts => self.bench_receipts_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Receipts => self.bench_receipts_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, @@ -161,30 +165,31 @@ impl Command { } /// Generates successive inclusive block ranges up to the tip starting at `self.from`. - fn block_ranges(&self, tip: BlockNumber) -> Vec> { + fn block_ranges(&self, tip: BlockNumber) -> Vec { let mut from = self.from; let mut ranges = Vec::new(); while from <= tip { let end_range = std::cmp::min(from + self.block_interval - 1, tip); - ranges.push(from..=end_range); + ranges.push(SegmentRangeInclusive::new(from, end_range)); from = end_range + 1; } ranges } - /// Generates snapshots from `self.from` with a `self.block_interval`. Generates them in + /// Generates static files from `self.from` with a `self.block_interval`. Generates them in /// parallel if specified. - fn generate_snapshot( + fn generate_static_file( &self, factory: Arc>, - segment: impl Segment + Send + Sync, + segment: impl Segment + Send + Sync, + config: SegmentConfig, ) -> eyre::Result<()> { let dir = PathBuf::default(); let ranges = self.block_ranges(factory.best_block_number()?); - let mut created_snapshots = vec![]; + let mut created_static_files = vec![]; // Filter/PHF is memory intensive, so we have to limit the parallelism. for block_ranges in ranges.chunks(self.parallel as usize) { @@ -194,34 +199,36 @@ impl Command { let provider = factory.provider()?; if !self.only_stats { - segment.snapshot::(&provider, &dir, block_range.clone())?; + segment.create_static_file_file( + &provider, + dir.as_path(), + config, + block_range.into(), + )?; } - let tx_range = - provider.transaction_range_by_block_range(block_range.clone())?; - - Ok(segment.segment().filename(block_range, &tx_range)) + Ok(segment.segment().filename(block_range)) }) .collect::, eyre::Report>>()?; - created_snapshots.extend(created_files); + created_static_files.extend(created_files); } - self.stats(created_snapshots) + self.stats(created_static_files) } - /// Prints detailed statistics for each snapshot, including loading time. + /// Prints detailed statistics for each static file, including loading time. /// - /// This function loads each snapshot from the provided paths and prints - /// statistics about various aspects of each snapshot, such as filters size, + /// This function loads each static file from the provided paths and prints + /// statistics about various aspects of each static file, such as filters size, /// offset index size, offset list size, and loading time. - fn stats(&self, snapshots: Vec>) -> eyre::Result<()> { + fn stats(&self, static_files: Vec>) -> eyre::Result<()> { let mut total_filters_size = 0; let mut total_index_size = 0; let mut total_duration = Duration::new(0, 0); let mut total_file_size = 0; - for snap in &snapshots { + for snap in &static_files { let start_time = Instant::now(); let jar = NippyJar::::load(snap.as_ref())?; let _cursor = NippyJarCursor::new(&jar)?; @@ -233,7 +240,7 @@ impl Command { total_duration += duration; total_file_size += file_size; - println!("Snapshot: {:?}", snap.as_ref().file_name()); + println!("StaticFile: {:?}", snap.as_ref().file_name()); println!(" File Size: {:>7}", human_bytes(file_size as f64)); println!(" Filters Size: {:>7}", human_bytes(jar.filter_size() as f64)); println!(" Offset Index Size: {:>7}", human_bytes(jar.offsets_index_size() as f64)); @@ -244,7 +251,7 @@ impl Command { ); } - let avg_duration = total_duration / snapshots.len() as u32; + let avg_duration = total_duration / static_files.len() as u32; println!("Total Filters Size: {:>7}", human_bytes(total_filters_size as f64)); println!("Total Offset Index Size: {:>7}", human_bytes(total_index_size as f64)); diff --git a/bin/reth/src/commands/db/snapshots/receipts.rs b/bin/reth/src/commands/db/static_files/receipts.rs similarity index 68% rename from bin/reth/src/commands/db/snapshots/receipts.rs rename to bin/reth/src/commands/db/static_files/receipts.rs index 203e021d372..ec9f580246c 100644 --- a/bin/reth/src/commands/db/snapshots/receipts.rs +++ b/bin/reth/src/commands/db/static_files/receipts.rs @@ -3,38 +3,28 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::ReceiptMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::ReceiptMask, DatabaseEnv}; use reth_primitives::{ - snapshot::{Filters, InclusionFilter}, - ChainSpec, Receipt, SnapshotSegment, + static_file::{Filters, InclusionFilter}, + Receipt, StaticFileSegment, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, ReceiptProvider, + providers::StaticFileProvider, BlockNumReader, ProviderError, ProviderFactory, ReceiptProvider, TransactionsProvider, TransactionsProviderExt, }; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_receipts_snapshot( + pub(crate) fn bench_receipts_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -44,19 +34,18 @@ impl Command { let mut rng = rand::thread_rng(); - let tx_range = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .provider()? - .transaction_range_by_block_range(block_range.clone())?; + let tx_range = + provider_factory.provider()?.transaction_range_by_block_range(block_range.into())?; let mut row_indexes = tx_range.clone().collect::>(); - let path: PathBuf = SnapshotSegment::Receipts - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Receipts + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Receipts, + StaticFileSegment::Receipts, self.from, Some(&path), )?; @@ -65,8 +54,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory.clone(), + StaticFileSegment::Receipts, filters, compression, || { @@ -96,8 +85,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory.clone(), + StaticFileSegment::Receipts, filters, compression, || { @@ -116,15 +105,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM RECEIPT BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let tx_hash = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) + let tx_hash = provider_factory .transaction_by_id(num)? .ok_or(ProviderError::ReceiptNotFound(num.into()))? .hash(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory, + StaticFileSegment::Receipts, filters, compression, || { diff --git a/bin/reth/src/commands/db/snapshots/transactions.rs b/bin/reth/src/commands/db/static_files/transactions.rs similarity index 69% rename from bin/reth/src/commands/db/snapshots/transactions.rs rename to bin/reth/src/commands/db/static_files/transactions.rs index e7600c92b66..79758c09325 100644 --- a/bin/reth/src/commands/db/snapshots/transactions.rs +++ b/bin/reth/src/commands/db/static_files/transactions.rs @@ -3,38 +3,29 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::TransactionMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::TransactionMask, DatabaseEnv}; + use reth_primitives::{ - snapshot::{Filters, InclusionFilter}, - ChainSpec, SnapshotSegment, TransactionSignedNoHash, + static_file::{Filters, InclusionFilter}, + StaticFileSegment, TransactionSignedNoHash, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, + providers::StaticFileProvider, BlockNumReader, ProviderError, ProviderFactory, TransactionsProvider, TransactionsProviderExt, }; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_transactions_snapshot( + pub(crate) fn bench_transactions_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -44,16 +35,16 @@ impl Command { let mut rng = rand::thread_rng(); - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range = provider.transaction_range_by_block_range(block_range.into())?; let mut row_indexes = tx_range.clone().collect::>(); - let path: PathBuf = SnapshotSegment::Transactions - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Transactions + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Transactions, + StaticFileSegment::Transactions, self.from, Some(&path), )?; @@ -62,8 +53,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory.clone(), + StaticFileSegment::Transactions, filters, compression, || { @@ -94,8 +85,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory.clone(), + StaticFileSegment::Transactions, filters, compression, || { @@ -115,16 +106,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM TRANSACTION BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let transaction_hash = - ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .transaction_by_id(num)? - .ok_or(ProviderError::TransactionNotFound(num.into()))? - .hash(); + let transaction_hash = provider_factory + .transaction_by_id(num)? + .ok_or(ProviderError::TransactionNotFound(num.into()))? + .hash(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory, + StaticFileSegment::Transactions, filters, compression, || { diff --git a/bin/reth/src/commands/db/stats.rs b/bin/reth/src/commands/db/stats.rs new file mode 100644 index 00000000000..09ad3a61b53 --- /dev/null +++ b/bin/reth/src/commands/db/stats.rs @@ -0,0 +1,290 @@ +use crate::utils::DbTool; +use clap::Parser; +use comfy_table::{Cell, Row, Table as ComfyTable}; +use eyre::WrapErr; +use human_bytes::human_bytes; +use itertools::Itertools; +use reth_db::{database::Database, mdbx, static_file::iter_static_files, DatabaseEnv, Tables}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::static_file::{find_fixed_range, SegmentRangeInclusive}; +use reth_provider::providers::StaticFileProvider; +use std::fs::File; + +#[derive(Parser, Debug)] +/// The arguments for the `reth db stats` command +pub struct Command { + /// Show only the total size for static files. + #[arg(long, default_value_t = false)] + only_total_size: bool, + /// Show only the summary per static file segment. + #[arg(long, default_value_t = false)] + summary: bool, +} + +impl Command { + /// Execute `db stats` command + pub fn execute( + self, + data_dir: ChainPath, + tool: &DbTool, + ) -> eyre::Result<()> { + let static_files_stats_table = self.static_files_stats_table(data_dir)?; + println!("{static_files_stats_table}"); + + println!("\n"); + + let db_stats_table = self.db_stats_table(tool)?; + println!("{db_stats_table}"); + + Ok(()) + } + + fn db_stats_table(&self, tool: &DbTool) -> eyre::Result { + let mut table = ComfyTable::new(); + table.load_preset(comfy_table::presets::ASCII_MARKDOWN); + table.set_header([ + "Table Name", + "# Entries", + "Branch Pages", + "Leaf Pages", + "Overflow Pages", + "Total Size", + ]); + + tool.provider_factory.db_ref().view(|tx| { + let mut db_tables = Tables::ALL.iter().map(|table| table.name()).collect::>(); + db_tables.sort(); + let mut total_size = 0; + for db_table in db_tables { + let table_db = tx.inner.open_db(Some(db_table)).wrap_err("Could not open db.")?; + + let stats = tx + .inner + .db_stat(&table_db) + .wrap_err(format!("Could not find table: {db_table}"))?; + + // Defaults to 16KB right now but we should + // re-evaluate depending on the DB we end up using + // (e.g. REDB does not have these options as configurable intentionally) + let page_size = stats.page_size() as usize; + let leaf_pages = stats.leaf_pages(); + let branch_pages = stats.branch_pages(); + let overflow_pages = stats.overflow_pages(); + let num_pages = leaf_pages + branch_pages + overflow_pages; + let table_size = page_size * num_pages; + + total_size += table_size; + let mut row = Row::new(); + row.add_cell(Cell::new(db_table)) + .add_cell(Cell::new(stats.entries())) + .add_cell(Cell::new(branch_pages)) + .add_cell(Cell::new(leaf_pages)) + .add_cell(Cell::new(overflow_pages)) + .add_cell(Cell::new(human_bytes(table_size as f64))); + table.add_row(row); + } + + let max_widths = table.column_max_content_widths(); + let mut seperator = Row::new(); + for width in max_widths { + seperator.add_cell(Cell::new("-".repeat(width as usize))); + } + table.add_row(seperator); + + let mut row = Row::new(); + row.add_cell(Cell::new("Tables")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new(human_bytes(total_size as f64))); + table.add_row(row); + + let freelist = tx.inner.env().freelist()?; + let freelist_size = + freelist * tx.inner.db_stat(&mdbx::Database::freelist_db())?.page_size() as usize; + + let mut row = Row::new(); + row.add_cell(Cell::new("Freelist")) + .add_cell(Cell::new(freelist)) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new(human_bytes(freelist_size as f64))); + table.add_row(row); + + Ok::<(), eyre::Report>(()) + })??; + + Ok(table) + } + + fn static_files_stats_table( + &self, + data_dir: ChainPath, + ) -> eyre::Result { + let mut table = ComfyTable::new(); + table.load_preset(comfy_table::presets::ASCII_MARKDOWN); + + if !self.only_total_size { + table.set_header([ + "Segment", + "Block Range", + "Transaction Range", + "Shape (columns x rows)", + "Data Size", + "Index Size", + "Offsets Size", + "Config Size", + "Total Size", + ]); + } else { + table.set_header([ + "Segment", + "Block Range", + "Transaction Range", + "Shape (columns x rows)", + "Size", + ]); + } + + let static_files = iter_static_files(data_dir.static_files_path())?; + let static_file_provider = StaticFileProvider::new(data_dir.static_files_path())?; + + let mut total_data_size = 0; + let mut total_index_size = 0; + let mut total_offsets_size = 0; + let mut total_config_size = 0; + + for (segment, ranges) in static_files.into_iter().sorted_by_key(|(segment, _)| *segment) { + let ( + mut segment_columns, + mut segment_rows, + mut segment_data_size, + mut segment_index_size, + mut segment_offsets_size, + mut segment_config_size, + ) = (0, 0, 0, 0, 0, 0); + + for (block_range, tx_range) in &ranges { + let fixed_block_range = find_fixed_range(block_range.start()); + let jar_provider = static_file_provider + .get_segment_provider(segment, || Some(fixed_block_range), None)? + .expect("something went wrong"); + + let columns = jar_provider.columns(); + let rows = jar_provider.rows(); + let data_size = File::open(jar_provider.data_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let index_size = File::open(jar_provider.index_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let offsets_size = File::open(jar_provider.offsets_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let config_size = File::open(jar_provider.config_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + + if self.summary { + if segment_columns > 0 { + assert_eq!(segment_columns, columns); + } else { + segment_columns = columns; + } + segment_rows += rows; + segment_data_size += data_size; + segment_index_size += index_size; + segment_offsets_size += offsets_size; + segment_config_size += config_size; + } else { + let mut row = Row::new(); + row.add_cell(Cell::new(segment)) + .add_cell(Cell::new(format!("{block_range}"))) + .add_cell(Cell::new( + tx_range.map_or("N/A".to_string(), |tx_range| format!("{tx_range}")), + )) + .add_cell(Cell::new(format!("{columns} x {rows}"))); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(data_size as f64))) + .add_cell(Cell::new(human_bytes(index_size as f64))) + .add_cell(Cell::new(human_bytes(offsets_size as f64))) + .add_cell(Cell::new(human_bytes(config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (data_size + index_size + offsets_size + config_size) as f64, + ))); + table.add_row(row); + } + + total_data_size += data_size; + total_index_size += index_size; + total_offsets_size += offsets_size; + total_config_size += config_size; + } + + if self.summary { + let first_ranges = ranges.first().expect("not empty list of ranges"); + let last_ranges = ranges.last().expect("not empty list of ranges"); + + let block_range = + SegmentRangeInclusive::new(first_ranges.0.start(), last_ranges.0.end()); + let tx_range = first_ranges + .1 + .zip(last_ranges.1) + .map(|(first, last)| SegmentRangeInclusive::new(first.start(), last.end())); + + let mut row = Row::new(); + row.add_cell(Cell::new(segment)) + .add_cell(Cell::new(format!("{block_range}"))) + .add_cell(Cell::new( + tx_range.map_or("N/A".to_string(), |tx_range| format!("{tx_range}")), + )) + .add_cell(Cell::new(format!("{segment_columns} x {segment_rows}"))); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(segment_data_size as f64))) + .add_cell(Cell::new(human_bytes(segment_index_size as f64))) + .add_cell(Cell::new(human_bytes(segment_offsets_size as f64))) + .add_cell(Cell::new(human_bytes(segment_config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (segment_data_size + + segment_index_size + + segment_offsets_size + + segment_config_size) as f64, + ))); + table.add_row(row); + } + } + + let max_widths = table.column_max_content_widths(); + let mut seperator = Row::new(); + for width in max_widths { + seperator.add_cell(Cell::new("-".repeat(width as usize))); + } + table.add_row(seperator); + + let mut row = Row::new(); + row.add_cell(Cell::new("Total")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(total_data_size as f64))) + .add_cell(Cell::new(human_bytes(total_index_size as f64))) + .add_cell(Cell::new(human_bytes(total_offsets_size as f64))) + .add_cell(Cell::new(human_bytes(total_config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (total_data_size + total_index_size + total_offsets_size + total_config_size) as f64, + ))); + table.add_row(row); + + Ok(table) + } +} diff --git a/bin/reth/src/commands/debug_cmd/build_block.rs b/bin/reth/src/commands/debug_cmd/build_block.rs index beb01873368..67215aa5485 100644 --- a/bin/reth/src/commands/debug_cmd/build_block.rs +++ b/bin/reth/src/commands/debug_cmd/build_block.rs @@ -116,7 +116,11 @@ impl Command { /// /// If the database is empty, returns the genesis block. fn lookup_best_block(&self, db: Arc) -> RethResult> { - let factory = ProviderFactory::new(db, self.chain.clone()); + let factory = ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?; let provider = factory.provider()?; let best_number = @@ -155,7 +159,11 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(Arc::clone(&db), Arc::clone(&self.chain)); + let provider_factory = ProviderFactory::new( + Arc::clone(&db), + Arc::clone(&self.chain), + data_dir.static_files_path(), + )?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); diff --git a/bin/reth/src/commands/debug_cmd/execution.rs b/bin/reth/src/commands/debug_cmd/execution.rs index 542a50b5e67..565852b7d7a 100644 --- a/bin/reth/src/commands/debug_cmd/execution.rs +++ b/bin/reth/src/commands/debug_cmd/execution.rs @@ -27,13 +27,16 @@ use reth_network::{NetworkEvents, NetworkHandle}; use reth_network_api::NetworkInfo; use reth_node_core::init::init_genesis; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{fs, stage::StageId, BlockHashOrNumber, BlockNumber, ChainSpec, B256}; +use reth_primitives::{ + fs, stage::StageId, BlockHashOrNumber, BlockNumber, ChainSpec, PruneModes, B256, +}; use reth_provider::{BlockExecutionWriter, HeaderSyncMode, ProviderFactory, StageCheckpointReader}; use reth_stages::{ sets::DefaultStages, - stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage, TotalDifficultyStage}, + stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage}, Pipeline, StageSet, }; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use std::{ net::{SocketAddr, SocketAddrV4}, @@ -92,6 +95,7 @@ impl Command { consensus: Arc, provider_factory: ProviderFactory, task_executor: &TaskExecutor, + static_file_producer: StaticFileProducer, ) -> eyre::Result> where DB: Database + Unpin + Clone + 'static, @@ -123,11 +127,7 @@ impl Command { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus) - .with_commit_threshold(stage_conf.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: stage_conf.sender_recovery.commit_threshold, }) @@ -147,7 +147,7 @@ impl Command { config.prune.clone().map(|prune| prune.segments).unwrap_or_default(), )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); Ok(pipeline) } @@ -170,7 +170,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -206,10 +210,11 @@ impl Command { fs::create_dir_all(&db_path)?; let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(provider_factory.clone())?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); @@ -226,6 +231,12 @@ impl Command { ) .await?; + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + // Configure the pipeline let fetch_client = network.fetch_client().await?; let mut pipeline = self.build_pipeline( @@ -234,6 +245,7 @@ impl Command { Arc::clone(&consensus), provider_factory.clone(), &ctx.task_executor, + static_file_producer, )?; let provider = provider_factory.provider()?; diff --git a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs index fffda86b42c..54af17e97be 100644 --- a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs +++ b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs @@ -20,9 +20,8 @@ use reth_network_api::NetworkInfo; use reth_node_ethereum::EthEvmConfig; use reth_primitives::{fs, stage::StageId, BlockHashOrNumber, ChainSpec}; use reth_provider::{ - AccountExtReader, BlockWriter, ExecutorFactory, HashingWriter, HeaderProvider, - LatestStateProviderRef, OriginalValuesKnown, ProviderFactory, StageCheckpointReader, - StorageReader, + AccountExtReader, ExecutorFactory, HashingWriter, HeaderProvider, LatestStateProviderRef, + OriginalValuesKnown, ProviderFactory, StageCheckpointReader, StorageReader, }; use reth_tasks::TaskExecutor; use reth_trie::{updates::TrieKey, StateRoot}; @@ -95,7 +94,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -115,7 +118,7 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider = factory.provider()?; // Look up merkle checkpoint @@ -165,8 +168,10 @@ impl Command { let executor_factory = reth_revm::EvmProcessorFactory::new(self.chain.clone(), EthEvmConfig::default()); - let mut executor = - executor_factory.with_state(LatestStateProviderRef::new(provider.tx_ref())); + let mut executor = executor_factory.with_state(LatestStateProviderRef::new( + provider.tx_ref(), + factory.static_file_provider(), + )); let merkle_block_td = provider.header_td_by_number(merkle_block_number)?.unwrap_or_default(); @@ -192,14 +197,14 @@ impl Command { let provider_rw = factory.provider_rw()?; // Insert block, state and hashes - provider_rw.insert_block( + provider_rw.insert_historical_block( block .clone() .try_seal_with_senders() .map_err(|_| BlockValidationError::SenderRecoveryError)?, None, )?; - block_state.write_to_db(provider_rw.tx_ref(), OriginalValuesKnown::No)?; + block_state.write_to_storage(provider_rw.tx_ref(), None, OriginalValuesKnown::No)?; let storage_lists = provider_rw.changed_storages_with_range(block.number..=block.number)?; let storages = provider_rw.plain_state_storages(storage_lists)?; provider_rw.insert_storage_for_hashing(storages)?; diff --git a/bin/reth/src/commands/debug_cmd/merkle.rs b/bin/reth/src/commands/debug_cmd/merkle.rs index dbc4879938e..8a5cce295a2 100644 --- a/bin/reth/src/commands/debug_cmd/merkle.rs +++ b/bin/reth/src/commands/debug_cmd/merkle.rs @@ -105,7 +105,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -125,7 +129,7 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider_rw = factory.provider_rw()?; // Configure and build network diff --git a/bin/reth/src/commands/debug_cmd/replay_engine.rs b/bin/reth/src/commands/debug_cmd/replay_engine.rs index a18b9a9568a..374844d1bbc 100644 --- a/bin/reth/src/commands/debug_cmd/replay_engine.rs +++ b/bin/reth/src/commands/debug_cmd/replay_engine.rs @@ -25,10 +25,11 @@ use reth_node_ethereum::{EthEngineTypes, EthEvmConfig}; #[cfg(feature = "optimism")] use reth_node_optimism::{OptimismEngineTypes, OptimismEvmConfig}; use reth_payload_builder::{PayloadBuilderHandle, PayloadBuilderService}; -use reth_primitives::{fs, ChainSpec}; +use reth_primitives::{fs, ChainSpec, PruneModes}; use reth_provider::{providers::BlockchainProvider, CanonStateSubscriptions, ProviderFactory}; use reth_revm::EvmProcessorFactory; use reth_stages::Pipeline; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use reth_transaction_pool::noop::NoopTransactionPool; use std::{ @@ -101,7 +102,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -121,7 +126,8 @@ impl Command { // Initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); @@ -191,7 +197,14 @@ impl Command { let (consensus_engine_tx, consensus_engine_rx) = mpsc::unbounded_channel(); let (beacon_consensus_engine, beacon_engine_handle) = BeaconConsensusEngine::with_channel( network_client, - Pipeline::builder().build(provider_factory), + Pipeline::builder().build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ), blockchain_db.clone(), Box::new(ctx.task_executor.clone()), Box::new(network), diff --git a/bin/reth/src/commands/import.rs b/bin/reth/src/commands/import.rs index ff0cf6080c4..f492a456d23 100644 --- a/bin/reth/src/commands/import.rs +++ b/bin/reth/src/commands/import.rs @@ -21,12 +21,13 @@ use reth_downloaders::{ use reth_interfaces::consensus::Consensus; use reth_node_core::{events::node::NodeEvent, init::init_genesis}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageId, ChainSpec, B256}; +use reth_primitives::{stage::StageId, ChainSpec, PruneModes, B256}; use reth_provider::{HeaderSyncMode, ProviderFactory, StageCheckpointReader}; use reth_stages::{ prelude::*, - stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage, TotalDifficultyStage}, + stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage}, }; +use reth_static_file::StaticFileProducer; use std::{path::PathBuf, sync::Arc}; use tokio::sync::watch; use tracing::{debug, info}; @@ -89,11 +90,12 @@ impl ImportCommand { let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(provider_factory.clone())?; let consensus = Arc::new(BeaconConsensus::new(self.chain.clone())); info!(target: "reth::cli", "Consensus engine initialized"); @@ -106,8 +108,20 @@ impl ImportCommand { let tip = file_client.tip().expect("file client has no tip"); info!(target: "reth::cli", "Chain file imported"); + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + let (mut pipeline, events) = self - .build_import_pipeline(config, provider_factory.clone(), &consensus, file_client) + .build_import_pipeline( + config, + provider_factory.clone(), + &consensus, + file_client, + static_file_producer, + ) .await?; // override the tip @@ -142,6 +156,7 @@ impl ImportCommand { provider_factory: ProviderFactory, consensus: &Arc, file_client: Arc, + static_file_producer: StaticFileProducer, ) -> eyre::Result<(Pipeline, impl Stream)> where DB: Database + Clone + Unpin + 'static, @@ -176,11 +191,7 @@ impl ImportCommand { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus.clone()) - .with_commit_threshold(config.stages.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: config.stages.sender_recovery.commit_threshold, }) @@ -201,7 +212,7 @@ impl ImportCommand { config.prune.map(|prune| prune.segments).unwrap_or_default(), )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); let events = pipeline.events().map(Into::into); diff --git a/bin/reth/src/commands/init_cmd.rs b/bin/reth/src/commands/init_cmd.rs index 9ea949e6b47..abd3edc5bdd 100644 --- a/bin/reth/src/commands/init_cmd.rs +++ b/bin/reth/src/commands/init_cmd.rs @@ -11,6 +11,7 @@ use clap::Parser; use reth_db::{init_db, mdbx::DatabaseArguments}; use reth_node_core::init::init_genesis; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::sync::Arc; use tracing::info; @@ -56,8 +57,12 @@ impl InitCommand { Arc::new(init_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; + info!(target: "reth::cli", "Writing genesis block"); - let hash = init_genesis(db, self.chain)?; + + let hash = init_genesis(provider_factory)?; info!(target: "reth::cli", hash = ?hash, "Genesis block written"); Ok(()) diff --git a/bin/reth/src/commands/p2p/mod.rs b/bin/reth/src/commands/p2p/mod.rs index f307b0dcd0d..0dbcd6c270b 100644 --- a/bin/reth/src/commands/p2p/mod.rs +++ b/bin/reth/src/commands/p2p/mod.rs @@ -131,7 +131,11 @@ impl Command { network_config_builder = self.discovery.apply_to_builder(network_config_builder); let network = network_config_builder - .build(Arc::new(ProviderFactory::new(noop_db, self.chain.clone()))) + .build(Arc::new(ProviderFactory::new( + noop_db, + self.chain.clone(), + data_dir.static_files_path(), + )?)) .start_network() .await?; diff --git a/bin/reth/src/commands/recover/storage_tries.rs b/bin/reth/src/commands/recover/storage_tries.rs index d0ec0281ba2..b0b2550cbb1 100644 --- a/bin/reth/src/commands/recover/storage_tries.rs +++ b/bin/reth/src/commands/recover/storage_tries.rs @@ -50,10 +50,11 @@ impl Command { fs::create_dir_all(&db_path)?; let db = Arc::new(init_db(db_path, Default::default())?); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; + debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(factory.clone())?; - let factory = ProviderFactory::new(&db, self.chain); let mut provider = factory.provider_rw()?; let best_block = provider.best_block_number()?; let best_header = provider diff --git a/bin/reth/src/commands/stage/drop.rs b/bin/reth/src/commands/stage/drop.rs index 6ff767d7b5a..3a5620331c9 100644 --- a/bin/reth/src/commands/stage/drop.rs +++ b/bin/reth/src/commands/stage/drop.rs @@ -10,12 +10,15 @@ use crate::{ }; use clap::Parser; use reth_db::{ - database::Database, mdbx::DatabaseArguments, open_db, tables, transaction::DbTxMut, DatabaseEnv, + database::Database, mdbx::DatabaseArguments, open_db, static_file::iter_static_files, tables, + transaction::DbTxMut, DatabaseEnv, }; use reth_node_core::init::{insert_genesis_header, insert_genesis_state}; -use reth_primitives::{fs, stage::StageId, ChainSpec}; +use reth_primitives::{ + fs, stage::StageId, static_file::find_fixed_range, ChainSpec, StaticFileSegment, +}; +use reth_provider::ProviderFactory; use std::sync::Arc; -use tracing::info; /// `reth drop-stage` command #[derive(Debug, Parser)] @@ -58,11 +61,44 @@ impl Command { let db = open_db(db_path.as_ref(), DatabaseArguments::default().log_level(self.db.log_level))?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + let static_file_provider = provider_factory.static_file_provider(); + + let tool = DbTool::new(provider_factory, self.chain.clone())?; - let tool = DbTool::new(&db, self.chain.clone())?; + let static_file_segment = match self.stage { + StageEnum::Headers => Some(StaticFileSegment::Headers), + StageEnum::Bodies => Some(StaticFileSegment::Transactions), + StageEnum::Execution => Some(StaticFileSegment::Receipts), + _ => None, + }; + + // Delete static file segment data before inserting the genesis header below + if let Some(static_file_segment) = static_file_segment { + let static_file_provider = tool.provider_factory.static_file_provider(); + let static_files = iter_static_files(static_file_provider.directory())?; + if let Some(segment_static_files) = static_files.get(&static_file_segment) { + for (block_range, _) in segment_static_files { + static_file_provider + .delete_jar(static_file_segment, find_fixed_range(block_range.start()))?; + } + } + } - tool.db.update(|tx| { - match &self.stage { + tool.provider_factory.db_ref().update(|tx| { + match self.stage { + StageEnum::Headers => { + tx.clear::()?; + tx.clear::()?; + tx.clear::()?; + tx.clear::()?; + tx.put::( + StageId::Headers.to_string(), + Default::default(), + )?; + insert_genesis_header::(tx, static_file_provider, self.chain)?; + } StageEnum::Bodies => { tx.clear::()?; tx.clear::()?; @@ -73,7 +109,7 @@ impl Command { StageId::Bodies.to_string(), Default::default(), )?; - insert_genesis_header::(tx, self.chain)?; + insert_genesis_header::(tx, static_file_provider, self.chain)?; } StageEnum::Senders => { tx.clear::()?; @@ -152,25 +188,13 @@ impl Command { Default::default(), )?; } - StageEnum::TotalDifficulty => { - tx.clear::()?; - tx.put::( - StageId::TotalDifficulty.to_string(), - Default::default(), - )?; - insert_genesis_header::(tx, self.chain)?; - } StageEnum::TxLookup => { tx.clear::()?; tx.put::( StageId::TransactionLookup.to_string(), Default::default(), )?; - insert_genesis_header::(tx, self.chain)?; - } - _ => { - info!("Nothing to do for stage {:?}", self.stage); - return Ok(()) + insert_genesis_header::(tx, static_file_provider, self.chain)?; } } diff --git a/bin/reth/src/commands/stage/dump/execution.rs b/bin/reth/src/commands/stage/dump/execution.rs index 5c357e56ee0..3234367fe8a 100644 --- a/bin/reth/src/commands/stage/dump/execution.rs +++ b/bin/reth/src/commands/stage/dump/execution.rs @@ -5,29 +5,38 @@ use reth_db::{ cursor::DbCursorRO, database::Database, table::TableImporter, tables, transaction::DbTx, DatabaseEnv, }; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageCheckpoint, ChainSpec}; -use reth_provider::ProviderFactory; +use reth_primitives::stage::StageCheckpoint; +use reth_provider::{ChainSpecProvider, ProviderFactory}; use reth_revm::EvmProcessorFactory; use reth_stages::{stages::ExecutionStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_execution_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; import_tables_with_range(&output_db, db_tool, from, to)?; unwind_and_copy(db_tool, from, tip_block_number, &output_db).await?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -36,34 +45,50 @@ pub(crate) async fn dump_execution_stage( /// Imports all the tables that can be copied over a range. fn import_tables_with_range( output_db: &DatabaseEnv, - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, ) -> eyre::Result<()> { // We're not sharing the transaction in case the memory grows too much. output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from), to, ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; // Find range of transactions that need to be copied over - let (from_tx, to_tx) = db_tool.db.view(|read_tx| { + let (from_tx, to_tx) = db_tool.provider_factory.db_ref().view(|read_tx| { let mut read_cursor = read_tx.cursor_read::()?; let (_, from_block) = read_cursor.seek(from)?.ok_or(eyre::eyre!("BlockBody {from} does not exist."))?; @@ -78,7 +103,7 @@ fn import_tables_with_range( output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from_tx), to_tx, ) @@ -86,7 +111,7 @@ fn import_tables_with_range( output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from_tx), to_tx, ) @@ -99,13 +124,12 @@ fn import_tables_with_range( /// PlainAccountState safely. There might be some state dependency from an address /// which hasn't been changed in the given range. async fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = ExecutionStage::new_with_factory(EvmProcessorFactory::new( db_tool.chain.clone(), @@ -133,22 +157,20 @@ async fn unwind_and_copy( /// Try to re-execute the stage without committing async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage. [dry-run]"); - let factory = ProviderFactory::new(&output_db, chain.clone()); let mut exec_stage = ExecutionStage::new_with_factory(EvmProcessorFactory::new( - chain.clone(), + output_provider_factory.chain_spec().clone(), EthEvmConfig::default(), )); let input = reth_stages::ExecInput { target: Some(to), checkpoint: Some(StageCheckpoint::new(from)) }; - exec_stage.execute(&factory.provider_rw()?, input)?; + exec_stage.execute(&output_provider_factory.provider_rw()?, input)?; info!(target: "reth::cli", "Success"); diff --git a/bin/reth/src/commands/stage/dump/hashing_account.rs b/bin/reth/src/commands/stage/dump/hashing_account.rs index be0723b495b..1888f0e303e 100644 --- a/bin/reth/src/commands/stage/dump/hashing_account.rs +++ b/bin/reth/src/commands/stage/dump/hashing_account.rs @@ -2,25 +2,25 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; -use reth_primitives::{stage::StageCheckpoint, BlockNumber, ChainSpec}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::{stage::StageCheckpoint, BlockNumber}; use reth_provider::ProviderFactory; use reth_stages::{stages::AccountHashingStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_hashing_account_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: BlockNumber, to: BlockNumber, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; // Import relevant AccountChangeSets output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from), to, ) @@ -29,7 +29,16 @@ pub(crate) async fn dump_hashing_account_stage( unwind_and_copy(db_tool, from, tip_block_number, &output_db)?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -37,13 +46,12 @@ pub(crate) async fn dump_hashing_account_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = AccountHashingStage::default(); exec_stage.unwind( @@ -63,15 +71,13 @@ fn unwind_and_copy( /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = AccountHashingStage { clean_threshold: 1, // Forces hashing from scratch ..Default::default() diff --git a/bin/reth/src/commands/stage/dump/hashing_storage.rs b/bin/reth/src/commands/stage/dump/hashing_storage.rs index c05bc66b260..7f827b25cd7 100644 --- a/bin/reth/src/commands/stage/dump/hashing_storage.rs +++ b/bin/reth/src/commands/stage/dump/hashing_storage.rs @@ -2,25 +2,34 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; -use reth_primitives::{stage::StageCheckpoint, ChainSpec}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::stage::StageCheckpoint; use reth_provider::ProviderFactory; use reth_stages::{stages::StorageHashingStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_hashing_storage_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; unwind_and_copy(db_tool, from, tip_block_number, &output_db)?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -28,13 +37,12 @@ pub(crate) async fn dump_hashing_storage_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = StorageHashingStage::default(); @@ -59,15 +67,13 @@ fn unwind_and_copy( /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = StorageHashingStage { clean_threshold: 1, // Forces hashing from scratch ..Default::default() diff --git a/bin/reth/src/commands/stage/dump/merkle.rs b/bin/reth/src/commands/stage/dump/merkle.rs index a85303c9d0c..3e6d2e6352c 100644 --- a/bin/reth/src/commands/stage/dump/merkle.rs +++ b/bin/reth/src/commands/stage/dump/merkle.rs @@ -2,8 +2,9 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageCheckpoint, BlockNumber, ChainSpec, PruneModes}; +use reth_primitives::{stage::StageCheckpoint, BlockNumber, PruneModes}; use reth_provider::ProviderFactory; use reth_stages::{ stages::{ @@ -12,25 +13,28 @@ use reth_stages::{ }, Stage, UnwindInput, }; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_merkle_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: BlockNumber, to: BlockNumber, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from), to, ) @@ -39,7 +43,16 @@ pub(crate) async fn dump_merkle_stage( unwind_and_copy(db_tool, (from, to), tip_block_number, &output_db).await?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -47,14 +60,13 @@ pub(crate) async fn dump_merkle_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. async fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, range: (u64, u64), tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { let (from, to) = range; - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let unwind = UnwindInput { unwind_to: from, @@ -117,14 +129,12 @@ async fn unwind_and_copy( /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = MerkleStage::Execution { // Forces updating the root instead of calculating from scratch diff --git a/bin/reth/src/commands/stage/dump/mod.rs b/bin/reth/src/commands/stage/dump/mod.rs index 2a34e67ee62..f85ce0e8b39 100644 --- a/bin/reth/src/commands/stage/dump/mod.rs +++ b/bin/reth/src/commands/stage/dump/mod.rs @@ -14,7 +14,9 @@ use reth_db::{ cursor::DbCursorRO, database::Database, init_db, table::TableImporter, tables, transaction::DbTx, DatabaseEnv, }; +use reth_node_core::dirs::PlatformPath; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::{path::PathBuf, sync::Arc}; use tracing::info; @@ -79,9 +81,9 @@ pub enum Stages { /// Stage command that takes a range #[derive(Debug, Clone, Parser)] pub struct StageCommand { - /// The path to the new database folder. + /// The path to the new datadir folder. #[arg(long, value_name = "OUTPUT_PATH", verbatim_doc_comment)] - output_db: PathBuf, + output_datadir: PlatformPath, /// From which block. #[arg(long, short)] @@ -104,22 +106,53 @@ impl Command { info!(target: "reth::cli", path = ?db_path, "Opening database"); let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + info!(target: "reth::cli", "Database opened"); - let tool = DbTool::new(&db, self.chain.clone())?; + let tool = DbTool::new(provider_factory, self.chain.clone())?; match &self.command { - Stages::Execution(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_execution_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::Execution(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_execution_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::StorageHashing(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_hashing_storage_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::StorageHashing(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_hashing_storage_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::AccountHashing(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_hashing_account_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::AccountHashing(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_hashing_account_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::Merkle(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_merkle_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::Merkle(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_merkle_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } } @@ -133,24 +166,27 @@ pub(crate) fn setup( from: u64, to: u64, output_db: &PathBuf, - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, ) -> eyre::Result<(DatabaseEnv, u64)> { assert!(from < to, "FROM block should be bigger than TO block."); info!(target: "reth::cli", ?output_db, "Creating separate db"); - let output_db = init_db(output_db, Default::default())?; + let output_datadir = init_db(output_db, Default::default())?; - output_db.update(|tx| { + output_datadir.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from - 1), to + 1, ) })??; - let (tip_block_number, _) = - db_tool.db.view(|tx| tx.cursor_read::()?.last())??.expect("some"); + let (tip_block_number, _) = db_tool + .provider_factory + .db_ref() + .view(|tx| tx.cursor_read::()?.last())?? + .expect("some"); - Ok((output_db, tip_block_number)) + Ok((output_datadir, tip_block_number)) } diff --git a/bin/reth/src/commands/stage/run.rs b/bin/reth/src/commands/stage/run.rs index 931cd901dae..5cd3ded0ba8 100644 --- a/bin/reth/src/commands/stage/run.rs +++ b/bin/reth/src/commands/stage/run.rs @@ -28,7 +28,7 @@ use reth_stages::{ }, ExecInput, ExecOutput, Stage, StageExt, UnwindInput, UnwindOutput, }; -use std::{any::Any, net::SocketAddr, path::PathBuf, sync::Arc}; +use std::{any::Any, net::SocketAddr, path::PathBuf, sync::Arc, time::Instant}; use tracing::*; /// `reth stage` command @@ -131,7 +131,11 @@ impl Command { Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); - let factory = ProviderFactory::new(Arc::clone(&db), self.chain.clone()); + let factory = ProviderFactory::new( + Arc::clone(&db), + self.chain.clone(), + data_dir.static_files_path(), + )?; let mut provider_rw = factory.provider_rw()?; if let Some(listen_addr) = self.metrics { @@ -169,8 +173,11 @@ impl Command { let default_peers_path = data_dir.known_peers_path(); - let provider_factory = - Arc::new(ProviderFactory::new(db.clone(), self.chain.clone())); + let provider_factory = Arc::new(ProviderFactory::new( + db.clone(), + self.chain.clone(), + data_dir.static_files_path(), + )?); let network = self .network @@ -273,6 +280,8 @@ impl Command { checkpoint: Some(checkpoint.with_block_number(self.from)), }; + let start = Instant::now(); + info!(target: "reth::cli", stage = %self.stage, "Executing stage"); loop { exec_stage.execute_ready(input).await?; let ExecOutput { checkpoint, done } = exec_stage.execute(&provider_rw, input)?; @@ -291,6 +300,7 @@ impl Command { break } } + info!(target: "reth::cli", stage = %self.stage, time = ?start.elapsed(), "Finished stage"); Ok(()) } diff --git a/bin/reth/src/commands/stage/unwind.rs b/bin/reth/src/commands/stage/unwind.rs index a5b568e4f8a..44d4c2845fa 100644 --- a/bin/reth/src/commands/stage/unwind.rs +++ b/bin/reth/src/commands/stage/unwind.rs @@ -68,7 +68,7 @@ impl Command { eyre::bail!("Cannot unwind genesis block") } - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider = factory.provider_rw()?; let blocks_and_execution = provider diff --git a/bin/reth/src/utils.rs b/bin/reth/src/utils.rs index 1e95f5a53ba..aa463816d09 100644 --- a/bin/reth/src/utils.rs +++ b/bin/reth/src/utils.rs @@ -10,6 +10,7 @@ use reth_db::{ DatabaseError, RawTable, TableRawRow, }; use reth_primitives::{fs, ChainSpec}; +use reth_provider::ProviderFactory; use std::{path::Path, rc::Rc, sync::Arc}; use tracing::info; @@ -24,17 +25,17 @@ pub use reth_node_core::utils::*; /// Wrapper over DB that implements many useful DB queries. #[derive(Debug)] -pub struct DbTool<'a, DB: Database> { - /// The database that the db tool will use. - pub db: &'a DB, +pub struct DbTool { + /// The provider factory that the db tool will use. + pub provider_factory: ProviderFactory, /// The [ChainSpec] that the db tool will use. pub chain: Arc, } -impl<'a, DB: Database> DbTool<'a, DB> { +impl DbTool { /// Takes a DB where the tables have already been created. - pub fn new(db: &'a DB, chain: Arc) -> eyre::Result { - Ok(Self { db, chain }) + pub fn new(provider_factory: ProviderFactory, chain: Arc) -> eyre::Result { + Ok(Self { provider_factory, chain }) } /// Grabs the contents of the table within a certain index range and places the @@ -50,7 +51,7 @@ impl<'a, DB: Database> DbTool<'a, DB> { let mut hits = 0; - let data = self.db.view(|tx| { + let data = self.provider_factory.db_ref().view(|tx| { let mut cursor = tx.cursor_read::>().expect("Was not able to obtain a cursor."); @@ -118,12 +119,13 @@ impl<'a, DB: Database> DbTool<'a, DB> { /// Grabs the content of the table for the given key pub fn get(&self, key: T::Key) -> Result> { - self.db.view(|tx| tx.get::(key))?.map_err(|e| eyre::eyre!(e)) + self.provider_factory.db_ref().view(|tx| tx.get::(key))?.map_err(|e| eyre::eyre!(e)) } /// Grabs the content of the DupSort table for the given key and subkey pub fn get_dup(&self, key: T::Key, subkey: T::SubKey) -> Result> { - self.db + self.provider_factory + .db_ref() .view(|tx| tx.cursor_dup_read::()?.seek_by_key_subkey(key, subkey))? .map_err(|e| eyre::eyre!(e)) } @@ -138,7 +140,7 @@ impl<'a, DB: Database> DbTool<'a, DB> { /// Drops the provided table from the database. pub fn drop_table(&mut self) -> Result<()> { - self.db.update(|tx| tx.clear::())??; + self.provider_factory.db_ref().update(|tx| tx.clear::())??; Ok(()) } } diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 1422cf1b706..ffd5f67e0bd 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -37,9 +37,13 @@ - [`reth db list`](./cli/reth/db/list.md) - [`reth db diff`](./cli/reth/db/diff.md) - [`reth db get`](./cli/reth/db/get.md) + - [`reth db get mdbx`](./cli/reth/db/get/mdbx.md) + - [`reth db get static-file`](./cli/reth/db/get/static-file.md) - [`reth db drop`](./cli/reth/db/drop.md) - [`reth db clear`](./cli/reth/db/clear.md) - - [`reth db snapshot`](./cli/reth/db/snapshot.md) + - [`reth db clear mdbx`](./cli/reth/db/clear/mdbx.md) + - [`reth db clear static-file`](./cli/reth/db/clear/static-file.md) + - [`reth db create-static-files`](./cli/reth/db/create-static-files.md) - [`reth db version`](./cli/reth/db/version.md) - [`reth db path`](./cli/reth/db/path.md) - [`reth stage`](./cli/reth/stage.md) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 898ddeb8681..07711434e31 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -8,9 +8,13 @@ - [`reth db list`](./reth/db/list.md) - [`reth db diff`](./reth/db/diff.md) - [`reth db get`](./reth/db/get.md) + - [`reth db get mdbx`](./reth/db/get/mdbx.md) + - [`reth db get static-file`](./reth/db/get/static-file.md) - [`reth db drop`](./reth/db/drop.md) - [`reth db clear`](./reth/db/clear.md) - - [`reth db snapshot`](./reth/db/snapshot.md) + - [`reth db clear mdbx`](./reth/db/clear/mdbx.md) + - [`reth db clear static-file`](./reth/db/clear/static-file.md) + - [`reth db create-static-files`](./reth/db/create-static-files.md) - [`reth db version`](./reth/db/version.md) - [`reth db path`](./reth/db/path.md) - [`reth stage`](./reth/stage.md) diff --git a/book/cli/reth/db.md b/book/cli/reth/db.md index 158b4b726a4..7c72730ae16 100644 --- a/book/cli/reth/db.md +++ b/book/cli/reth/db.md @@ -7,16 +7,16 @@ $ reth db --help Usage: reth db [OPTIONS] Commands: - stats Lists all the tables, their entry count and their size - list Lists the contents of a table - diff Create a diff between two database tables or two entire databases - get Gets the content of a table for the given key - drop Deletes all database entries - clear Deletes all table entries - snapshot Snapshots tables from database - version Lists current and local database versions - path Returns the full database path - help Print this message or the help of the given subcommand(s) + stats Lists all the tables, their entry count and their size + list Lists the contents of a table + diff Create a diff between two database tables or two entire databases + get Gets the content of a table for the given key + drop Deletes all database entries + clear Deletes all table entries + create-static-files Creates static files from database tables + version Lists current and local database versions + path Returns the full database path + help Print this message or the help of the given subcommand(s) Options: --datadir diff --git a/book/cli/reth/db/clear.md b/book/cli/reth/db/clear.md index c8dd59583fe..f69e29b6062 100644 --- a/book/cli/reth/db/clear.md +++ b/book/cli/reth/db/clear.md @@ -4,11 +4,12 @@ Deletes all table entries ```bash $ reth db clear --help -Usage: reth db clear [OPTIONS] +Usage: reth db clear [OPTIONS] -Arguments: -
- Table name +Commands: + mdbx Deletes all database table entries + static-file Deletes all static file segment entries + help Print this message or the help of the given subcommand(s) Options: --datadir diff --git a/book/cli/reth/db/clear/mdbx.md b/book/cli/reth/db/clear/mdbx.md new file mode 100644 index 00000000000..e16697d395a --- /dev/null +++ b/book/cli/reth/db/clear/mdbx.md @@ -0,0 +1,124 @@ +# reth db clear mdbx + +Deletes all database table entries + +```bash +$ reth db clear mdbx --help +Usage: reth db clear mdbx [OPTIONS]
+ +Arguments: +
+ + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/clear/static-file.md b/book/cli/reth/db/clear/static-file.md new file mode 100644 index 00000000000..c41158b7af5 --- /dev/null +++ b/book/cli/reth/db/clear/static-file.md @@ -0,0 +1,127 @@ +# reth db clear static-file + +Deletes all static file segment entries + +```bash +$ reth db clear static-file --help +Usage: reth db clear static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/clear/static_file.md b/book/cli/reth/db/clear/static_file.md new file mode 100644 index 00000000000..2c503dd714a --- /dev/null +++ b/book/cli/reth/db/clear/static_file.md @@ -0,0 +1,127 @@ +# reth db clear static-file + +Deletes all static_file segment entries + +```bash +$ reth db clear static-file --help +Usage: reth db clear static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/create-static-files.md b/book/cli/reth/db/create-static-files.md new file mode 100644 index 00000000000..01094f925d8 --- /dev/null +++ b/book/cli/reth/db/create-static-files.md @@ -0,0 +1,174 @@ +# reth db create-static-files + +Creates static files from database tables + +```bash +$ reth db create-static-files --help +Usage: reth db create-static-files [OPTIONS] [SEGMENTS]... + +Arguments: + [SEGMENTS]... + Static File segments to generate + + Possible values: + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + -f, --from + Starting block for the static file + + [default: 0] + + -b, --block-interval + Number of blocks in the static file + + [default: 500000] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + -p, --parallel + Sets the number of static files built in parallel. Note: Each parallel build is memory-intensive + + [default: 1] + + --only-stats + Flag to skip static file creation and print static files stats + + --bench + Flag to enable database-to-static file benchmarking + + --only-bench + Flag to skip static file creation and only run benchmarks on existing static files + + -c, --compression + Compression algorithms to use + + [default: uncompressed] + + Possible values: + - lz4: LZ4 compression algorithm + - zstd: Zstandard (Zstd) compression algorithm + - zstd-with-dictionary: Zstandard (Zstd) compression algorithm with a dictionary + - uncompressed: No compression + + --with-filters + Flag to enable inclusion list filters and PHFs + + --phf + Specifies the perfect hashing function to use + + Possible values: + - fmph: Fingerprint-Based Minimal Perfect Hash Function + - go-fmph: Fingerprint-Based Minimal Perfect Hash Function with Group Optimization + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/get.md b/book/cli/reth/db/get.md index 17b155eaaae..de2f83b56c3 100644 --- a/book/cli/reth/db/get.md +++ b/book/cli/reth/db/get.md @@ -4,19 +4,12 @@ Gets the content of a table for the given key ```bash $ reth db get --help -Usage: reth db get [OPTIONS]
[SUBKEY] +Usage: reth db get [OPTIONS] -Arguments: -
- The table name - - NOTE: The dupsort tables are not supported now. - - - The key to get content for - - [SUBKEY] - The subkey to get content for +Commands: + mdbx Gets the content of a database table for the given key + static-file Gets the content of a static file segment for the given key + help Print this message or the help of the given subcommand(s) Options: --datadir @@ -30,9 +23,6 @@ Options: [default: default] - --raw - Output bytes instead of human-readable decoded value - --chain The chain this node is running. Possible values are either a built-in chain or the path to a chain specification file. diff --git a/book/cli/reth/db/get/mdbx.md b/book/cli/reth/db/get/mdbx.md new file mode 100644 index 00000000000..bf6f0749463 --- /dev/null +++ b/book/cli/reth/db/get/mdbx.md @@ -0,0 +1,133 @@ +# reth db get mdbx + +Gets the content of a database table for the given key + +```bash +$ reth db get mdbx --help +Usage: reth db get mdbx [OPTIONS]
[SUBKEY] + +Arguments: +
+ + + + The key to get content for + + [SUBKEY] + The subkey to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/get/static-file.md b/book/cli/reth/db/get/static-file.md new file mode 100644 index 00000000000..a6addeffb8f --- /dev/null +++ b/book/cli/reth/db/get/static-file.md @@ -0,0 +1,133 @@ +# reth db get static-file + +Gets the content of a static file segment for the given key + +```bash +$ reth db get static-file --help +Usage: reth db get static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table + + + The key to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/get/static_file.md b/book/cli/reth/db/get/static_file.md new file mode 100644 index 00000000000..12df536f22b --- /dev/null +++ b/book/cli/reth/db/get/static_file.md @@ -0,0 +1,133 @@ +# reth db get static-file + +Gets the content of a static_file segment for the given key + +```bash +$ reth db get static-file --help +Usage: reth db get static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table + + + The key to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/snapshot.md b/book/cli/reth/db/static_file.md similarity index 83% rename from book/cli/reth/db/snapshot.md rename to book/cli/reth/db/static_file.md index a612de81c21..a6f965075bd 100644 --- a/book/cli/reth/db/snapshot.md +++ b/book/cli/reth/db/static_file.md @@ -1,19 +1,19 @@ -# reth db snapshot +# reth db static-file -Snapshots tables from database +StaticFiles tables from database ```bash -$ reth db snapshot --help -Usage: reth db snapshot [OPTIONS] [SEGMENTS]... +$ reth db static-file --help +Usage: reth db static-file [OPTIONS] [SEGMENTS]... Arguments: [SEGMENTS]... - Snapshot segments to generate + StaticFile segments to generate Possible values: - - headers: Snapshot segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables - - transactions: Snapshot segment responsible for the `Transactions` table - - receipts: Snapshot segment responsible for the `Receipts` table + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table Options: --datadir @@ -28,13 +28,13 @@ Options: [default: default] -f, --from - Starting block for the snapshot - + Starting block for the static_file + [default: 0] -b, --block-interval - Number of blocks in the snapshot - + Number of blocks in the static_file + [default: 500000] --chain @@ -47,18 +47,18 @@ Options: [default: mainnet] -p, --parallel - Sets the number of snapshots built in parallel. Note: Each parallel build is memory-intensive - + Sets the number of static files built in parallel. Note: Each parallel build is memory-intensive + [default: 1] --only-stats - Flag to skip snapshot creation and print snapshot files stats + Flag to skip static_file creation and print static_file files stats --bench - Flag to enable database-to-snapshot benchmarking + Flag to enable database-to-static_file benchmarking --only-bench - Flag to skip snapshot creation and only run benchmarks on existing snapshots + Flag to skip static_file creation and only run benchmarks on existing static files -c, --compression Compression algorithms to use @@ -69,7 +69,7 @@ Options: - lz4: LZ4 compression algorithm - zstd: Zstandard (Zstd) compression algorithm - zstd-with-dictionary: Zstandard (Zstd) compression algorithm with a dictionary - - uncompressed: No compression, uncompressed snapshot + - uncompressed: No compression, uncompressed static_file --with-filters Flag to enable inclusion list filters and PHFs diff --git a/book/cli/reth/db/stats.md b/book/cli/reth/db/stats.md index 8fa61d1a807..dea5e3d058c 100644 --- a/book/cli/reth/db/stats.md +++ b/book/cli/reth/db/stats.md @@ -18,6 +18,9 @@ Options: [default: default] + --only-total-size + Show only the total size for static files + --chain The chain this node is running. Possible values are either a built-in chain or the path to a chain specification file. @@ -27,6 +30,9 @@ Options: [default: mainnet] + --summary + Show only the summary per static file segment + --instance Add a new instance of a node. diff --git a/book/cli/reth/node.md b/book/cli/reth/node.md index 655861ba43a..80072c1ab47 100644 --- a/book/cli/reth/node.md +++ b/book/cli/reth/node.md @@ -131,14 +131,14 @@ Networking: --pooled-tx-response-soft-limit Soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::GetPooledTransactions) request. Spec'd at 2 MiB. - + . - + [default: 2097152] --pooled-tx-pack-soft-limit Default soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::PooledTransactions) request. This defaults to less than the [`SOFT_LIMIT_BYTE_SIZE_POOLED_TRANSACTIONS_RESPONSE`], at 2 MiB, used when assembling a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response. Default is 128 KiB - + [default: 131072] RPC: @@ -533,4 +533,3 @@ Display: -q, --quiet Silence all log output -``` \ No newline at end of file diff --git a/book/cli/reth/stage/drop.md b/book/cli/reth/stage/drop.md index d6c912b4cc2..88a0197990d 100644 --- a/book/cli/reth/stage/drop.md +++ b/book/cli/reth/stage/drop.md @@ -57,18 +57,17 @@ Database: Possible values: - - headers: The headers stage within the pipeline - - bodies: The bodies stage within the pipeline - - senders: The senders stage within the pipeline - - execution: The execution stage within the pipeline - - account-hashing: The account hashing stage within the pipeline - - storage-hashing: The storage hashing stage within the pipeline - - hashing: The hashing stage within the pipeline - - merkle: The Merkle stage within the pipeline - - tx-lookup: The transaction lookup stage within the pipeline - - account-history: The account history stage within the pipeline - - storage-history: The storage history stage within the pipeline - - total-difficulty: The total difficulty stage within the pipeline + - headers: The headers stage within the pipeline + - bodies: The bodies stage within the pipeline + - senders: The senders stage within the pipeline + - execution: The execution stage within the pipeline + - account-hashing: The account hashing stage within the pipeline + - storage-hashing: The storage hashing stage within the pipeline + - hashing: The hashing stage within the pipeline + - merkle: The Merkle stage within the pipeline + - tx-lookup: The transaction lookup stage within the pipeline + - account-history: The account history stage within the pipeline + - storage-history: The storage history stage within the pipeline Logging: --log.stdout.format diff --git a/book/cli/reth/stage/dump/account-hashing.md b/book/cli/reth/stage/dump/account-hashing.md index 4a575388c32..c8b6069fad5 100644 --- a/book/cli/reth/stage/dump/account-hashing.md +++ b/book/cli/reth/stage/dump/account-hashing.md @@ -4,11 +4,11 @@ AccountHashing stage ```bash $ reth stage dump account-hashing --help -Usage: reth stage dump account-hashing [OPTIONS] --output-db --from --to +Usage: reth stage dump account-hashing [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/execution.md b/book/cli/reth/stage/dump/execution.md index 4d6b0ce5d8e..8ff064a70cc 100644 --- a/book/cli/reth/stage/dump/execution.md +++ b/book/cli/reth/stage/dump/execution.md @@ -4,11 +4,11 @@ Execution stage ```bash $ reth stage dump execution --help -Usage: reth stage dump execution [OPTIONS] --output-db --from --to +Usage: reth stage dump execution [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/merkle.md b/book/cli/reth/stage/dump/merkle.md index c5c04b68371..ec5d142c472 100644 --- a/book/cli/reth/stage/dump/merkle.md +++ b/book/cli/reth/stage/dump/merkle.md @@ -4,11 +4,11 @@ Merkle stage ```bash $ reth stage dump merkle --help -Usage: reth stage dump merkle [OPTIONS] --output-db --from --to +Usage: reth stage dump merkle [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/storage-hashing.md b/book/cli/reth/stage/dump/storage-hashing.md index 9223b445b4d..6a45c5d1ab9 100644 --- a/book/cli/reth/stage/dump/storage-hashing.md +++ b/book/cli/reth/stage/dump/storage-hashing.md @@ -4,11 +4,11 @@ StorageHashing stage ```bash $ reth stage dump storage-hashing --help -Usage: reth stage dump storage-hashing [OPTIONS] --output-db --from --to +Usage: reth stage dump storage-hashing [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/run.md b/book/cli/reth/stage/run.md index c27b0f457ba..a7d8d61bcd2 100644 --- a/book/cli/reth/stage/run.md +++ b/book/cli/reth/stage/run.md @@ -11,18 +11,17 @@ Arguments: The name of the stage to run Possible values: - - headers: The headers stage within the pipeline - - bodies: The bodies stage within the pipeline - - senders: The senders stage within the pipeline - - execution: The execution stage within the pipeline - - account-hashing: The account hashing stage within the pipeline - - storage-hashing: The storage hashing stage within the pipeline - - hashing: The hashing stage within the pipeline - - merkle: The Merkle stage within the pipeline - - tx-lookup: The transaction lookup stage within the pipeline - - account-history: The account history stage within the pipeline - - storage-history: The storage history stage within the pipeline - - total-difficulty: The total difficulty stage within the pipeline + - headers: The headers stage within the pipeline + - bodies: The bodies stage within the pipeline + - senders: The senders stage within the pipeline + - execution: The execution stage within the pipeline + - account-hashing: The account hashing stage within the pipeline + - storage-hashing: The storage hashing stage within the pipeline + - hashing: The hashing stage within the pipeline + - merkle: The Merkle stage within the pipeline + - tx-lookup: The transaction lookup stage within the pipeline + - account-history: The account history stage within the pipeline + - storage-history: The storage history stage within the pipeline Options: --config @@ -152,6 +151,18 @@ Networking: --max-inbound-peers Maximum number of inbound requests. default: 30 + --pooled-tx-response-soft-limit + Soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::GetPooledTransactions) request. Spec'd at 2 MiB. + + . + + [default: 2097152] + + --pooled-tx-pack-soft-limit + Default soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::PooledTransactions) request. This defaults to less than the [`SOFT_LIMIT_BYTE_SIZE_POOLED_TRANSACTIONS_RESPONSE`], at 2 MiB, used when assembling a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response. Default is 128 KiB + + [default: 131072] + Database: --db.log-level Database logging level. Levels higher than "notice" require a debug build diff --git a/book/run/config.md b/book/run/config.md index d5889c6a22a..9c770dae73e 100644 --- a/book/run/config.md +++ b/book/run/config.md @@ -12,7 +12,6 @@ The configuration file contains the following sections: - [`[stages]`](#the-stages-section) -- Configuration of the individual sync stages - [`headers`](#headers) - - [`total_difficulty`](#total_difficulty) - [`bodies`](#bodies) - [`sender_recovery`](#sender_recovery) - [`execution`](#execution) @@ -64,20 +63,6 @@ downloader_request_limit = 1000 commit_threshold = 10000 ``` -### `total_difficulty` - -The total difficulty stage calculates the total difficulty reached for each header in the chain. - -```toml -[stages.total_difficulty] -# The amount of headers to calculate the total difficulty for -# before writing the results to disk. -# -# Lower thresholds correspond to more frequent disk I/O (writes), -# but lowers memory usage -commit_threshold = 100000 -``` - ### `bodies` The bodies section controls both the behavior of the bodies stage, which download historical block bodies, as well as the primary downloader that fetches block bodies over P2P. @@ -207,7 +192,7 @@ The transaction lookup stage builds an index of transaction hashes to their sequ # # Lower thresholds correspond to more frequent disk I/O (writes), # but lowers memory usage -commit_threshold = 5000000 +chunk_size = 5000000 ``` ### `index_account_history` diff --git a/book/run/observability.md b/book/run/observability.md index 4ab2951805e..39d485e1f36 100644 --- a/book/run/observability.md +++ b/book/run/observability.md @@ -12,7 +12,7 @@ Now, as the node is running, you can `curl` the endpoint you provided to the `-- curl 127.0.0.1:9001 ``` -The response from this is quite descriptive, but it can be a bit verbose. Plus, it's just a snapshot of the metrics at the time that you `curl`ed the endpoint. +The response from this is quite descriptive, but it can be a bit verbose. Plus, it's just a static_file of the metrics at the time that you `curl`ed the endpoint. You can run the following command in a separate terminal to periodically poll the endpoint, and just print the values (without the header text) to the terminal: diff --git a/crates/blockchain-tree/src/blockchain_tree.rs b/crates/blockchain-tree/src/blockchain_tree.rs index 36d238caed8..72ef5dbfc01 100644 --- a/crates/blockchain-tree/src/blockchain_tree.rs +++ b/crates/blockchain-tree/src/blockchain_tree.rs @@ -1294,7 +1294,7 @@ mod tests { let provider = factory.provider_rw().unwrap(); provider - .insert_block( + .insert_historical_block( genesis.try_seal_with_senders().expect("invalid tx signature in genesis"), None, ) diff --git a/crates/blockchain-tree/src/externals.rs b/crates/blockchain-tree/src/externals.rs index 150a09c6677..5a288271e76 100644 --- a/crates/blockchain-tree/src/externals.rs +++ b/crates/blockchain-tree/src/externals.rs @@ -1,9 +1,11 @@ //! Blockchain tree externals. -use reth_db::{cursor::DbCursorRO, database::Database, tables, transaction::DbTx}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::HeaderMask, tables, transaction::DbTx, +}; use reth_interfaces::{consensus::Consensus, RethResult}; -use reth_primitives::{BlockHash, BlockNumber}; -use reth_provider::ProviderFactory; +use reth_primitives::{BlockHash, BlockNumber, StaticFileSegment}; +use reth_provider::{ProviderFactory, StatsReader}; use std::{collections::BTreeMap, sync::Arc}; /// A container for external components. @@ -44,13 +46,39 @@ impl TreeExternals { &self, num_hashes: usize, ) -> RethResult> { - Ok(self + // Fetch the latest canonical hashes from the database + let mut hashes = self .provider_factory .provider()? .tx_ref() .cursor_read::()? .walk_back(None)? .take(num_hashes) - .collect::, _>>()?) + .collect::, _>>()?; + + // Fetch the same number of latest canonical hashes from the static_files and merge them + // with the database hashes. It is needed due to the fact that we're writing + // directly to static_files in pipeline sync, but to the database in live sync, + // which means that the latest canonical hashes in the static file might be more recent + // than in the database, and vice versa, or even some ranges of the latest + // `num_hashes` blocks may be in database, and some ranges in static_files. + let static_file_provider = self.provider_factory.static_file_provider(); + let total_headers = static_file_provider.count_entries::()? as u64; + if total_headers > 0 { + let range = + total_headers.saturating_sub(1).saturating_sub(num_hashes as u64)..total_headers; + + hashes.extend(range.clone().zip(static_file_provider.fetch_range_with_predicate( + StaticFileSegment::Headers, + range, + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + )?)); + } + + // We may have fetched more than `num_hashes` hashes, so we need to truncate the result to + // the requested number. + let hashes = hashes.into_iter().rev().take(num_hashes).collect(); + Ok(hashes) } } diff --git a/crates/config/src/config.rs b/crates/config/src/config.rs index 99401bd1e52..cc3741fc5c1 100644 --- a/crates/config/src/config.rs +++ b/crates/config/src/config.rs @@ -52,8 +52,6 @@ impl Config { pub struct StageConfig { /// Header stage configuration. pub headers: HeadersConfig, - /// Total Difficulty stage configuration - pub total_difficulty: TotalDifficultyConfig, /// Body stage configuration. pub bodies: BodiesConfig, /// Sender Recovery stage configuration. @@ -107,21 +105,6 @@ impl Default for HeadersConfig { } } -/// Total difficulty stage configuration -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] -#[serde(default)] -pub struct TotalDifficultyConfig { - /// The maximum number of total difficulty entries to sum up before committing progress to the - /// database. - pub commit_threshold: u64, -} - -impl Default for TotalDifficultyConfig { - fn default() -> Self { - Self { commit_threshold: 100_000 } - } -} - /// Body stage configuration. #[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] #[serde(default)] @@ -242,13 +225,13 @@ impl Default for MerkleConfig { #[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] #[serde(default)] pub struct TransactionLookupConfig { - /// The maximum number of transactions to process before committing progress to the database. - pub commit_threshold: u64, + /// The maximum number of transactions to process before writing to disk. + pub chunk_size: u64, } impl Default for TransactionLookupConfig { fn default() -> Self { - Self { commit_threshold: 5_000_000 } + Self { chunk_size: 5_000_000 } } } @@ -359,9 +342,6 @@ downloader_max_buffered_responses = 100 downloader_request_limit = 1000 commit_threshold = 10000 -[stages.total_difficulty] -commit_threshold = 100000 - [stages.bodies] downloader_request_limit = 200 downloader_stream_batch_size = 1000 @@ -388,7 +368,7 @@ commit_threshold = 100000 clean_threshold = 50000 [stages.transaction_lookup] -commit_threshold = 5000000 +chunk_size = 5000000 [stages.index_account_history] commit_threshold = 100000 diff --git a/crates/consensus/beacon/Cargo.toml b/crates/consensus/beacon/Cargo.toml index 9672b98ca8e..a6407240ba0 100644 --- a/crates/consensus/beacon/Cargo.toml +++ b/crates/consensus/beacon/Cargo.toml @@ -23,7 +23,7 @@ reth-tasks.workspace = true reth-payload-builder.workspace = true reth-payload-validator.workspace = true reth-prune.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-tokio-util.workspace = true reth-node-api.workspace = true @@ -59,6 +59,7 @@ reth-node-ethereum.workspace = true reth-node-optimism.workspace = true assert_matches.workspace = true +tempfile.workspace = true [features] optimism = [ diff --git a/crates/consensus/beacon/src/engine/hooks/controller.rs b/crates/consensus/beacon/src/engine/hooks/controller.rs index 77e3ed2701c..33ae74c83e9 100644 --- a/crates/consensus/beacon/src/engine/hooks/controller.rs +++ b/crates/consensus/beacon/src/engine/hooks/controller.rs @@ -10,7 +10,6 @@ use tracing::debug; #[derive(Debug)] pub(crate) struct PolledHook { - #[allow(dead_code)] pub(crate) name: &'static str, pub(crate) event: EngineHookEvent, pub(crate) db_access_level: EngineHookDBAccessLevel, @@ -151,6 +150,8 @@ impl EngineHooksController { ); return Poll::Ready(Ok(result)) + } else { + debug!(target: "consensus::engine::hooks", hook = hook.name(), "Next hook is not ready"); } Poll::Pending diff --git a/crates/consensus/beacon/src/engine/hooks/mod.rs b/crates/consensus/beacon/src/engine/hooks/mod.rs index 69101db1f69..ce149717ada 100644 --- a/crates/consensus/beacon/src/engine/hooks/mod.rs +++ b/crates/consensus/beacon/src/engine/hooks/mod.rs @@ -11,8 +11,8 @@ pub(crate) use controller::{EngineHooksController, PolledHook}; mod prune; pub use prune::PruneHook; -mod snapshot; -pub use snapshot::SnapshotHook; +mod static_file; +pub use static_file::StaticFileHook; /// Collection of [engine hooks][`EngineHook`]. #[derive(Default)] diff --git a/crates/consensus/beacon/src/engine/hooks/snapshot.rs b/crates/consensus/beacon/src/engine/hooks/snapshot.rs deleted file mode 100644 index 780812818e4..00000000000 --- a/crates/consensus/beacon/src/engine/hooks/snapshot.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! Snapshot hook for the engine implementation. - -use crate::{ - engine::hooks::{EngineContext, EngineHook, EngineHookError, EngineHookEvent}, - hooks::EngineHookDBAccessLevel, -}; -use futures::FutureExt; -use reth_db::database::Database; -use reth_interfaces::{RethError, RethResult}; -use reth_primitives::BlockNumber; -use reth_snapshot::{Snapshotter, SnapshotterError, SnapshotterWithResult}; -use reth_tasks::TaskSpawner; -use std::task::{ready, Context, Poll}; -use tokio::sync::oneshot; - -/// Manages snapshotting under the control of the engine. -/// -/// This type controls the [Snapshotter]. -#[derive(Debug)] -pub struct SnapshotHook { - /// The current state of the snapshotter. - state: SnapshotterState, - /// The type that can spawn the snapshotter task. - task_spawner: Box, -} - -impl SnapshotHook { - /// Create a new instance - pub fn new(snapshotter: Snapshotter, task_spawner: Box) -> Self { - Self { state: SnapshotterState::Idle(Some(snapshotter)), task_spawner } - } - - /// Advances the snapshotter state. - /// - /// This checks for the result in the channel, or returns pending if the snapshotter is idle. - fn poll_snapshotter(&mut self, cx: &mut Context<'_>) -> Poll> { - let result = match self.state { - SnapshotterState::Idle(_) => return Poll::Pending, - SnapshotterState::Running(ref mut fut) => { - ready!(fut.poll_unpin(cx)) - } - }; - - let event = match result { - Ok((snapshotter, result)) => { - self.state = SnapshotterState::Idle(Some(snapshotter)); - - match result { - Ok(_) => EngineHookEvent::Finished(Ok(())), - Err(err) => EngineHookEvent::Finished(Err(err.into())), - } - } - Err(_) => { - // failed to receive the snapshotter - EngineHookEvent::Finished(Err(EngineHookError::ChannelClosed)) - } - }; - - Poll::Ready(Ok(event)) - } - - /// This will try to spawn the snapshotter if it is idle: - /// 1. Check if snapshotting is needed through [Snapshotter::get_snapshot_targets] and then - /// [SnapshotTargets::any](reth_snapshot::SnapshotTargets::any). - /// 2. - /// 1. If snapshotting is needed, pass snapshot request to the [Snapshotter::run] and spawn - /// it in a separate task. Set snapshotter state to [SnapshotterState::Running]. - /// 2. If snapshotting is not needed, set snapshotter state back to - /// [SnapshotterState::Idle]. - /// - /// If snapshotter is already running, do nothing. - fn try_spawn_snapshotter( - &mut self, - finalized_block_number: BlockNumber, - ) -> RethResult> { - Ok(match &mut self.state { - SnapshotterState::Idle(snapshotter) => { - let Some(mut snapshotter) = snapshotter.take() else { return Ok(None) }; - - let targets = snapshotter.get_snapshot_targets(finalized_block_number)?; - - // Check if the snapshotting of any data has been requested. - if targets.any() { - let (tx, rx) = oneshot::channel(); - self.task_spawner.spawn_critical_blocking( - "snapshotter task", - Box::pin(async move { - let result = snapshotter.run(targets); - let _ = tx.send((snapshotter, result)); - }), - ); - self.state = SnapshotterState::Running(rx); - - Some(EngineHookEvent::Started) - } else { - self.state = SnapshotterState::Idle(Some(snapshotter)); - Some(EngineHookEvent::NotReady) - } - } - SnapshotterState::Running(_) => None, - }) - } -} - -impl EngineHook for SnapshotHook { - fn name(&self) -> &'static str { - "Snapshot" - } - - fn poll( - &mut self, - cx: &mut Context<'_>, - ctx: EngineContext, - ) -> Poll> { - let Some(finalized_block_number) = ctx.finalized_block_number else { - return Poll::Ready(Ok(EngineHookEvent::NotReady)) - }; - - // Try to spawn a snapshotter - match self.try_spawn_snapshotter(finalized_block_number)? { - Some(EngineHookEvent::NotReady) => return Poll::Pending, - Some(event) => return Poll::Ready(Ok(event)), - None => (), - } - - // Poll snapshotter and check its status - self.poll_snapshotter(cx) - } - - fn db_access_level(&self) -> EngineHookDBAccessLevel { - EngineHookDBAccessLevel::ReadOnly - } -} - -/// The possible snapshotter states within the sync controller. -/// -/// [SnapshotterState::Idle] means that the snapshotter is currently idle. -/// [SnapshotterState::Running] means that the snapshotter is currently running. -#[derive(Debug)] -enum SnapshotterState { - /// Snapshotter is idle. - Idle(Option>), - /// Snapshotter is running and waiting for a response - Running(oneshot::Receiver>), -} - -impl From for EngineHookError { - fn from(err: SnapshotterError) -> Self { - match err { - SnapshotterError::InconsistentData(_) => EngineHookError::Internal(Box::new(err)), - SnapshotterError::Interface(err) => err.into(), - SnapshotterError::Database(err) => RethError::Database(err).into(), - SnapshotterError::Provider(err) => RethError::Provider(err).into(), - } - } -} diff --git a/crates/consensus/beacon/src/engine/hooks/static_file.rs b/crates/consensus/beacon/src/engine/hooks/static_file.rs new file mode 100644 index 00000000000..ee4309cc0b7 --- /dev/null +++ b/crates/consensus/beacon/src/engine/hooks/static_file.rs @@ -0,0 +1,163 @@ +//! StaticFile hook for the engine implementation. + +use crate::{ + engine::hooks::{EngineContext, EngineHook, EngineHookError, EngineHookEvent}, + hooks::EngineHookDBAccessLevel, +}; +use futures::FutureExt; +use reth_db::database::Database; +use reth_interfaces::RethResult; +use reth_primitives::{static_file::HighestStaticFiles, BlockNumber}; +use reth_static_file::{StaticFileProducer, StaticFileProducerWithResult}; +use reth_tasks::TaskSpawner; +use std::task::{ready, Context, Poll}; +use tokio::sync::oneshot; +use tracing::trace; + +/// Manages producing static files under the control of the engine. +/// +/// This type controls the [StaticFileProducer]. +#[derive(Debug)] +pub struct StaticFileHook { + /// The current state of the static_file_producer. + state: StaticFileProducerState, + /// The type that can spawn the static_file_producer task. + task_spawner: Box, +} + +impl StaticFileHook { + /// Create a new instance + pub fn new( + static_file_producer: StaticFileProducer, + task_spawner: Box, + ) -> Self { + Self { state: StaticFileProducerState::Idle(Some(static_file_producer)), task_spawner } + } + + /// Advances the static_file_producer state. + /// + /// This checks for the result in the channel, or returns pending if the static_file_producer is + /// idle. + fn poll_static_file_producer( + &mut self, + cx: &mut Context<'_>, + ) -> Poll> { + let result = match self.state { + StaticFileProducerState::Idle(_) => return Poll::Pending, + StaticFileProducerState::Running(ref mut fut) => { + ready!(fut.poll_unpin(cx)) + } + }; + + let event = match result { + Ok((static_file_producer, result)) => { + self.state = StaticFileProducerState::Idle(Some(static_file_producer)); + + match result { + Ok(_) => EngineHookEvent::Finished(Ok(())), + Err(err) => EngineHookEvent::Finished(Err(err.into())), + } + } + Err(_) => { + // failed to receive the static_file_producer + EngineHookEvent::Finished(Err(EngineHookError::ChannelClosed)) + } + }; + + Poll::Ready(Ok(event)) + } + + /// This will try to spawn the static_file_producer if it is idle: + /// 1. Check if producing static files is needed through + /// [StaticFileProducer::get_static_file_targets] and then + /// [StaticFileTargets::any](reth_static_file::StaticFileTargets::any). + /// 2. + /// 1. If producing static files is needed, pass static file request to the + /// [StaticFileProducer::run] and spawn it in a separate task. Set static file producer + /// state to [StaticFileProducerState::Running]. + /// 2. If producing static files is not needed, set static file producer state back to + /// [StaticFileProducerState::Idle]. + /// + /// If static_file_producer is already running, do nothing. + fn try_spawn_static_file_producer( + &mut self, + finalized_block_number: BlockNumber, + ) -> RethResult> { + Ok(match &mut self.state { + StaticFileProducerState::Idle(static_file_producer) => { + let Some(mut static_file_producer) = static_file_producer.take() else { + trace!(target: "consensus::engine::hooks::static_file", "StaticFileProducer is already running but the state is idle"); + return Ok(None); + }; + + let targets = static_file_producer.get_static_file_targets(HighestStaticFiles { + headers: Some(finalized_block_number), + receipts: Some(finalized_block_number), + transactions: Some(finalized_block_number), + })?; + + // Check if the moving data to static files has been requested. + if targets.any() { + let (tx, rx) = oneshot::channel(); + self.task_spawner.spawn_critical_blocking( + "static_file_producer task", + Box::pin(async move { + let result = static_file_producer.run(targets); + let _ = tx.send((static_file_producer, result)); + }), + ); + self.state = StaticFileProducerState::Running(rx); + + Some(EngineHookEvent::Started) + } else { + self.state = StaticFileProducerState::Idle(Some(static_file_producer)); + Some(EngineHookEvent::NotReady) + } + } + StaticFileProducerState::Running(_) => None, + }) + } +} + +impl EngineHook for StaticFileHook { + fn name(&self) -> &'static str { + "StaticFile" + } + + fn poll( + &mut self, + cx: &mut Context<'_>, + ctx: EngineContext, + ) -> Poll> { + let Some(finalized_block_number) = ctx.finalized_block_number else { + trace!(target: "consensus::engine::hooks::static_file", ?ctx, "Finalized block number is not available"); + return Poll::Pending; + }; + + // Try to spawn a static_file_producer + match self.try_spawn_static_file_producer(finalized_block_number)? { + Some(EngineHookEvent::NotReady) => return Poll::Pending, + Some(event) => return Poll::Ready(Ok(event)), + None => (), + } + + // Poll static_file_producer and check its status + self.poll_static_file_producer(cx) + } + + fn db_access_level(&self) -> EngineHookDBAccessLevel { + EngineHookDBAccessLevel::ReadOnly + } +} + +/// The possible static_file_producer states within the sync controller. +/// +/// [StaticFileProducerState::Idle] means that the static file producer is currently idle. +/// [StaticFileProducerState::Running] means that the static file producer is currently running. +#[derive(Debug)] +enum StaticFileProducerState { + /// [StaticFileProducer] is idle. + Idle(Option>), + /// [StaticFileProducer] is running and waiting for a response + Running(oneshot::Receiver>), +} diff --git a/crates/consensus/beacon/src/engine/mod.rs b/crates/consensus/beacon/src/engine/mod.rs index a7c9e9fdc4b..5ae78a7ab5d 100644 --- a/crates/consensus/beacon/src/engine/mod.rs +++ b/crates/consensus/beacon/src/engine/mod.rs @@ -361,6 +361,9 @@ where warn!( target: "consensus::engine", hook = %hook.name(), + head_block_hash = ?state.head_block_hash, + safe_block_hash = ?state.safe_block_hash, + finalized_block_hash = ?state.finalized_block_hash, "Hook is in progress, skipping forkchoice update. \ This may affect the performance of your node as a validator." ); @@ -1502,7 +1505,9 @@ where debug!(target: "consensus::engine", hash=?new_head.hash(), number=new_head.number, "Canonicalized new head"); // we can update the FCU blocks - let _ = self.update_canon_chain(new_head, &target); + if let Err(err) = self.update_canon_chain(new_head, &target) { + debug!(target: "consensus::engine", ?err, ?target, "Failed to update the canonical chain tracker"); + } // we're no longer syncing self.sync_state_updater.update_sync_state(SyncState::Idle); @@ -1704,9 +1709,18 @@ where None } - fn on_hook_result(&self, result: PolledHook) -> Result<(), BeaconConsensusEngineError> { - if result.db_access_level.is_read_write() { - match result.event { + fn on_hook_result(&self, polled_hook: PolledHook) -> Result<(), BeaconConsensusEngineError> { + if let EngineHookEvent::Finished(Err(error)) = &polled_hook.event { + error!( + target: "consensus::engine", + name = %polled_hook.name, + ?error, + "Hook finished with error" + ) + } + + if polled_hook.db_access_level.is_read_write() { + match polled_hook.event { EngineHookEvent::NotReady => {} EngineHookEvent::Started => { // If the hook has read-write access to the database, it means that the engine @@ -1889,9 +1903,7 @@ mod tests { }; use assert_matches::assert_matches; use reth_interfaces::test_utils::generators::{self, Rng}; - use reth_primitives::{ - stage::StageCheckpoint, ChainSpec, ChainSpecBuilder, B256, MAINNET, U256, - }; + use reth_primitives::{stage::StageCheckpoint, ChainSpecBuilder, MAINNET}; use reth_provider::{BlockWriter, ProviderFactory}; use reth_rpc_types::engine::{ForkchoiceState, ForkchoiceUpdated, PayloadStatus}; use reth_rpc_types_compat::engine::payload::try_block_to_payload_v1; @@ -2064,12 +2076,10 @@ mod tests { } fn insert_blocks<'a, DB: Database>( - db: DB, - chain: Arc, + provider_factory: ProviderFactory, mut blocks: impl Iterator, ) { - let factory = ProviderFactory::new(db, chain); - let provider = factory.provider_rw().unwrap(); + let provider = provider_factory.provider_rw().unwrap(); blocks .try_for_each(|b| { provider @@ -2085,8 +2095,9 @@ mod tests { mod fork_choice_updated { use super::*; - use reth_db::{tables, transaction::DbTxMut}; + use reth_db::{tables, test_utils::create_test_static_files_dir, transaction::DbTxMut}; use reth_interfaces::test_utils::generators::random_block; + use reth_primitives::U256; use reth_rpc_types::engine::ForkchoiceUpdateError; #[tokio::test] @@ -2139,7 +2150,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); env.db .update(|tx| { tx.put::( @@ -2189,7 +2208,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2205,7 +2232,15 @@ mod tests { let invalid_rx = env.send_forkchoice_updated(next_forkchoice_state).await; // Insert next head immediately after sending forkchoice update - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&next_head].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&next_head].into_iter(), + ); let expected_result = ForkchoiceUpdated::from_status(PayloadStatusEnum::Syncing); assert_matches!(invalid_rx, Ok(result) => assert_eq!(result, expected_result)); @@ -2239,7 +2274,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let engine = spawn_consensus_engine(consensus_engine); @@ -2287,8 +2330,12 @@ mod tests { block3.header.set_difficulty(U256::from(1)); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&genesis, &block1, &block2, &block3].into_iter(), ); @@ -2330,7 +2377,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let _engine = spawn_consensus_engine(consensus_engine); @@ -2352,10 +2407,11 @@ mod tests { mod new_payload { use super::*; + use reth_db::test_utils::create_test_static_files_dir; use reth_interfaces::test_utils::generators::random_block; use reth_primitives::{ genesis::{Genesis, GenesisAllocator}, - Hardfork, + Hardfork, U256, }; use reth_provider::test_utils::blocks::BlockChainTestData; @@ -2426,8 +2482,12 @@ mod tests { let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); let block2 = random_block(&mut rng, 2, Some(block1.hash()), None, Some(0)); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&genesis, &block1, &block2].into_iter(), ); @@ -2492,7 +2552,15 @@ mod tests { // TODO: add transactions that transfer from the alloc accounts, generating the new // block tx and state root - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2530,7 +2598,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2589,8 +2665,12 @@ mod tests { .build(); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&data.genesis, &block1].into_iter(), ); diff --git a/crates/consensus/beacon/src/engine/sync.rs b/crates/consensus/beacon/src/engine/sync.rs index 6b25643e95a..c6bd452be5d 100644 --- a/crates/consensus/beacon/src/engine/sync.rs +++ b/crates/consensus/beacon/src/engine/sync.rs @@ -398,13 +398,14 @@ mod tests { use reth_interfaces::{p2p::either::EitherDownloader, test_utils::TestFullBlockClient}; use reth_primitives::{ constants::ETHEREUM_BLOCK_GAS_LIMIT, stage::StageCheckpoint, BlockBody, ChainSpecBuilder, - Header, SealedHeader, MAINNET, + Header, PruneModes, SealedHeader, MAINNET, }; use reth_provider::{ test_utils::{create_test_provider_factory_with_chain_spec, TestExecutorFactory}, BundleStateWithReceipts, }; use reth_stages::{test_utils::TestStages, ExecOutput, StageError}; + use reth_static_file::StaticFileProducer; use reth_tasks::TokioTaskExecutor; use std::{collections::VecDeque, future::poll_fn, ops::Range}; use tokio::sync::watch; @@ -465,7 +466,15 @@ mod tests { pipeline = pipeline.with_max_block(max_block); } - pipeline.build(create_test_provider_factory_with_chain_spec(chain_spec)) + let provider_factory = create_test_provider_factory_with_chain_spec(chain_spec); + + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + + pipeline.build(provider_factory, static_file_producer) } } diff --git a/crates/consensus/beacon/src/engine/test_utils.rs b/crates/consensus/beacon/src/engine/test_utils.rs index 3541a84ceed..e491b1fe33e 100644 --- a/crates/consensus/beacon/src/engine/test_utils.rs +++ b/crates/consensus/beacon/src/engine/test_utils.rs @@ -6,10 +6,7 @@ use crate::{ use reth_blockchain_tree::{ config::BlockchainTreeConfig, externals::TreeExternals, BlockchainTree, ShareableBlockchainTree, }; -use reth_db::{ - test_utils::{create_test_rw_db, TempDatabase}, - DatabaseEnv as DE, -}; +use reth_db::{test_utils::TempDatabase, DatabaseEnv as DE}; type DatabaseEnv = TempDatabase; use reth_downloaders::{ bodies::bodies::BodiesDownloaderBuilder, @@ -24,10 +21,11 @@ use reth_interfaces::{ }; use reth_node_ethereum::{EthEngineTypes, EthEvmConfig}; use reth_payload_builder::test_utils::spawn_test_payload_service; -use reth_primitives::{BlockNumber, ChainSpec, B256}; +use reth_primitives::{BlockNumber, ChainSpec, PruneModes, B256}; use reth_provider::{ - providers::BlockchainProvider, test_utils::TestExecutorFactory, BundleStateWithReceipts, - ExecutorFactory, HeaderSyncMode, ProviderFactory, PrunableBlockExecutor, + providers::BlockchainProvider, + test_utils::{create_test_provider_factory_with_chain_spec, TestExecutorFactory}, + BundleStateWithReceipts, ExecutorFactory, HeaderSyncMode, PrunableBlockExecutor, }; use reth_prune::Pruner; use reth_revm::EvmProcessorFactory; @@ -35,6 +33,7 @@ use reth_rpc_types::engine::{ CancunPayloadFields, ExecutionPayload, ForkchoiceState, ForkchoiceUpdated, PayloadStatus, }; use reth_stages::{sets::DefaultStages, test_utils::TestStages, ExecOutput, Pipeline, StageError}; +use reth_static_file::StaticFileProducer; use reth_tasks::TokioTaskExecutor; use std::{collections::VecDeque, sync::Arc}; use tokio::sync::{oneshot, watch}; @@ -348,9 +347,8 @@ where /// Builds the test consensus engine into a `TestConsensusEngine` and `TestEnv`. pub fn build(self) -> (TestBeaconConsensusEngine, TestEnv>) { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); let provider_factory = - ProviderFactory::new(db.clone(), self.base_config.chain_spec.clone()); + create_test_provider_factory_with_chain_spec(self.base_config.chain_spec.clone()); let consensus: Arc = match self.base_config.consensus { TestConsensusConfig::Real => { @@ -380,6 +378,12 @@ where )), }; + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + // Setup pipeline let (tip_tx, tip_rx) = watch::channel(B256::default()); let mut pipeline = match self.base_config.pipeline_config { @@ -395,14 +399,17 @@ where .build(client.clone(), consensus.clone(), provider_factory.clone()) .into_task(); - Pipeline::builder().add_stages(DefaultStages::new( - ProviderFactory::new(db.clone(), self.base_config.chain_spec.clone()), - HeaderSyncMode::Tip(tip_rx.clone()), - Arc::clone(&consensus), - header_downloader, - body_downloader, - executor_factory.clone(), - )) + Pipeline::builder().add_stages( + DefaultStages::new( + provider_factory.clone(), + HeaderSyncMode::Tip(tip_rx.clone()), + Arc::clone(&consensus), + header_downloader, + body_downloader, + executor_factory.clone(), + ) + .expect("should build"), + ) } }; @@ -410,7 +417,7 @@ where pipeline = pipeline.with_max_block(max_block); } - let pipeline = pipeline.build(provider_factory.clone()); + let pipeline = pipeline.build(provider_factory.clone(), static_file_producer); // Setup blockchain tree let externals = TreeExternals::new(provider_factory.clone(), consensus, executor_factory); @@ -423,12 +430,11 @@ where BlockchainProvider::with_latest(provider_factory.clone(), tree, latest); let pruner = Pruner::new( - provider_factory, + provider_factory.clone(), vec![], 5, self.base_config.chain_spec.prune_delete_limit, config.max_reorg_depth() as usize, - watch::channel(None).1, ); let mut hooks = EngineHooks::new(); @@ -453,7 +459,7 @@ where engine.sync.set_max_block(max_block) } - (engine, TestEnv::new(db, tip_rx, handle)) + (engine, TestEnv::new(provider_factory.db_ref().clone(), tip_rx, handle)) } } diff --git a/crates/etl/Cargo.toml b/crates/etl/Cargo.toml new file mode 100644 index 00000000000..07af6c72968 --- /dev/null +++ b/crates/etl/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "reth-etl" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +exclude.workspace = true + +[dependencies] +tempfile.workspace = true +reth-db.workspace = true +rayon.workspace = true + +[dev-dependencies] +reth-primitives.workspace = true diff --git a/crates/etl/src/lib.rs b/crates/etl/src/lib.rs new file mode 100644 index 00000000000..0fc865b98d2 --- /dev/null +++ b/crates/etl/src/lib.rs @@ -0,0 +1,264 @@ +//! ETL data collector. +//! +//! This crate is useful for dumping unsorted data into temporary files and iterating on their +//! sorted representation later on. +//! +//! This has multiple uses, such as optimizing database inserts (for Btree based databases) and +//! memory management (as it moves the buffer to disk instead of memory). + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/paradigmxyz/reth/main/assets/reth-docs.png", + html_favicon_url = "https://avatars0.githubusercontent.com/u/97369466?s=256", + issue_tracker_base_url = "https://github.com/paradigmxyz/reth/issues/" +)] +#![warn(missing_debug_implementations, missing_docs, unreachable_pub, rustdoc::all)] +#![deny(unused_must_use, rust_2018_idioms)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] + +use std::{ + cmp::Reverse, + collections::BinaryHeap, + io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}, + path::Path, + sync::Arc, +}; + +use rayon::prelude::*; +use reth_db::table::{Compress, Encode, Key, Value}; +use tempfile::{NamedTempFile, TempDir}; + +/// An ETL (extract, transform, load) data collector. +/// +/// Data is pushed (extract) to the collector which internally flushes the data in a sorted +/// (transform) manner to files of some specified capacity. +/// +/// The data can later be iterated over (load) in a sorted manner. +#[derive(Debug)] +pub struct Collector +where + K: Encode + Ord, + V: Compress, + ::Encoded: std::fmt::Debug, + ::Compressed: std::fmt::Debug, +{ + /// Directory for temporary file storage + dir: Arc, + /// Collection of temporary ETL files + files: Vec, + /// Current buffer size in bytes + buffer_size_bytes: usize, + /// Maximum buffer capacity in bytes, triggers flush when reached + buffer_capacity_bytes: usize, + /// In-memory buffer storing encoded and compressed key-value pairs + buffer: Vec<(::Encoded, ::Compressed)>, + /// Total number of elements in the collector, including all files + len: usize, +} + +impl Collector +where + K: Key, + V: Value, + ::Encoded: Ord + std::fmt::Debug, + ::Compressed: Ord + std::fmt::Debug, +{ + /// Create a new collector in a specific temporary directory with some capacity. + /// + /// Once the capacity (in bytes) is reached, the data is sorted and flushed to disk. + pub fn new(dir: Arc, buffer_capacity_bytes: usize) -> Self { + Self { + dir, + buffer_size_bytes: 0, + files: Vec::new(), + buffer_capacity_bytes, + buffer: Vec::new(), + len: 0, + } + } + + /// Returns number of elements currently in the collector. + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if there are currently no elements in the collector. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Insert an entry into the collector. + pub fn insert(&mut self, key: K, value: V) { + let key = key.encode(); + let value = value.compress(); + self.buffer_size_bytes += key.as_ref().len() + value.as_ref().len(); + self.buffer.push((key, value)); + if self.buffer_size_bytes > self.buffer_capacity_bytes { + self.flush(); + } + self.len += 1; + } + + fn flush(&mut self) { + self.buffer_size_bytes = 0; + self.buffer.par_sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let mut buf = Vec::with_capacity(self.buffer.len()); + std::mem::swap(&mut buf, &mut self.buffer); + self.files.push(EtlFile::new(self.dir.path(), buf).expect("could not flush data to disk")) + } + + /// Returns an iterator over the collector data. + /// + /// The items of the iterator are sorted across all underlying files. + /// + /// # Note + /// + /// The keys and values have been pre-encoded, meaning they *SHOULD NOT* be encoded or + /// compressed again. + pub fn iter(&mut self) -> std::io::Result> { + // Flush the remaining items to disk + if self.buffer_size_bytes > 0 { + self.flush(); + } + + let mut heap = BinaryHeap::new(); + for (current_id, file) in self.files.iter_mut().enumerate() { + if let Some((current_key, current_value)) = file.read_next()? { + heap.push((Reverse((current_key, current_value)), current_id)); + } + } + + Ok(EtlIter { heap, files: &mut self.files }) + } +} + +/// `EtlIter` is an iterator for traversing through sorted key-value pairs in a collection of ETL +/// files. These files are created using the [`Collector`] and contain data where keys are encoded +/// and values are compressed. +/// +/// This iterator returns each key-value pair in ascending order based on the key. +/// It is particularly designed to efficiently handle large datasets by employing a binary heap for +/// managing the iteration order. +#[derive(Debug)] +pub struct EtlIter<'a> { + /// Heap managing the next items to be iterated. + #[allow(clippy::type_complexity)] + heap: BinaryHeap<(Reverse<(Vec, Vec)>, usize)>, + /// Reference to the vector of ETL files being iterated over. + files: &'a mut Vec, +} + +impl<'a> EtlIter<'a> { + /// Peeks into the next element + pub fn peek(&self) -> Option<&(Vec, Vec)> { + self.heap.peek().map(|(Reverse(entry), _)| entry) + } +} + +impl<'a> Iterator for EtlIter<'a> { + type Item = std::io::Result<(Vec, Vec)>; + + fn next(&mut self) -> Option { + // Get the next sorted entry from the heap + let (Reverse(entry), id) = self.heap.pop()?; + + // Populate the heap with the next entry from the same file + match self.files[id].read_next() { + Ok(Some((key, value))) => { + self.heap.push((Reverse((key, value)), id)); + Some(Ok(entry)) + } + Ok(None) => Some(Ok(entry)), + err => err.transpose(), + } + } +} + +/// A temporary ETL file. +#[derive(Debug)] +struct EtlFile { + file: BufReader, + len: usize, +} + +impl EtlFile { + /// Create a new file with the given data (which should be pre-sorted) at the given path. + /// + /// The file will be a temporary file. + pub(crate) fn new(dir: &Path, buffer: Vec<(K, V)>) -> std::io::Result + where + Self: Sized, + K: AsRef<[u8]>, + V: AsRef<[u8]>, + { + let file = NamedTempFile::new_in(dir)?; + let mut w = BufWriter::new(file); + for entry in &buffer { + let k = entry.0.as_ref(); + let v = entry.1.as_ref(); + + w.write_all(&k.len().to_be_bytes())?; + w.write_all(&v.len().to_be_bytes())?; + w.write_all(k)?; + w.write_all(v)?; + } + + let mut file = BufReader::new(w.into_inner()?); + file.seek(SeekFrom::Start(0))?; + let len = buffer.len(); + Ok(Self { file, len }) + } + + /// Read the next entry in the file. + /// + /// Can return error if it reaches EOF before filling the internal buffers. + pub(crate) fn read_next(&mut self) -> std::io::Result, Vec)>> { + if self.len == 0 { + return Ok(None) + } + + let mut buffer_key_length = [0; 8]; + let mut buffer_value_length = [0; 8]; + + self.file.read_exact(&mut buffer_key_length)?; + self.file.read_exact(&mut buffer_value_length)?; + + let key_length = usize::from_be_bytes(buffer_key_length); + let value_length = usize::from_be_bytes(buffer_value_length); + let mut key = vec![0; key_length]; + let mut value = vec![0; value_length]; + + self.file.read_exact(&mut key)?; + self.file.read_exact(&mut value)?; + + self.len -= 1; + + Ok(Some((key, value))) + } +} + +#[cfg(test)] +mod tests { + use reth_primitives::{TxHash, TxNumber}; + + use super::*; + + #[test] + fn etl_hashes() { + let mut entries: Vec<_> = + (0..10_000).map(|id| (TxHash::random(), id as TxNumber)).collect(); + + let mut collector = Collector::new(Arc::new(TempDir::new().unwrap()), 1024); + for (k, v) in entries.clone() { + collector.insert(k, v); + } + entries.sort_unstable_by_key(|entry| entry.0); + + for (id, entry) in collector.iter().unwrap().enumerate() { + let expected = entries[id]; + assert_eq!( + entry.unwrap(), + (expected.0.encode().to_vec(), expected.1.compress().to_vec()) + ); + } + } +} diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 5e8a6a02431..513e3efe9d9 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -1,5 +1,5 @@ use reth_primitives::{ - Address, BlockHash, BlockHashOrNumber, BlockNumber, GotExpected, SnapshotSegment, + Address, BlockHash, BlockHashOrNumber, BlockNumber, GotExpected, StaticFileSegment, TxHashOrNumber, TxNumber, B256, U256, }; use std::path::PathBuf; @@ -113,15 +113,18 @@ pub enum ProviderError { /// Provider does not support this particular request. #[error("this provider does not support this request")] UnsupportedProvider, - /// Snapshot file is not found at specified path. - #[error("not able to find {0} snapshot file at {1}")] - MissingSnapshotPath(SnapshotSegment, PathBuf), - /// Snapshot file is not found for requested block. - #[error("not able to find {0} snapshot file for block number {1}")] - MissingSnapshotBlock(SnapshotSegment, BlockNumber), - /// Snapshot file is not found for requested transaction. - #[error("not able to find {0} snapshot file for transaction id {1}")] - MissingSnapshotTx(SnapshotSegment, TxNumber), + /// Static File is not found at specified path. + #[error("not able to find {0} static file at {1}")] + MissingStaticFilePath(StaticFileSegment, PathBuf), + /// Static File is not found for requested block. + #[error("not able to find {0} static file for block number {1}")] + MissingStaticFileBlock(StaticFileSegment, BlockNumber), + /// Static File is not found for requested transaction. + #[error("unable to find {0} static file for transaction id {1}")] + MissingStaticFileTx(StaticFileSegment, TxNumber), + /// Static File is finalized and cannot be written to. + #[error("unable to write block #{1} to finalized static file {0}")] + FinalizedStaticFile(StaticFileSegment, BlockNumber), /// Error encountered when the block number conversion from U256 to u64 causes an overflow. #[error("failed to convert block number U256 to u64: {0}")] BlockNumberOverflow(U256), diff --git a/crates/net/downloaders/src/bodies/bodies.rs b/crates/net/downloaders/src/bodies/bodies.rs index 627b5ea9d24..2d436acf798 100644 --- a/crates/net/downloaders/src/bodies/bodies.rs +++ b/crates/net/downloaders/src/bodies/bodies.rs @@ -596,7 +596,7 @@ mod tests { test_utils::{generate_bodies, TestBodiesClient}, }; use assert_matches::assert_matches; - use reth_db::test_utils::create_test_rw_db; + use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_interfaces::test_utils::{generators, generators::random_block_range, TestConsensus}; use reth_primitives::{BlockBody, B256, MAINNET}; use reth_provider::ProviderFactory; @@ -618,7 +618,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); downloader.set_download_range(0..=19).expect("failed to set download range"); @@ -657,7 +657,7 @@ mod tests { BodiesDownloaderBuilder::default().with_request_limit(request_limit).build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); downloader.set_download_range(0..=199).expect("failed to set download range"); @@ -686,7 +686,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); let mut range_start = 0; @@ -716,7 +716,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().with_stream_batch_size(100).build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Set and download the first range @@ -756,7 +756,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Set and download the entire range @@ -787,7 +787,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Download the requested range diff --git a/crates/net/downloaders/src/bodies/task.rs b/crates/net/downloaders/src/bodies/task.rs index cbae7628f1a..2424101db52 100644 --- a/crates/net/downloaders/src/bodies/task.rs +++ b/crates/net/downloaders/src/bodies/task.rs @@ -169,20 +169,18 @@ mod tests { test_utils::{generate_bodies, TestBodiesClient}, }; use assert_matches::assert_matches; - use reth_db::test_utils::create_test_rw_db; use reth_interfaces::{p2p::error::DownloadError, test_utils::TestConsensus}; - use reth_primitives::MAINNET; - use reth_provider::ProviderFactory; + use reth_provider::test_utils::create_test_provider_factory; use std::sync::Arc; #[tokio::test(flavor = "multi_thread")] async fn download_one_by_one_on_task() { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (headers, mut bodies) = generate_bodies(0..=19); - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); let client = Arc::new( TestBodiesClient::default().with_bodies(bodies.clone()).with_should_delay(true), @@ -190,7 +188,7 @@ mod tests { let downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); let mut downloader = TaskDownloader::spawn(downloader); @@ -208,11 +206,10 @@ mod tests { async fn set_download_range_error_returned() { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); let downloader = BodiesDownloaderBuilder::default().build( Arc::new(TestBodiesClient::default()), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + create_test_provider_factory(), ); let mut downloader = TaskDownloader::spawn(downloader); diff --git a/crates/net/downloaders/src/file_client.rs b/crates/net/downloaders/src/file_client.rs index 0e4efb90054..073640427bf 100644 --- a/crates/net/downloaders/src/file_client.rs +++ b/crates/net/downloaders/src/file_client.rs @@ -241,7 +241,6 @@ mod tests { }; use assert_matches::assert_matches; use futures_util::stream::StreamExt; - use reth_db::test_utils::create_test_rw_db; use reth_interfaces::{ p2p::{ bodies::downloader::BodyDownloader, @@ -249,17 +248,17 @@ mod tests { }, test_utils::TestConsensus, }; - use reth_primitives::{SealedHeader, MAINNET}; - use reth_provider::ProviderFactory; + use reth_primitives::SealedHeader; + use reth_provider::test_utils::create_test_provider_factory; use std::sync::Arc; #[tokio::test] async fn streams_bodies_from_buffer() { // Generate some random blocks - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (headers, mut bodies) = generate_bodies(0..=19); - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); // create an empty file let file = tempfile::tempfile().unwrap(); @@ -269,7 +268,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); downloader.set_download_range(0..=19).expect("failed to set download range"); @@ -338,19 +337,19 @@ mod tests { #[tokio::test] async fn test_download_bodies_from_file() { // Generate some random blocks - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (file, headers, mut bodies) = generate_bodies_file(0..=19).await; // now try to read them back let client = Arc::new(FileClient::from_file(file).await.unwrap()); // insert headers in db for the bodies downloader - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); downloader.set_download_range(0..=19).expect("failed to set download range"); diff --git a/crates/net/eth-wire/Cargo.toml b/crates/net/eth-wire/Cargo.toml index 7bf89f3b2c3..bab06af474d 100644 --- a/crates/net/eth-wire/Cargo.toml +++ b/crates/net/eth-wire/Cargo.toml @@ -25,7 +25,7 @@ reth-metrics.workspace = true metrics.workspace = true bytes.workspace = true -derive_more = "0.99.17" +derive_more.workspace = true thiserror.workspace = true serde = { workspace = true, optional = true } tokio = { workspace = true, features = ["full"] } diff --git a/crates/node-builder/Cargo.toml b/crates/node-builder/Cargo.toml index c9e122e2c41..a7375ba3934 100644 --- a/crates/node-builder/Cargo.toml +++ b/crates/node-builder/Cargo.toml @@ -29,7 +29,7 @@ reth-transaction-pool.workspace = true reth-tasks.workspace = true reth-tracing.workspace = true reth-interfaces.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-prune.workspace = true reth-stages.workspace = true reth-config.workspace = true diff --git a/crates/node-builder/src/builder.rs b/crates/node-builder/src/builder.rs index 45e074c37ba..ed2db5df48d 100644 --- a/crates/node-builder/src/builder.rs +++ b/crates/node-builder/src/builder.rs @@ -350,22 +350,22 @@ where info!(target: "reth::cli", "Database opened"); - let mut provider_factory = - ProviderFactory::new(database.clone(), Arc::clone(&config.chain)); - - // configure snapshotter - let snapshotter = reth_snapshot::Snapshotter::new( - provider_factory.clone(), - data_dir.snapshots_path(), - config.chain.snapshot_block_interval, + let provider_factory = ProviderFactory::new( + database.clone(), + Arc::clone(&config.chain), + data_dir.static_files_path(), )?; - provider_factory = provider_factory - .with_snapshots(data_dir.snapshots_path(), snapshotter.highest_snapshot_receiver())?; + // configure static_file_producer + let static_file_producer = reth_static_file::StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + config.prune_config()?.unwrap_or_default().segments, + ); debug!(target: "reth::cli", chain=%config.chain.chain, genesis=?config.chain.genesis_hash(), "Initializing genesis"); - let genesis_hash = init_genesis(database.clone(), config.chain.clone())?; + let genesis_hash = init_genesis(provider_factory.clone())?; info!(target: "reth::cli", "{}", config.chain.display_hardforks()); @@ -471,6 +471,7 @@ where sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -492,6 +493,7 @@ where sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -508,7 +510,7 @@ where let mut pruner = PrunerBuilder::new(prune_config.clone()) .max_reorg_depth(tree_config.max_reorg_depth() as usize) .prune_delete_limit(config.chain.prune_delete_limit) - .build(provider_factory, snapshotter.highest_snapshot_receiver()); + .build(provider_factory); let events = pruner.events(); hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); diff --git a/crates/node-core/Cargo.toml b/crates/node-core/Cargo.toml index f252e048310..308ae00b15b 100644 --- a/crates/node-core/Cargo.toml +++ b/crates/node-core/Cargo.toml @@ -44,7 +44,7 @@ reth-stages.workspace = true reth-prune.workspace = true reth-blockchain-tree.workspace = true revm-inspectors.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-eth-wire.workspace = true # `optimism` feature @@ -69,6 +69,7 @@ thiserror.workspace = true const-str = "0.5.6" rand.workspace = true pin-project.workspace = true +derive_more.workspace = true # io dirs-next = "2.0.0" diff --git a/crates/node-core/src/args/stage_args.rs b/crates/node-core/src/args/stage_args.rs index 46618ff2f72..d90eabcfc1d 100644 --- a/crates/node-core/src/args/stage_args.rs +++ b/crates/node-core/src/args/stage_args.rs @@ -1,9 +1,10 @@ //! Shared arguments related to stages +use derive_more::Display; /// Represents a specific stage within the data pipeline. /// /// Different stages within the pipeline have dedicated functionalities and operations. -#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, clap::ValueEnum)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, clap::ValueEnum, Display)] pub enum StageEnum { /// The headers stage within the pipeline. /// @@ -49,8 +50,4 @@ pub enum StageEnum { /// /// Manages historical data related to storage. StorageHistory, - /// The total difficulty stage within the pipeline. - /// - /// Handles computations and data related to total difficulty. - TotalDifficulty, } diff --git a/crates/node-core/src/dirs.rs b/crates/node-core/src/dirs.rs index b4e6d8955bb..affcb7f8a6d 100644 --- a/crates/node-core/src/dirs.rs +++ b/crates/node-core/src/dirs.rs @@ -282,9 +282,9 @@ impl ChainPath { self.0.join("db").into() } - /// Returns the path to the snapshots directory for this chain. - pub fn snapshots_path(&self) -> PathBuf { - self.0.join("snapshots").into() + /// Returns the path to the static_files directory for this chain. + pub fn static_files_path(&self) -> PathBuf { + self.0.join("static_files").into() } /// Returns the path to the reth p2p secret key for this chain. diff --git a/crates/node-core/src/events/node.rs b/crates/node-core/src/events/node.rs index 00b5b7fb54d..0de1987a7c6 100644 --- a/crates/node-core/src/events/node.rs +++ b/crates/node-core/src/events/node.rs @@ -14,6 +14,7 @@ use reth_primitives::{ }; use reth_prune::PrunerEvent; use reth_stages::{ExecOutput, PipelineEvent}; +use reth_static_file::StaticFileProducerEvent; use std::{ fmt::{Display, Formatter}, future::Future, @@ -233,11 +234,25 @@ impl NodeState { fn handle_pruner_event(&self, event: PrunerEvent) { match event { + PrunerEvent::Started { tip_block_number } => { + info!(tip_block_number, "Pruner started"); + } PrunerEvent::Finished { tip_block_number, elapsed, stats } => { info!(tip_block_number, ?elapsed, ?stats, "Pruner finished"); } } } + + fn handle_static_file_producer_event(&self, event: StaticFileProducerEvent) { + match event { + StaticFileProducerEvent::Started { targets } => { + info!(?targets, "Static File Producer started"); + } + StaticFileProducerEvent::Finished { targets, elapsed } => { + info!(?targets, ?elapsed, "Static File Producer finished"); + } + } + } } impl NodeState { @@ -282,6 +297,8 @@ pub enum NodeEvent { ConsensusLayerHealth(ConsensusLayerHealthEvent), /// A pruner event Pruner(PrunerEvent), + /// A static_file_producer event + StaticFileProducer(StaticFileProducerEvent), } impl From for NodeEvent { @@ -314,6 +331,12 @@ impl From for NodeEvent { } } +impl From for NodeEvent { + fn from(event: StaticFileProducerEvent) -> Self { + NodeEvent::StaticFileProducer(event) + } +} + /// Displays relevant information to the user from components of the node, and periodically /// displays the high-level status of the node. pub async fn handle_events( @@ -430,6 +453,9 @@ where NodeEvent::Pruner(event) => { this.state.handle_pruner_event(event); } + NodeEvent::StaticFileProducer(event) => { + this.state.handle_static_file_producer_event(event); + } } } diff --git a/crates/node-core/src/init.rs b/crates/node-core/src/init.rs index 406c701ef4a..8b7a0c08f2e 100644 --- a/crates/node-core/src/init.rs +++ b/crates/node-core/src/init.rs @@ -1,19 +1,20 @@ //! Reth genesis initialization utility functions. use reth_db::{ - cursor::DbCursorRO, database::Database, tables, transaction::{DbTx, DbTxMut}, }; use reth_interfaces::{db::DatabaseError, provider::ProviderResult}; use reth_primitives::{ - stage::StageId, Account, Bytecode, ChainSpec, Receipts, StorageEntry, B256, U256, + stage::StageId, Account, Bytecode, ChainSpec, Receipts, StaticFileSegment, StorageEntry, B256, + U256, }; use reth_provider::{ bundle_state::{BundleStateInit, RevertsInit}, - BundleStateWithReceipts, DatabaseProviderRW, HashingWriter, HistoryWriter, OriginalValuesKnown, - ProviderError, ProviderFactory, + providers::{StaticFileProvider, StaticFileWriter}, + BlockHashReader, BundleStateWithReceipts, ChainSpecProvider, DatabaseProviderRW, HashingWriter, + HistoryWriter, OriginalValuesKnown, ProviderError, ProviderFactory, }; use std::{ collections::{BTreeMap, HashMap}, @@ -46,40 +47,39 @@ impl From for InitDatabaseError { } /// Write the genesis block if it has not already been written -pub fn init_genesis( - db: DB, - chain: Arc, -) -> Result { - let genesis = chain.genesis(); +pub fn init_genesis(factory: ProviderFactory) -> Result { + let chain = factory.chain_spec(); + let genesis = chain.genesis(); let hash = chain.genesis_hash(); - let tx = db.tx()?; - if let Some((_, db_hash)) = tx.cursor_read::()?.first()? { - if db_hash == hash { - debug!("Genesis already written, skipping."); - return Ok(hash) - } + // Check if we already have the genesis header or if we have the wrong one. + match factory.block_hash(0) { + Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, 0)) => {} + Ok(Some(block_hash)) => { + if block_hash == hash { + debug!("Genesis already written, skipping."); + return Ok(hash) + } - return Err(InitDatabaseError::GenesisHashMismatch { - chainspec_hash: hash, - database_hash: db_hash, - }) + return Err(InitDatabaseError::GenesisHashMismatch { + chainspec_hash: hash, + database_hash: block_hash, + }) + } + Err(e) => return Err(dbg!(e).into()), } - drop(tx); debug!("Writing genesis block."); // use transaction to insert genesis header - let factory = ProviderFactory::new(&db, chain.clone()); let provider_rw = factory.provider_rw()?; insert_genesis_hashes(&provider_rw, genesis)?; insert_genesis_history(&provider_rw, genesis)?; - provider_rw.commit()?; // Insert header - let tx = db.tx_mut()?; - insert_genesis_header::(&tx, chain.clone())?; + let tx = provider_rw.into_tx(); + insert_genesis_header::(&tx, factory.static_file_provider(), chain.clone())?; insert_genesis_state::(&tx, genesis)?; @@ -89,6 +89,7 @@ pub fn init_genesis( } tx.commit()?; + Ok(hash) } @@ -153,14 +154,14 @@ pub fn insert_genesis_state( 0, ); - bundle.write_to_db(tx, OriginalValuesKnown::Yes)?; + bundle.write_to_storage(tx, None, OriginalValuesKnown::Yes)?; Ok(()) } /// Inserts hashes for the genesis state. pub fn insert_genesis_hashes( - provider: &DatabaseProviderRW<&DB>, + provider: &DatabaseProviderRW, genesis: &reth_primitives::Genesis, ) -> ProviderResult<()> { // insert and hash accounts to hashing table @@ -187,7 +188,7 @@ pub fn insert_genesis_hashes( /// Inserts history indices for genesis accounts and storage. pub fn insert_genesis_history( - provider: &DatabaseProviderRW<&DB>, + provider: &DatabaseProviderRW, genesis: &reth_primitives::Genesis, ) -> ProviderResult<()> { let account_transitions = @@ -208,15 +209,24 @@ pub fn insert_genesis_history( /// Inserts header for the genesis state. pub fn insert_genesis_header( tx: &::TXMut, + static_file_provider: StaticFileProvider, chain: Arc, ) -> ProviderResult<()> { let (header, block_hash) = chain.sealed_genesis_header().split(); - tx.put::(0, block_hash)?; + match static_file_provider.block_hash(0) { + Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, 0)) => { + let (difficulty, hash) = (header.difficulty, block_hash); + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.append_header(header, difficulty, hash)?; + writer.commit()?; + } + Ok(Some(_)) => {} + Err(e) => return Err(e), + } + tx.put::(block_hash, 0)?; tx.put::(0, Default::default())?; - tx.put::(0, header.difficulty.into())?; - tx.put::(0, header)?; Ok(()) } @@ -226,15 +236,16 @@ mod tests { use super::*; use reth_db::{ + cursor::DbCursorRO, models::{storage_sharded_key::StorageShardedKey, ShardedKey}, table::{Table, TableRow}, - test_utils::create_test_rw_db, DatabaseEnv, }; use reth_primitives::{ Address, Chain, ForkTimestamps, Genesis, GenesisAccount, IntegerList, GOERLI, GOERLI_GENESIS_HASH, MAINNET, MAINNET_GENESIS_HASH, SEPOLIA, SEPOLIA_GENESIS_HASH, }; + use reth_provider::test_utils::create_test_provider_factory_with_chain_spec; fn collect_table_entries( tx: &::TX, @@ -248,8 +259,8 @@ mod tests { #[test] fn success_init_genesis_mainnet() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, MAINNET.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(MAINNET.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, MAINNET_GENESIS_HASH); @@ -257,8 +268,8 @@ mod tests { #[test] fn success_init_genesis_goerli() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, GOERLI.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(GOERLI.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, GOERLI_GENESIS_HASH); @@ -266,8 +277,8 @@ mod tests { #[test] fn success_init_genesis_sepolia() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, SEPOLIA.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(SEPOLIA.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, SEPOLIA_GENESIS_HASH); @@ -275,11 +286,19 @@ mod tests { #[test] fn fail_init_inconsistent_db() { - let db = create_test_rw_db(); - init_genesis(db.clone(), SEPOLIA.clone()).unwrap(); + let factory = create_test_provider_factory_with_chain_spec(SEPOLIA.clone()); + let static_file_provider = factory.static_file_provider(); + init_genesis(factory.clone()).unwrap(); // Try to init db with a different genesis block - let genesis_hash = init_genesis(db, MAINNET.clone()); + let genesis_hash = init_genesis( + ProviderFactory::new( + factory.into_db(), + MAINNET.clone(), + static_file_provider.path().into(), + ) + .unwrap(), + ); assert_eq!( genesis_hash.unwrap_err(), @@ -321,13 +340,15 @@ mod tests { ..Default::default() }); - let db = create_test_rw_db(); - init_genesis(db.clone(), chain_spec).unwrap(); + let factory = create_test_provider_factory_with_chain_spec(chain_spec); + init_genesis(factory.clone()).unwrap(); + + let provider = factory.provider().unwrap(); - let tx = db.tx().expect("failed to init tx"); + let tx = provider.tx_ref(); assert_eq!( - collect_table_entries::, tables::AccountsHistory>(&tx) + collect_table_entries::, tables::AccountsHistory>(tx) .expect("failed to collect"), vec![ (ShardedKey::new(address_with_balance, u64::MAX), IntegerList::new([0]).unwrap()), @@ -336,7 +357,7 @@ mod tests { ); assert_eq!( - collect_table_entries::, tables::StoragesHistory>(&tx) + collect_table_entries::, tables::StoragesHistory>(tx) .expect("failed to collect"), vec![( StorageShardedKey::new(address_with_storage, storage_key, u64::MAX), diff --git a/crates/node-core/src/node_config.rs b/crates/node-core/src/node_config.rs index b7317b745dd..1b5a8da2a74 100644 --- a/crates/node-core/src/node_config.rs +++ b/crates/node-core/src/node_config.rs @@ -57,10 +57,11 @@ use reth_stages::{ stages::{ AccountHashingStage, ExecutionStage, ExecutionStageThresholds, IndexAccountHistoryStage, IndexStorageHistoryStage, MerkleStage, SenderRecoveryStage, StorageHashingStage, - TotalDifficultyStage, TransactionLookupStage, + TransactionLookupStage, }, MetricEvent, }; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use reth_transaction_pool::{ blobstore::{DiskFileBlobStore, DiskFileBlobStoreConfig}, @@ -546,6 +547,7 @@ impl NodeConfig { metrics_tx: reth_stages::MetricEventsSender, prune_config: Option, max_block: Option, + static_file_producer: StaticFileProducer, evm_config: EvmConfig, ) -> eyre::Result> where @@ -573,6 +575,7 @@ impl NodeConfig { self.debug.continuous, metrics_tx, prune_config, + static_file_producer, evm_config, ) .await?; @@ -794,6 +797,7 @@ impl NodeConfig { continuous: bool, metrics_tx: reth_stages::MetricEventsSender, prune_config: Option, + static_file_producer: StaticFileProducer, evm_config: EvmConfig, ) -> eyre::Result> where @@ -843,11 +847,7 @@ impl NodeConfig { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus) - .with_commit_threshold(stage_config.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: stage_config.sender_recovery.commit_threshold, }) @@ -879,7 +879,7 @@ impl NodeConfig { )) .set(MerkleStage::new_execution(stage_config.merkle.clean_threshold)) .set(TransactionLookupStage::new( - stage_config.transaction_lookup.commit_threshold, + stage_config.transaction_lookup.chunk_size, prune_modes.transaction_lookup, )) .set(IndexAccountHistoryStage::new( @@ -891,7 +891,7 @@ impl NodeConfig { prune_modes.storage_history, )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); Ok(pipeline) } diff --git a/crates/primitives/Cargo.toml b/crates/primitives/Cargo.toml index b3aaf5cb9b8..dd2bb80d075 100644 --- a/crates/primitives/Cargo.toml +++ b/crates/primitives/Cargo.toml @@ -41,7 +41,7 @@ tracing.workspace = true bytes.workspace = true byteorder = "1" clap = { workspace = true, features = ["derive"], optional = true } -derive_more = "0.99" +derive_more.workspace = true itertools.workspace = true modular-bitfield = "0.11.2" num_enum = "0.7" diff --git a/crates/primitives/src/chain/spec.rs b/crates/primitives/src/chain/spec.rs index 8caaf16cc33..a1b535cafdc 100644 --- a/crates/primitives/src/chain/spec.rs +++ b/crates/primitives/src/chain/spec.rs @@ -67,7 +67,6 @@ pub static MAINNET: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 3500, - snapshot_block_interval: 500_000, } .into() }); @@ -111,7 +110,6 @@ pub static GOERLI: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -159,7 +157,6 @@ pub static SEPOLIA: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -202,7 +199,6 @@ pub static HOLESKY: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -296,7 +292,6 @@ pub static BASE_SEPOLIA: Lazy> = Lazy::new(|| { .into(), ), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, ..Default::default() } .into() @@ -351,7 +346,6 @@ pub static BASE_MAINNET: Lazy> = Lazy::new(|| { .into(), ), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, ..Default::default() } .into() @@ -502,9 +496,6 @@ pub struct ChainSpec { /// data coming in. #[serde(default)] pub prune_delete_limit: usize, - - /// The block interval for creating snapshots. Each snapshot will have that much blocks in it. - pub snapshot_block_interval: u64, } impl Default for ChainSpec { @@ -519,7 +510,6 @@ impl Default for ChainSpec { deposit_contract: Default::default(), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: MAINNET.prune_delete_limit, - snapshot_block_interval: Default::default(), } } } diff --git a/crates/primitives/src/header.rs b/crates/primitives/src/header.rs index 2733a6bd765..5777a96b711 100644 --- a/crates/primitives/src/header.rs +++ b/crates/primitives/src/header.rs @@ -663,13 +663,14 @@ pub enum HeaderValidationError { /// A [`Header`] that is sealed at a precalculated hash, use [`SealedHeader::unseal()`] if you want /// to modify header. -#[add_arbitrary_tests(rlp)] -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[main_codec(no_arbitrary)] +#[add_arbitrary_tests(rlp, compact)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SealedHeader { - /// Locked Header fields. - header: Header, /// Locked Header hash. hash: BlockHash, + /// Locked Header fields. + header: Header, } impl SealedHeader { diff --git a/crates/primitives/src/lib.rs b/crates/primitives/src/lib.rs index fb35cb2b1cc..e304b773b4f 100644 --- a/crates/primitives/src/lib.rs +++ b/crates/primitives/src/lib.rs @@ -37,8 +37,8 @@ mod receipt; /// Helpers for working with revm pub mod revm; pub mod serde_helper; -pub mod snapshot; pub mod stage; +pub mod static_file; mod storage; /// Helpers for working with transactions pub mod transaction; @@ -71,11 +71,11 @@ pub use net::{ }; pub use peer::{PeerId, WithPeerId}; pub use prune::{ - PruneCheckpoint, PruneMode, PruneModes, PruneProgress, PruneSegment, PruneSegmentError, - ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, + PruneCheckpoint, PruneMode, PruneModes, PruneProgress, PrunePurpose, PruneSegment, + PruneSegmentError, ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, }; pub use receipt::{Receipt, ReceiptWithBloom, ReceiptWithBloomRef, Receipts}; -pub use snapshot::SnapshotSegment; +pub use static_file::StaticFileSegment; pub use storage::StorageEntry; #[cfg(feature = "c-kzg")] diff --git a/crates/primitives/src/prune/mod.rs b/crates/primitives/src/prune/mod.rs index 76144007255..b11aef43263 100644 --- a/crates/primitives/src/prune/mod.rs +++ b/crates/primitives/src/prune/mod.rs @@ -6,7 +6,7 @@ mod target; use crate::{Address, BlockNumber}; pub use checkpoint::PruneCheckpoint; pub use mode::PruneMode; -pub use segment::{PruneSegment, PruneSegmentError}; +pub use segment::{PrunePurpose, PruneSegment, PruneSegmentError}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; pub use target::{PruneModes, MINIMUM_PRUNING_DISTANCE}; @@ -53,7 +53,7 @@ impl ReceiptsLogPruneConfig { // Reminder, that we increment because the [`BlockNumber`] key of the new map should be // viewed as `PruneMode::Before(block)` let block = (pruned_block + 1).max( - mode.prune_target_block(tip, PruneSegment::ContractLogs)? + mode.prune_target_block(tip, PruneSegment::ContractLogs, PrunePurpose::User)? .map(|(block, _)| block) .unwrap_or_default() + 1, @@ -76,7 +76,7 @@ impl ReceiptsLogPruneConfig { for (_, mode) in self.0.iter() { if let PruneMode::Distance(_) = mode { if let Some((block, _)) = - mode.prune_target_block(tip, PruneSegment::ContractLogs)? + mode.prune_target_block(tip, PruneSegment::ContractLogs, PrunePurpose::User)? { lowest = Some(lowest.unwrap_or(u64::MAX).min(block)); } diff --git a/crates/primitives/src/prune/mode.rs b/crates/primitives/src/prune/mode.rs index 2dd04473407..c32f66d35d1 100644 --- a/crates/primitives/src/prune/mode.rs +++ b/crates/primitives/src/prune/mode.rs @@ -1,4 +1,4 @@ -use crate::{BlockNumber, PruneSegment, PruneSegmentError}; +use crate::{prune::segment::PrunePurpose, BlockNumber, PruneSegment, PruneSegmentError}; use reth_codecs::{main_codec, Compact}; /// Prune mode. @@ -15,21 +15,29 @@ pub enum PruneMode { } impl PruneMode { + /// Prune blocks up to the specified block number. The specified block number is also pruned. + /// + /// This acts as `PruneMode::Before(block_number + 1)`. + pub fn before_inclusive(block_number: BlockNumber) -> Self { + Self::Before(block_number + 1) + } + /// Returns block up to which variant pruning needs to be done, inclusive, according to the /// provided tip. pub fn prune_target_block( &self, tip: BlockNumber, segment: PruneSegment, + purpose: PrunePurpose, ) -> Result, PruneSegmentError> { let result = match self { - PruneMode::Full if segment.min_blocks() == 0 => Some((tip, *self)), + PruneMode::Full if segment.min_blocks(purpose) == 0 => Some((tip, *self)), PruneMode::Distance(distance) if *distance > tip => None, // Nothing to prune yet - PruneMode::Distance(distance) if *distance >= segment.min_blocks() => { + PruneMode::Distance(distance) if *distance >= segment.min_blocks(purpose) => { Some((tip - distance, *self)) } PruneMode::Before(n) if *n > tip => None, // Nothing to prune yet - PruneMode::Before(n) if tip - n >= segment.min_blocks() => Some((n - 1, *self)), + PruneMode::Before(n) if tip - n >= segment.min_blocks(purpose) => Some((n - 1, *self)), _ => return Err(PruneSegmentError::Configuration(segment)), }; Ok(result) @@ -64,7 +72,9 @@ impl Default for PruneMode { #[cfg(test)] mod tests { - use crate::{prune::PruneMode, PruneSegment, PruneSegmentError, MINIMUM_PRUNING_DISTANCE}; + use crate::{ + prune::PruneMode, PrunePurpose, PruneSegment, PruneSegmentError, MINIMUM_PRUNING_DISTANCE, + }; use assert_matches::assert_matches; use serde::Deserialize; @@ -79,8 +89,8 @@ mod tests { // Nothing to prune (PruneMode::Distance(tip + 1), Ok(None)), ( - PruneMode::Distance(segment.min_blocks() + 1), - Ok(Some(tip - (segment.min_blocks() + 1))), + PruneMode::Distance(segment.min_blocks(PrunePurpose::User) + 1), + Ok(Some(tip - (segment.min_blocks(PrunePurpose::User) + 1))), ), // Nothing to prune (PruneMode::Before(tip + 1), Ok(None)), @@ -97,7 +107,7 @@ mod tests { for (index, (mode, expected_result)) in tests.into_iter().enumerate() { assert_eq!( - mode.prune_target_block(tip, segment), + mode.prune_target_block(tip, segment, PrunePurpose::User), expected_result.map(|r| r.map(|b| (b, mode))), "Test {} failed", index + 1, @@ -106,7 +116,7 @@ mod tests { // Test for a scenario where there are no minimum blocks and Full can be used assert_eq!( - PruneMode::Full.prune_target_block(tip, PruneSegment::Transactions), + PruneMode::Full.prune_target_block(tip, PruneSegment::Transactions, PrunePurpose::User), Ok(Some((tip, PruneMode::Full))), ); } diff --git a/crates/primitives/src/prune/segment.rs b/crates/primitives/src/prune/segment.rs index 964a18e1fda..d88cd0befbe 100644 --- a/crates/primitives/src/prune/segment.rs +++ b/crates/primitives/src/prune/segment.rs @@ -28,18 +28,41 @@ pub enum PruneSegment { impl PruneSegment { /// Returns minimum number of blocks to left in the database for this segment. - pub fn min_blocks(&self) -> u64 { + pub fn min_blocks(&self, purpose: PrunePurpose) -> u64 { match self { Self::SenderRecovery | Self::TransactionLookup | Self::Headers | Self::Transactions => { 0 } - Self::Receipts | Self::ContractLogs | Self::AccountHistory | Self::StorageHistory => { + Self::Receipts if purpose.is_static_file() => 0, + Self::ContractLogs | Self::AccountHistory | Self::StorageHistory => { MINIMUM_PRUNING_DISTANCE } + Self::Receipts => MINIMUM_PRUNING_DISTANCE, } } } +/// Prune purpose. +#[derive(Debug, Clone, Copy)] +pub enum PrunePurpose { + /// Prune data according to user configuration. + User, + /// Prune data according to highest static_files to delete the data from database. + StaticFile, +} + +impl PrunePurpose { + /// Returns true if the purpose is [`PrunePurpose::User`]. + pub fn is_user(self) -> bool { + matches!(self, Self::User) + } + + /// Returns true if the purpose is [`PrunePurpose::StaticFile`]. + pub fn is_static_file(self) -> bool { + matches!(self, Self::StaticFile) + } +} + /// PruneSegment error type. #[derive(Debug, Error, PartialEq, Eq, Clone)] pub enum PruneSegmentError { diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs deleted file mode 100644 index bbfcec1d288..00000000000 --- a/crates/primitives/src/snapshot/mod.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Snapshot primitives. - -mod compression; -mod filters; -mod segment; - -use alloy_primitives::BlockNumber; -pub use compression::Compression; -pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; -pub use segment::{SegmentConfig, SegmentHeader, SnapshotSegment}; - -/// Default snapshot block count. -pub const BLOCKS_PER_SNAPSHOT: u64 = 500_000; - -/// Highest snapshotted block numbers, per data part. -#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)] -pub struct HighestSnapshots { - /// Highest snapshotted block of headers, inclusive. - /// If [`None`], no snapshot is available. - pub headers: Option, - /// Highest snapshotted block of receipts, inclusive. - /// If [`None`], no snapshot is available. - pub receipts: Option, - /// Highest snapshotted block of transactions, inclusive. - /// If [`None`], no snapshot is available. - pub transactions: Option, -} - -impl HighestSnapshots { - /// Returns the highest snapshot if it exists for a segment - pub fn highest(&self, segment: SnapshotSegment) -> Option { - match segment { - SnapshotSegment::Headers => self.headers, - SnapshotSegment::Transactions => self.transactions, - SnapshotSegment::Receipts => self.receipts, - } - } - - /// Returns a mutable reference to a snapshot segment - pub fn as_mut(&mut self, segment: SnapshotSegment) -> &mut Option { - match segment { - SnapshotSegment::Headers => &mut self.headers, - SnapshotSegment::Transactions => &mut self.transactions, - SnapshotSegment::Receipts => &mut self.receipts, - } - } -} diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs deleted file mode 100644 index 2f5b1442efd..00000000000 --- a/crates/primitives/src/snapshot/segment.rs +++ /dev/null @@ -1,288 +0,0 @@ -use crate::{ - snapshot::{Compression, Filters, InclusionFilter}, - BlockNumber, TxNumber, -}; -use derive_more::Display; -use serde::{Deserialize, Serialize}; -use std::{ffi::OsStr, ops::RangeInclusive, str::FromStr}; -use strum::{AsRefStr, EnumIter, EnumString}; - -#[derive( - Debug, - Copy, - Clone, - Eq, - PartialEq, - Hash, - Ord, - PartialOrd, - Deserialize, - Serialize, - EnumString, - EnumIter, - AsRefStr, - Display, -)] -#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Segment of the data that can be snapshotted. -pub enum SnapshotSegment { - #[strum(serialize = "headers")] - /// Snapshot segment responsible for the `CanonicalHeaders`, `Headers`, - /// `HeaderTerminalDifficulties` tables. - Headers, - #[strum(serialize = "transactions")] - /// Snapshot segment responsible for the `Transactions` table. - Transactions, - #[strum(serialize = "receipts")] - /// Snapshot segment responsible for the `Receipts` table. - Receipts, -} - -impl SnapshotSegment { - /// Returns the default configuration of the segment. - pub const fn config(&self) -> SegmentConfig { - let default_config = SegmentConfig { - filters: Filters::WithFilters( - InclusionFilter::Cuckoo, - super::PerfectHashingFunction::Fmph, - ), - compression: Compression::Lz4, - }; - - match self { - SnapshotSegment::Headers => default_config, - SnapshotSegment::Transactions => default_config, - SnapshotSegment::Receipts => default_config, - } - } - - /// Returns the default file name for the provided segment and range. - pub fn filename( - &self, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> String { - // ATTENTION: if changing the name format, be sure to reflect those changes in - // [`Self::parse_filename`]. - format!( - "snapshot_{}_{}_{}_{}_{}", - self.as_ref(), - block_range.start(), - block_range.end(), - tx_range.start(), - tx_range.end(), - ) - } - - /// Returns file name for the provided segment and range, alongisde filters, compression. - pub fn filename_with_configuration( - &self, - filters: Filters, - compression: Compression, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> String { - let prefix = self.filename(block_range, tx_range); - - let filters_name = match filters { - Filters::WithFilters(inclusion_filter, phf) => { - format!("{}-{}", inclusion_filter.as_ref(), phf.as_ref()) - } - Filters::WithoutFilters => "none".to_string(), - }; - - // ATTENTION: if changing the name format, be sure to reflect those changes in - // [`Self::parse_filename`.] - format!("{prefix}_{}_{}", filters_name, compression.as_ref()) - } - - /// Parses a filename into a `SnapshotSegment` and its corresponding block and transaction - /// ranges. - /// - /// The filename is expected to follow the format: - /// "snapshot_{segment}_{block_start}_{block_end}_{tx_start}_{tx_end}". This function checks - /// for the correct prefix ("snapshot"), and then parses the segment and the inclusive - /// ranges for blocks and transactions. It ensures that the start of each range is less than the - /// end. - /// - /// # Returns - /// - `Some((segment, block_range, tx_range))` if parsing is successful and all conditions are - /// met. - /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid - /// range. - /// - /// # Note - /// This function is tightly coupled with the naming convention defined in [`Self::filename`]. - /// Any changes in the filename format in `filename` should be reflected here. - pub fn parse_filename( - name: &OsStr, - ) -> Option<(Self, RangeInclusive, RangeInclusive)> { - let mut parts = name.to_str()?.split('_'); - if parts.next() != Some("snapshot") { - return None - } - - let segment = Self::from_str(parts.next()?).ok()?; - let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); - let (tx_start, tx_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); - - if block_start >= block_end || tx_start > tx_end { - return None - } - - Some((segment, block_start..=block_end, tx_start..=tx_end)) - } -} - -/// A segment header that contains information common to all segments. Used for storage. -#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)] -pub struct SegmentHeader { - /// Block range of the snapshot segment - block_range: RangeInclusive, - /// Transaction range of the snapshot segment - tx_range: RangeInclusive, - /// Segment type - segment: SnapshotSegment, -} - -impl SegmentHeader { - /// Returns [`SegmentHeader`]. - pub fn new( - block_range: RangeInclusive, - tx_range: RangeInclusive, - segment: SnapshotSegment, - ) -> Self { - Self { block_range, tx_range, segment } - } - - /// Returns the transaction range. - pub fn tx_range(&self) -> &RangeInclusive { - &self.tx_range - } - - /// Returns the block range. - pub fn block_range(&self) -> &RangeInclusive { - &self.block_range - } - - /// Returns the first block number of the segment. - pub fn block_start(&self) -> BlockNumber { - *self.block_range.start() - } - - /// Returns the last block number of the segment. - pub fn block_end(&self) -> BlockNumber { - *self.block_range.end() - } - - /// Returns the first transaction number of the segment. - pub fn tx_start(&self) -> TxNumber { - *self.tx_range.start() - } - - /// Returns the row offset which depends on whether the segment is block or transaction based. - pub fn start(&self) -> u64 { - match self.segment { - SnapshotSegment::Headers => self.block_start(), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => self.tx_start(), - } - } -} - -/// Configuration used on the segment. -#[derive(Debug, Clone, Copy)] -pub struct SegmentConfig { - /// Inclusion filters used on the segment - pub filters: Filters, - /// Compression used on the segment - pub compression: Compression, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_filename() { - let test_vectors = [ - (SnapshotSegment::Headers, 2..=30, 0..=1, "snapshot_headers_2_30_0_1", None), - ( - SnapshotSegment::Receipts, - 30..=300, - 110..=1000, - "snapshot_receipts_30_300_110_1000", - None, - ), - ( - SnapshotSegment::Transactions, - 1_123_233..=11_223_233, - 1_123_233..=2_123_233, - "snapshot_transactions_1123233_11223233_1123233_2123233", - None, - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_lz4", - Some(( - Compression::Lz4, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_zstd", - Some(( - Compression::Zstd, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_zstd-dict", - Some(( - Compression::ZstdWithDictionary, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ]; - - for (segment, block_range, tx_range, filename, configuration) in test_vectors { - if let Some((compression, filters)) = configuration { - assert_eq!( - segment.filename_with_configuration( - filters, - compression, - &block_range, - &tx_range - ), - filename - ); - } else { - assert_eq!(segment.filename(&block_range, &tx_range), filename); - } - - assert_eq!( - SnapshotSegment::parse_filename(OsStr::new(filename)), - Some((segment, block_range, tx_range)) - ); - } - - assert_eq!(SnapshotSegment::parse_filename(OsStr::new("snapshot_headers_2_30_3_2")), None); - assert_eq!(SnapshotSegment::parse_filename(OsStr::new("snapshot_headers_2_30_1")), None); - } -} diff --git a/crates/primitives/src/stage/id.rs b/crates/primitives/src/stage/id.rs index ac44a5e46d9..df92bd112c7 100644 --- a/crates/primitives/src/stage/id.rs +++ b/crates/primitives/src/stage/id.rs @@ -3,10 +3,10 @@ /// For custom stages, use [`StageId::Other`] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum StageId { + /// Static File stage in the process. + StaticFile, /// Header stage in the process. Headers, - /// Total difficulty stage in the process. - TotalDifficulty, /// Bodies stage in the process. Bodies, /// Sender recovery stage in the process. @@ -36,8 +36,8 @@ pub enum StageId { impl StageId { /// All supported Stages pub const ALL: [StageId; 13] = [ + StageId::StaticFile, StageId::Headers, - StageId::TotalDifficulty, StageId::Bodies, StageId::SenderRecovery, StageId::Execution, @@ -54,8 +54,8 @@ impl StageId { /// Return stage id formatted as string. pub fn as_str(&self) -> &str { match self { + StageId::StaticFile => "StaticFile", StageId::Headers => "Headers", - StageId::TotalDifficulty => "TotalDifficulty", StageId::Bodies => "Bodies", StageId::SenderRecovery => "SenderRecovery", StageId::Execution => "Execution", @@ -94,8 +94,8 @@ mod tests { #[test] fn stage_id_as_string() { + assert_eq!(StageId::StaticFile.to_string(), "StaticFile"); assert_eq!(StageId::Headers.to_string(), "Headers"); - assert_eq!(StageId::TotalDifficulty.to_string(), "TotalDifficulty"); assert_eq!(StageId::Bodies.to_string(), "Bodies"); assert_eq!(StageId::SenderRecovery.to_string(), "SenderRecovery"); assert_eq!(StageId::Execution.to_string(), "Execution"); diff --git a/crates/primitives/src/snapshot/compression.rs b/crates/primitives/src/static_file/compression.rs similarity index 86% rename from crates/primitives/src/snapshot/compression.rs rename to crates/primitives/src/static_file/compression.rs index 2d5599c2cda..f1a64a50140 100644 --- a/crates/primitives/src/snapshot/compression.rs +++ b/crates/primitives/src/static_file/compression.rs @@ -1,6 +1,6 @@ use strum::AsRefStr; -/// Snapshot compression types. +/// Static File compression types. #[derive(Debug, Copy, Clone, Default, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] pub enum Compression { @@ -13,7 +13,7 @@ pub enum Compression { /// Zstandard (Zstd) compression algorithm with a dictionary. #[strum(serialize = "zstd-dict")] ZstdWithDictionary, - /// No compression, uncompressed snapshot. + /// No compression. #[strum(serialize = "uncompressed")] #[default] Uncompressed, diff --git a/crates/primitives/src/snapshot/filters.rs b/crates/primitives/src/static_file/filters.rs similarity index 71% rename from crates/primitives/src/snapshot/filters.rs rename to crates/primitives/src/static_file/filters.rs index 3443d474706..cc844468e54 100644 --- a/crates/primitives/src/snapshot/filters.rs +++ b/crates/primitives/src/static_file/filters.rs @@ -1,16 +1,16 @@ use strum::AsRefStr; #[derive(Debug, Copy, Clone)] -/// Snapshot filters. +/// Static File filters. pub enum Filters { - /// Snapshot uses filters with [InclusionFilter] and [PerfectHashingFunction]. + /// Static File uses filters with [InclusionFilter] and [PerfectHashingFunction]. WithFilters(InclusionFilter, PerfectHashingFunction), - /// Snapshot doesn't use any filters. + /// Static File doesn't use any filters. WithoutFilters, } impl Filters { - /// Returns `true` if snapshot uses filters. + /// Returns `true` if static file uses filters. pub const fn has_filters(&self) -> bool { matches!(self, Self::WithFilters(_, _)) } @@ -18,7 +18,7 @@ impl Filters { #[derive(Debug, Copy, Clone, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Snapshot inclusion filter. Also see [Filters]. +/// Static File inclusion filter. Also see [Filters]. pub enum InclusionFilter { #[strum(serialize = "cuckoo")] /// Cuckoo filter @@ -27,7 +27,7 @@ pub enum InclusionFilter { #[derive(Debug, Copy, Clone, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Snapshot perfect hashing function. Also see [Filters]. +/// Static File perfect hashing function. Also see [Filters]. pub enum PerfectHashingFunction { #[strum(serialize = "fmph")] /// Fingerprint-Based Minimal Perfect Hash Function diff --git a/crates/primitives/src/static_file/mod.rs b/crates/primitives/src/static_file/mod.rs new file mode 100644 index 00000000000..fe15bd1c759 --- /dev/null +++ b/crates/primitives/src/static_file/mod.rs @@ -0,0 +1,54 @@ +//! StaticFile primitives. + +mod compression; +mod filters; +mod segment; + +use alloy_primitives::BlockNumber; +pub use compression::Compression; +pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; +pub use segment::{SegmentConfig, SegmentHeader, SegmentRangeInclusive, StaticFileSegment}; + +/// Default static file block count. +pub const BLOCKS_PER_STATIC_FILE: u64 = 500_000; + +/// Highest static file block numbers, per data part. +#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)] +pub struct HighestStaticFiles { + /// Highest static file block of headers, inclusive. + /// If [`None`], no static file is available. + pub headers: Option, + /// Highest static file block of receipts, inclusive. + /// If [`None`], no static file is available. + pub receipts: Option, + /// Highest static file block of transactions, inclusive. + /// If [`None`], no static file is available. + pub transactions: Option, +} + +impl HighestStaticFiles { + /// Returns the highest static file if it exists for a segment + pub fn highest(&self, segment: StaticFileSegment) -> Option { + match segment { + StaticFileSegment::Headers => self.headers, + StaticFileSegment::Transactions => self.transactions, + StaticFileSegment::Receipts => self.receipts, + } + } + + /// Returns a mutable reference to a static file segment + pub fn as_mut(&mut self, segment: StaticFileSegment) -> &mut Option { + match segment { + StaticFileSegment::Headers => &mut self.headers, + StaticFileSegment::Transactions => &mut self.transactions, + StaticFileSegment::Receipts => &mut self.receipts, + } + } +} + +/// Each static file has a fixed number of blocks. This gives out the range where the requested +/// block is positioned. Used for segment filename. +pub fn find_fixed_range(block: BlockNumber) -> SegmentRangeInclusive { + let start = (block / BLOCKS_PER_STATIC_FILE) * BLOCKS_PER_STATIC_FILE; + SegmentRangeInclusive::new(start, start + BLOCKS_PER_STATIC_FILE - 1) +} diff --git a/crates/primitives/src/static_file/segment.rs b/crates/primitives/src/static_file/segment.rs new file mode 100644 index 00000000000..8f9e3e08af2 --- /dev/null +++ b/crates/primitives/src/static_file/segment.rs @@ -0,0 +1,435 @@ +use crate::{ + static_file::{Compression, Filters, InclusionFilter}, + BlockNumber, TxNumber, +}; +use derive_more::Display; +use serde::{Deserialize, Serialize}; +use std::{ops::RangeInclusive, str::FromStr}; +use strum::{AsRefStr, EnumIter, EnumString}; + +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + Hash, + Ord, + PartialOrd, + Deserialize, + Serialize, + EnumString, + EnumIter, + AsRefStr, + Display, +)] +#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] +/// Segment of the data that can be moved to static files. +pub enum StaticFileSegment { + #[strum(serialize = "headers")] + /// Static File segment responsible for the `CanonicalHeaders`, `Headers`, + /// `HeaderTerminalDifficulties` tables. + Headers, + #[strum(serialize = "transactions")] + /// Static File segment responsible for the `Transactions` table. + Transactions, + #[strum(serialize = "receipts")] + /// Static File segment responsible for the `Receipts` table. + Receipts, +} + +impl StaticFileSegment { + /// Returns the segment as a string. + pub const fn as_str(&self) -> &'static str { + match self { + StaticFileSegment::Headers => "headers", + StaticFileSegment::Transactions => "transactions", + StaticFileSegment::Receipts => "receipts", + } + } + + /// Returns the default configuration of the segment. + pub const fn config(&self) -> SegmentConfig { + let default_config = SegmentConfig { + filters: Filters::WithFilters( + InclusionFilter::Cuckoo, + super::PerfectHashingFunction::Fmph, + ), + compression: Compression::Lz4, + }; + + match self { + StaticFileSegment::Headers => default_config, + StaticFileSegment::Transactions => default_config, + StaticFileSegment::Receipts => default_config, + } + } + + /// Returns the number of columns for the segment + pub const fn columns(&self) -> usize { + match self { + StaticFileSegment::Headers => 3, + StaticFileSegment::Transactions => 1, + StaticFileSegment::Receipts => 1, + } + } + + /// Returns the default file name for the provided segment and range. + pub fn filename(&self, block_range: &SegmentRangeInclusive) -> String { + // ATTENTION: if changing the name format, be sure to reflect those changes in + // [`Self::parse_filename`]. + format!("static_file_{}_{}_{}", self.as_ref(), block_range.start(), block_range.end()) + } + + /// Returns file name for the provided segment and range, alongisde filters, compression. + pub fn filename_with_configuration( + &self, + filters: Filters, + compression: Compression, + block_range: &SegmentRangeInclusive, + ) -> String { + let prefix = self.filename(block_range); + + let filters_name = match filters { + Filters::WithFilters(inclusion_filter, phf) => { + format!("{}-{}", inclusion_filter.as_ref(), phf.as_ref()) + } + Filters::WithoutFilters => "none".to_string(), + }; + + // ATTENTION: if changing the name format, be sure to reflect those changes in + // [`Self::parse_filename`.] + format!("{prefix}_{}_{}", filters_name, compression.as_ref()) + } + + /// Parses a filename into a `StaticFileSegment` and its expected block range. + /// + /// The filename is expected to follow the format: + /// "static_file_{segment}_{block_start}_{block_end}". This function checks + /// for the correct prefix ("static_file"), and then parses the segment and the inclusive + /// ranges for blocks. It ensures that the start of each range is less than or equal to the + /// end. + /// + /// # Returns + /// - `Some((segment, block_range))` if parsing is successful and all conditions are met. + /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid + /// range. + /// + /// # Note + /// This function is tightly coupled with the naming convention defined in [`Self::filename`]. + /// Any changes in the filename format in `filename` should be reflected here. + pub fn parse_filename(name: &str) -> Option<(Self, SegmentRangeInclusive)> { + let mut parts = name.split('_'); + if !(parts.next() == Some("static") && parts.next() == Some("file")) { + return None + } + + let segment = Self::from_str(parts.next()?).ok()?; + let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); + + if block_start > block_end { + return None + } + + Some((segment, SegmentRangeInclusive::new(block_start, block_end))) + } + + /// Returns `true` if the segment is `StaticFileSegment::Headers`. + pub fn is_headers(&self) -> bool { + matches!(self, StaticFileSegment::Headers) + } +} + +/// A segment header that contains information common to all segments. Used for storage. +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone)] +pub struct SegmentHeader { + /// Defines the expected block range for a static file segment. This attribute is crucial for + /// scenarios where the file contains no data, allowing for a representation beyond a + /// simple `start..=start` range. It ensures clarity in differentiating between an empty file + /// and a file with a single block numbered 0. + expected_block_range: SegmentRangeInclusive, + /// Block range of data on the static file segment + block_range: Option, + /// Transaction range of data of the static file segment + tx_range: Option, + /// Segment type + segment: StaticFileSegment, +} + +impl SegmentHeader { + /// Returns [`SegmentHeader`]. + pub fn new( + expected_block_range: SegmentRangeInclusive, + block_range: Option, + tx_range: Option, + segment: StaticFileSegment, + ) -> Self { + Self { expected_block_range, block_range, tx_range, segment } + } + + /// Returns the static file segment kind. + pub fn segment(&self) -> StaticFileSegment { + self.segment + } + + /// Returns the block range. + pub fn block_range(&self) -> Option<&SegmentRangeInclusive> { + self.block_range.as_ref() + } + + /// Returns the transaction range. + pub fn tx_range(&self) -> Option<&SegmentRangeInclusive> { + self.tx_range.as_ref() + } + + /// The expected block start of the segment. + pub fn expected_block_start(&self) -> BlockNumber { + self.expected_block_range.start() + } + + /// The expected block end of the segment. + pub fn expected_block_end(&self) -> BlockNumber { + self.expected_block_range.end() + } + + /// Returns the first block number of the segment. + pub fn block_start(&self) -> Option { + self.block_range.as_ref().map(|b| b.start()) + } + + /// Returns the last block number of the segment. + pub fn block_end(&self) -> Option { + self.block_range.as_ref().map(|b| b.end()) + } + + /// Returns the first transaction number of the segment. + pub fn tx_start(&self) -> Option { + self.tx_range.as_ref().map(|t| t.start()) + } + + /// Returns the last transaction number of the segment. + pub fn tx_end(&self) -> Option { + self.tx_range.as_ref().map(|t| t.end()) + } + + /// Number of transactions. + pub fn tx_len(&self) -> Option { + self.tx_range.as_ref().map(|r| (r.end() + 1) - r.start()) + } + + /// Number of blocks. + pub fn block_len(&self) -> Option { + self.block_range.as_ref().map(|r| (r.end() + 1) - r.start()) + } + + /// Increments block end range depending on segment + pub fn increment_block(&mut self) -> BlockNumber { + if let Some(block_range) = &mut self.block_range { + block_range.end += 1; + block_range.end + } else { + self.block_range = Some(SegmentRangeInclusive::new( + self.expected_block_start(), + self.expected_block_start(), + )); + self.expected_block_start() + } + } + + /// Increments tx end range depending on segment + pub fn increment_tx(&mut self) { + match self.segment { + StaticFileSegment::Headers => (), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + if let Some(tx_range) = &mut self.tx_range { + tx_range.end += 1; + } else { + self.tx_range = Some(SegmentRangeInclusive::new(0, 0)); + } + } + } + } + + /// Removes `num` elements from end of tx or block range. + pub fn prune(&mut self, num: u64) { + match self.segment { + StaticFileSegment::Headers => { + if let Some(range) = &mut self.block_range { + if num > range.end { + self.block_range = None; + } else { + range.end = range.end.saturating_sub(num); + } + }; + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + if let Some(range) = &mut self.tx_range { + if num > range.end { + self.tx_range = None; + } else { + range.end = range.end.saturating_sub(num); + } + }; + } + }; + } + + /// Sets a new block_range. + pub fn set_block_range(&mut self, block_start: BlockNumber, block_end: BlockNumber) { + if let Some(block_range) = &mut self.block_range { + block_range.start = block_start; + block_range.end = block_end; + } else { + self.block_range = Some(SegmentRangeInclusive::new(block_start, block_end)) + } + } + + /// Sets a new tx_range. + pub fn set_tx_range(&mut self, tx_start: TxNumber, tx_end: TxNumber) { + if let Some(tx_range) = &mut self.tx_range { + tx_range.start = tx_start; + tx_range.end = tx_end; + } else { + self.tx_range = Some(SegmentRangeInclusive::new(tx_start, tx_end)) + } + } + + /// Returns the row offset which depends on whether the segment is block or transaction based. + pub fn start(&self) -> Option { + match self.segment { + StaticFileSegment::Headers => self.block_start(), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => self.tx_start(), + } + } +} + +/// Configuration used on the segment. +#[derive(Debug, Clone, Copy)] +pub struct SegmentConfig { + /// Inclusion filters used on the segment + pub filters: Filters, + /// Compression used on the segment + pub compression: Compression, +} + +/// Helper type to handle segment transaction and block INCLUSIVE ranges. +/// +/// They can be modified on a hot loop, which makes the `std::ops::RangeInclusive` a poor fit. +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)] +pub struct SegmentRangeInclusive { + start: u64, + end: u64, +} + +impl SegmentRangeInclusive { + /// Creates a new [`SegmentRangeInclusive`] + pub fn new(start: u64, end: u64) -> Self { + Self { start, end } + } + + /// Start of the inclusive range + pub fn start(&self) -> u64 { + self.start + } + + /// End of the inclusive range + pub fn end(&self) -> u64 { + self.end + } +} + +impl std::fmt::Display for SegmentRangeInclusive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..={}", self.start, self.end) + } +} + +impl From> for SegmentRangeInclusive { + fn from(value: RangeInclusive) -> Self { + SegmentRangeInclusive { start: *value.start(), end: *value.end() } + } +} + +impl From<&SegmentRangeInclusive> for RangeInclusive { + fn from(value: &SegmentRangeInclusive) -> Self { + value.start()..=value.end() + } +} + +impl From for RangeInclusive { + fn from(value: SegmentRangeInclusive) -> Self { + (&value).into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filename() { + let test_vectors = [ + (StaticFileSegment::Headers, 2..=30, "static_file_headers_2_30", None), + (StaticFileSegment::Receipts, 30..=300, "static_file_receipts_30_300", None), + ( + StaticFileSegment::Transactions, + 1_123_233..=11_223_233, + "static_file_transactions_1123233_11223233", + None, + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_lz4", + Some(( + Compression::Lz4, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_zstd", + Some(( + Compression::Zstd, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_zstd-dict", + Some(( + Compression::ZstdWithDictionary, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ]; + + for (segment, block_range, filename, configuration) in test_vectors { + let block_range: SegmentRangeInclusive = block_range.into(); + if let Some((compression, filters)) = configuration { + assert_eq!( + segment.filename_with_configuration(filters, compression, &block_range,), + filename + ); + } else { + assert_eq!(segment.filename(&block_range), filename); + } + + assert_eq!(StaticFileSegment::parse_filename(filename), Some((segment, block_range))); + } + + assert_eq!(StaticFileSegment::parse_filename("static_file_headers_2"), None); + assert_eq!(StaticFileSegment::parse_filename("static_file_headers_"), None); + } +} diff --git a/crates/prune/Cargo.toml b/crates/prune/Cargo.toml index 750cd84d6bf..6699c567bc2 100644 --- a/crates/prune/Cargo.toml +++ b/crates/prune/Cargo.toml @@ -17,7 +17,7 @@ reth-primitives.workspace = true reth-db.workspace = true reth-provider.workspace = true reth-interfaces.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-tokio-util.workspace = true reth-config.workspace = true diff --git a/crates/prune/src/builder.rs b/crates/prune/src/builder.rs index 7ccf665d98a..5836688bf0f 100644 --- a/crates/prune/src/builder.rs +++ b/crates/prune/src/builder.rs @@ -3,7 +3,6 @@ use reth_config::PruneConfig; use reth_db::database::Database; use reth_primitives::{PruneModes, MAINNET}; use reth_provider::ProviderFactory; -use reth_snapshot::HighestSnapshotsTracker; /// Contains the information required to build a pruner #[derive(Debug, Clone, PartialEq, Eq)] @@ -53,11 +52,7 @@ impl PrunerBuilder { } /// Builds a [Pruner] from the current configuration. - pub fn build( - self, - provider_factory: ProviderFactory, - highest_snapshots_rx: HighestSnapshotsTracker, - ) -> Pruner { + pub fn build(self, provider_factory: ProviderFactory) -> Pruner { let segments = SegmentSet::::from_prune_modes(self.segments); Pruner::new( @@ -66,7 +61,6 @@ impl PrunerBuilder { self.block_interval, self.prune_delete_limit, self.max_reorg_depth, - highest_snapshots_rx, ) } } diff --git a/crates/prune/src/event.rs b/crates/prune/src/event.rs index 7599b809b60..c2c3808d137 100644 --- a/crates/prune/src/event.rs +++ b/crates/prune/src/event.rs @@ -4,6 +4,8 @@ use std::{collections::BTreeMap, time::Duration}; /// An event emitted by a [Pruner][crate::Pruner]. #[derive(Debug, PartialEq, Eq, Clone)] pub enum PrunerEvent { + /// Emitted when pruner started running. + Started { tip_block_number: BlockNumber }, /// Emitted when pruner finished running. Finished { tip_block_number: BlockNumber, diff --git a/crates/prune/src/pruner.rs b/crates/prune/src/pruner.rs index 649c3b0925c..459383204f1 100644 --- a/crates/prune/src/pruner.rs +++ b/crates/prune/src/pruner.rs @@ -6,13 +6,14 @@ use crate::{ Metrics, PrunerError, PrunerEvent, }; use reth_db::database::Database; -use reth_primitives::{BlockNumber, PruneMode, PruneProgress, PruneSegment}; -use reth_provider::{ProviderFactory, PruneCheckpointReader}; -use reth_snapshot::HighestSnapshotsTracker; +use reth_primitives::{ + BlockNumber, PruneMode, PruneProgress, PrunePurpose, PruneSegment, StaticFileSegment, +}; +use reth_provider::{DatabaseProviderRW, ProviderFactory, PruneCheckpointReader}; use reth_tokio_util::EventListeners; -use std::{collections::BTreeMap, sync::Arc, time::Instant}; +use std::{collections::BTreeMap, time::Instant}; use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::{debug, trace}; +use tracing::debug; /// Result of [Pruner::run] execution. pub type PrunerResult = Result; @@ -20,11 +21,13 @@ pub type PrunerResult = Result; /// The pruner type itself with the result of [Pruner::run] pub type PrunerWithResult = (Pruner, PrunerResult); +type PrunerStats = BTreeMap; + /// Pruning routine. Main pruning logic happens in [Pruner::run]. #[derive(Debug)] pub struct Pruner { provider_factory: ProviderFactory, - segments: Vec>>, + segments: Vec>>, /// Minimum pruning interval measured in blocks. All prune segments are checked and, if needed, /// pruned, when the chain advances by the specified number of blocks. min_block_interval: usize, @@ -37,8 +40,6 @@ pub struct Pruner { /// Maximum number of blocks to be pruned per run, as an additional restriction to /// `previous_tip_block_number`. prune_max_blocks_per_run: usize, - #[allow(dead_code)] - highest_snapshots_tracker: HighestSnapshotsTracker, metrics: Metrics, listeners: EventListeners, } @@ -47,11 +48,10 @@ impl Pruner { /// Creates a new [Pruner]. pub fn new( provider_factory: ProviderFactory, - segments: Vec>>, + segments: Vec>>, min_block_interval: usize, delete_limit: usize, prune_max_blocks_per_run: usize, - highest_snapshots_tracker: HighestSnapshotsTracker, ) -> Self { Self { provider_factory, @@ -60,13 +60,12 @@ impl Pruner { previous_tip_block_number: None, delete_limit, prune_max_blocks_per_run, - highest_snapshots_tracker, metrics: Metrics::default(), listeners: Default::default(), } } - /// Listen for events on the prune. + /// Listen for events on the pruner. pub fn events(&mut self) -> UnboundedReceiverStream { self.listeners.new_listener() } @@ -76,20 +75,14 @@ impl Pruner { if tip_block_number == 0 { self.previous_tip_block_number = Some(tip_block_number); - trace!(target: "pruner", %tip_block_number, "Nothing to prune yet"); + debug!(target: "pruner", %tip_block_number, "Nothing to prune yet"); return Ok(PruneProgress::Finished) } - trace!(target: "pruner", %tip_block_number, "Pruner started"); - let start = Instant::now(); - - let provider = self.provider_factory.provider_rw()?; + self.listeners.notify(PrunerEvent::Started { tip_block_number }); - let mut done = true; - let mut stats = BTreeMap::new(); - - // TODO(alexey): prune snapshotted segments of data (headers, transactions) - let highest_snapshots = *self.highest_snapshots_tracker.borrow(); + debug!(target: "pruner", %tip_block_number, "Pruner started"); + let start = Instant::now(); // Multiply `self.delete_limit` (number of rows to delete per block) by number of blocks // since last pruner run. `self.previous_tip_block_number` is close to @@ -106,34 +99,80 @@ impl Pruner { tip_block_number.saturating_sub(previous_tip_block_number) as usize })) .min(self.prune_max_blocks_per_run); - let mut delete_limit = self.delete_limit * blocks_since_last_run; + let delete_limit = self.delete_limit * blocks_since_last_run; + + let provider = self.provider_factory.provider_rw()?; + let (stats, delete_limit, progress) = + self.prune_segments(&provider, tip_block_number, delete_limit)?; + provider.commit()?; + + self.previous_tip_block_number = Some(tip_block_number); + + let elapsed = start.elapsed(); + self.metrics.duration_seconds.record(elapsed); + + debug!( + target: "pruner", + %tip_block_number, + ?elapsed, + %delete_limit, + ?progress, + ?stats, + "Pruner finished" + ); - for segment in &self.segments { + self.listeners.notify(PrunerEvent::Finished { tip_block_number, elapsed, stats }); + + Ok(progress) + } + + /// Prunes the segments that the [Pruner] was initialized with, and the segments that needs to + /// be pruned according to the highest static_files. + /// + /// Returns [PrunerStats], `delete_limit` that remained after pruning all segments, and + /// [PruneProgress]. + fn prune_segments( + &mut self, + provider: &DatabaseProviderRW, + tip_block_number: BlockNumber, + mut delete_limit: usize, + ) -> Result<(PrunerStats, usize, PruneProgress), PrunerError> { + let static_file_segments = self.static_file_segments(); + let segments = static_file_segments + .iter() + .map(|segment| (segment, PrunePurpose::StaticFile)) + .chain(self.segments.iter().map(|segment| (segment, PrunePurpose::User))); + + let mut done = true; + let mut stats = PrunerStats::new(); + + for (segment, purpose) in segments { if delete_limit == 0 { break } if let Some((to_block, prune_mode)) = segment .mode() - .map(|mode| mode.prune_target_block(tip_block_number, segment.segment())) + .map(|mode| mode.prune_target_block(tip_block_number, segment.segment(), purpose)) .transpose()? .flatten() { - trace!( + debug!( target: "pruner", segment = ?segment.segment(), + ?purpose, %to_block, ?prune_mode, - "Got target block to prune" + "Segment pruning started" ); let segment_start = Instant::now(); let previous_checkpoint = provider.get_prune_checkpoint(segment.segment())?; let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; + .prune(provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; if let Some(checkpoint) = output.checkpoint { segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; + .save_checkpoint(provider, checkpoint.as_prune_checkpoint(prune_mode))?; } self.metrics .get_prune_segment_metrics(segment.segment()) @@ -147,112 +186,70 @@ impl Pruner { done = done && output.done; delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - segment.segment(), - (PruneProgress::from_done(output.done), output.pruned), - ); - } else { - trace!(target: "pruner", segment = ?segment.segment(), "No target block to prune"); - } - } - if let Some(snapshots) = highest_snapshots { - if let (Some(to_block), true) = (snapshots.headers, delete_limit > 0) { - let prune_mode = PruneMode::Before(to_block + 1); - trace!( + debug!( target: "pruner", - prune_segment = ?PruneSegment::Headers, + segment = ?segment.segment(), + ?purpose, %to_block, ?prune_mode, - "Got target block to prune" + %output.pruned, + "Segment pruning finished" ); - let segment_start = Instant::now(); - let segment = segments::Headers::new(prune_mode); - let previous_checkpoint = provider.get_prune_checkpoint(PruneSegment::Headers)?; - let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; - if let Some(checkpoint) = output.checkpoint { - segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; + if output.pruned > 0 { + stats.insert( + segment.segment(), + (PruneProgress::from_done(output.done), output.pruned), + ); } - self.metrics - .get_prune_segment_metrics(PruneSegment::Headers) - .duration_seconds - .record(segment_start.elapsed()); - - done = done && output.done; - delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - PruneSegment::Headers, - (PruneProgress::from_done(output.done), output.pruned), - ); + } else { + debug!(target: "pruner", segment = ?segment.segment(), ?purpose, "Nothing to prune for the segment"); } + } - if let (Some(to_block), true) = (snapshots.transactions, delete_limit > 0) { - let prune_mode = PruneMode::Before(to_block + 1); - trace!( - target: "pruner", - prune_segment = ?PruneSegment::Transactions, - %to_block, - ?prune_mode, - "Got target block to prune" - ); + Ok((stats, delete_limit, PruneProgress::from_done(done))) + } - let segment_start = Instant::now(); - let segment = segments::Transactions::new(prune_mode); - let previous_checkpoint = provider.get_prune_checkpoint(PruneSegment::Headers)?; - let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; - if let Some(checkpoint) = output.checkpoint { - segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; - } - self.metrics - .get_prune_segment_metrics(PruneSegment::Transactions) - .duration_seconds - .record(segment_start.elapsed()); + /// Returns pre-configured segments that needs to be pruned according to the highest + /// static_files for [PruneSegment::Transactions], [PruneSegment::Headers] and + /// [PruneSegment::Receipts]. + fn static_file_segments(&self) -> Vec>> { + let mut segments = Vec::>>::new(); - done = done && output.done; - delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - PruneSegment::Transactions, - (PruneProgress::from_done(output.done), output.pruned), - ); - } - } - - provider.commit()?; - self.previous_tip_block_number = Some(tip_block_number); + let static_file_provider = self.provider_factory.static_file_provider(); - let elapsed = start.elapsed(); - self.metrics.duration_seconds.record(elapsed); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Transactions) + { + segments + .push(Box::new(segments::Transactions::new(PruneMode::before_inclusive(to_block)))) + } - trace!( - target: "pruner", - %tip_block_number, - ?elapsed, - %delete_limit, - %done, - ?stats, - "Pruner finished" - ); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Headers) + { + segments.push(Box::new(segments::Headers::new(PruneMode::before_inclusive(to_block)))) + } - self.listeners.notify(PrunerEvent::Finished { tip_block_number, elapsed, stats }); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Receipts) + { + segments.push(Box::new(segments::Receipts::new(PruneMode::before_inclusive(to_block)))) + } - Ok(PruneProgress::from_done(done)) + segments } /// Returns `true` if the pruning is needed at the provided tip block number. /// This determined by the check against minimum pruning interval and last pruned block number. pub fn is_pruning_needed(&self, tip_block_number: BlockNumber) -> bool { - if self.previous_tip_block_number.map_or(true, |previous_tip_block_number| { - // Saturating subtraction is needed for the case when the chain was reverted, meaning - // current block number might be less than the previous tip block number. - // If that's the case, no pruning is needed as outdated data is also reverted. - tip_block_number.saturating_sub(previous_tip_block_number) >= - self.min_block_interval as u64 - }) { + // Saturating subtraction is needed for the case when the chain was reverted, meaning + // current block number might be less than the previous tip block number. + // If that's the case, no pruning is needed as outdated data is also reverted. + if tip_block_number.saturating_sub(self.previous_tip_block_number.unwrap_or_default()) >= + self.min_block_interval as u64 + { debug!( target: "pruner", previous_tip_block_number = ?self.previous_tip_block_number, @@ -269,20 +266,21 @@ impl Pruner { #[cfg(test)] mod tests { use crate::Pruner; - use reth_db::test_utils::create_test_rw_db; + use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_primitives::MAINNET; use reth_provider::ProviderFactory; - use tokio::sync::watch; #[test] fn is_pruning_needed() { let db = create_test_rw_db(); - let provider_factory = ProviderFactory::new(db, MAINNET.clone()); - let mut pruner = Pruner::new(provider_factory, vec![], 5, 0, 5, watch::channel(None).1); + let provider_factory = + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()) + .expect("create provide factory with static_files"); + let mut pruner = Pruner::new(provider_factory, vec![], 5, 0, 5); // No last pruned block number was set before let first_block_number = 1; - assert!(pruner.is_pruning_needed(first_block_number)); + assert!(!pruner.is_pruning_needed(first_block_number)); pruner.previous_tip_block_number = Some(first_block_number); // Tip block number delta is >= than min block interval diff --git a/crates/prune/src/segments/account_history.rs b/crates/prune/src/segments/account_history.rs index cf6b6e6046d..a18897640ba 100644 --- a/crates/prune/src/segments/account_history.rs +++ b/crates/prune/src/segments/account_history.rs @@ -90,7 +90,7 @@ mod tests { }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::{collections::BTreeMap, ops::AddAssign}; #[test] @@ -99,7 +99,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=5000, B256::ZERO, 0..1); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let accounts = random_eoa_accounts(&mut rng, 2).into_iter().collect::>(); diff --git a/crates/prune/src/segments/headers.rs b/crates/prune/src/segments/headers.rs index de6bcf9b187..2da19137589 100644 --- a/crates/prune/src/segments/headers.rs +++ b/crates/prune/src/segments/headers.rs @@ -52,17 +52,13 @@ impl Segment for Headers { } let results = [ - self.prune_table::( - provider, - block_range.clone(), - delete_limit, - )?, self.prune_table::(provider, block_range.clone(), delete_limit)?, self.prune_table::( provider, - block_range, + block_range.clone(), delete_limit, )?, + self.prune_table::(provider, block_range, delete_limit)?, ]; if !results.iter().map(|(_, _, last_pruned_block)| last_pruned_block).all_equal() { @@ -116,9 +112,9 @@ impl Headers { mod tests { use crate::segments::{Headers, PruneInput, PruneOutput, Segment}; use assert_matches::assert_matches; - use reth_db::tables; + use reth_db::{tables, transaction::DbTx}; use reth_interfaces::test_utils::{generators, generators::random_header_range}; - use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; + use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256, U256}; use reth_provider::PruneCheckpointReader; use reth_stages::test_utils::TestStageDB; @@ -128,7 +124,11 @@ mod tests { let mut rng = generators::rng(); let headers = random_header_range(&mut rng, 0..100, B256::ZERO); - db.insert_headers_with_td(headers.iter()).expect("insert headers"); + let tx = db.factory.provider_rw().unwrap().into_tx(); + for header in headers.iter() { + TestStageDB::insert_header(None, &tx, header, U256::ZERO).unwrap(); + } + tx.commit().unwrap(); assert_eq!(db.table::().unwrap().len(), headers.len()); assert_eq!(db.table::().unwrap().len(), headers.len()); diff --git a/crates/prune/src/segments/mod.rs b/crates/prune/src/segments/mod.rs index 339c4e01374..5e644e227e1 100644 --- a/crates/prune/src/segments/mod.rs +++ b/crates/prune/src/segments/mod.rs @@ -95,10 +95,18 @@ impl PruneInput { .unwrap_or(0); let to_tx_number = match provider.block_body_indices(self.to_block)? { - Some(body) => body, + Some(body) => { + let last_tx = body.last_tx_num(); + if last_tx + body.tx_count() == 0 { + // Prevents a scenario where the pruner correctly starts at a finalized block, + // but the first transaction (tx_num = 0) only appears on an unfinalized one. + // Should only happen on a test/hive scenario. + return Ok(None) + } + last_tx + } None => return Ok(None), - } - .last_tx_num(); + }; let range = from_tx_number..=to_tx_number; if range.is_empty() { diff --git a/crates/prune/src/segments/receipts.rs b/crates/prune/src/segments/receipts.rs index fdd4d0402e4..d1ce5324e6a 100644 --- a/crates/prune/src/segments/receipts.rs +++ b/crates/prune/src/segments/receipts.rs @@ -99,7 +99,7 @@ mod tests { }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -108,7 +108,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut receipts = Vec::new(); for block in &blocks { diff --git a/crates/prune/src/segments/receipts_by_logs.rs b/crates/prune/src/segments/receipts_by_logs.rs index 984072b3076..efcbfe76110 100644 --- a/crates/prune/src/segments/receipts_by_logs.rs +++ b/crates/prune/src/segments/receipts_by_logs.rs @@ -4,7 +4,8 @@ use crate::{ }; use reth_db::{database::Database, tables}; use reth_primitives::{ - PruneCheckpoint, PruneMode, PruneSegment, ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, ReceiptsLogPruneConfig, + MINIMUM_PRUNING_DISTANCE, }; use reth_provider::{BlockReader, DatabaseProviderRW, PruneCheckpointWriter, TransactionsProvider}; use tracing::{instrument, trace}; @@ -39,7 +40,7 @@ impl Segment for ReceiptsByLogs { // for the other receipts it's as if they had a `PruneMode::Distance()` of // `MINIMUM_PRUNING_DISTANCE`. let to_block = PruneMode::Distance(MINIMUM_PRUNING_DISTANCE) - .prune_target_block(input.to_block, PruneSegment::ContractLogs)? + .prune_target_block(input.to_block, PruneSegment::ContractLogs, PrunePurpose::User)? .map(|(bn, _)| bn) .unwrap_or_default(); @@ -217,7 +218,7 @@ mod tests { }; use reth_primitives::{PruneMode, PruneSegment, ReceiptsLogPruneConfig, B256}; use reth_provider::{PruneCheckpointReader, TransactionsProvider}; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::collections::BTreeMap; #[test] @@ -232,7 +233,7 @@ mod tests { random_block_range(&mut rng, (tip - 100 + 1)..=tip, B256::ZERO, 1..5), ] .concat(); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut receipts = Vec::new(); diff --git a/crates/prune/src/segments/sender_recovery.rs b/crates/prune/src/segments/sender_recovery.rs index 421ff77173e..0684fbd37bd 100644 --- a/crates/prune/src/segments/sender_recovery.rs +++ b/crates/prune/src/segments/sender_recovery.rs @@ -81,7 +81,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -90,7 +90,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut transaction_senders = Vec::new(); for block in &blocks { diff --git a/crates/prune/src/segments/set.rs b/crates/prune/src/segments/set.rs index 5fdfdf58077..7978bd4e583 100644 --- a/crates/prune/src/segments/set.rs +++ b/crates/prune/src/segments/set.rs @@ -4,12 +4,11 @@ use crate::segments::{ }; use reth_db::database::Database; use reth_primitives::PruneModes; -use std::sync::Arc; /// Collection of [Segment]. Thread-safe, allocated on the heap. #[derive(Debug)] pub struct SegmentSet { - inner: Vec>>, + inner: Vec>>, } impl SegmentSet { @@ -20,7 +19,7 @@ impl SegmentSet { /// Adds new [Segment] to collection. pub fn segment + 'static>(mut self, segment: S) -> Self { - self.inner.push(Arc::new(segment)); + self.inner.push(Box::new(segment)); self } @@ -33,7 +32,7 @@ impl SegmentSet { } /// Consumes [SegmentSet] and returns a [Vec]. - pub fn into_vec(self) -> Vec>> { + pub fn into_vec(self) -> Vec>> { self.inner } diff --git a/crates/prune/src/segments/storage_history.rs b/crates/prune/src/segments/storage_history.rs index 7c0da3b8413..eba8d172424 100644 --- a/crates/prune/src/segments/storage_history.rs +++ b/crates/prune/src/segments/storage_history.rs @@ -94,7 +94,7 @@ mod tests { }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::{collections::BTreeMap, ops::AddAssign}; #[test] @@ -103,7 +103,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=5000, B256::ZERO, 0..1); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let accounts = random_eoa_accounts(&mut rng, 2).into_iter().collect::>(); diff --git a/crates/prune/src/segments/transaction_lookup.rs b/crates/prune/src/segments/transaction_lookup.rs index 6b1dfc8d2c3..37943144834 100644 --- a/crates/prune/src/segments/transaction_lookup.rs +++ b/crates/prune/src/segments/transaction_lookup.rs @@ -104,7 +104,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -113,7 +113,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut tx_hash_numbers = Vec::new(); for block in &blocks { diff --git a/crates/prune/src/segments/transactions.rs b/crates/prune/src/segments/transactions.rs index 7155cd8888a..3c2ac425536 100644 --- a/crates/prune/src/segments/transactions.rs +++ b/crates/prune/src/segments/transactions.rs @@ -80,7 +80,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -89,7 +89,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=100, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let transactions = blocks.iter().flat_map(|block| &block.body).collect::>(); diff --git a/crates/rpc/rpc/Cargo.toml b/crates/rpc/rpc/Cargo.toml index 517c7ca1574..566fa90eec3 100644 --- a/crates/rpc/rpc/Cargo.toml +++ b/crates/rpc/rpc/Cargo.toml @@ -81,7 +81,7 @@ tracing.workspace = true tracing-futures = "0.2" schnellru.workspace = true futures.workspace = true -derive_more = "0.99" +derive_more.workspace = true lazy_static = "*" [dev-dependencies] diff --git a/crates/snapshot/README.md b/crates/snapshot/README.md deleted file mode 100644 index 6056bbf9f0a..00000000000 --- a/crates/snapshot/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# Snapshot - -## Overview - -Data that has reached a finalized state and won't undergo further changes (essentially frozen) should be read without concerns of modification. This makes it unsuitable for traditional databases. - -This crate aims to copy this data from the current database to multiple static files, aggregated by block ranges. At every 500_000th block new static files are created. - -Below are two diagrams illustrating the processes of creating static files (custom format: `NippyJar`) and querying them. A glossary is also provided to explain the different (linked) components involved in these processes. - -
- Creation diagram (Snapshotter) - -```mermaid -graph TD; - I("BLOCK_HEIGHT % 500_000 == 0")--triggers-->SP(Snapshotter) - SP --> |triggers| SH["create_snapshot(block_range, SnapshotSegment::Headers)"] - SP --> |triggers| ST["create_snapshot(block_range, SnapshotSegment::Transactions)"] - SP --> |triggers| SR["create_snapshot(block_range, SnapshotSegment::Receipts)"] - SP --> |triggers| ETC["create_snapshot(block_range, ...)"] - SH --> CS["create_snapshot::< T >(DatabaseCursor)"] - ST --> CS - SR --> CS - ETC --> CS - CS --> |create| IF(NippyJar::InclusionFilters) - CS -- iterates --> DC(DatabaseCursor) -->HN{HasNext} - HN --> |true| NJC(NippyJar::Compression) - NJC --> HN - NJC --store--> NJ - HN --> |false| NJ - IF --store--> NJ(NippyJar) - NJ --freeze--> F(File) - F--"on success"--> SP1(Snapshotter) - SP1 --"sends BLOCK_HEIGHT"--> HST(HighestSnapshotTracker) - HST --"read by"-->Pruner - HST --"read by"-->DatabaseProvider - HST --"read by"-->SnapsotProvider - HST --"read by"-->ProviderFactory - -``` -
- - -
- Query diagram (Provider) - -```mermaid -graph TD; - RPC-->P - P("Provider::header(block_number)")-->PF(ProviderFactory) - PF--shares-->SP1("Arc(SnapshotProvider)") - SP1--shares-->PD(DatabaseProvider) - PF--creates-->PD - PD--check `HighestSnapshotTracker`-->PD - PD-->DC1{block_number
>
highest snapshot block} - DC1 --> |true| PD1("DatabaseProvider::header(block_number)") - DC1 --> |false| ASP("SnapshotProvider::header(block_number)") - PD1 --> MDBX - ASP --find correct jar and creates--> JP("SnapshotJarProvider::header(block_number)") - JP --"creates"-->SC(SnapshotCursor) - SC --".get_one< HeaderMask< Header > >(number)"--->NJC("NippyJarCursor") - NJC--".row_by_number(row_index, mask)"-->NJ[NippyJar] - NJ--"&[u8]"-->NJC - NJC--"&[u8]"-->SC - SC--"Header"--> JP - JP--"Header"--> ASP -``` -
- - -### Glossary -In descending order of abstraction hierarchy: - -[`Snapshotter`](../../crates/snapshot/src/snapshotter.rs#L20): A `reth` background service that **copies** data from the database to new snapshot files when the block height reaches a certain threshold (e.g., `500_000th`). Upon completion, it dispatches a notification about the higher snapshotted block to `HighestSnapshotTracker` channel. **It DOES NOT remove data from the database.** - -[`HighestSnapshotTracker`](../../crates/snapshot/src/snapshotter.rs#L22): A channel utilized by `Snapshotter` to announce the newest snapshot block to all components with a listener: `Pruner` (to know which additional tables can be pruned) and `DatabaseProvider` (to know which data can be queried from the snapshots). - -[`SnapshotProvider`](../../crates/storage/provider/src/providers/snapshot/manager.rs#L15) A provider similar to `DatabaseProvider`, **managing all existing snapshot files** and selecting the optimal one (by range and segment type) to fulfill a request. **A single instance is shared across all components and should be instantiated only once within `ProviderFactory`**. An immutable reference is given everytime `ProviderFactory` creates a new `DatabaseProvider`. - -[`SnapshotJarProvider`](../../crates/storage/provider/src/providers/snapshot/jar.rs#L42) A provider similar to `DatabaseProvider` that provides access to a **single snapshot file**. - -[`SnapshotCursor`](../../crates/storage/db/src/snapshot/cursor.rs#L12) An elevated abstraction of `NippyJarCursor` for simplified access. It associates the bitmasks with type decoding. For instance, `cursor.get_two::>(tx_number)` would yield `Tx` and `Signature`, eliminating the need to manage masks or invoke a decoder/decompressor. - -[`SnapshotSegment`](../../crates/primitives/src/snapshot/segment.rs#L10) Each snapshot file only contains data of a specific segment, e.g., `Headers`, `Transactions`, or `Receipts`. - -[`NippyJarCursor`](../../crates/storage/nippy-jar/src/cursor.rs#L12) Accessor of data in a `NippyJar` file. It enables queries either by row number (e.g., block number 1) or by a predefined key not part of the file (e.g., transaction hashes). If a file has multiple columns (e.g., `Tx | TxSender | Signature`), and one wishes to access only one of the column values, this can be accomplished by bitmasks. (e.g., for `TxSender`, the mask would be `0b010`). - -[`NippyJar`](../../crates/storage/nippy-jar/src/lib.rs#57) A create-only file format. No data can be appended after creation. It supports multiple columns, compression (e.g., Zstd (with and without dictionaries), lz4, uncompressed) and inclusion filters (e.g., cuckoo filter: `is hash X part of this dataset`). Snapshots are organized by block ranges. (e.g., `TransactionSnapshot_499_999.jar` contains a transaction per row for all transactions from block `0` to block `499_999`). For more check the struct documentation. diff --git a/crates/snapshot/src/error.rs b/crates/snapshot/src/error.rs deleted file mode 100644 index 302803835cf..00000000000 --- a/crates/snapshot/src/error.rs +++ /dev/null @@ -1,25 +0,0 @@ -use reth_db::DatabaseError; -use reth_interfaces::RethError; -use reth_provider::ProviderError; -use thiserror::Error; - -/// Error returned by [crate::Snapshotter::run] -#[derive(Error, Debug)] -/// Errors that can occur during snapshotting. -pub enum SnapshotterError { - /// Inconsistent data error. - #[error("inconsistent data: {0}")] - InconsistentData(&'static str), - - /// Error related to the interface. - #[error(transparent)] - Interface(#[from] RethError), - - /// Error related to the database. - #[error(transparent)] - Database(#[from] DatabaseError), - - /// Error related to the provider. - #[error(transparent)] - Provider(#[from] ProviderError), -} diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs deleted file mode 100644 index feb2b1f2936..00000000000 --- a/crates/snapshot/src/segments/headers.rs +++ /dev/null @@ -1,98 +0,0 @@ -use crate::segments::{prepare_jar, Segment, SegmentHeader}; -use reth_db::{ - cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, tables, - transaction::DbTx, RawKey, RawTable, -}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig}, - BlockNumber, SnapshotSegment, -}; -use reth_provider::DatabaseProviderRO; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Headers] part of data. -#[derive(Debug)] -pub struct Headers { - config: SegmentConfig, -} - -impl Headers { - /// Creates new instance of [Headers] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Headers { - fn default() -> Self { - Self { config: SnapshotSegment::Headers.config() } - } -} - -impl Segment for Headers { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Headers - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - range: RangeInclusive, - ) -> ProviderResult<()> { - let range_len = range.clone().count(); - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - range.clone(), - range_len, - || { - Ok([ - self.dataset_for_compression::( - provider, &range, range_len, - )?, - self.dataset_for_compression::( - provider, &range, range_len, - )?, - self.dataset_for_compression::( - provider, &range, range_len, - )?, - ]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut cursor = provider.tx_ref().cursor_read::>()?; - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - cursor - .walk(Some(RawKey::from(*range.start())))? - .take(range_len) - .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())), - ); - } - - create_snapshot_T1_T2_T3::< - tables::Headers, - tables::HeaderTerminalDifficulties, - tables::CanonicalHeaders, - BlockNumber, - SegmentHeader, - >( - provider.tx_ref(), - range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs deleted file mode 100644 index 68b1b81b099..00000000000 --- a/crates/snapshot/src/segments/mod.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Snapshot segment implementations and utilities. - -mod transactions; -pub use transactions::Transactions; - -mod headers; -pub use headers::Headers; - -mod receipts; -pub use receipts::Receipts; - -use reth_db::{ - cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, -}; -use reth_interfaces::provider::ProviderResult; -use reth_nippy_jar::NippyJar; -use reth_primitives::{ - snapshot::{ - Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentConfig, SegmentHeader, - }, - BlockNumber, SnapshotSegment, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -pub(crate) type Rows = [Vec>; COLUMNS]; - -/// A segment represents a snapshotting of some portion of the data. -pub trait Segment: Default { - /// Snapshot data using the provided range. The `directory` parameter determines the snapshot - /// file's save location. - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - range: RangeInclusive, - ) -> ProviderResult<()>; - - /// Returns this struct's [`SnapshotSegment`]. - fn segment(&self) -> SnapshotSegment; - - /// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). - fn dataset_for_compression>( - &self, - provider: &DatabaseProviderRO, - range: &RangeInclusive, - range_len: usize, - ) -> ProviderResult>> { - let mut cursor = provider.tx_ref().cursor_read::>()?; - Ok(cursor - .walk_back(Some(RawKey::from(*range.end())))? - .take(range_len.min(1000)) - .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) - .collect::>()) - } -} - -/// Returns a [`NippyJar`] according to the desired configuration. The `directory` parameter -/// determines the snapshot file's save location. -pub(crate) fn prepare_jar( - provider: &DatabaseProviderRO, - directory: impl AsRef, - segment: SnapshotSegment, - segment_config: SegmentConfig, - block_range: RangeInclusive, - total_rows: usize, - prepare_compression: impl Fn() -> ProviderResult>, -) -> ProviderResult> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let mut nippy_jar = NippyJar::new( - COLUMNS, - &directory.as_ref().join(segment.filename(&block_range, &tx_range).as_str()), - SegmentHeader::new(block_range, tx_range, segment), - ); - - nippy_jar = match segment_config.compression { - Compression::Lz4 => nippy_jar.with_lz4(), - Compression::Zstd => nippy_jar.with_zstd(false, 0), - Compression::ZstdWithDictionary => { - let dataset = prepare_compression()?; - - nippy_jar = nippy_jar.with_zstd(true, 5_000_000); - nippy_jar.prepare_compression(dataset.to_vec())?; - nippy_jar - } - Compression::Uncompressed => nippy_jar, - }; - - if let Filters::WithFilters(inclusion_filter, phf) = segment_config.filters { - nippy_jar = match inclusion_filter { - InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), - }; - nippy_jar = match phf { - PerfectHashingFunction::Fmph => nippy_jar.with_fmph(), - PerfectHashingFunction::GoFmph => nippy_jar.with_gofmph(), - }; - } - - Ok(nippy_jar) -} diff --git a/crates/snapshot/src/segments/receipts.rs b/crates/snapshot/src/segments/receipts.rs deleted file mode 100644 index 5c5a4811297..00000000000 --- a/crates/snapshot/src/segments/receipts.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::segments::{prepare_jar, Segment}; -use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig, SegmentHeader}, - BlockNumber, SnapshotSegment, TxNumber, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Receipts] part of data. -#[derive(Debug)] -pub struct Receipts { - config: SegmentConfig, -} - -impl Receipts { - /// Creates new instance of [Receipts] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Receipts { - fn default() -> Self { - Self { config: SnapshotSegment::Receipts.config() } - } -} - -impl Segment for Receipts { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Receipts - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - block_range: RangeInclusive, - ) -> ProviderResult<()> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let tx_range_len = tx_range.clone().count(); - - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - block_range, - tx_range_len, - || { - Ok([self.dataset_for_compression::( - provider, - &tx_range, - tx_range_len, - )?]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - provider - .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? - .into_iter() - .map(|(tx, _)| Ok(tx)), - ); - } - - create_snapshot_T1::( - provider.tx_ref(), - tx_range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - tx_range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs deleted file mode 100644 index ea936bd95bf..00000000000 --- a/crates/snapshot/src/segments/transactions.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::segments::{prepare_jar, Segment}; -use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig, SegmentHeader}, - BlockNumber, SnapshotSegment, TxNumber, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Transactions] part of data. -#[derive(Debug)] -pub struct Transactions { - config: SegmentConfig, -} - -impl Transactions { - /// Creates new instance of [Transactions] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Transactions { - fn default() -> Self { - Self { config: SnapshotSegment::Transactions.config() } - } -} - -impl Segment for Transactions { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Transactions - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - block_range: RangeInclusive, - ) -> ProviderResult<()> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let tx_range_len = tx_range.clone().count(); - - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - block_range, - tx_range_len, - || { - Ok([self.dataset_for_compression::( - provider, - &tx_range, - tx_range_len, - )?]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - provider - .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? - .into_iter() - .map(|(tx, _)| Ok(tx)), - ); - } - - create_snapshot_T1::( - provider.tx_ref(), - tx_range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - tx_range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/snapshotter.rs b/crates/snapshot/src/snapshotter.rs deleted file mode 100644 index 993c4c14099..00000000000 --- a/crates/snapshot/src/snapshotter.rs +++ /dev/null @@ -1,397 +0,0 @@ -//! Support for snapshotting. - -use crate::{segments, segments::Segment, SnapshotterError}; -use reth_db::{database::Database, snapshot::iter_snapshots}; -use reth_interfaces::{RethError, RethResult}; -use reth_primitives::{snapshot::HighestSnapshots, BlockNumber, TxNumber}; -use reth_provider::{BlockReader, DatabaseProviderRO, ProviderFactory, TransactionsProviderExt}; -use std::{ - collections::HashMap, - ops::RangeInclusive, - path::{Path, PathBuf}, -}; -use tokio::sync::watch; -use tracing::warn; - -/// Result of [Snapshotter::run] execution. -pub type SnapshotterResult = Result; - -/// The snapshotter type itself with the result of [Snapshotter::run] -pub type SnapshotterWithResult = (Snapshotter, SnapshotterResult); - -/// Snapshots are initially created in `{...}/datadir/snapshots/temp` and moved once finished. This -/// directory is cleaned up on every booting up of the node. -const TEMPORARY_SUBDIRECTORY: &str = "temp"; - -/// Snapshotting routine. Main snapshotting logic happens in [Snapshotter::run]. -#[derive(Debug)] -pub struct Snapshotter { - /// Provider factory - provider_factory: ProviderFactory, - /// Directory where snapshots are located - snapshots_path: PathBuf, - /// Highest snapshotted block numbers for each segment - highest_snapshots: HighestSnapshots, - /// Channel sender to notify other components of the new highest snapshots - highest_snapshots_notifier: watch::Sender>, - /// Channel receiver to be cloned and shared that already comes with the newest value - highest_snapshots_tracker: HighestSnapshotsTracker, - /// Block interval after which the snapshot is taken. - block_interval: u64, -} - -/// Tracker for the latest [`HighestSnapshots`] value. -pub type HighestSnapshotsTracker = watch::Receiver>; - -/// Snapshot targets, per data part, measured in [`BlockNumber`] and [`TxNumber`], if applicable. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct SnapshotTargets { - headers: Option>, - receipts: Option<(RangeInclusive, RangeInclusive)>, - transactions: Option<(RangeInclusive, RangeInclusive)>, -} - -impl SnapshotTargets { - /// Returns `true` if any of the targets are [Some]. - pub fn any(&self) -> bool { - self.headers.is_some() || self.receipts.is_some() || self.transactions.is_some() - } - - /// Returns `true` if all targets are either [None] or multiple of `block_interval`. - fn is_multiple_of_block_interval(&self, block_interval: u64) -> bool { - [ - self.headers.as_ref(), - self.receipts.as_ref().map(|(blocks, _)| blocks), - self.transactions.as_ref().map(|(blocks, _)| blocks), - ] - .iter() - .all(|blocks| blocks.map_or(true, |blocks| (blocks.end() + 1) % block_interval == 0)) - } - - // Returns `true` if all targets are either [`None`] or has beginning of the range equal to the - // highest snapshot. - fn is_contiguous_to_highest_snapshots(&self, snapshots: HighestSnapshots) -> bool { - [ - (self.headers.as_ref(), snapshots.headers), - (self.receipts.as_ref().map(|(blocks, _)| blocks), snapshots.receipts), - (self.transactions.as_ref().map(|(blocks, _)| blocks), snapshots.transactions), - ] - .iter() - .all(|(target, highest)| { - target.map_or(true, |block_number| { - highest.map_or(*block_number.start() == 0, |previous_block_number| { - *block_number.start() == previous_block_number + 1 - }) - }) - }) - } -} - -impl Snapshotter { - /// Creates a new [Snapshotter]. - pub fn new( - provider_factory: ProviderFactory, - snapshots_path: impl AsRef, - block_interval: u64, - ) -> RethResult { - let (highest_snapshots_notifier, highest_snapshots_tracker) = watch::channel(None); - - let mut snapshotter = Self { - provider_factory, - snapshots_path: snapshots_path.as_ref().into(), - highest_snapshots: HighestSnapshots::default(), - highest_snapshots_notifier, - highest_snapshots_tracker, - block_interval, - }; - - snapshotter.create_directory()?; - snapshotter.update_highest_snapshots_tracker()?; - - Ok(snapshotter) - } - - /// Ensures the snapshots directory and its temporary subdirectory are properly set up. - /// - /// This function performs the following actions: - /// 1. If `datadir/snapshots` does not exist, it creates it. - /// 2. Ensures `datadir/snapshots/temp` exists and is empty. - /// - /// The `temp` subdirectory is where snapshots are initially created before being - /// moved to their final location within `datadir/snapshots`. - fn create_directory(&self) -> RethResult<()> { - let temporary_path = self.snapshots_path.join(TEMPORARY_SUBDIRECTORY); - - if !self.snapshots_path.exists() { - reth_primitives::fs::create_dir_all(&self.snapshots_path)?; - } else if temporary_path.exists() { - reth_primitives::fs::remove_dir_all(&temporary_path)?; - } - - reth_primitives::fs::create_dir_all(temporary_path)?; - - Ok(()) - } - - #[cfg(test)] - fn set_highest_snapshots_from_targets(&mut self, targets: &SnapshotTargets) { - if let Some(block_number) = &targets.headers { - self.highest_snapshots.headers = Some(*block_number.end()); - } - if let Some((block_number, _)) = &targets.receipts { - self.highest_snapshots.receipts = Some(*block_number.end()); - } - if let Some((block_number, _)) = &targets.transactions { - self.highest_snapshots.transactions = Some(*block_number.end()); - } - } - - /// Looks into the snapshot directory to find the highest snapshotted block of each segment, and - /// notifies every tracker. - fn update_highest_snapshots_tracker(&mut self) -> RethResult<()> { - // It walks over the directory and parses the snapshot filenames extracting - // `SnapshotSegment` and their inclusive range. It then takes the maximum block - // number for each specific segment. - for (segment, ranges) in - iter_snapshots(&self.snapshots_path).map_err(|err| RethError::Provider(err.into()))? - { - for (block_range, _) in ranges { - let max_segment_block = self.highest_snapshots.as_mut(segment); - if max_segment_block.map_or(true, |block| block < *block_range.end()) { - *max_segment_block = Some(*block_range.end()); - } - } - } - - let _ = self.highest_snapshots_notifier.send(Some(self.highest_snapshots)).map_err(|_| { - warn!(target: "snapshot", "Highest snapshots channel closed"); - }); - - Ok(()) - } - - /// Returns a new [`HighestSnapshotsTracker`]. - pub fn highest_snapshot_receiver(&self) -> HighestSnapshotsTracker { - self.highest_snapshots_tracker.clone() - } - - /// Run the snapshotter - pub fn run(&mut self, targets: SnapshotTargets) -> SnapshotterResult { - debug_assert!(targets.is_multiple_of_block_interval(self.block_interval)); - debug_assert!(targets.is_contiguous_to_highest_snapshots(self.highest_snapshots)); - - self.run_segment::(targets.receipts.clone().map(|(range, _)| range))?; - - self.run_segment::( - targets.transactions.clone().map(|(range, _)| range), - )?; - - self.run_segment::(targets.headers.clone())?; - - self.update_highest_snapshots_tracker()?; - - Ok(targets) - } - - /// Run the snapshotter for one segment. - /// - /// It first builds the snapshot in a **temporary directory** inside the snapshots directory. If - /// for some reason the node is terminated during the snapshot process, it will be cleaned - /// up on boot (on [`Snapshotter::new`]) and the snapshot process restarted from scratch for - /// this block range and segment. - /// - /// If it succeeds, then we move the snapshot file from the temporary directory to its main one. - fn run_segment( - &self, - block_range: Option>, - ) -> RethResult<()> { - if let Some(block_range) = block_range { - let temp = self.snapshots_path.join(TEMPORARY_SUBDIRECTORY); - let provider = self.provider_factory.provider()?; - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let segment = S::default(); - let filename = segment.segment().filename(&block_range, &tx_range); - - segment.snapshot::(&provider, temp.clone(), block_range)?; - - reth_primitives::fs::rename(temp.join(&filename), self.snapshots_path.join(filename))?; - } - Ok(()) - } - - /// Returns a snapshot targets at the provided finalized block number, respecting the block - /// interval. The target is determined by the check against last snapshots. - pub fn get_snapshot_targets( - &self, - finalized_block_number: BlockNumber, - ) -> RethResult { - let provider = self.provider_factory.provider()?; - - // Round down `finalized_block_number` to a multiple of `block_interval` - let to_block_number = finalized_block_number.saturating_sub( - // Adjust for 0-indexed block numbers - (finalized_block_number + 1) % self.block_interval, - ); - - // Calculate block ranges to snapshot - let headers_block_range = - self.get_snapshot_target_block_range(to_block_number, self.highest_snapshots.headers); - let receipts_block_range = - self.get_snapshot_target_block_range(to_block_number, self.highest_snapshots.receipts); - let transactions_block_range = self - .get_snapshot_target_block_range(to_block_number, self.highest_snapshots.transactions); - - // Calculate transaction ranges to snapshot - let mut block_to_tx_number_cache = HashMap::default(); - let receipts_tx_range = self.get_snapshot_target_tx_range( - &provider, - &mut block_to_tx_number_cache, - self.highest_snapshots.receipts, - &receipts_block_range, - )?; - let transactions_tx_range = self.get_snapshot_target_tx_range( - &provider, - &mut block_to_tx_number_cache, - self.highest_snapshots.transactions, - &transactions_block_range, - )?; - - Ok(SnapshotTargets { - headers: headers_block_range - .size_hint() - .1 - .expect("finalized block should be >= last headers snapshot") - .ge(&(self.block_interval as usize)) - .then_some(headers_block_range), - receipts: receipts_block_range - .size_hint() - .1 - .expect("finalized block should be >= last receipts snapshot") - .ge(&(self.block_interval as usize)) - .then_some((receipts_block_range, receipts_tx_range)), - transactions: transactions_block_range - .size_hint() - .1 - .expect("finalized block should be >= last transactions snapshot") - .ge(&(self.block_interval as usize)) - .then_some((transactions_block_range, transactions_tx_range)), - }) - } - - fn get_snapshot_target_block_range( - &self, - to_block_number: BlockNumber, - highest_snapshot: Option, - ) -> RangeInclusive { - let highest_snapshot = highest_snapshot.map_or(0, |block_number| block_number + 1); - highest_snapshot..=(highest_snapshot + self.block_interval - 1).min(to_block_number) - } - - fn get_snapshot_target_tx_range( - &self, - provider: &DatabaseProviderRO, - block_to_tx_number_cache: &mut HashMap, - highest_snapshot: Option, - block_range: &RangeInclusive, - ) -> RethResult> { - let from_tx_number = if let Some(block_number) = highest_snapshot { - *block_to_tx_number_cache.entry(block_number).or_insert( - provider - .block_body_indices(block_number)? - .ok_or(RethError::Custom( - "Block body indices for highest snapshot not found".to_string(), - ))? - .next_tx_num(), - ) - } else { - 0 - }; - - let to_tx_number = *block_to_tx_number_cache.entry(*block_range.end()).or_insert( - provider - .block_body_indices(*block_range.end())? - .ok_or(RethError::Custom( - "Block body indices for block range end not found".to_string(), - ))? - .last_tx_num(), - ); - Ok(from_tx_number..=to_tx_number) - } -} - -#[cfg(test)] -mod tests { - use crate::{snapshotter::SnapshotTargets, Snapshotter}; - use assert_matches::assert_matches; - use reth_interfaces::{ - test_utils::{generators, generators::random_block_range}, - RethError, - }; - use reth_primitives::{snapshot::HighestSnapshots, B256}; - use reth_stages::test_utils::TestStageDB; - - #[test] - fn new() { - let db = TestStageDB::default(); - let snapshots_dir = tempfile::TempDir::new().unwrap(); - let snapshotter = Snapshotter::new(db.factory, snapshots_dir.into_path(), 2).unwrap(); - - assert_eq!( - *snapshotter.highest_snapshot_receiver().borrow(), - Some(HighestSnapshots::default()) - ); - } - - #[test] - fn get_snapshot_targets() { - let db = TestStageDB::default(); - let snapshots_dir = tempfile::TempDir::new().unwrap(); - let mut rng = generators::rng(); - - let blocks = random_block_range(&mut rng, 0..=3, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); - - let mut snapshotter = Snapshotter::new(db.factory, snapshots_dir.into_path(), 2).unwrap(); - - // Snapshot targets has data per part up to the passed finalized block number, - // respecting the block interval - let targets = snapshotter.get_snapshot_targets(1).expect("get snapshot targets"); - assert_eq!( - targets, - SnapshotTargets { - headers: Some(0..=1), - receipts: Some((0..=1, 0..=3)), - transactions: Some((0..=1, 0..=3)) - } - ); - assert!(targets.is_multiple_of_block_interval(snapshotter.block_interval)); - assert!(targets.is_contiguous_to_highest_snapshots(snapshotter.highest_snapshots)); - // Imitate snapshotter run according to the targets which updates the last snapshots state - snapshotter.set_highest_snapshots_from_targets(&targets); - - // Nothing to snapshot, last snapshots state of snapshotter doesn't pass the thresholds - assert_eq!( - snapshotter.get_snapshot_targets(2), - Ok(SnapshotTargets { headers: None, receipts: None, transactions: None }) - ); - - // Snapshot targets has data per part up to the passed finalized block number, - // respecting the block interval - let targets = snapshotter.get_snapshot_targets(5).expect("get snapshot targets"); - assert_eq!( - targets, - SnapshotTargets { - headers: Some(2..=3), - receipts: Some((2..=3, 4..=7)), - transactions: Some((2..=3, 4..=7)) - } - ); - assert!(targets.is_multiple_of_block_interval(snapshotter.block_interval)); - assert!(targets.is_contiguous_to_highest_snapshots(snapshotter.highest_snapshots)); - // Imitate snapshotter run according to the targets which updates the last snapshots state - snapshotter.set_highest_snapshots_from_targets(&targets); - - // Block body indices not found - assert_matches!(snapshotter.get_snapshot_targets(5), Err(RethError::Custom(_))); - } -} diff --git a/crates/stages/Cargo.toml b/crates/stages/Cargo.toml index 337d582e22d..fe6a109dace 100644 --- a/crates/stages/Cargo.toml +++ b/crates/stages/Cargo.toml @@ -26,6 +26,8 @@ reth-codecs.workspace = true reth-provider.workspace = true reth-trie.workspace = true reth-tokio-util.workspace = true +reth-etl.workspace = true +reth-static-file.workspace = true # revm revm.workspace = true @@ -41,6 +43,7 @@ tracing.workspace = true # io serde.workspace = true +tempfile.workspace = true # metrics reth-metrics.workspace = true @@ -66,6 +69,7 @@ reth-node-optimism.workspace = true reth-blockchain-tree.workspace = true reth-revm.workspace = true reth-trie = { workspace = true, features = ["test-utils"] } +reth-provider = { workspace = true, features = ["test-utils"] } alloy-rlp.workspace = true itertools.workspace = true @@ -82,7 +86,7 @@ criterion = { workspace = true, features = ["async_futures"] } serde_json.workspace = true [features] -test-utils = ["reth-interfaces/test-utils", "reth-db/test-utils"] +test-utils = ["reth-interfaces/test-utils", "reth-db/test-utils", "reth-provider/test-utils"] [[bench]] name = "criterion" diff --git a/crates/stages/benches/criterion.rs b/crates/stages/benches/criterion.rs index e9354503d27..eb668ab74f9 100644 --- a/crates/stages/benches/criterion.rs +++ b/crates/stages/benches/criterion.rs @@ -5,14 +5,14 @@ use criterion::{ }; use pprof::criterion::{Output, PProfProfiler}; use reth_db::{test_utils::TempDatabase, DatabaseEnv}; -use reth_interfaces::test_utils::TestConsensus; -use reth_primitives::stage::StageCheckpoint; + +use reth_primitives::{stage::StageCheckpoint, BlockNumber}; use reth_stages::{ - stages::{MerkleStage, SenderRecoveryStage, TotalDifficultyStage, TransactionLookupStage}, + stages::{MerkleStage, SenderRecoveryStage, TransactionLookupStage}, test_utils::TestStageDB, ExecInput, Stage, StageExt, UnwindInput, }; -use std::{path::PathBuf, sync::Arc}; +use std::{ops::RangeInclusive, sync::Arc}; mod setup; use setup::StageRange; @@ -20,7 +20,7 @@ use setup::StageRange; criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(1000, Output::Flamegraph(None))); - targets = transaction_lookup, account_hashing, senders, total_difficulty, merkle + targets = transaction_lookup, account_hashing, senders, merkle } criterion_main!(benches); @@ -33,16 +33,9 @@ fn account_hashing(c: &mut Criterion) { group.sample_size(10); let num_blocks = 10_000; - let (path, stage, execution_range) = setup::prepare_account_hashing(num_blocks); + let (db, stage, range) = setup::prepare_account_hashing(num_blocks); - measure_stage_with_path( - path, - &mut group, - setup::stage_unwind, - stage, - execution_range, - "AccountHashing".to_string(), - ); + measure_stage(&mut group, &db, setup::stage_unwind, stage, range, "AccountHashing".to_string()); } fn senders(c: &mut Criterion) { @@ -50,11 +43,13 @@ fn senders(c: &mut Criterion) { // don't need to run each stage for that many times group.sample_size(10); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); + for batch in [1000usize, 10_000, 100_000, 250_000] { let stage = SenderRecoveryStage { commit_threshold: DEFAULT_NUM_BLOCKS }; let label = format!("SendersRecovery-batch-{batch}"); - measure_stage(&mut group, setup::stage_unwind, stage, 0..DEFAULT_NUM_BLOCKS, label); + measure_stage(&mut group, &db, setup::stage_unwind, stage, 0..=DEFAULT_NUM_BLOCKS, label); } } @@ -64,29 +59,15 @@ fn transaction_lookup(c: &mut Criterion) { group.sample_size(10); let stage = TransactionLookupStage::new(DEFAULT_NUM_BLOCKS, None); - measure_stage( - &mut group, - setup::stage_unwind, - stage, - 0..DEFAULT_NUM_BLOCKS, - "TransactionLookup".to_string(), - ); -} - -fn total_difficulty(c: &mut Criterion) { - let mut group = c.benchmark_group("Stages"); - group.measurement_time(std::time::Duration::from_millis(2000)); - group.warm_up_time(std::time::Duration::from_millis(2000)); - // don't need to run each stage for that many times - group.sample_size(10); - let stage = TotalDifficultyStage::new(Arc::new(TestConsensus::default())); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); measure_stage( &mut group, + &db, setup::stage_unwind, stage, - 0..DEFAULT_NUM_BLOCKS, - "TotalDifficulty".to_string(), + 0..=DEFAULT_NUM_BLOCKS, + "TransactionLookup".to_string(), ); } @@ -95,44 +76,58 @@ fn merkle(c: &mut Criterion) { // don't need to run each stage for that many times group.sample_size(10); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); + let stage = MerkleStage::Both { clean_threshold: u64::MAX }; measure_stage( &mut group, + &db, setup::unwind_hashes, stage, - 1..DEFAULT_NUM_BLOCKS, + 1..=DEFAULT_NUM_BLOCKS, "Merkle-incremental".to_string(), ); let stage = MerkleStage::Both { clean_threshold: 0 }; measure_stage( &mut group, + &db, setup::unwind_hashes, stage, - 1..DEFAULT_NUM_BLOCKS, + 1..=DEFAULT_NUM_BLOCKS, "Merkle-fullhash".to_string(), ); } -fn measure_stage_with_path( - path: PathBuf, +fn measure_stage( group: &mut BenchmarkGroup<'_, WallTime>, + db: &TestStageDB, setup: F, stage: S, - stage_range: StageRange, + block_interval: RangeInclusive, label: String, ) where S: Clone + Stage>>, F: Fn(S, &TestStageDB, StageRange), { - let db = TestStageDB::new(&path); + let stage_range = ( + ExecInput { + target: Some(*block_interval.end()), + checkpoint: Some(StageCheckpoint::new(*block_interval.start())), + }, + UnwindInput { + checkpoint: StageCheckpoint::new(*block_interval.end()), + unwind_to: *block_interval.start(), + bad_block: None, + }, + ); let (input, _) = stage_range; group.bench_function(label, move |b| { b.to_async(FuturesExecutor).iter_with_setup( || { // criterion setup does not support async, so we have to use our own runtime - setup(stage.clone(), &db, stage_range) + setup(stage.clone(), db, stage_range) }, |_| async { let mut stage = stage.clone(); @@ -147,35 +142,3 @@ fn measure_stage_with_path( ) }); } - -fn measure_stage( - group: &mut BenchmarkGroup<'_, WallTime>, - setup: F, - stage: S, - block_interval: std::ops::Range, - label: String, -) where - S: Clone + Stage>>, - F: Fn(S, &TestStageDB, StageRange), -{ - let path = setup::txs_testdata(block_interval.end); - - measure_stage_with_path( - path, - group, - setup, - stage, - ( - ExecInput { - target: Some(block_interval.end), - checkpoint: Some(StageCheckpoint::new(block_interval.start)), - }, - UnwindInput { - checkpoint: StageCheckpoint::new(block_interval.end), - unwind_to: block_interval.start, - bad_block: None, - }, - ), - label, - ); -} diff --git a/crates/stages/benches/setup/account_hashing.rs b/crates/stages/benches/setup/account_hashing.rs index 569481c2a41..d300265355e 100644 --- a/crates/stages/benches/setup/account_hashing.rs +++ b/crates/stages/benches/setup/account_hashing.rs @@ -1,15 +1,15 @@ #![allow(unreachable_pub)] -use super::{constants, StageRange}; + +use super::constants; use reth_db::{ cursor::DbCursorRO, database::Database, tables, transaction::DbTx, DatabaseError as DbError, }; -use reth_primitives::{fs, stage::StageCheckpoint}; +use reth_primitives::{fs, stage::StageCheckpoint, BlockNumber}; use reth_stages::{ stages::{AccountHashingStage, SeedOpts}, test_utils::TestStageDB, - ExecInput, UnwindInput, }; -use std::path::{Path, PathBuf}; +use std::{ops::RangeInclusive, path::Path}; /// Prepares a database for [`AccountHashingStage`] /// If the environment variable [`constants::ACCOUNT_HASHING_DB`] is set, it will use that one and @@ -17,20 +17,22 @@ use std::path::{Path, PathBuf}; /// generate its own random data. /// /// Returns the path to the database file, stage and range of stage execution if it exists. -pub fn prepare_account_hashing(num_blocks: u64) -> (PathBuf, AccountHashingStage, StageRange) { - let (path, stage_range) = match std::env::var(constants::ACCOUNT_HASHING_DB) { +pub fn prepare_account_hashing( + num_blocks: u64, +) -> (TestStageDB, AccountHashingStage, RangeInclusive) { + let (db, stage_range) = match std::env::var(constants::ACCOUNT_HASHING_DB) { Ok(db) => { let path = Path::new(&db).to_path_buf(); let range = find_stage_range(&path); - (path, range) + (TestStageDB::new(&path), range) } Err(_) => generate_testdata_db(num_blocks), }; - (path, AccountHashingStage::default(), stage_range) + (db, AccountHashingStage::default(), stage_range) } -fn find_stage_range(db: &Path) -> StageRange { +fn find_stage_range(db: &Path) -> RangeInclusive { let mut stage_range = None; TestStageDB::new(db) .factory @@ -40,13 +42,7 @@ fn find_stage_range(db: &Path) -> StageRange { let from = cursor.first()?.unwrap().0; let to = StageCheckpoint::new(cursor.last()?.unwrap().0); - stage_range = Some(( - ExecInput { - target: Some(to.block_number), - checkpoint: Some(StageCheckpoint::new(from)), - }, - UnwindInput { unwind_to: from, checkpoint: to, bad_block: None }, - )); + stage_range = Some(from..=to.block_number); Ok::<(), DbError>(()) }) .unwrap() @@ -55,19 +51,21 @@ fn find_stage_range(db: &Path) -> StageRange { stage_range.expect("Could not find the stage range from the external DB.") } -fn generate_testdata_db(num_blocks: u64) -> (PathBuf, StageRange) { +fn generate_testdata_db(num_blocks: u64) -> (TestStageDB, RangeInclusive) { let opts = SeedOpts { blocks: 0..=num_blocks, accounts: 100_000, txs: 100..150 }; let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("account-hashing-bench"); + let exists = path.exists(); + let db = TestStageDB::new(&path); - if !path.exists() { + if !exists { // create the dirs fs::create_dir_all(&path).unwrap(); println!("Account Hashing testdata not found, generating to {:?}", path.display()); - let db = TestStageDB::new(&path); let provider = db.factory.provider_rw().unwrap(); - let _accounts = AccountHashingStage::seed(&provider, opts); + let _accounts = AccountHashingStage::seed(&provider, opts.clone()); provider.commit().expect("failed to commit"); } - (path, (ExecInput { target: Some(num_blocks), ..Default::default() }, UnwindInput::default())) + + (db, opts.blocks) } diff --git a/crates/stages/benches/setup/mod.rs b/crates/stages/benches/setup/mod.rs index f5f7e54ed14..b63ab63cb63 100644 --- a/crates/stages/benches/setup/mod.rs +++ b/crates/stages/benches/setup/mod.rs @@ -17,15 +17,11 @@ use reth_interfaces::test_utils::{ use reth_primitives::{fs, Account, Address, SealedBlock, B256, U256}; use reth_stages::{ stages::{AccountHashingStage, StorageHashingStage}, - test_utils::TestStageDB, + test_utils::{StorageKind, TestStageDB}, ExecInput, Stage, UnwindInput, }; use reth_trie::StateRoot; -use std::{ - collections::BTreeMap, - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{collections::BTreeMap, path::Path, sync::Arc}; mod constants; @@ -84,8 +80,7 @@ pub(crate) fn unwind_hashes>>>( // Helper for generating testdata for the benchmarks. // Returns the path to the database file. -pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { - let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); +pub(crate) fn txs_testdata(num_blocks: u64) -> TestStageDB { let txs_range = 100..150; // number of storage changes per transition @@ -101,11 +96,14 @@ pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { // rng let mut rng = generators::rng(); - if !path.exists() { + let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); + let exists = path.exists(); + let db = TestStageDB::new(&path); + + if !exists { // create the dirs fs::create_dir_all(&path).unwrap(); println!("Transactions testdata not found, generating to {:?}", path.display()); - let db = TestStageDB::new(&path); let accounts: BTreeMap = concat([ random_eoa_accounts(&mut rng, n_eoa), @@ -167,7 +165,7 @@ pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { updated_header.state_root = root; *last_block = SealedBlock { header: updated_header.seal_slow(), ..cloned_last }; - db.insert_blocks(blocks.iter(), None).unwrap(); + db.insert_blocks(blocks.iter(), StorageKind::Static).unwrap(); // initialize TD db.commit(|tx| { @@ -177,5 +175,5 @@ pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { .unwrap(); } - path + db } diff --git a/crates/stages/src/error.rs b/crates/stages/src/error.rs index 170f592ec83..e8a5e3a71ff 100644 --- a/crates/stages/src/error.rs +++ b/crates/stages/src/error.rs @@ -3,7 +3,7 @@ use reth_interfaces::{ consensus, db::DatabaseError as DbError, executor, p2p::error::DownloadError, provider::ProviderError, RethError, }; -use reth_primitives::SealedHeader; +use reth_primitives::{BlockNumber, SealedHeader, StaticFileSegment, TxNumber}; use thiserror::Error; use tokio::sync::mpsc::error::SendError; @@ -76,6 +76,36 @@ pub enum StageError { /// rely on external downloaders #[error("invalid download response: {0}")] Download(#[from] DownloadError), + /// Database is ahead of static file data. + #[error("missing static file data for block number: {number}", number = block.number)] + MissingStaticFileData { + /// Starting block with missing data. + block: Box, + /// Static File segment + segment: StaticFileSegment, + }, + /// Unrecoverable inconsistency error related to a transaction number in a static file segment. + #[error( + "inconsistent transaction number for {segment}. db: {database}, static_file: {static_file}" + )] + InconsistentTxNumber { + /// Static File segment where this error was encountered. + segment: StaticFileSegment, + /// Expected database transaction number. + database: TxNumber, + /// Expected static file transaction number. + static_file: TxNumber, + }, + /// Unrecoverable inconsistency error related to a block number in a static file segment. + #[error("inconsistent block number for {segment}. db: {database}, static_file: {static_file}")] + InconsistentBlockNumber { + /// Static File segment where this error was encountered. + segment: StaticFileSegment, + /// Expected database block number. + database: BlockNumber, + /// Expected static file block number. + static_file: BlockNumber, + }, /// Internal error #[error(transparent)] Internal(#[from] RethError), @@ -104,11 +134,20 @@ impl StageError { StageError::MissingDownloadBuffer | StageError::MissingSyncGap | StageError::ChannelClosed | + StageError::InconsistentBlockNumber { .. } | + StageError::InconsistentTxNumber { .. } | + StageError::Internal(_) | StageError::Fatal(_) ) } } +impl From for StageError { + fn from(source: std::io::Error) -> Self { + StageError::Fatal(Box::new(source)) + } +} + /// A pipeline execution error. #[derive(Error, Debug)] pub enum PipelineError { @@ -124,7 +163,7 @@ pub enum PipelineError { /// The pipeline encountered an error while trying to send an event. #[error("pipeline encountered an error while trying to send an event")] Channel(#[from] Box>), - /// The stage encountered an internal error. + /// Internal error #[error(transparent)] - Internal(Box), + Internal(#[from] RethError), } diff --git a/crates/stages/src/lib.rs b/crates/stages/src/lib.rs index 0ad60c6219c..c2bceceee2b 100644 --- a/crates/stages/src/lib.rs +++ b/crates/stages/src/lib.rs @@ -18,7 +18,7 @@ //! # use reth_interfaces::consensus::Consensus; //! # use reth_interfaces::test_utils::{TestBodiesClient, TestConsensus, TestHeadersClient}; //! # use reth_revm::EvmProcessorFactory; -//! # use reth_primitives::{PeerId, MAINNET, B256}; +//! # use reth_primitives::{PeerId, MAINNET, B256, PruneModes}; //! # use reth_stages::Pipeline; //! # use reth_stages::sets::DefaultStages; //! # use tokio::sync::watch; @@ -26,6 +26,7 @@ //! # use reth_provider::ProviderFactory; //! # use reth_provider::HeaderSyncMode; //! # use reth_provider::test_utils::create_test_provider_factory; +//! # use reth_static_file::StaticFileProducer; //! # //! # let chain_spec = MAINNET.clone(); //! # let consensus: Arc = Arc::new(TestConsensus::default()); @@ -41,19 +42,27 @@ //! # ); //! # let (tip_tx, tip_rx) = watch::channel(B256::default()); //! # let executor_factory = EvmProcessorFactory::new(chain_spec.clone(), EthEvmConfig::default()); +//! # let static_file_producer = StaticFileProducer::new( +//! # provider_factory.clone(), +//! # provider_factory.static_file_provider(), +//! # PruneModes::default() +//! # ); //! // Create a pipeline that can fully sync //! # let pipeline = //! Pipeline::builder() //! .with_tip_sender(tip_tx) -//! .add_stages(DefaultStages::new( -//! provider_factory.clone(), -//! HeaderSyncMode::Tip(tip_rx), -//! consensus, -//! headers_downloader, -//! bodies_downloader, -//! executor_factory, -//! )) -//! .build(provider_factory); +//! .add_stages( +//! DefaultStages::new( +//! provider_factory.clone(), +//! HeaderSyncMode::Tip(tip_rx), +//! consensus, +//! headers_downloader, +//! bodies_downloader, +//! executor_factory, +//! ) +//! .unwrap(), +//! ) +//! .build(provider_factory, static_file_producer); //! ``` //! //! ## Feature Flags diff --git a/crates/stages/src/pipeline/builder.rs b/crates/stages/src/pipeline/builder.rs index 3e160577fdd..e76f76c604c 100644 --- a/crates/stages/src/pipeline/builder.rs +++ b/crates/stages/src/pipeline/builder.rs @@ -2,6 +2,7 @@ use crate::{pipeline::BoxedStage, MetricEventsSender, Pipeline, Stage, StageSet} use reth_db::database::Database; use reth_primitives::{stage::StageId, BlockNumber, B256}; use reth_provider::ProviderFactory; +use reth_static_file::StaticFileProducer; use tokio::sync::watch; /// Builds a [`Pipeline`]. @@ -67,12 +68,17 @@ where } /// Builds the final [`Pipeline`] using the given database. - pub fn build(self, provider_factory: ProviderFactory) -> Pipeline { + pub fn build( + self, + provider_factory: ProviderFactory, + static_file_producer: StaticFileProducer, + ) -> Pipeline { let Self { stages, max_block, tip_tx, metrics_tx } = self; Pipeline { provider_factory, stages, max_block, + static_file_producer, tip_tx, listeners: Default::default(), progress: Default::default(), diff --git a/crates/stages/src/pipeline/mod.rs b/crates/stages/src/pipeline/mod.rs index 5ef0a3f6a99..40d010f4860 100644 --- a/crates/stages/src/pipeline/mod.rs +++ b/crates/stages/src/pipeline/mod.rs @@ -3,12 +3,17 @@ use crate::{ }; use futures_util::Future; use reth_db::database::Database; +use reth_interfaces::RethResult; use reth_primitives::{ constants::BEACON_CONSENSUS_REORG_UNWIND_DEPTH, stage::{StageCheckpoint, StageId}, + static_file::HighestStaticFiles, BlockNumber, B256, }; -use reth_provider::{ProviderFactory, StageCheckpointReader, StageCheckpointWriter}; +use reth_provider::{ + providers::StaticFileWriter, ProviderFactory, StageCheckpointReader, StageCheckpointWriter, +}; +use reth_static_file::StaticFileProducer; use reth_tokio_util::EventListeners; use std::pin::Pin; use tokio::sync::watch; @@ -66,6 +71,7 @@ pub struct Pipeline { stages: Vec>, /// The maximum block number to sync to. max_block: Option, + static_file_producer: StaticFileProducer, /// All listeners for events the pipeline emits. listeners: EventListeners, /// Keeps track of the progress of the pipeline. @@ -177,6 +183,8 @@ where /// pipeline (for example the `Finish` stage). Or [ControlFlow::Unwind] of the stage that caused /// the unwind. pub async fn run_loop(&mut self) -> Result { + self.produce_static_files()?; + let mut previous_stage = None; for stage_index in 0..self.stages.len() { let stage = &self.stages[stage_index]; @@ -212,6 +220,33 @@ where Ok(self.progress.next_ctrl()) } + /// Run [static file producer](StaticFileProducer) and move all data from the database to static + /// files for corresponding [segments](reth_primitives::static_file::StaticFileSegment), + /// according to their [stage checkpoints](StageCheckpoint): + /// - [StaticFileSegment::Headers](reth_primitives::static_file::StaticFileSegment::Headers) -> + /// [StageId::Headers] + /// - [StaticFileSegment::Receipts](reth_primitives::static_file::StaticFileSegment::Receipts) + /// -> [StageId::Execution] + /// - [StaticFileSegment::Transactions](reth_primitives::static_file::StaticFileSegment::Transactions) + /// -> [StageId::Bodies] + fn produce_static_files(&mut self) -> RethResult<()> { + let provider = self.provider_factory.provider()?; + let targets = self.static_file_producer.get_static_file_targets(HighestStaticFiles { + headers: provider + .get_stage_checkpoint(StageId::Headers)? + .map(|checkpoint| checkpoint.block_number), + receipts: provider + .get_stage_checkpoint(StageId::Execution)? + .map(|checkpoint| checkpoint.block_number), + transactions: provider + .get_stage_checkpoint(StageId::Bodies)? + .map(|checkpoint| checkpoint.block_number), + })?; + self.static_file_producer.run(targets)?; + + Ok(()) + } + /// Unwind the stages to the target block. /// /// If the unwind is due to a bad block the number of that block should be specified. @@ -279,7 +314,9 @@ where self.listeners .notify(PipelineEvent::Unwound { stage_id, result: unwind_output }); + self.provider_factory.static_file_provider().commit()?; provider_rw.commit()?; + provider_rw = self.provider_factory.provider_rw()?; } Err(err) => { @@ -371,6 +408,7 @@ where result: out.clone(), }); + self.provider_factory.static_file_provider().commit()?; provider_rw.commit()?; if done { @@ -428,6 +466,7 @@ fn on_stage_error( StageId::MerkleExecute, prev_checkpoint.unwrap_or_default(), )?; + factory.static_file_provider().commit()?; provider_rw.commit()?; // We unwind because of a validation error. If the unwind itself @@ -457,6 +496,16 @@ fn on_stage_error( })) } } + } else if let StageError::MissingStaticFileData { block, segment } = err { + error!( + target: "sync::pipeline", + stage = %stage_id, + bad_block = %block.number, + segment = %segment, + "Stage is missing static file data." + ); + + Ok(Some(ControlFlow::Unwind { target: block.number - 1, bad_block: block })) } else if err.is_fatal() { error!(target: "sync::pipeline", stage = %stage_id, "Stage encountered a fatal error: {err}"); Err(err.into()) @@ -492,6 +541,7 @@ mod tests { provider::ProviderError, test_utils::{generators, generators::random_header}, }; + use reth_primitives::PruneModes; use reth_provider::test_utils::create_test_provider_factory; use tokio_stream::StreamExt; @@ -537,7 +587,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -597,7 +654,14 @@ mod tests { .add_unwind(Ok(UnwindOutput { checkpoint: StageCheckpoint::new(1) })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -704,7 +768,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -801,7 +872,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -881,7 +959,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let result = pipeline.run().await; assert_matches!(result, Ok(())); @@ -891,7 +976,14 @@ mod tests { .add_stage(TestStage::new(StageId::Other("Fatal")).add_exec(Err( StageError::DatabaseIntegrity(ProviderError::BlockBodyIndicesNotFound(5)), ))) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let result = pipeline.run().await; assert_matches!( result, diff --git a/crates/stages/src/sets.rs b/crates/stages/src/sets.rs index 7879c20d86b..1b029b11e53 100644 --- a/crates/stages/src/sets.rs +++ b/crates/stages/src/sets.rs @@ -13,14 +13,22 @@ //! # use reth_stages::Pipeline; //! # use reth_stages::sets::{OfflineStages}; //! # use reth_revm::EvmProcessorFactory; -//! # use reth_primitives::MAINNET; +//! # use reth_primitives::{PruneModes, MAINNET}; //! # use reth_node_ethereum::EthEvmConfig; //! # use reth_provider::test_utils::create_test_provider_factory; +//! # use reth_static_file::StaticFileProducer; //! //! # let executor_factory = EvmProcessorFactory::new(MAINNET.clone(), EthEvmConfig::default()); //! # let provider_factory = create_test_provider_factory(); +//! # let static_file_producer = StaticFileProducer::new( +//! provider_factory.clone(), +//! provider_factory.static_file_provider(), +//! PruneModes::default(), +//! ); //! // Build a pipeline with all offline stages. -//! # let pipeline = Pipeline::builder().add_stages(OfflineStages::new(executor_factory)).build(provider_factory); +//! # let pipeline = Pipeline::builder() +//! .add_stages(OfflineStages::new(executor_factory)) +//! .build(provider_factory, static_file_producer); //! ``` //! //! ```ignore @@ -42,9 +50,9 @@ use crate::{ stages::{ AccountHashingStage, BodyStage, ExecutionStage, FinishStage, HeaderStage, IndexAccountHistoryStage, IndexStorageHistoryStage, MerkleStage, SenderRecoveryStage, - StorageHashingStage, TotalDifficultyStage, TransactionLookupStage, + StorageHashingStage, TransactionLookupStage, }, - StageSet, StageSetBuilder, + StageError, StageSet, StageSetBuilder, }; use reth_db::database::Database; use reth_interfaces::{ @@ -53,6 +61,7 @@ use reth_interfaces::{ }; use reth_provider::{ExecutorFactory, HeaderSyncGapProvider, HeaderSyncMode}; use std::sync::Arc; +use tempfile::TempDir; /// A set containing all stages to run a fully syncing instance of reth. /// @@ -64,7 +73,6 @@ use std::sync::Arc; /// /// This expands to the following series of stages: /// - [`HeaderStage`] -/// - [`TotalDifficultyStage`] /// - [`BodyStage`] /// - [`SenderRecoveryStage`] /// - [`ExecutionStage`] @@ -93,20 +101,21 @@ impl DefaultStages { header_downloader: H, body_downloader: B, executor_factory: EF, - ) -> Self + ) -> Result where EF: ExecutorFactory, { - Self { + Ok(Self { online: OnlineStages::new( provider, header_mode, consensus, header_downloader, body_downloader, + Arc::new(TempDir::new()?), ), executor_factory, - } + }) } } @@ -119,17 +128,20 @@ where default_offline: StageSetBuilder, executor_factory: EF, ) -> StageSetBuilder { - default_offline.add_set(OfflineStages::new(executor_factory)).add_stage(FinishStage) + StageSetBuilder::default() + .add_set(default_offline) + .add_set(OfflineStages::new(executor_factory)) + .add_stage(FinishStage) } } -impl StageSet for DefaultStages +impl StageSet for DefaultStages where - DB: Database, Provider: HeaderSyncGapProvider + 'static, H: HeaderDownloader + 'static, B: BodyDownloader + 'static, EF: ExecutorFactory, + DB: Database + 'static, { fn builder(self) -> StageSetBuilder { Self::add_offline_stages(self.online.builder(), self.executor_factory) @@ -152,6 +164,8 @@ pub struct OnlineStages { header_downloader: H, /// The block body downloader body_downloader: B, + /// Temporary directory for ETL usage on headers stage. + temp_dir: Arc, } impl OnlineStages { @@ -162,8 +176,9 @@ impl OnlineStages { consensus: Arc, header_downloader: H, body_downloader: B, + temp_dir: Arc, ) -> Self { - Self { provider, header_mode, consensus, header_downloader, body_downloader } + Self { provider, header_mode, consensus, header_downloader, body_downloader, temp_dir } } } @@ -177,12 +192,8 @@ where pub fn builder_with_headers( headers: HeaderStage, body_downloader: B, - consensus: Arc, ) -> StageSetBuilder { - StageSetBuilder::default() - .add_stage(headers) - .add_stage(TotalDifficultyStage::new(consensus.clone())) - .add_stage(BodyStage::new(body_downloader)) + StageSetBuilder::default().add_stage(headers).add_stage(BodyStage::new(body_downloader)) } /// Create a new builder using the given bodies stage. @@ -192,10 +203,16 @@ where mode: HeaderSyncMode, header_downloader: H, consensus: Arc, + temp_dir: Arc, ) -> StageSetBuilder { StageSetBuilder::default() - .add_stage(HeaderStage::new(provider, header_downloader, mode)) - .add_stage(TotalDifficultyStage::new(consensus.clone())) + .add_stage(HeaderStage::new( + provider, + header_downloader, + mode, + consensus.clone(), + temp_dir.clone(), + )) .add_stage(bodies) } } @@ -209,8 +226,13 @@ where { fn builder(self) -> StageSetBuilder { StageSetBuilder::default() - .add_stage(HeaderStage::new(self.provider, self.header_downloader, self.header_mode)) - .add_stage(TotalDifficultyStage::new(self.consensus.clone())) + .add_stage(HeaderStage::new( + self.provider, + self.header_downloader, + self.header_mode, + self.consensus.clone(), + self.temp_dir.clone(), + )) .add_stage(BodyStage::new(self.body_downloader)) } } diff --git a/crates/stages/src/stages/bodies.rs b/crates/stages/src/stages/bodies.rs index 672af2b49bd..459eace7207 100644 --- a/crates/stages/src/stages/bodies.rs +++ b/crates/stages/src/stages/bodies.rs @@ -5,13 +5,21 @@ use reth_db::{ database::Database, models::{StoredBlockBodyIndices, StoredBlockOmmers, StoredBlockWithdrawals}, tables, - transaction::{DbTx, DbTxMut}, - DatabaseError, + transaction::DbTxMut, +}; +use reth_interfaces::{ + p2p::bodies::{downloader::BodyDownloader, response::BlockResponse}, + provider::ProviderResult, +}; +use reth_primitives::{ + stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, + StaticFileSegment, +}; +use reth_provider::{providers::StaticFileWriter, DatabaseProviderRW, HeaderProvider, StatsReader}; +use std::{ + cmp::Ordering, + task::{ready, Context, Poll}, }; -use reth_interfaces::p2p::bodies::{downloader::BodyDownloader, response::BlockResponse}; -use reth_primitives::stage::{EntitiesCheckpoint, StageCheckpoint, StageId}; -use reth_provider::DatabaseProviderRW; -use std::task::{ready, Context, Poll}; use tracing::*; // TODO(onbjerg): Metrics and events (gradual status for e.g. CLI) @@ -109,13 +117,48 @@ impl Stage for BodyStage { // Cursors used to write bodies, ommers and transactions let tx = provider.tx_ref(); let mut block_indices_cursor = tx.cursor_write::()?; - let mut tx_cursor = tx.cursor_write::()?; let mut tx_block_cursor = tx.cursor_write::()?; let mut ommers_cursor = tx.cursor_write::()?; let mut withdrawals_cursor = tx.cursor_write::()?; - // Get id for the next tx_num or zero if there are no transactions. - let mut next_tx_num = tx_cursor.last()?.map(|(id, _)| id + 1).unwrap_or_default(); + // Get id for the next tx_num of zero if there are no transactions. + let mut next_tx_num = tx_block_cursor.last()?.map(|(id, _)| id + 1).unwrap_or_default(); + + let static_file_provider = provider.static_file_provider(); + let mut static_file_producer = + static_file_provider.get_writer(from_block, StaticFileSegment::Transactions)?; + + // Make sure Transactions static file is at the same height. If it's further, this + // input execution was interrupted previously and we need to unwind the static file. + let next_static_file_tx_num = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .map(|id| id + 1) + .unwrap_or_default(); + + match next_static_file_tx_num.cmp(&next_tx_num) { + // If static files are ahead, then we didn't reach the database commit in a previous + // stage run. So, our only solution is to unwind the static files and proceed from the + // database expected height. + Ordering::Greater => static_file_producer + .prune_transactions(next_static_file_tx_num - next_tx_num, from_block - 1)?, + // If static files are behind, then there was some corruption or loss of files. This + // error will trigger an unwind, that will bring the database to the same height as the + // static files. + Ordering::Less => { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Transactions) + .unwrap_or_default(); + + let missing_block = + Box::new(provider.sealed_header(last_block + 1)?.unwrap_or_default()); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Transactions, + }) + } + Ordering::Equal => {} + } debug!(target: "sync::stages::bodies", stage_progress = from_block, target = to_block, start_tx_id = next_tx_num, "Commencing sync"); @@ -133,6 +176,23 @@ impl Stage for BodyStage { BlockResponse::Empty(_) => 0, }, }; + + // Increment block on static file header. + if block_number > 0 { + let appended_block_number = + static_file_producer.increment_block(StaticFileSegment::Transactions)?; + + if appended_block_number != block_number { + // This scenario indicates a critical error in the logic of adding new + // items. It should be treated as an `expect()` failure. + return Err(StageError::InconsistentBlockNumber { + segment: StaticFileSegment::Transactions, + database: block_number, + static_file: appended_block_number, + }) + } + } + match response { BlockResponse::Full(block) => { // write transaction block index @@ -142,8 +202,19 @@ impl Stage for BodyStage { // Write transactions for transaction in block.body { - // Append the transaction - tx_cursor.append(next_tx_num, transaction.into())?; + let appended_tx_number = static_file_producer + .append_transaction(next_tx_num, transaction.into())?; + + if appended_tx_number != next_tx_num { + // This scenario indicates a critical error in the logic of adding new + // items. It should be treated as an `expect()` failure. + return Err(StageError::InconsistentTxNumber { + segment: StaticFileSegment::Transactions, + database: next_tx_num, + static_file: appended_tx_number, + }) + } + // Increment transaction id for each transaction. next_tx_num += 1; } @@ -190,10 +261,10 @@ impl Stage for BodyStage { ) -> Result { self.buffer.take(); + let static_file_provider = provider.static_file_provider(); let tx = provider.tx_ref(); // Cursors to unwind bodies, ommers let mut body_cursor = tx.cursor_write::()?; - let mut transaction_cursor = tx.cursor_write::()?; let mut ommers_cursor = tx.cursor_write::()?; let mut withdrawals_cursor = tx.cursor_write::()?; // Cursors to unwind transitions @@ -222,18 +293,41 @@ impl Stage for BodyStage { tx_block_cursor.delete_current()?; } - // Delete all transactions that belong to this block - for tx_id in block_meta.tx_num_range() { - // First delete the transaction - if transaction_cursor.seek_exact(tx_id)?.is_some() { - transaction_cursor.delete_current()?; - } - } - // Delete the current body value rev_walker.delete_current()?; } + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Transactions)?; + + // Unwind from static files. Get the current last expected transaction from DB, and match it + // on static file + let db_tx_num = + body_cursor.last()?.map(|(_, block_meta)| block_meta.last_tx_num()).unwrap_or_default(); + let static_file_tx_num: u64 = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .unwrap_or_default(); + + // If there are more transactions on database, then we are missing static file data and we + // need to unwind further. + if db_tx_num > static_file_tx_num { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Transactions) + .unwrap_or_default(); + + let missing_block = + Box::new(provider.sealed_header(last_block + 1)?.unwrap_or_default()); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Transactions, + }) + } + + // Unwinds static file + static_file_producer + .prune_transactions(static_file_tx_num.saturating_sub(db_tx_num), input.unwind_to)?; + Ok(UnwindOutput { checkpoint: StageCheckpoint::new(input.unwind_to) .with_entities_stage_checkpoint(stage_checkpoint(provider)?), @@ -246,10 +340,10 @@ impl Stage for BodyStage { // progress in gas as a proxy to size. Execution stage uses a similar approach. fn stage_checkpoint( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: (provider.count_entries::()? as u64).saturating_sub(1), }) } @@ -289,6 +383,7 @@ mod tests { // Check that we only synced around `batch_size` blocks even though the number of blocks // synced by the previous stage is higher let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -325,6 +420,7 @@ mod tests { // Check that we synced all blocks successfully, even though our `batch_size` allows us to // sync more (if there were more headers) let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { @@ -362,6 +458,7 @@ mod tests { // Check that we synced at least 10 blocks let first_run = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( first_run, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -382,6 +479,7 @@ mod tests { // Check that we synced more blocks let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -422,6 +520,7 @@ mod tests { // Check that we synced all blocks successfully, even though our `batch_size` allows us to // sync more (if there were more headers) let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -439,16 +538,12 @@ mod tests { .expect("Written block data invalid"); // Delete a transaction - runner - .db() - .commit(|tx| { - let mut tx_cursor = tx.cursor_write::()?; - tx_cursor.last()?.expect("Could not read last transaction"); - tx_cursor.delete_current()?; - Ok(()) - }) - .expect("Could not delete a transaction"); - + let static_file_provider = runner.db().factory.static_file_provider(); + { + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Transactions).unwrap(); + static_file_producer.prune_transactions(1, checkpoint.block_number).unwrap(); + } // Unwind all of it let unwind_to = 1; let input = UnwindInput { bad_block: None, checkpoint, unwind_to }; @@ -480,6 +575,7 @@ mod tests { use reth_db::{ cursor::DbCursorRO, models::{StoredBlockBodyIndices, StoredBlockOmmers}, + static_file::HeaderMask, tables, test_utils::TempDatabase, transaction::{DbTx, DbTxMut}, @@ -501,8 +597,13 @@ mod tests { generators::{random_block_range, random_signed_tx}, }, }; - use reth_primitives::{BlockBody, BlockNumber, SealedBlock, SealedHeader, TxNumber, B256}; - use reth_provider::ProviderFactory; + use reth_primitives::{ + BlockBody, BlockHash, BlockNumber, Header, SealedBlock, SealedHeader, + StaticFileSegment, TxNumber, B256, + }; + use reth_provider::{ + providers::StaticFileWriter, HeaderProvider, ProviderFactory, TransactionsProvider, + }; use std::{ collections::{HashMap, VecDeque}, ops::RangeInclusive, @@ -571,24 +672,38 @@ mod tests { fn seed_execution(&mut self, input: ExecInput) -> Result { let start = input.checkpoint().block_number; let end = input.target(); + + let static_file_provider = self.db.factory.static_file_provider(); + let mut rng = generators::rng(); - let blocks = random_block_range(&mut rng, start..=end, GENESIS_HASH, 0..2); + + // Static files do not support gaps in headers, so we need to generate 0 to end + let blocks = random_block_range(&mut rng, 0..=end, GENESIS_HASH, 0..2); self.db.insert_headers_with_td(blocks.iter().map(|block| &block.header))?; - if let Some(progress) = blocks.first() { + if let Some(progress) = blocks.get(start as usize) { // Insert last progress data - self.db.commit(|tx| { + { + let tx = self.db.factory.provider_rw()?.into_tx(); + let mut static_file_producer = static_file_provider + .get_writer(start, StaticFileSegment::Transactions)?; + let body = StoredBlockBodyIndices { first_tx_num: 0, tx_count: progress.body.len() as u64, }; + + static_file_producer.set_block_range(0..=progress.number); + body.tx_num_range().try_for_each(|tx_num| { let transaction = random_signed_tx(&mut rng); - tx.put::(tx_num, transaction.into()) + static_file_producer + .append_transaction(tx_num, transaction.into()) + .map(|_| ()) })?; if body.tx_count != 0 { tx.put::( - body.first_tx_num(), + body.last_tx_num(), progress.number, )?; } @@ -601,8 +716,10 @@ mod tests { StoredBlockOmmers { ommers: progress.ommers.clone() }, )?; } - Ok(()) - })?; + + static_file_producer.commit()?; + tx.commit()?; + } } self.set_responses(blocks.iter().map(body_by_hash).collect()); Ok(blocks) @@ -663,12 +780,12 @@ mod tests { prev_progress: BlockNumber, highest_block: BlockNumber, ) -> Result<(), TestRunnerError> { + let static_file_provider = self.db.factory.static_file_provider(); + self.db.query(|tx| { // Acquire cursors on body related tables - let mut headers_cursor = tx.cursor_read::()?; let mut bodies_cursor = tx.cursor_read::()?; let mut ommers_cursor = tx.cursor_read::()?; - let mut transaction_cursor = tx.cursor_read::()?; let mut tx_block_cursor = tx.cursor_read::()?; let first_body_key = match bodies_cursor.first()? { @@ -678,6 +795,7 @@ mod tests { let mut prev_number: Option = None; + for entry in bodies_cursor.walk(Some(first_body_key))? { let (number, body) = entry?; @@ -695,7 +813,7 @@ mod tests { "We wrote a block body outside of our synced range. Found block with number {number}, highest block according to stage is {highest_block}", ); - let (_, header) = headers_cursor.seek_exact(number)?.expect("to be present"); + let header = static_file_provider.header_by_number(number)?.expect("to be present"); // Validate that ommers exist if any let stored_ommers = ommers_cursor.seek_exact(number)?; if header.ommers_hash_is_empty() { @@ -712,11 +830,9 @@ mod tests { } for tx_id in body.tx_num_range() { - let tx_entry = transaction_cursor.seek_exact(tx_id)?; - assert!(tx_entry.is_some(), "Transaction is missing."); + assert!(static_file_provider.transaction_by_id(tx_id)?.is_some(), "Transaction is missing."); } - prev_number = Some(number); } Ok(()) @@ -775,16 +891,14 @@ mod tests { &mut self, range: RangeInclusive, ) -> DownloadResult<()> { - let provider = self.provider_factory.provider()?; - let mut header_cursor = provider.tx_ref().cursor_read::()?; - - let mut canonical_cursor = - provider.tx_ref().cursor_read::()?; - let walker = canonical_cursor.walk_range(range)?; - - for entry in walker { - let (num, hash) = entry?; - let (_, header) = header_cursor.seek_exact(num)?.expect("missing header"); + let static_file_provider = self.provider_factory.static_file_provider(); + + for header in static_file_provider.fetch_range_iter( + StaticFileSegment::Headers, + *range.start()..*range.end() + 1, + |cursor, number| cursor.get_two::>(number.into()), + )? { + let (header, hash) = header?; self.headers.push_back(header.seal(hash)); } diff --git a/crates/stages/src/stages/execution.rs b/crates/stages/src/stages/execution.rs index bcac2b3428d..724603e41c0 100644 --- a/crates/stages/src/stages/execution.rs +++ b/crates/stages/src/stages/execution.rs @@ -7,21 +7,23 @@ use reth_db::{ cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}, database::Database, models::BlockNumberAddress, + static_file::HeaderMask, tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; use reth_primitives::{ stage::{ CheckpointBlockRange, EntitiesCheckpoint, ExecutionCheckpoint, StageCheckpoint, StageId, }, - BlockNumber, Header, PruneModes, U256, + BlockNumber, Header, PruneModes, StaticFileSegment, U256, }; use reth_provider::{ + providers::{StaticFileProvider, StaticFileProviderRWRefMut, StaticFileWriter}, BlockReader, DatabaseProviderRW, ExecutorFactory, HeaderProvider, LatestStateProviderRef, - OriginalValuesKnown, ProviderError, TransactionVariant, + OriginalValuesKnown, ProviderError, StatsReader, TransactionVariant, }; use std::{ + cmp::Ordering, ops::RangeInclusive, time::{Duration, Instant}, }; @@ -121,17 +123,26 @@ impl ExecutionStage { let start_block = input.next_block(); let max_block = input.target(); let prune_modes = self.adjust_prune_modes(provider, start_block, max_block)?; + let static_file_provider = provider.static_file_provider(); + + // We only use static files for Receipts, if there is no receipt pruning of any kind. + let mut static_file_producer = None; + if self.prune_modes.receipts.is_none() && self.prune_modes.receipts_log_filter.is_empty() { + static_file_producer = Some(prepare_static_file_producer(provider, start_block)?); + } // Build executor - let mut executor = - self.executor_factory.with_state(LatestStateProviderRef::new(provider.tx_ref())); + let mut executor = self.executor_factory.with_state(LatestStateProviderRef::new( + provider.tx_ref(), + provider.static_file_provider().clone(), + )); executor.set_prune_modes(prune_modes); executor.set_tip(max_block); // Progress tracking let mut stage_progress = start_block; let mut stage_checkpoint = - execution_checkpoint(provider, start_block, max_block, input.checkpoint())?; + execution_checkpoint(static_file_provider, start_block, max_block, input.checkpoint())?; let mut fetch_block_duration = Duration::default(); let mut execution_duration = Duration::default(); @@ -196,7 +207,11 @@ impl ExecutionStage { let time = Instant::now(); // write output - state.write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes)?; + state.write_to_storage( + provider.tx_ref(), + static_file_producer, + OriginalValuesKnown::Yes, + )?; let db_write_duration = time.elapsed(); debug!( target: "sync::stages::execution", @@ -236,7 +251,7 @@ impl ExecutionStage { // If we're not executing MerkleStage from scratch (by threshold or first-sync), then erase // changeset related pruning configurations if !(max_block - start_block > self.external_clean_threshold || - provider.tx_ref().entries::()?.is_zero()) + provider.count_entries::()?.is_zero()) { prune_modes.account_history = None; prune_modes.storage_history = None; @@ -245,12 +260,12 @@ impl ExecutionStage { } } -fn execution_checkpoint( - provider: &DatabaseProviderRW, +fn execution_checkpoint( + provider: &StaticFileProvider, start_block: BlockNumber, max_block: BlockNumber, checkpoint: StageCheckpoint, -) -> Result { +) -> Result { Ok(match checkpoint.execution_stage_checkpoint() { // If checkpoint block range fully matches our range, // we take the previously used stage checkpoint as-is. @@ -312,15 +327,20 @@ fn execution_checkpoint( }) } -fn calculate_gas_used_from_headers( - provider: &DatabaseProviderRW, +fn calculate_gas_used_from_headers( + provider: &StaticFileProvider, range: RangeInclusive, -) -> Result { +) -> Result { let mut gas_total = 0; let start = Instant::now(); - for entry in provider.tx_ref().cursor_read::()?.walk_range(range.clone())? { - let (_, Header { gas_used, .. }) = entry?; + + for entry in provider.fetch_range_iter( + StaticFileSegment::Headers, + *range.start()..*range.end() + 1, + |cursor, number| cursor.get_one::>(number.into()), + )? { + let Header { gas_used, .. } = entry?; gas_total += gas_used; } @@ -420,17 +440,37 @@ impl Stage for ExecutionStage { let mut stage_checkpoint = input.checkpoint.execution_stage_checkpoint(); // Unwind all receipts for transactions in the block range - let mut cursor = tx.cursor_write::()?; - let mut reverse_walker = cursor.walk_back(None)?; + if self.prune_modes.receipts.is_none() && self.prune_modes.receipts_log_filter.is_empty() { + // We only use static files for Receipts, if there is no receipt pruning of any kind. - while let Some(Ok((tx_number, receipt))) = reverse_walker.next() { - if tx_number < first_tx_num { - break - } - reverse_walker.delete_current()?; + // prepare_static_file_producer does a consistency check that will unwind static files + // if the expected highest receipt in the files is higher than the database. + // Which is essentially what happens here when we unwind this stage. + let _static_file_producer = prepare_static_file_producer(provider, *range.start())?; + // Update the checkpoint. if let Some(stage_checkpoint) = stage_checkpoint.as_mut() { - stage_checkpoint.progress.processed -= receipt.cumulative_gas_used; + for block_number in range { + stage_checkpoint.progress.processed -= provider + .block_by_number(block_number)? + .ok_or_else(|| ProviderError::BlockNotFound(block_number.into()))? + .gas_used; + } + } + } else { + // We database for Receipts, if there is any kind of receipt pruning/filtering. + let mut cursor = tx.cursor_write::()?; + let mut reverse_walker = cursor.walk_back(None)?; + + while let Some(Ok((tx_number, receipt))) = reverse_walker.next() { + if tx_number < first_tx_num { + break + } + reverse_walker.delete_current()?; + + if let Some(stage_checkpoint) = stage_checkpoint.as_mut() { + stage_checkpoint.progress.processed -= receipt.cumulative_gas_used; + } } } @@ -493,22 +533,83 @@ impl ExecutionStageThresholds { } } +/// Returns a `StaticFileProviderRWRefMut` static file producer after performing a consistency +/// check. +/// +/// This function compares the highest receipt number recorded in the database with that in the +/// static file to detect any discrepancies due to unexpected shutdowns or database rollbacks. **If +/// the height in the static file is higher**, it rolls back (unwinds) the static file. +/// **Conversely, if the height in the database is lower**, it triggers a rollback in the database +/// (by returning [`StageError`]) until the heights in both the database and static file match. +fn prepare_static_file_producer<'a, 'b, DB: Database>( + provider: &'b DatabaseProviderRW, + start_block: u64, +) -> Result, StageError> +where + 'b: 'a, +{ + // Get next expected receipt number + let tx = provider.tx_ref(); + let next_receipt_num = tx + .cursor_read::()? + .seek_exact(start_block)? + .map(|(_, value)| value.first_tx_num) + .unwrap_or(0); + + // Get next expected receipt number in static files + let static_file_provider = provider.static_file_provider(); + let next_static_file_receipt_num = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Receipts) + .map(|num| num + 1) + .unwrap_or(0); + + let mut static_file_producer = + static_file_provider.get_writer(start_block, StaticFileSegment::Receipts)?; + + // Check if we had any unexpected shutdown after committing to static files, but + // NOT committing to database. + match next_static_file_receipt_num.cmp(&next_receipt_num) { + Ordering::Greater => static_file_producer.prune_receipts( + next_static_file_receipt_num - next_receipt_num, + start_block.saturating_sub(1), + )?, + Ordering::Less => { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Receipts) + .unwrap_or(0); + + let missing_block = Box::new( + tx.get::(last_block + 1)?.unwrap_or_default().seal_slow(), + ); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Receipts, + }) + } + Ordering::Equal => {} + } + + Ok(static_file_producer) +} + #[cfg(test)] mod tests { use super::*; use crate::test_utils::TestStageDB; use alloy_rlp::Decodable; use assert_matches::assert_matches; - use reth_db::{models::AccountBeforeTx, test_utils::create_test_rw_db}; + use reth_db::models::AccountBeforeTx; use reth_interfaces::executor::BlockValidationError; use reth_node_ethereum::EthEvmConfig; use reth_primitives::{ - address, hex_literal::hex, keccak256, stage::StageUnitCheckpoint, Account, Bytecode, - ChainSpecBuilder, SealedBlock, StorageEntry, B256, MAINNET, + address, hex_literal::hex, keccak256, stage::StageUnitCheckpoint, Account, Address, + Bytecode, ChainSpecBuilder, PruneMode, ReceiptsLogPruneConfig, SealedBlock, StorageEntry, + B256, }; - use reth_provider::{AccountReader, BlockWriter, ProviderFactory, ReceiptProvider}; + use reth_provider::{test_utils::create_test_provider_factory, AccountReader, ReceiptProvider}; use reth_revm::EvmProcessorFactory; - use std::sync::Arc; + use std::{collections::BTreeMap, sync::Arc}; fn stage() -> ExecutionStage> { let executor_factory = EvmProcessorFactory::new( @@ -530,9 +631,7 @@ mod tests { #[test] fn execution_checkpoint_matches() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); - let tx = factory.provider_rw().unwrap(); + let factory = create_test_provider_factory(); let previous_stage_checkpoint = ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 0, to: 0 }, @@ -544,7 +643,7 @@ mod tests { }; let stage_checkpoint = execution_checkpoint( - &tx, + &factory.static_file_provider(), previous_stage_checkpoint.block_range.from, previous_stage_checkpoint.block_range.to, previous_checkpoint, @@ -555,8 +654,7 @@ mod tests { #[test] fn execution_checkpoint_precedes() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); @@ -564,7 +662,7 @@ mod tests { let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); provider - .insert_block( + .insert_historical_block( genesis .try_seal_with_senders() .map_err(|_| BlockValidationError::SenderRecoveryError) @@ -572,7 +670,15 @@ mod tests { None, ) .unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_stage_checkpoint = ExecutionCheckpoint { @@ -584,8 +690,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Execution(previous_stage_checkpoint)), }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -599,16 +705,23 @@ mod tests { #[test] fn execution_checkpoint_recalculate_full_previous_some() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_stage_checkpoint = ExecutionCheckpoint { @@ -620,8 +733,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Execution(previous_stage_checkpoint)), }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -635,22 +748,29 @@ mod tests { #[test] fn execution_checkpoint_recalculate_full_previous_none() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_checkpoint = StageCheckpoint { block_number: 1, stage_checkpoint: None }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -665,16 +785,23 @@ mod tests { async fn sanity_execution_of_block() { // TODO cleanup the setup after https://github.com/paradigmxyz/reth/issues/332 // is merged as it has similar framework - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let input = ExecInput { target: Some(1), checkpoint: None }; let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // insert pre state @@ -701,69 +828,101 @@ mod tests { db_tx.put::(code_hash, Bytecode::new_raw(code.to_vec().into())).unwrap(); provider.commit().unwrap(); - let provider = factory.provider_rw().unwrap(); - let mut execution_stage: ExecutionStage> = stage(); - let output = execution_stage.execute(&provider, input).unwrap(); - provider.commit().unwrap(); - assert_matches!(output, ExecOutput { - checkpoint: StageCheckpoint { - block_number: 1, - stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { - block_range: CheckpointBlockRange { - from: 1, - to: 1, - }, - progress: EntitiesCheckpoint { - processed, - total - } - })) - }, - done: true - } if processed == total && total == block.gas_used); - - let provider = factory.provider().unwrap(); - - // check post state - let account1 = address!("1000000000000000000000000000000000000000"); - let account1_info = - Account { balance: U256::ZERO, nonce: 0x00, bytecode_hash: Some(code_hash) }; - let account2 = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); - let account2_info = Account { - balance: U256::from(0x1bc16d674ece94bau128), - nonce: 0x00, - bytecode_hash: None, - }; - let account3 = address!("a94f5374fce5edbc8e2a8697c15331677e6ebf0b"); - let account3_info = Account { - balance: U256::from(0x3635c9adc5de996b46u128), - nonce: 0x01, - bytecode_hash: None, - }; + // execute - // assert accounts - assert_eq!( - provider.basic_account(account1), - Ok(Some(account1_info)), - "Post changed of a account" - ); - assert_eq!( - provider.basic_account(account2), - Ok(Some(account2_info)), - "Post changed of a account" - ); - assert_eq!( - provider.basic_account(account3), - Ok(Some(account3_info)), - "Post changed of a account" - ); - // assert storage - // Get on dupsort would return only first value. This is good enough for this test. - assert_eq!( - provider.tx_ref().get::(account1), - Ok(Some(StorageEntry { key: B256::with_last_byte(1), value: U256::from(2) })), - "Post changed of a account" - ); + // If there is a pruning configuration, then it's forced to use the database. + // This way we test both cases. + let modes = [None, Some(PruneModes::none())]; + let random_filter = + ReceiptsLogPruneConfig(BTreeMap::from([(Address::random(), PruneMode::Full)])); + + // Tests node with database and node with static files + for mut mode in modes { + let provider = factory.provider_rw().unwrap(); + + if let Some(mode) = &mut mode { + // Simulating a full node where we write receipts to database + mode.receipts_log_filter = random_filter.clone(); + } + + let mut execution_stage: ExecutionStage> = stage(); + execution_stage.prune_modes = mode.clone().unwrap_or_default(); + + let output = execution_stage.execute(&provider, input).unwrap(); + provider.commit().unwrap(); + + assert_matches!(output, ExecOutput { + checkpoint: StageCheckpoint { + block_number: 1, + stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { + block_range: CheckpointBlockRange { + from: 1, + to: 1, + }, + progress: EntitiesCheckpoint { + processed, + total + } + })) + }, + done: true + } if processed == total && total == block.gas_used); + + let provider = factory.provider().unwrap(); + + // check post state + let account1 = address!("1000000000000000000000000000000000000000"); + let account1_info = + Account { balance: U256::ZERO, nonce: 0x00, bytecode_hash: Some(code_hash) }; + let account2 = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); + let account2_info = Account { + balance: U256::from(0x1bc16d674ece94bau128), + nonce: 0x00, + bytecode_hash: None, + }; + let account3 = address!("a94f5374fce5edbc8e2a8697c15331677e6ebf0b"); + let account3_info = Account { + balance: U256::from(0x3635c9adc5de996b46u128), + nonce: 0x01, + bytecode_hash: None, + }; + + // assert accounts + assert_eq!( + provider.basic_account(account1), + Ok(Some(account1_info)), + "Post changed of a account" + ); + assert_eq!( + provider.basic_account(account2), + Ok(Some(account2_info)), + "Post changed of a account" + ); + assert_eq!( + provider.basic_account(account3), + Ok(Some(account3_info)), + "Post changed of a account" + ); + // assert storage + // Get on dupsort would return only first value. This is good enough for this test. + assert_eq!( + provider.tx_ref().get::(account1), + Ok(Some(StorageEntry { key: B256::with_last_byte(1), value: U256::from(2) })), + "Post changed of a account" + ); + + let provider = factory.provider_rw().unwrap(); + let mut stage = stage(); + stage.prune_modes = mode.unwrap_or_default(); + + let _result = stage + .unwind( + &provider, + UnwindInput { checkpoint: output.checkpoint, unwind_to: 0, bad_block: None }, + ) + .unwrap(); + provider.commit().unwrap(); + } } #[tokio::test] @@ -771,16 +930,23 @@ mod tests { // TODO cleanup the setup after https://github.com/paradigmxyz/reth/issues/332 // is merged as it has similar framework - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let input = ExecInput { target: Some(1), checkpoint: None }; let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // variables @@ -802,44 +968,77 @@ mod tests { provider.commit().unwrap(); // execute - let provider = factory.provider_rw().unwrap(); - let mut execution_stage = stage(); - let result = execution_stage.execute(&provider, input).unwrap(); - provider.commit().unwrap(); - - let provider = factory.provider_rw().unwrap(); - let mut stage = stage(); - let result = stage - .unwind( - &provider, - UnwindInput { checkpoint: result.checkpoint, unwind_to: 0, bad_block: None }, - ) - .unwrap(); - - assert_matches!(result, UnwindOutput { - checkpoint: StageCheckpoint { - block_number: 0, - stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { - block_range: CheckpointBlockRange { - from: 1, - to: 1, - }, - progress: EntitiesCheckpoint { - processed: 0, - total - } - })) + let mut provider = factory.provider_rw().unwrap(); + + // If there is a pruning configuration, then it's forced to use the database. + // This way we test both cases. + let modes = [None, Some(PruneModes::none())]; + let random_filter = + ReceiptsLogPruneConfig(BTreeMap::from([(Address::random(), PruneMode::Full)])); + + // Tests node with database and node with static files + for mut mode in modes { + if let Some(mode) = &mut mode { + // Simulating a full node where we write receipts to database + mode.receipts_log_filter = random_filter.clone(); } - } if total == block.gas_used); - // assert unwind stage - assert_eq!(provider.basic_account(acc1), Ok(Some(acc1_info)), "Pre changed of a account"); - assert_eq!(provider.basic_account(acc2), Ok(Some(acc2_info)), "Post changed of a account"); + // Test Execution + let mut execution_stage = stage(); + execution_stage.prune_modes = mode.clone().unwrap_or_default(); + + let result = execution_stage.execute(&provider, input).unwrap(); + provider.commit().unwrap(); - let miner_acc = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); - assert_eq!(provider.basic_account(miner_acc), Ok(None), "Third account should be unwound"); + // Test Unwind + provider = factory.provider_rw().unwrap(); + let mut stage = stage(); + stage.prune_modes = mode.unwrap_or_default(); - assert_eq!(provider.receipt(0), Ok(None), "First receipt should be unwound"); + let result = stage + .unwind( + &provider, + UnwindInput { checkpoint: result.checkpoint, unwind_to: 0, bad_block: None }, + ) + .unwrap(); + + assert_matches!(result, UnwindOutput { + checkpoint: StageCheckpoint { + block_number: 0, + stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { + block_range: CheckpointBlockRange { + from: 1, + to: 1, + }, + progress: EntitiesCheckpoint { + processed: 0, + total + } + })) + } + } if total == block.gas_used); + + // assert unwind stage + assert_eq!( + provider.basic_account(acc1), + Ok(Some(acc1_info)), + "Pre changed of a account" + ); + assert_eq!( + provider.basic_account(acc2), + Ok(Some(acc2_info)), + "Post changed of a account" + ); + + let miner_acc = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); + assert_eq!( + provider.basic_account(miner_acc), + Ok(None), + "Third account should be unwound" + ); + + assert_eq!(provider.receipt(0), Ok(None), "First receipt should be unwound"); + } } #[tokio::test] @@ -851,8 +1050,16 @@ mod tests { let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f9025ff901f7a0c86e8cc0310ae7c531c758678ddbfd16fc51c8cef8cec650b032de9869e8b94fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa050554882fbbda2c2fd93fdc466db9946ea262a67f7a76cc169e714f105ab583da00967f09ef1dfed20c0eacfaa94d5cd4002eda3242ac47eae68972d07b106d192a0e3c8b47fbfc94667ef4cceb17e5cc21e3b1eebd442cebb27f07562b33836290db90100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008302000001830f42408238108203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f862f860800a83061a8094095e7baea6a6c7c4c2dfeb977efac326af552d8780801ba072ed817487b84ba367d15d2f039b5fc5f087d0a8882fbdf73e8cb49357e1ce30a0403d800545b8fc544f92ce8124e2255f8c3c6af93f28243a120585d4c4c6a2a3c0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // variables diff --git a/crates/stages/src/stages/finish.rs b/crates/stages/src/stages/finish.rs index 341be77dd1e..e0e0057c3c7 100644 --- a/crates/stages/src/stages/finish.rs +++ b/crates/stages/src/stages/finish.rs @@ -45,6 +45,7 @@ mod tests { generators::{random_header, random_header_range}, }; use reth_primitives::SealedHeader; + use reth_provider::providers::StaticFileWriter; stage_test_suite_ext!(FinishTestRunner, finish); diff --git a/crates/stages/src/stages/hashing_account.rs b/crates/stages/src/stages/hashing_account.rs index 7e500dca20f..13b8b4a5348 100644 --- a/crates/stages/src/stages/hashing_account.rs +++ b/crates/stages/src/stages/hashing_account.rs @@ -8,7 +8,7 @@ use reth_db::{ transaction::{DbTx, DbTxMut}, RawKey, RawTable, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::ProviderResult; use reth_primitives::{ keccak256, stage::{ @@ -16,7 +16,7 @@ use reth_primitives::{ StageId, }, }; -use reth_provider::{AccountExtReader, DatabaseProviderRW, HashingWriter}; +use reth_provider::{AccountExtReader, DatabaseProviderRW, HashingWriter, StatsReader}; use std::{ cmp::max, fmt::Debug, @@ -88,15 +88,21 @@ impl AccountHashingStage { generators::{random_block_range, random_eoa_accounts}, }; use reth_primitives::{Account, B256, U256}; - use reth_provider::BlockWriter; + use reth_provider::providers::StaticFileWriter; let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, opts.blocks.clone(), B256::ZERO, opts.txs); for block in blocks { - provider.insert_block(block.try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(block.try_seal_with_senders().unwrap(), None).unwrap(); } + provider + .static_file_provider() + .latest_writer(reth_primitives::StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); let mut accounts = random_eoa_accounts(&mut rng, opts.accounts); { // Account State generator @@ -289,10 +295,10 @@ impl Stage for AccountHashingStage { fn stage_checkpoint_progress( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: provider.count_entries::()? as u64, }) } @@ -300,10 +306,12 @@ fn stage_checkpoint_progress( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, TestRunnerError, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, + UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_primitives::{stage::StageUnitCheckpoint, Account, U256}; + use reth_provider::providers::StaticFileWriter; use test_utils::*; stage_test_suite_ext!(AccountHashingTestRunner, account_hashing); @@ -435,7 +443,7 @@ mod tests { mod test_utils { use super::*; - use crate::test_utils::{StageTestRunner, TestStageDB}; + use crate::test_utils::TestStageDB; use reth_primitives::Address; pub(crate) struct AccountHashingTestRunner { diff --git a/crates/stages/src/stages/hashing_storage.rs b/crates/stages/src/stages/hashing_storage.rs index f396001c395..c2a18df2994 100644 --- a/crates/stages/src/stages/hashing_storage.rs +++ b/crates/stages/src/stages/hashing_storage.rs @@ -7,7 +7,7 @@ use reth_db::{ tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::ProviderResult; use reth_primitives::{ keccak256, stage::{ @@ -16,7 +16,7 @@ use reth_primitives::{ }, StorageEntry, }; -use reth_provider::{DatabaseProviderRW, HashingWriter, StorageReader}; +use reth_provider::{DatabaseProviderRW, HashingWriter, StatsReader, StorageReader}; use std::{collections::BTreeMap, fmt::Debug}; use tracing::*; @@ -214,10 +214,10 @@ impl Stage for StorageHashingStage { fn stage_checkpoint_progress( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: provider.count_entries::()? as u64, }) } @@ -239,6 +239,7 @@ mod tests { generators::{random_block_range, random_contract_account_range}, }; use reth_primitives::{stage::StageUnitCheckpoint, Address, SealedBlock, B256, U256}; + use reth_provider::providers::StaticFileWriter; stage_test_suite_ext!(StorageHashingTestRunner, storage_hashing); diff --git a/crates/stages/src/stages/headers.rs b/crates/stages/src/stages/headers.rs index 83a212dcb4d..d34ffa46ba8 100644 --- a/crates/stages/src/stages/headers.rs +++ b/crates/stages/src/stages/headers.rs @@ -1,12 +1,16 @@ -use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; +use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; use futures_util::StreamExt; +use reth_codecs::Compact; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, database::Database, tables, - transaction::{DbTx, DbTxMut}, + transaction::DbTxMut, + RawKey, RawTable, RawValue, }; +use reth_etl::Collector; use reth_interfaces::{ + consensus::Consensus, p2p::headers::{downloader::HeaderDownloader, error::HeadersDownloaderError}, provider::ProviderError, }; @@ -14,10 +18,18 @@ use reth_primitives::{ stage::{ CheckpointBlockRange, EntitiesCheckpoint, HeadersCheckpoint, StageCheckpoint, StageId, }, - BlockHashOrNumber, BlockNumber, SealedHeader, + BlockHash, BlockNumber, SealedHeader, StaticFileSegment, }; -use reth_provider::{DatabaseProviderRW, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode}; -use std::task::{ready, Context, Poll}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockHashReader, DatabaseProviderRW, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, + HeaderSyncMode, +}; +use std::{ + sync::Arc, + task::{ready, Context, Poll}, +}; +use tempfile::TempDir; use tracing::*; /// The headers stage. @@ -41,10 +53,16 @@ pub struct HeaderStage { downloader: Downloader, /// The sync mode for the stage. mode: HeaderSyncMode, + /// Consensus client implementation + consensus: Arc, /// Current sync gap. sync_gap: Option, - /// Header buffer. - buffer: Option>, + /// ETL collector with HeaderHash -> BlockNumber + hash_collector: Collector, + /// ETL collector with BlockNumber -> SealedHeader + header_collector: Collector, + /// Returns true if the ETL collector has all necessary headers to fill the gap. + is_etl_ready: bool, } // === impl HeaderStage === @@ -54,56 +72,121 @@ where Downloader: HeaderDownloader, { /// Create a new header stage - pub fn new(database: Provider, downloader: Downloader, mode: HeaderSyncMode) -> Self { - Self { provider: database, downloader, mode, sync_gap: None, buffer: None } - } - - fn is_stage_done( - &self, - tx: &::TXMut, - checkpoint: u64, - ) -> Result { - let mut header_cursor = tx.cursor_read::()?; - let (head_num, _) = header_cursor - .seek_exact(checkpoint)? - .ok_or_else(|| ProviderError::HeaderNotFound(checkpoint.into()))?; - // Check if the next entry is congruent - Ok(header_cursor.next()?.map(|(next_num, _)| head_num + 1 == next_num).unwrap_or_default()) + pub fn new( + database: Provider, + downloader: Downloader, + mode: HeaderSyncMode, + consensus: Arc, + tempdir: Arc, + ) -> Self { + Self { + provider: database, + downloader, + mode, + consensus, + sync_gap: None, + hash_collector: Collector::new(tempdir.clone(), 100 * (1024 * 1024)), + header_collector: Collector::new(tempdir, 100 * (1024 * 1024)), + is_etl_ready: false, + } } - /// Write downloaded headers to the given transaction + /// Write downloaded headers to the given transaction from ETL. /// - /// Note: this writes the headers with rising block numbers. + /// Writes to the following tables: + /// [`tables::Headers`], [`tables::CanonicalHeaders`], [`tables::HeaderTerminalDifficulties`] + /// and [`tables::HeaderNumbers`]. fn write_headers( - &self, + &mut self, tx: &::TXMut, - headers: Vec, - ) -> Result, StageError> { - trace!(target: "sync::stages::headers", len = headers.len(), "writing headers"); - - let mut cursor_header = tx.cursor_write::()?; - let mut cursor_canonical = tx.cursor_write::()?; + static_file_provider: StaticFileProvider, + ) -> Result { + let total_headers = self.header_collector.len(); + + info!(target: "sync::stages::headers", total = total_headers, "Writing headers"); + + // Consistency check of expected headers in static files vs DB is done on provider::sync_gap + // when poll_execute_ready is polled. + let mut last_header_number = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .unwrap_or_default(); + + // Find the latest total difficulty + let mut td = static_file_provider + .header_td_by_number(last_header_number)? + .ok_or(ProviderError::TotalDifficultyNotFound(last_header_number))?; + + // Although headers were downloaded in reverse order, the collector iterates it in ascending + // order + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + let interval = (total_headers / 10).max(1); + for (index, header) in self.header_collector.iter()?.enumerate() { + let (_, header_buf) = header?; + + if index > 0 && index % interval == 0 { + info!(target: "sync::stages::headers", progress = %format!("{:.2}%", (index as f64 / total_headers as f64) * 100.0), "Writing headers"); + } - let mut latest = None; - // Since the headers were returned in descending order, - // iterate them in the reverse order - for header in headers.into_iter().rev() { + let (sealed_header, _) = SealedHeader::from_compact(&header_buf, header_buf.len()); + let (header, header_hash) = sealed_header.split(); if header.number == 0 { continue } + last_header_number = header.number; - let header_hash = header.hash(); - let header_number = header.number; - let header = header.unseal(); - latest = Some(header.number); + // Increase total difficulty + td += header.difficulty; + + // Header validation + self.consensus.validate_header_with_total_difficulty(&header, td).map_err(|error| { + StageError::Block { + block: Box::new(header.clone().seal(header_hash)), + error: BlockErrorKind::Validation(error), + } + })?; - // NOTE: HeaderNumbers are not sorted and can't be inserted with cursor. - tx.put::(header_hash, header_number)?; - cursor_header.insert(header_number, header)?; - cursor_canonical.insert(header_number, header_hash)?; + // Append to Headers segment + writer.append_header(header, td, header_hash)?; } - Ok(latest) + info!(target: "sync::stages::headers", total = total_headers, "Writing header hash index"); + + let mut cursor_header_numbers = tx.cursor_write::>()?; + let mut first_sync = false; + + // If we only have the genesis block hash, then we are at first sync, and we can remove it, + // add it to the collector and use tx.append on all hashes. + if let Some((hash, block_number)) = cursor_header_numbers.last()? { + if block_number.value()? == 0 { + self.hash_collector.insert(hash.key()?, 0); + cursor_header_numbers.delete_current()?; + first_sync = true; + } + } + + // Since ETL sorts all entries by hashes, we are either appending (first sync) or inserting + // in order (further syncs). + for (index, hash_to_number) in self.hash_collector.iter()?.enumerate() { + let (hash, number) = hash_to_number?; + + if index > 0 && index % interval == 0 { + info!(target: "sync::stages::headers", progress = %format!("{:.2}%", (index as f64 / total_headers as f64) * 100.0), "Writing headers hash index"); + } + + if first_sync { + cursor_header_numbers.append( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } else { + cursor_header_numbers.insert( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } + } + + Ok(last_header_number) } } @@ -125,14 +208,8 @@ where ) -> Poll> { let current_checkpoint = input.checkpoint(); - // Return if buffer already has some items. - if self.buffer.is_some() { - // TODO: review - trace!( - target: "sync::stages::headers", - checkpoint = %current_checkpoint.block_number, - "Buffer is not empty" - ); + // Return if stage has already completed the gap on the ETL files + if self.is_etl_ready { return Poll::Ready(Ok(())) } @@ -149,27 +226,42 @@ where target = ?tip, "Target block already reached" ); + self.is_etl_ready = true; return Poll::Ready(Ok(())) } debug!(target: "sync::stages::headers", ?tip, head = ?gap.local_head.hash(), "Commencing sync"); + let local_head_number = gap.local_head.number; // let the downloader know what to sync - self.downloader.update_sync_gap(gap.local_head, gap.target); - - let result = match ready!(self.downloader.poll_next_unpin(cx)) { - Some(Ok(headers)) => { - info!(target: "sync::stages::headers", len = headers.len(), "Received headers"); - self.buffer = Some(headers); - Ok(()) - } - Some(Err(HeadersDownloaderError::DetachedHead { local_head, header, error })) => { - error!(target: "sync::stages::headers", %error, "Cannot attach header to head"); - Err(StageError::DetachedHead { local_head, header, error }) + self.downloader.update_sync_gap(gap.local_head, gap.target.clone()); + + // We only want to stop once we have all the headers on ETL filespace (disk). + loop { + match ready!(self.downloader.poll_next_unpin(cx)) { + Some(Ok(headers)) => { + info!(target: "sync::stages::headers", total = headers.len(), from_block = headers.first().map(|h| h.number), to_block = headers.last().map(|h| h.number), "Received headers"); + for header in headers { + let header_number = header.number; + + self.hash_collector.insert(header.hash(), header_number); + self.header_collector.insert(header_number, header); + + // Headers are downloaded in reverse, so if we reach here, we know we have + // filled the gap. + if header_number == local_head_number + 1 { + self.is_etl_ready = true; + return Poll::Ready(Ok(())) + } + } + } + Some(Err(HeadersDownloaderError::DetachedHead { local_head, header, error })) => { + error!(target: "sync::stages::headers", %error, "Cannot attach header to head"); + return Poll::Ready(Err(StageError::DetachedHead { local_head, header, error })) + } + None => return Poll::Ready(Err(StageError::ChannelClosed)), } - None => Err(StageError::ChannelClosed), - }; - Poll::Ready(result) + } } /// Download the headers in reverse order (falling block numbers) @@ -181,99 +273,41 @@ where ) -> Result { let current_checkpoint = input.checkpoint(); - let gap = self.sync_gap.clone().ok_or(StageError::MissingSyncGap)?; - if gap.is_closed() { + if self.sync_gap.as_ref().ok_or(StageError::MissingSyncGap)?.is_closed() { + self.is_etl_ready = false; return Ok(ExecOutput::done(current_checkpoint)) } - let local_head = gap.local_head.number; - let tip = gap.target.tip(); + // We should be here only after we have downloaded all headers into the disk buffer (ETL). + if !self.is_etl_ready { + return Err(StageError::MissingDownloadBuffer) + } - let downloaded_headers = self.buffer.take().ok_or(StageError::MissingDownloadBuffer)?; - let tip_block_number = match tip { - // If tip is hash and it equals to the first downloaded header's hash, we can use - // the block number of this header as tip. - BlockHashOrNumber::Hash(hash) => downloaded_headers - .first() - .and_then(|header| (header.hash() == hash).then_some(header.number)), - // If tip is number, we can just grab it and not resolve using downloaded headers. - BlockHashOrNumber::Number(number) => Some(number), - }; + // Reset flag + self.is_etl_ready = false; - // Since we're syncing headers in batches, gap tip will move in reverse direction towards - // our local head with every iteration. To get the actual target block number we're - // syncing towards, we need to take into account already synced headers from the database. - // It is `None`, if tip didn't change and we're still downloading headers for previously - // calculated gap. - let tx = provider.tx_ref(); - let target_block_number = if let Some(tip_block_number) = tip_block_number { - let local_max_block_number = tx - .cursor_read::()? - .last()? - .map(|(canonical_block, _)| canonical_block); - - Some(tip_block_number.max(local_max_block_number.unwrap_or_default())) - } else { - None - }; + // Write the headers and related tables to DB from ETL space + let to_be_processed = self.hash_collector.len() as u64; + let last_header_number = + self.write_headers::(provider.tx_ref(), provider.static_file_provider().clone())?; - let mut stage_checkpoint = match current_checkpoint.headers_stage_checkpoint() { - // If checkpoint block range matches our range, we take the previously used - // stage checkpoint as-is. - Some(stage_checkpoint) - if stage_checkpoint.block_range.from == input.checkpoint().block_number => - { - stage_checkpoint - } - // Otherwise, we're on the first iteration of new gap sync, so we recalculate the number - // of already processed and total headers. - // `target_block_number` is guaranteed to be `Some`, because on the first iteration - // we download the header for missing tip and use its block number. - _ => { - let target = target_block_number.expect("No downloaded header for tip found"); + Ok(ExecOutput { + checkpoint: StageCheckpoint::new(last_header_number).with_headers_stage_checkpoint( HeadersCheckpoint { block_range: CheckpointBlockRange { from: input.checkpoint().block_number, - to: target, + to: last_header_number, }, progress: EntitiesCheckpoint { - // Set processed to the local head block number + number - // of block already filled in the gap. - processed: local_head + (target - tip_block_number.unwrap_or_default()), - total: target, + processed: input.checkpoint().block_number + to_be_processed, + total: last_header_number, }, - } - } - }; - - // Total headers can be updated if we received new tip from the network, and need to fill - // the local gap. - if let Some(target_block_number) = target_block_number { - stage_checkpoint.progress.total = target_block_number; - } - stage_checkpoint.progress.processed += downloaded_headers.len() as u64; - - // Write the headers to db - self.write_headers::(tx, downloaded_headers)?.unwrap_or_default(); - - if self.is_stage_done::(tx, current_checkpoint.block_number)? { - let checkpoint = current_checkpoint.block_number.max( - tx.cursor_read::()? - .last()? - .map(|(num, _)| num) - .unwrap_or_default(), - ); - Ok(ExecOutput { - checkpoint: StageCheckpoint::new(checkpoint) - .with_headers_stage_checkpoint(stage_checkpoint), - done: true, - }) - } else { - Ok(ExecOutput { - checkpoint: current_checkpoint.with_headers_stage_checkpoint(stage_checkpoint), - done: false, - }) - } + }, + ), + // We only reach here if all headers have been downloaded by ETL, and pushed to DB all + // in one stage run. + done: true, + }) } /// Unwind the stage. @@ -282,23 +316,30 @@ where provider: &DatabaseProviderRW, input: UnwindInput, ) -> Result { - self.buffer.take(); self.sync_gap.take(); - provider.unwind_table_by_walker::( - input.unwind_to + 1, - )?; - provider.unwind_table_by_num::(input.unwind_to)?; - let unwound_headers = provider.unwind_table_by_num::(input.unwind_to)?; + let static_file_provider = provider.static_file_provider(); + let highest_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .unwrap_or_default(); + let unwound_headers = highest_block - input.unwind_to; + + for block in (input.unwind_to + 1)..=highest_block { + let header_hash = static_file_provider + .block_hash(block)? + .ok_or(ProviderError::HeaderNotFound(block.into()))?; + + provider.tx_ref().delete::(header_hash, None)?; + } + + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.prune_headers(unwound_headers)?; let stage_checkpoint = input.checkpoint.headers_stage_checkpoint().map(|stage_checkpoint| HeadersCheckpoint { block_range: stage_checkpoint.block_range, progress: EntitiesCheckpoint { - processed: stage_checkpoint - .progress - .processed - .saturating_sub(unwound_headers as u64), + processed: stage_checkpoint.progress.processed.saturating_sub(unwound_headers), total: stage_checkpoint.progress.total, }, }); @@ -335,9 +376,7 @@ mod tests { generators, generators::random_header_range, TestConsensus, TestHeaderDownloader, TestHeadersClient, }; - use reth_primitives::U256; - use reth_provider::{BlockHashReader, BlockNumReader, HeaderProvider}; - use std::sync::Arc; + use reth_provider::BlockNumReader; use tokio::sync::watch; pub(crate) struct HeadersTestRunner { @@ -345,6 +384,7 @@ mod tests { channel: (watch::Sender, watch::Receiver), downloader_factory: Box D + Send + Sync + 'static>, db: TestStageDB, + consensus: Arc, } impl Default for HeadersTestRunner { @@ -353,6 +393,7 @@ mod tests { Self { client: client.clone(), channel: watch::channel(B256::ZERO), + consensus: Arc::new(TestConsensus::default()), downloader_factory: Box::new(move || { TestHeaderDownloader::new( client.clone(), @@ -378,6 +419,8 @@ mod tests { self.db.factory.clone(), (*self.downloader_factory)(), HeaderSyncMode::Tip(self.channel.1.clone()), + self.consensus.clone(), + Arc::new(TempDir::new().unwrap()), ) } } @@ -388,15 +431,9 @@ mod tests { fn seed_execution(&mut self, input: ExecInput) -> Result { let mut rng = generators::rng(); let start = input.checkpoint().block_number; - let head = random_header(&mut rng, start, None); - self.db.insert_headers(std::iter::once(&head))?; - // patch td table for `update_head` call - self.db.commit(|tx| { - Ok(tx.put::( - head.number, - U256::ZERO.into(), - )?) - })?; + let headers = random_header_range(&mut rng, 0..start + 1, B256::ZERO); + let head = headers.last().cloned().unwrap(); + self.db.insert_headers_with_td(headers.iter())?; // use previous checkpoint as seed size let end = input.target.unwrap_or_default() + 1; @@ -420,8 +457,11 @@ mod tests { match output { Some(output) if output.checkpoint.block_number > initial_checkpoint => { let provider = self.db.factory.provider()?; - for block_num in (initial_checkpoint..output.checkpoint.block_number).rev() - { + let mut td = provider + .header_td_by_number(initial_checkpoint.saturating_sub(1))? + .unwrap_or_default(); + + for block_num in initial_checkpoint..output.checkpoint.block_number { // look up the header hash let hash = provider.block_hash(block_num)?.expect("no header hash"); @@ -433,6 +473,13 @@ mod tests { assert!(header.is_some()); let header = header.unwrap().seal_slow(); assert_eq!(header.hash(), hash); + + // validate the header total difficulty + td += header.difficulty; + assert_eq!( + provider.header_td_by_number(block_num)?.map(Into::into), + Some(td) + ); } } _ => self.check_no_header_entry_above(initial_checkpoint)?, @@ -472,6 +519,7 @@ mod tests { .build(client.clone(), Arc::new(TestConsensus::default())) }), db: TestStageDB::default(), + consensus: Arc::new(TestConsensus::default()), } } } @@ -485,6 +533,10 @@ mod tests { .ensure_no_entry_above_by_value::(block, |val| val)?; self.db.ensure_no_entry_above::(block, |key| key)?; self.db.ensure_no_entry_above::(block, |key| key)?; + self.db.ensure_no_entry_above::( + block, + |num| num, + )?; Ok(()) } @@ -515,6 +567,7 @@ mod tests { runner.send_tip(tip.hash()); let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { block_number, stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { @@ -530,69 +583,8 @@ mod tests { }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head - processed == checkpoint + headers.len() as u64 - 1 && total == tip.number); - assert!(runner.validate_execution(input, result.ok()).is_ok(), "validation failed"); - } - - /// Execute the stage in two steps - #[tokio::test] - async fn execute_from_previous_checkpoint() { - let mut runner = HeadersTestRunner::with_linear_downloader(); - // pick range that's larger than the configured headers batch size - let (checkpoint, previous_stage) = (600, 1200); - let mut input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(checkpoint)), - }; - let headers = runner.seed_execution(input).expect("failed to seed execution"); - let rx = runner.execute(input); - - runner.client.extend(headers.iter().rev().map(|h| h.clone().unseal())).await; - - // skip `after_execution` hook for linear downloader - let tip = headers.last().unwrap(); - runner.send_tip(tip.hash()); - - let result = rx.await.unwrap(); - assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { - block_range: CheckpointBlockRange { - from, - to - }, - progress: EntitiesCheckpoint { - processed, - total, - } - })) - }, done: false }) if block_number == checkpoint && - from == checkpoint && to == previous_stage && - processed == checkpoint + 500 && total == tip.number); - - runner.client.clear().await; - runner.client.extend(headers.iter().take(101).map(|h| h.clone().unseal()).rev()).await; - input.checkpoint = Some(result.unwrap().checkpoint); - - let rx = runner.execute(input); - let result = rx.await.unwrap(); - - assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { - block_range: CheckpointBlockRange { - from, - to - }, - progress: EntitiesCheckpoint { - processed, - total, - } - })) - }, done: true }) if block_number == tip.number && - from == checkpoint && to == previous_stage && - // -1 because we don't need to download the local head - processed == checkpoint + headers.len() as u64 - 1 && total == tip.number); + processed == checkpoint + headers.len() as u64 - 1 && total == tip.number + ); assert!(runner.validate_execution(input, result.ok()).is_ok(), "validation failed"); } } diff --git a/crates/stages/src/stages/index_account_history.rs b/crates/stages/src/stages/index_account_history.rs index 71c9c33e27e..dab5eb3218a 100644 --- a/crates/stages/src/stages/index_account_history.rs +++ b/crates/stages/src/stages/index_account_history.rs @@ -2,7 +2,7 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput} use reth_db::database::Database; use reth_primitives::{ stage::{StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, }; use reth_provider::{ AccountExtReader, DatabaseProviderRW, HistoryWriter, PruneCheckpointReader, @@ -49,7 +49,13 @@ impl Stage for IndexAccountHistoryStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::AccountHistory)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::AccountHistory, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -123,6 +129,7 @@ mod tests { generators::{random_block_range, random_changeset_range, random_contract_account_range}, }; use reth_primitives::{address, Address, BlockNumber, B256}; + use reth_provider::providers::StaticFileWriter; use std::collections::BTreeMap; const ADDRESS: Address = address!("0000000000000000000000000000000000000001"); diff --git a/crates/stages/src/stages/index_storage_history.rs b/crates/stages/src/stages/index_storage_history.rs index 7f2a9d154ff..b6d79583db2 100644 --- a/crates/stages/src/stages/index_storage_history.rs +++ b/crates/stages/src/stages/index_storage_history.rs @@ -2,7 +2,7 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput} use reth_db::{database::Database, models::BlockNumberAddress}; use reth_primitives::{ stage::{StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, }; use reth_provider::{ DatabaseProviderRW, HistoryWriter, PruneCheckpointReader, PruneCheckpointWriter, StorageReader, @@ -48,7 +48,13 @@ impl Stage for IndexStorageHistoryStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::StorageHistory)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::StorageHistory, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -121,6 +127,7 @@ mod tests { generators::{random_block_range, random_changeset_range, random_contract_account_range}, }; use reth_primitives::{address, b256, Address, BlockNumber, StorageEntry, B256, U256}; + use reth_provider::providers::StaticFileWriter; use std::collections::BTreeMap; const ADDRESS: Address = address!("0000000000000000000000000000000000000001"); diff --git a/crates/stages/src/stages/merkle.rs b/crates/stages/src/stages/merkle.rs index 8866262e737..b2761267b8d 100644 --- a/crates/stages/src/stages/merkle.rs +++ b/crates/stages/src/stages/merkle.rs @@ -12,7 +12,8 @@ use reth_primitives::{ BlockNumber, GotExpected, SealedHeader, B256, }; use reth_provider::{ - DatabaseProviderRW, HeaderProvider, ProviderError, StageCheckpointReader, StageCheckpointWriter, + DatabaseProviderRW, HeaderProvider, ProviderError, StageCheckpointReader, + StageCheckpointWriter, StatsReader, }; use reth_trie::{IntermediateStateRootState, StateRoot, StateRootProgress}; use std::fmt::Debug; @@ -184,8 +185,8 @@ impl Stage for MerkleStage { } .unwrap_or(EntitiesCheckpoint { processed: 0, - total: (provider.tx_ref().entries::()? + - provider.tx_ref().entries::()?) + total: (provider.count_entries::()? + + provider.count_entries::()?) as u64, }); @@ -230,8 +231,8 @@ impl Stage for MerkleStage { .map_err(|e| StageError::Fatal(Box::new(e)))?; updates.flush(provider.tx_ref())?; - let total_hashed_entries = (provider.tx_ref().entries::()? + - provider.tx_ref().entries::()?) + let total_hashed_entries = (provider.count_entries::()? + + provider.count_entries::()?) as u64; let entities_checkpoint = EntitiesCheckpoint { @@ -336,8 +337,8 @@ fn validate_state_root( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_db::cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}; @@ -347,7 +348,10 @@ mod tests { random_block, random_block_range, random_changeset_range, random_contract_account_range, }, }; - use reth_primitives::{keccak256, stage::StageUnitCheckpoint, SealedBlock, StorageEntry, U256}; + use reth_primitives::{ + keccak256, stage::StageUnitCheckpoint, SealedBlock, StaticFileSegment, StorageEntry, U256, + }; + use reth_provider::providers::StaticFileWriter; use reth_trie::test_utils::{state_root, state_root_prehashed}; use std::collections::BTreeMap; @@ -466,6 +470,17 @@ mod tests { let end = input.target(); let mut rng = generators::rng(); + let mut preblocks = vec![]; + if stage_progress > 0 { + preblocks.append(&mut random_block_range( + &mut rng, + 0..=stage_progress - 1, + B256::ZERO, + 0..1, + )); + self.db.insert_blocks(preblocks.iter(), StorageKind::Static)?; + } + let num_of_accounts = 31; let accounts = random_contract_account_range(&mut rng, &mut (0..num_of_accounts)) .into_iter() @@ -475,8 +490,13 @@ mod tests { accounts.iter().map(|(addr, acc)| (*addr, (*acc, std::iter::empty()))), )?; - let SealedBlock { header, body, ommers, withdrawals } = - random_block(&mut rng, stage_progress, None, Some(0), None); + let SealedBlock { header, body, ommers, withdrawals } = random_block( + &mut rng, + stage_progress, + preblocks.last().map(|b| b.hash()), + Some(0), + None, + ); let mut header = header.unseal(); header.state_root = state_root( @@ -490,7 +510,8 @@ mod tests { let head_hash = sealed_head.hash(); let mut blocks = vec![sealed_head]; blocks.extend(random_block_range(&mut rng, start..=end, head_hash, 0..3)); - self.db.insert_blocks(blocks.iter(), None)?; + let last_block = blocks.last().cloned().unwrap(); + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; let (transitions, final_state) = random_changeset_range( &mut rng, @@ -527,13 +548,16 @@ mod tests { Ok(state_root_prehashed(accounts.into_iter())) })?; - let last_block_number = end; - self.db.commit(|tx| { - let mut last_header = tx.get::(last_block_number)?.unwrap(); - last_header.state_root = root; - tx.put::(last_block_number, last_header)?; - Ok(()) - })?; + let static_file_provider = self.db.factory.static_file_provider(); + let mut writer = + static_file_provider.latest_writer(StaticFileSegment::Headers).unwrap(); + let mut last_header = last_block.header().clone(); + last_header.state_root = root; + + let hash = last_header.hash_slow(); + writer.prune_headers(1).unwrap(); + writer.append_header(last_header, U256::ZERO, hash).unwrap(); + writer.commit().unwrap(); Ok(blocks) } diff --git a/crates/stages/src/stages/mod.rs b/crates/stages/src/stages/mod.rs index a48b8d2d516..0f260d90a73 100644 --- a/crates/stages/src/stages/mod.rs +++ b/crates/stages/src/stages/mod.rs @@ -18,8 +18,6 @@ mod index_storage_history; mod merkle; /// The sender recovery stage. mod sender_recovery; -/// The total difficulty stage -mod total_difficulty; /// The transaction lookup stage mod tx_lookup; @@ -33,7 +31,6 @@ pub use index_account_history::*; pub use index_storage_history::*; pub use merkle::*; pub use sender_recovery::*; -pub use total_difficulty::*; pub use tx_lookup::*; #[cfg(test)] @@ -53,10 +50,11 @@ mod tests { use reth_node_ethereum::EthEvmConfig; use reth_primitives::{ address, hex_literal::hex, keccak256, Account, Bytecode, ChainSpecBuilder, PruneMode, - PruneModes, SealedBlock, U256, + PruneModes, SealedBlock, StaticFileSegment, U256, }; use reth_provider::{ - AccountExtReader, BlockWriter, ProviderFactory, ReceiptProvider, StorageReader, + providers::StaticFileWriter, AccountExtReader, ProviderFactory, ReceiptProvider, + StorageReader, }; use reth_revm::EvmProcessorFactory; use std::sync::Arc; @@ -73,8 +71,12 @@ mod tests { let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider_rw.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider_rw.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider_rw + .insert_historical_block(genesis.try_seal_with_senders().unwrap(), None) + .unwrap(); + provider_rw + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); // Fill with bogus blocks to respect PruneMode distance. let mut head = block.hash(); @@ -82,8 +84,16 @@ mod tests { for block_number in 2..=tip { let nblock = random_block(&mut rng, block_number, Some(head), Some(0), Some(0)); head = nblock.hash(); - provider_rw.insert_block(nblock.try_seal_with_senders().unwrap(), None).unwrap(); + provider_rw + .insert_historical_block(nblock.try_seal_with_senders().unwrap(), None) + .unwrap(); } + provider_rw + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider_rw.commit().unwrap(); // insert pre state diff --git a/crates/stages/src/stages/sender_recovery.rs b/crates/stages/src/stages/sender_recovery.rs index fcbe313748d..79e4263661d 100644 --- a/crates/stages/src/stages/sender_recovery.rs +++ b/crates/stages/src/stages/sender_recovery.rs @@ -1,22 +1,22 @@ use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use itertools::Itertools; use reth_db::{ - cursor::{DbCursorRO, DbCursorRW}, + cursor::DbCursorRW, database::Database, + static_file::TransactionMask, tables, transaction::{DbTx, DbTxMut}, - DatabaseError, RawKey, RawTable, RawValue, }; use reth_interfaces::consensus; use reth_primitives::{ keccak256, stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - Address, PruneSegment, TransactionSignedNoHash, TxNumber, + Address, PruneSegment, StaticFileSegment, TransactionSignedNoHash, TxNumber, }; use reth_provider::{ BlockReader, DatabaseProviderRW, HeaderProvider, ProviderError, PruneCheckpointReader, + StatsReader, }; -use std::{fmt::Debug, sync::mpsc}; +use std::{fmt::Debug, ops::Range, sync::mpsc}; use thiserror::Error; use tracing::*; @@ -81,46 +81,49 @@ impl Stage for SenderRecoveryStage { // Acquire the cursor for inserting elements let mut senders_cursor = tx.cursor_write::()?; - // Acquire the cursor over the transactions - let mut tx_cursor = tx.cursor_read::>()?; - // Walk the transactions from start to end index (inclusive) - let raw_tx_range = RawKey::new(tx_range.start)..RawKey::new(tx_range.end); - let tx_walker = tx_cursor.walk_range(raw_tx_range)?; - // Iterate over transactions in chunks info!(target: "sync::stages::sender_recovery", ?tx_range, "Recovering senders"); - // channels used to return result of sender recovery. - let mut channels = Vec::new(); - // Spawn recovery jobs onto the default rayon threadpool and send the result through the // channel. // - // We try to evenly divide the transactions to recover across all threads in the threadpool. - // Chunks are submitted instead of individual transactions to reduce the overhead of work - // stealing in the threadpool workers. - let chunk_size = self.commit_threshold as usize / rayon::current_num_threads(); - // prevents an edge case - // where the chunk size is either 0 or too small - // to gain anything from using more than 1 thread - let chunk_size = chunk_size.max(16); - - for chunk in &tx_walker.chunks(chunk_size) { + // Transactions are different size, so chunks will not all take the same processing time. If + // chunks are too big, there will be idle threads waiting for work. Choosing an + // arbitrary smaller value to make sure it doesn't happen. + let chunk_size = 100; + + let chunks = (tx_range.start..tx_range.end) + .step_by(chunk_size as usize) + .map(|start| start..std::cmp::min(start + chunk_size as u64, tx_range.end)) + .collect::>>(); + + let mut channels = Vec::with_capacity(chunks.len()); + for chunk_range in chunks { // An _unordered_ channel to receive results from a rayon job let (recovered_senders_tx, recovered_senders_rx) = mpsc::channel(); channels.push(recovered_senders_rx); - // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) - let chunk: Vec<_> = chunk.collect(); - // Spawn the sender recovery task onto the global rayon pool - // This task will send the results through the channel after it recovered the senders. + let static_file_provider = provider.static_file_provider().clone(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has read the transaction + // and calculated the sender. rayon::spawn(move || { let mut rlp_buf = Vec::with_capacity(128); - for entry in chunk { - rlp_buf.clear(); - let recovery_result = recover_sender(entry, &mut rlp_buf); - let _ = recovered_senders_tx.send(recovery_result); - } + let _ = static_file_provider.fetch_range_with_predicate( + StaticFileSegment::Transactions, + chunk_range, + |cursor, number| { + Ok(cursor + .get_one::>(number.into())? + .map(|tx| { + rlp_buf.clear(); + let _ = recovered_senders_tx + .send(recover_sender((number, tx), &mut rlp_buf)); + })) + }, + |_| true, + ); }); } @@ -186,15 +189,11 @@ impl Stage for SenderRecoveryStage { } } +#[inline] fn recover_sender( - entry: Result<(RawKey, RawValue), DatabaseError>, + (tx_id, tx): (TxNumber, TransactionSignedNoHash), rlp_buf: &mut Vec, ) -> Result<(u64, Address), Box> { - let (tx_id, transaction) = - entry.map_err(|e| Box::new(SenderRecoveryStageError::StageError(e.into())))?; - let tx_id = tx_id.key().expect("key to be formated"); - - let tx = transaction.value().expect("value to be formated"); tx.transaction.encode_without_signature(rlp_buf); // We call [Signature::recover_signer_unchecked] because transactions run in the pipeline are @@ -221,9 +220,8 @@ fn stage_checkpoint( // If `TransactionSenders` table was pruned, we will have a number of entries in it not // matching the actual number of processed transactions. To fix that, we add the // number of pruned `TransactionSenders` entries. - processed: provider.tx_ref().entries::()? as u64 + - pruned_entries, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64 + pruned_entries, + total: provider.count_entries::()? as u64, }) } @@ -249,6 +247,7 @@ struct FailedSenderRecoveryError { #[cfg(test)] mod tests { use assert_matches::assert_matches; + use reth_db::cursor::DbCursorRO; use reth_interfaces::test_utils::{ generators, generators::{random_block, random_block_range}, @@ -257,12 +256,12 @@ mod tests { stage::StageUnitCheckpoint, BlockNumber, PruneCheckpoint, PruneMode, SealedBlock, TransactionSigned, B256, }; - use reth_provider::{PruneCheckpointWriter, TransactionsProvider}; + use reth_provider::{providers::StaticFileWriter, PruneCheckpointWriter, TransactionsProvider}; use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; stage_test_suite_ext!(SenderRecoveryTestRunner, sender_recovery); @@ -293,7 +292,10 @@ mod tests { ) }) .collect::>(); - runner.db.insert_blocks(blocks.iter(), None).expect("failed to insert blocks"); + runner + .db + .insert_blocks(blocks.iter(), StorageKind::Static) + .expect("failed to insert blocks"); let rx = runner.execute(input); @@ -327,9 +329,17 @@ mod tests { // Manually seed once with full input range let seed = random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..4); // set tx count range high enough to hit the threshold - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); - - let total_transactions = runner.db.table::().unwrap().len() as u64; + runner + .db + .insert_blocks(seed.iter(), StorageKind::Static) + .expect("failed to seed execution"); + + let total_transactions = runner + .db + .factory + .static_file_provider() + .count_entries::() + .unwrap() as u64; let first_input = ExecInput { target: Some(previous_stage), @@ -389,7 +399,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=100, B256::ZERO, 0..10); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Static).expect("insert blocks"); let max_pruned_block = 30; let max_processed_block = 70; @@ -503,7 +513,7 @@ mod tests { let end = input.target(); let blocks = random_block_range(&mut rng, stage_progress..=end, B256::ZERO, 0..2); - self.db.insert_blocks(blocks.iter(), None)?; + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; Ok(blocks) } diff --git a/crates/stages/src/stages/total_difficulty.rs b/crates/stages/src/stages/total_difficulty.rs deleted file mode 100644 index eccac5181e7..00000000000 --- a/crates/stages/src/stages/total_difficulty.rs +++ /dev/null @@ -1,314 +0,0 @@ -use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use reth_db::{ - cursor::{DbCursorRO, DbCursorRW}, - database::Database, - tables, - transaction::{DbTx, DbTxMut}, - DatabaseError, -}; -use reth_interfaces::{consensus::Consensus, provider::ProviderError}; -use reth_primitives::{ - stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - U256, -}; -use reth_provider::DatabaseProviderRW; -use std::sync::Arc; -use tracing::*; - -/// The total difficulty stage. -/// -/// This stage walks over inserted headers and computes total difficulty -/// at each block. The entries are inserted into -/// [`HeaderTerminalDifficulties`][reth_db::tables::HeaderTerminalDifficulties] table. -#[derive(Debug, Clone)] -pub struct TotalDifficultyStage { - /// Consensus client implementation - consensus: Arc, - /// The number of table entries to commit at once - commit_threshold: u64, -} - -impl TotalDifficultyStage { - /// Create a new total difficulty stage - pub fn new(consensus: Arc) -> Self { - Self { consensus, commit_threshold: 100_000 } - } - - /// Set a commit threshold on total difficulty stage - pub fn with_commit_threshold(mut self, commit_threshold: u64) -> Self { - self.commit_threshold = commit_threshold; - self - } -} - -impl Stage for TotalDifficultyStage { - /// Return the id of the stage - fn id(&self) -> StageId { - StageId::TotalDifficulty - } - - /// Write total difficulty entries - fn execute( - &mut self, - provider: &DatabaseProviderRW, - input: ExecInput, - ) -> Result { - let tx = provider.tx_ref(); - if input.target_reached() { - return Ok(ExecOutput::done(input.checkpoint())) - } - - let (range, is_final_range) = input.next_block_range_with_threshold(self.commit_threshold); - let (start_block, end_block) = range.clone().into_inner(); - - debug!(target: "sync::stages::total_difficulty", start_block, end_block, "Commencing sync"); - - // Acquire cursor over total difficulty and headers tables - let mut cursor_td = tx.cursor_write::()?; - let mut cursor_headers = tx.cursor_read::()?; - - // Get latest total difficulty - let last_header_number = input.checkpoint().block_number; - let last_entry = cursor_td - .seek_exact(last_header_number)? - .ok_or(ProviderError::TotalDifficultyNotFound(last_header_number))?; - - let mut td: U256 = last_entry.1.into(); - debug!(target: "sync::stages::total_difficulty", ?td, block_number = last_header_number, "Last total difficulty entry"); - - // Walk over newly inserted headers, update & insert td - for entry in cursor_headers.walk_range(range)? { - let (block_number, header) = entry?; - td += header.difficulty; - - self.consensus.validate_header_with_total_difficulty(&header, td).map_err(|error| { - StageError::Block { - block: Box::new(header.seal_slow()), - error: BlockErrorKind::Validation(error), - } - })?; - cursor_td.append(block_number, td.into())?; - } - - Ok(ExecOutput { - checkpoint: StageCheckpoint::new(end_block) - .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - done: is_final_range, - }) - } - - /// Unwind the stage. - fn unwind( - &mut self, - provider: &DatabaseProviderRW, - input: UnwindInput, - ) -> Result { - let (_, unwind_to, _) = input.unwind_block_range_with_threshold(self.commit_threshold); - - provider.unwind_table_by_num::(unwind_to)?; - - Ok(UnwindOutput { - checkpoint: StageCheckpoint::new(unwind_to) - .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - }) - } -} - -fn stage_checkpoint( - provider: &DatabaseProviderRW, -) -> Result { - Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, - }; - use assert_matches::assert_matches; - use reth_interfaces::test_utils::{ - generators, - generators::{random_header, random_header_range}, - TestConsensus, - }; - use reth_primitives::{stage::StageUnitCheckpoint, BlockNumber, SealedHeader}; - use reth_provider::HeaderProvider; - - stage_test_suite_ext!(TotalDifficultyTestRunner, total_difficulty); - - #[tokio::test] - async fn execute_with_intermediate_commit() { - let threshold = 50; - let (stage_progress, previous_stage) = (1000, 1100); // input exceeds threshold - - let mut runner = TotalDifficultyTestRunner::default(); - runner.set_threshold(threshold); - - let first_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(stage_progress)), - }; - - // Seed only once with full input range - runner.seed_execution(first_input).expect("failed to seed execution"); - - // Execute first time - let result = runner.execute(first_input).await.unwrap(); - let expected_progress = stage_progress + threshold; - assert_matches!( - result, - Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: false }) if block_number == expected_progress && processed == 1 + threshold && - total == runner.db.table::().unwrap().len() as u64 - ); - - // Execute second time - let second_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(expected_progress)), - }; - let result = runner.execute(second_input).await.unwrap(); - assert_matches!( - result, - Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 - ); - - assert!(runner.validate_execution(first_input, result.ok()).is_ok(), "validation failed"); - } - - struct TotalDifficultyTestRunner { - db: TestStageDB, - consensus: Arc, - commit_threshold: u64, - } - - impl Default for TotalDifficultyTestRunner { - fn default() -> Self { - Self { - db: Default::default(), - consensus: Arc::new(TestConsensus::default()), - commit_threshold: 500, - } - } - } - - impl StageTestRunner for TotalDifficultyTestRunner { - type S = TotalDifficultyStage; - - fn db(&self) -> &TestStageDB { - &self.db - } - - fn stage(&self) -> Self::S { - TotalDifficultyStage { - consensus: self.consensus.clone(), - commit_threshold: self.commit_threshold, - } - } - } - - impl ExecuteStageTestRunner for TotalDifficultyTestRunner { - type Seed = Vec; - - fn seed_execution(&mut self, input: ExecInput) -> Result { - let mut rng = generators::rng(); - let start = input.checkpoint().block_number; - let head = random_header(&mut rng, start, None); - self.db.insert_headers(std::iter::once(&head))?; - self.db.commit(|tx| { - let td: U256 = tx - .cursor_read::()? - .last()? - .map(|(_, v)| v) - .unwrap_or_default() - .into(); - tx.put::( - head.number, - (td + head.difficulty).into(), - )?; - Ok(()) - })?; - - // use previous progress as seed size - let end = input.target.unwrap_or_default() + 1; - - if start + 1 >= end { - return Ok(Vec::default()) - } - - let mut headers = random_header_range(&mut rng, start + 1..end, head.hash()); - self.db.insert_headers(headers.iter())?; - headers.insert(0, head); - Ok(headers) - } - - /// Validate stored headers - fn validate_execution( - &self, - input: ExecInput, - output: Option, - ) -> Result<(), TestRunnerError> { - let initial_stage_progress = input.checkpoint().block_number; - match output { - Some(output) if output.checkpoint.block_number > initial_stage_progress => { - let provider = self.db.factory.provider()?; - - let mut header_cursor = provider.tx_ref().cursor_read::()?; - let (_, mut current_header) = header_cursor - .seek_exact(initial_stage_progress)? - .expect("no initial header"); - let mut td: U256 = provider - .header_td_by_number(initial_stage_progress)? - .expect("no initial td"); - - while let Some((next_key, next_header)) = header_cursor.next()? { - assert_eq!(current_header.number + 1, next_header.number); - td += next_header.difficulty; - assert_eq!( - provider.header_td_by_number(next_key)?.map(Into::into), - Some(td) - ); - current_header = next_header; - } - } - _ => self.check_no_td_above(initial_stage_progress)?, - }; - Ok(()) - } - } - - impl UnwindStageTestRunner for TotalDifficultyTestRunner { - fn validate_unwind(&self, input: UnwindInput) -> Result<(), TestRunnerError> { - self.check_no_td_above(input.unwind_to) - } - } - - impl TotalDifficultyTestRunner { - fn check_no_td_above(&self, block: BlockNumber) -> Result<(), TestRunnerError> { - self.db - .ensure_no_entry_above::(block, |num| num)?; - Ok(()) - } - - fn set_threshold(&mut self, new_threshold: u64) { - self.commit_threshold = new_threshold; - } - } -} diff --git a/crates/stages/src/stages/tx_lookup.rs b/crates/stages/src/stages/tx_lookup.rs index 63b6527d5a5..a619fe709b5 100644 --- a/crates/stages/src/stages/tx_lookup.rs +++ b/crates/stages/src/stages/tx_lookup.rs @@ -1,20 +1,24 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use rayon::prelude::*; +use num_traits::Zero; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, database::Database, tables, transaction::{DbTx, DbTxMut}, + RawKey, RawValue, }; +use reth_etl::Collector; use reth_interfaces::provider::ProviderError; use reth_primitives::{ stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, TxHash, TxNumber, }; use reth_provider::{ - BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, - TransactionsProviderExt, + BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, StatsReader, + TransactionsProvider, TransactionsProviderExt, }; +use std::sync::Arc; +use tempfile::TempDir; use tracing::*; /// The transaction lookup stage. @@ -23,23 +27,26 @@ use tracing::*; /// block to the corresponding `BlockNumber` at each block. This is written to the /// [`tables::TransactionHashNumbers`] This is used for looking up changesets via the transaction /// hash. +/// +/// It uses [`reth_etl::Collector`] to collect all entries before finally writing them to disk. #[derive(Debug, Clone)] pub struct TransactionLookupStage { - /// The number of lookup entries to commit at once - commit_threshold: u64, + /// The maximum number of lookup entries to hold in memory before pushing them to + /// [`reth_etl::Collector`]. + chunk_size: u64, prune_mode: Option, } impl Default for TransactionLookupStage { fn default() -> Self { - Self { commit_threshold: 5_000_000, prune_mode: None } + Self { chunk_size: 5_000_000, prune_mode: None } } } impl TransactionLookupStage { /// Create new instance of [TransactionLookupStage]. - pub fn new(commit_threshold: u64, prune_mode: Option) -> Self { - Self { commit_threshold, prune_mode } + pub fn new(chunk_size: u64, prune_mode: Option) -> Self { + Self { chunk_size, prune_mode } } } @@ -57,7 +64,13 @@ impl Stage for TransactionLookupStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::TransactionLookup)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::TransactionLookup, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -87,43 +100,73 @@ impl Stage for TransactionLookupStage { return Ok(ExecOutput::done(input.checkpoint())) } - let (tx_range, block_range, is_final_range) = - input.next_block_range_with_transaction_threshold(provider, self.commit_threshold)?; - let end_block = *block_range.end(); + // 500MB temporary files + let mut hash_collector: Collector = + Collector::new(Arc::new(TempDir::new()?), 500 * (1024 * 1024)); - debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Updating transaction lookup"); + debug!( + target: "sync::stages::transaction_lookup", + tx_range = ?input.checkpoint().block_number..=input.target(), + "Updating transaction lookup" + ); - let mut tx_list = provider.transaction_hashes_by_range(tx_range)?; + loop { + let (tx_range, block_range, is_final_range) = + input.next_block_range_with_transaction_threshold(provider, self.chunk_size)?; - // Sort before inserting the reverse lookup for hash -> tx_id. - tx_list.par_sort_unstable_by(|txa, txb| txa.0.cmp(&txb.0)); + let end_block = *block_range.end(); - let tx = provider.tx_ref(); - let mut txhash_cursor = tx.cursor_write::()?; - - // If the last inserted element in the database is equal or bigger than the first - // in our set, then we need to insert inside the DB. If it is smaller then last - // element in the DB, we can append to the DB. - // Append probably only ever happens during sync, on the first table insertion. - let insert = tx_list - .first() - .zip(txhash_cursor.last()?) - .map(|((first, _), (last, _))| first <= &last) - .unwrap_or_default(); - // if txhash_cursor.last() is None we will do insert. `zip` would return none if any item is - // none. if it is some and if first is smaller than last, we will do append. - for (tx_hash, id) in tx_list { - if insert { - txhash_cursor.insert(tx_hash, id)?; - } else { - txhash_cursor.append(tx_hash, id)?; + debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Calculating transaction hashes"); + + for (key, value) in provider.transaction_hashes_by_range(tx_range)? { + hash_collector.insert(key, value); + } + + input.checkpoint = Some( + StageCheckpoint::new(end_block) + .with_entities_stage_checkpoint(stage_checkpoint(provider)?), + ); + + if is_final_range { + let append_only = + provider.count_entries::()?.is_zero(); + let mut txhash_cursor = provider + .tx_ref() + .cursor_write::>()?; + + let total_hashes = hash_collector.len(); + let interval = (total_hashes / 10).max(1); + for (index, hash_to_number) in hash_collector.iter()?.enumerate() { + let (hash, number) = hash_to_number?; + if index > 0 && index % interval == 0 { + debug!( + target: "sync::stages::transaction_lookup", + ?append_only, + progress = format!("{:.2}%", (index as f64 / total_hashes as f64) * 100.0), + "Inserting hashes" + ); + } + + if append_only { + txhash_cursor.append( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } else { + txhash_cursor.insert( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } + } + break } } Ok(ExecOutput { - checkpoint: StageCheckpoint::new(end_block) + checkpoint: StageCheckpoint::new(input.target()) .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - done: is_final_range, + done: true, }) } @@ -134,12 +177,12 @@ impl Stage for TransactionLookupStage { input: UnwindInput, ) -> Result { let tx = provider.tx_ref(); - let (range, unwind_to, _) = input.unwind_block_range_with_threshold(self.commit_threshold); + let (range, unwind_to, _) = input.unwind_block_range_with_threshold(self.chunk_size); // Cursors to unwind tx hash to number let mut body_cursor = tx.cursor_read::()?; let mut tx_hash_number_cursor = tx.cursor_write::()?; - let mut transaction_cursor = tx.cursor_read::()?; + let static_file_provider = provider.static_file_provider(); let mut rev_walker = body_cursor.walk_back(Some(*range.end()))?; while let Some((number, body)) = rev_walker.next().transpose()? { if number <= unwind_to { @@ -149,7 +192,7 @@ impl Stage for TransactionLookupStage { // Delete all transactions that belong to this block for tx_id in body.tx_num_range() { // First delete the transaction and hash to id mapping - if let Some((_, transaction)) = transaction_cursor.seek_exact(tx_id)? { + if let Some(transaction) = static_file_provider.transaction_by_id(tx_id)? { if tx_hash_number_cursor.seek_exact(transaction.hash())?.is_some() { tx_hash_number_cursor.delete_current()?; } @@ -177,9 +220,9 @@ fn stage_checkpoint( // If `TransactionHashNumbers` table was pruned, we will have a number of entries in it not // matching the actual number of processed transactions. To fix that, we add the // number of pruned `TransactionHashNumbers` entries. - processed: provider.tx_ref().entries::()? as u64 + + processed: provider.count_entries::()? as u64 + pruned_entries, - total: provider.tx_ref().entries::()? as u64, + total: provider.count_entries::()? as u64, }) } @@ -187,8 +230,8 @@ fn stage_checkpoint( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_interfaces::test_utils::{ @@ -196,7 +239,7 @@ mod tests { generators::{random_block, random_block_range}, }; use reth_primitives::{stage::StageUnitCheckpoint, BlockNumber, SealedBlock, B256}; - use reth_provider::TransactionsProvider; + use reth_provider::providers::StaticFileWriter; use std::ops::Sub; // Implement stage test suite. @@ -227,7 +270,10 @@ mod tests { ) }) .collect::>(); - runner.db.insert_blocks(blocks.iter(), None).expect("failed to insert blocks"); + runner + .db + .insert_blocks(blocks.iter(), StorageKind::Static) + .expect("failed to insert blocks"); let rx = runner.execute(input); @@ -243,83 +289,13 @@ mod tests { total })) }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 + total == runner.db.factory.static_file_provider().count_entries::().unwrap() as u64 ); // Validate the stage execution assert!(runner.validate_execution(input, result.ok()).is_ok(), "execution validation"); } - /// Execute the stage twice with input range that exceeds the commit threshold - #[tokio::test] - async fn execute_intermediate_commit_transaction_lookup() { - let threshold = 50; - let mut runner = TransactionLookupTestRunner::default(); - runner.set_commit_threshold(threshold); - let (stage_progress, previous_stage) = (1000, 1100); // input exceeds threshold - let first_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(stage_progress)), - }; - let mut rng = generators::rng(); - - // Seed only once with full input range - let seed = - random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..4); // set tx count range high enough to hit the threshold - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); - - let total_txs = runner.db.table::().unwrap().len() as u64; - - // Execute first time - let result = runner.execute(first_input).await.unwrap(); - let mut tx_count = 0; - let expected_progress = seed - .iter() - .find(|x| { - tx_count += x.body.len(); - tx_count as u64 > threshold - }) - .map(|x| x.number) - .unwrap_or(previous_stage); - assert_matches!(result, Ok(_)); - assert_eq!( - result.unwrap(), - ExecOutput { - checkpoint: StageCheckpoint::new(expected_progress).with_entities_stage_checkpoint( - EntitiesCheckpoint { - processed: runner - .db - .table::() - .unwrap() - .len() as u64, - total: total_txs - } - ), - done: false - } - ); - - // Execute second time to completion - runner.set_commit_threshold(u64::MAX); - let second_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(expected_progress)), - }; - let result = runner.execute(second_input).await.unwrap(); - assert_matches!(result, Ok(_)); - assert_eq!( - result.as_ref().unwrap(), - &ExecOutput { - checkpoint: StageCheckpoint::new(previous_stage).with_entities_stage_checkpoint( - EntitiesCheckpoint { processed: total_txs, total: total_txs } - ), - done: true - } - ); - - assert!(runner.validate_execution(first_input, result.ok()).is_ok(), "validation failed"); - } - #[tokio::test] async fn execute_pruned_transaction_lookup() { let (previous_stage, prune_target, stage_progress) = (500, 400, 100); @@ -335,7 +311,10 @@ mod tests { // Seed only once with full input range let seed = random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..2); - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); + runner + .db + .insert_blocks(seed.iter(), StorageKind::Static) + .expect("failed to seed execution"); runner.set_prune_mode(PruneMode::Before(prune_target)); @@ -353,7 +332,7 @@ mod tests { total })) }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 + total == runner.db.factory.static_file_provider().count_entries::().unwrap() as u64 ); // Validate the stage execution @@ -366,7 +345,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=100, B256::ZERO, 0..10); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Static).expect("insert blocks"); let max_pruned_block = 30; let max_processed_block = 70; @@ -417,21 +396,17 @@ mod tests { struct TransactionLookupTestRunner { db: TestStageDB, - commit_threshold: u64, + chunk_size: u64, prune_mode: Option, } impl Default for TransactionLookupTestRunner { fn default() -> Self { - Self { db: TestStageDB::default(), commit_threshold: 1000, prune_mode: None } + Self { db: TestStageDB::default(), chunk_size: 1000, prune_mode: None } } } impl TransactionLookupTestRunner { - fn set_commit_threshold(&mut self, threshold: u64) { - self.commit_threshold = threshold; - } - fn set_prune_mode(&mut self, prune_mode: PruneMode) { self.prune_mode = Some(prune_mode); } @@ -474,10 +449,7 @@ mod tests { } fn stage(&self) -> Self::S { - TransactionLookupStage { - commit_threshold: self.commit_threshold, - prune_mode: self.prune_mode, - } + TransactionLookupStage { chunk_size: self.chunk_size, prune_mode: self.prune_mode } } } @@ -490,7 +462,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, stage_progress + 1..=end, B256::ZERO, 0..2); - self.db.insert_blocks(blocks.iter(), None)?; + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; Ok(blocks) } @@ -506,7 +478,11 @@ mod tests { if let Some((target_prunable_block, _)) = self .prune_mode .map(|mode| { - mode.prune_target_block(input.target(), PruneSegment::TransactionLookup) + mode.prune_target_block( + input.target(), + PruneSegment::TransactionLookup, + PrunePurpose::User, + ) }) .transpose() .expect("prune target block for transaction lookup") diff --git a/crates/stages/src/test_utils/macros.rs b/crates/stages/src/test_utils/macros.rs index 8cc3e9cda1b..0ffb16f0861 100644 --- a/crates/stages/src/test_utils/macros.rs +++ b/crates/stages/src/test_utils/macros.rs @@ -13,6 +13,8 @@ macro_rules! stage_test_suite { // Run stage execution let result = runner.execute(input).await; + runner.db().factory.static_file_provider().commit().unwrap(); + // Check that the result is returned and the stage does not panic. // The return result with empty db is stage-specific. assert_matches::assert_matches!(result, Ok(_)); @@ -44,6 +46,8 @@ macro_rules! stage_test_suite { // Assert the successful result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) @@ -72,6 +76,8 @@ macro_rules! stage_test_suite { // Run stage unwind let rx = runner.unwind(input).await; + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( rx, Ok(UnwindOutput { checkpoint }) if checkpoint.block_number == input.unwind_to @@ -104,6 +110,8 @@ macro_rules! stage_test_suite { // Assert the successful execution result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) @@ -171,6 +179,8 @@ macro_rules! stage_test_suite_ext { // Assert the successful result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) diff --git a/crates/stages/src/test_utils/mod.rs b/crates/stages/src/test_utils/mod.rs index f48aa46d500..dd788bca74e 100644 --- a/crates/stages/src/test_utils/mod.rs +++ b/crates/stages/src/test_utils/mod.rs @@ -13,7 +13,7 @@ pub(crate) use runner::{ }; mod test_db; -pub use test_db::TestStageDB; +pub use test_db::{StorageKind, TestStageDB}; mod stage; pub use stage::TestStage; diff --git a/crates/stages/src/test_utils/test_db.rs b/crates/stages/src/test_utils/test_db.rs index 7115a7c7722..c349137f4a4 100644 --- a/crates/stages/src/test_utils/test_db.rs +++ b/crates/stages/src/test_utils/test_db.rs @@ -5,7 +5,9 @@ use reth_db::{ models::{AccountBeforeTx, StoredBlockBodyIndices}, table::Table, tables, - test_utils::{create_test_rw_db, create_test_rw_db_with_path, TempDatabase}, + test_utils::{ + create_test_rw_db, create_test_rw_db_with_path, create_test_static_files_dir, TempDatabase, + }, transaction::{DbTx, DbTxMut}, DatabaseEnv, DatabaseError as DbError, }; @@ -14,7 +16,10 @@ use reth_primitives::{ keccak256, Account, Address, BlockNumber, Receipt, SealedBlock, SealedHeader, StorageEntry, TxHash, TxNumber, B256, MAINNET, U256, }; -use reth_provider::{HistoryWriter, ProviderFactory}; +use reth_provider::{ + providers::{StaticFileProviderRWRefMut, StaticFileWriter}, + HistoryWriter, ProviderError, ProviderFactory, +}; use std::{collections::BTreeMap, path::Path, sync::Arc}; /// Test database that is used for testing stage implementations. @@ -26,13 +31,27 @@ pub struct TestStageDB { impl Default for TestStageDB { /// Create a new instance of [TestStageDB] fn default() -> Self { - Self { factory: ProviderFactory::new(create_test_rw_db(), MAINNET.clone()) } + Self { + factory: ProviderFactory::new( + create_test_rw_db(), + MAINNET.clone(), + create_test_static_files_dir(), + ) + .unwrap(), + } } } impl TestStageDB { pub fn new(path: &Path) -> Self { - Self { factory: ProviderFactory::new(create_test_rw_db_with_path(path), MAINNET.clone()) } + Self { + factory: ProviderFactory::new( + create_test_rw_db_with_path(path), + MAINNET.clone(), + create_test_static_files_dir(), + ) + .unwrap(), + } } /// Invoke a callback with transaction committing it afterwards @@ -112,75 +131,126 @@ impl TestStageDB { }) } - /// Inserts a single [SealedHeader] into the corresponding tables of the headers stage. - fn insert_header(tx: &TX, header: &SealedHeader) -> Result<(), DbError> { - tx.put::(header.number, header.hash())?; + /// Insert header to static file if `writer` exists, otherwise to DB. + pub fn insert_header( + writer: Option<&mut StaticFileProviderRWRefMut<'_>>, + tx: &TX, + header: &SealedHeader, + td: U256, + ) -> ProviderResult<()> { + if let Some(writer) = writer { + writer.append_header(header.header().clone(), td, header.hash())?; + } else { + tx.put::(header.number, header.hash())?; + tx.put::(header.number, td.into())?; + tx.put::(header.number, header.header().clone())?; + } + tx.put::(header.hash(), header.number)?; - tx.put::(header.number, header.clone().unseal()) + Ok(()) + } + + fn insert_headers_inner<'a, I, const TD: bool>(&self, headers: I) -> ProviderResult<()> + where + I: Iterator, + { + let provider = self.factory.static_file_provider(); + let mut writer = provider.latest_writer(reth_primitives::StaticFileSegment::Headers)?; + let tx = self.factory.provider_rw()?.into_tx(); + let mut td = U256::ZERO; + + for header in headers { + if TD { + td += header.difficulty; + } + Self::insert_header(Some(&mut writer), &tx, header, td)?; + } + + writer.commit()?; + tx.commit()?; + + Ok(()) } - /// Insert ordered collection of [SealedHeader] into the corresponding tables + /// Insert ordered collection of [SealedHeader] into the corresponding static file and tables /// that are supposed to be populated by the headers stage. pub fn insert_headers<'a, I>(&self, headers: I) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - Ok(headers.into_iter().try_for_each(|header| Self::insert_header(tx, header))?) - }) + self.insert_headers_inner::(headers) } - /// Inserts total difficulty of headers into the corresponding tables. + /// Inserts total difficulty of headers into the corresponding static file and tables. /// /// Superset functionality of [TestStageDB::insert_headers]. pub fn insert_headers_with_td<'a, I>(&self, headers: I) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - let mut td = U256::ZERO; - headers.into_iter().try_for_each(|header| { - Self::insert_header(tx, header)?; - td += header.difficulty; - Ok(tx.put::(header.number, td.into())?) - }) - }) + self.insert_headers_inner::(headers) } /// Insert ordered collection of [SealedBlock] into corresponding tables. /// Superset functionality of [TestStageDB::insert_headers]. /// + /// If tx_offset is set to `None`, then transactions will be stored on static files, otherwise + /// database. + /// /// Assumes that there's a single transition for each transaction (i.e. no block rewards). - pub fn insert_blocks<'a, I>(&self, blocks: I, tx_offset: Option) -> ProviderResult<()> + pub fn insert_blocks<'a, I>(&self, blocks: I, storage_kind: StorageKind) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - let mut next_tx_num = tx_offset.unwrap_or_default(); - - blocks.into_iter().try_for_each(|block| { - Self::insert_header(tx, &block.header)?; - // Insert into body tables. - let block_body_indices = StoredBlockBodyIndices { - first_tx_num: next_tx_num, - tx_count: block.body.len() as u64, - }; - - if !block.body.is_empty() { - tx.put::( - block_body_indices.last_tx_num(), - block.number, - )?; + let provider = self.factory.static_file_provider(); + + let mut txs_writer = storage_kind.is_static().then(|| { + provider.latest_writer(reth_primitives::StaticFileSegment::Transactions).unwrap() + }); + + let mut headers_writer = + provider.latest_writer(reth_primitives::StaticFileSegment::Headers)?; + let tx = self.factory.provider_rw().unwrap().into_tx(); + + let mut next_tx_num = storage_kind.tx_offset(); + blocks.into_iter().try_for_each(|block| { + Self::insert_header(Some(&mut headers_writer), &tx, &block.header, U256::ZERO)?; + + // Insert into body tables. + let block_body_indices = StoredBlockBodyIndices { + first_tx_num: next_tx_num, + tx_count: block.body.len() as u64, + }; + + if !block.body.is_empty() { + tx.put::( + block_body_indices.last_tx_num(), + block.number, + )?; + } + tx.put::(block.number, block_body_indices)?; + + let res = block.body.iter().try_for_each(|body_tx| { + if let Some(txs_writer) = &mut txs_writer { + txs_writer.append_transaction(next_tx_num, body_tx.clone().into())?; + } else { + tx.put::(next_tx_num, body_tx.clone().into())? } - tx.put::(block.number, block_body_indices)?; + next_tx_num += 1; + Ok::<(), ProviderError>(()) + }); - block.body.iter().try_for_each(|body_tx| { - tx.put::(next_tx_num, body_tx.clone().into())?; - next_tx_num += 1; - Ok(()) - }) - }) - }) + if let Some(txs_writer) = &mut txs_writer { + txs_writer.increment_block(reth_primitives::StaticFileSegment::Transactions)?; + } + res + })?; + + tx.commit()?; + if let Some(txs_writer) = &mut txs_writer { + txs_writer.commit()?; + } + headers_writer.commit() } pub fn insert_tx_hash_numbers(&self, tx_hash_numbers: I) -> ProviderResult<()> @@ -319,3 +389,28 @@ impl TestStageDB { Ok(()) } } + +/// Used to identify where to store data when setting up a test. +#[derive(Debug)] +pub enum StorageKind { + Database(Option), + Static, +} + +impl StorageKind { + #[allow(dead_code)] + fn is_database(&self) -> bool { + matches!(self, Self::Database(_)) + } + + fn is_static(&self) -> bool { + matches!(self, Self::Static) + } + + fn tx_offset(&self) -> u64 { + if let Self::Database(offset) = self { + return offset.unwrap_or_default() + } + 0 + } +} diff --git a/crates/snapshot/Cargo.toml b/crates/static-file/Cargo.toml similarity index 83% rename from crates/snapshot/Cargo.toml rename to crates/static-file/Cargo.toml index a082c01135e..8c7128455cb 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/static-file/Cargo.toml @@ -1,12 +1,12 @@ [package] -name = "reth-snapshot" +name = "reth-static-file" version.workspace = true edition.workspace = true rust-version.workspace = true license.workspace = true homepage.workspace = true repository.workspace = true -description = "Snapshotting implementation" +description = "Static file producer implementation" [lints] workspace = true @@ -18,14 +18,17 @@ reth-db.workspace = true reth-provider.workspace = true reth-interfaces.workspace = true reth-nippy-jar.workspace = true +reth-tokio-util.workspace = true # async tokio = { workspace = true, features = ["sync"] } +tokio-stream.workspace = true # misc thiserror.workspace = true tracing.workspace = true clap = { workspace = true, features = ["derive"], optional = true } +rayon.workspace = true [dev-dependencies] # reth diff --git a/crates/static-file/README.md b/crates/static-file/README.md new file mode 100644 index 00000000000..1d455475a59 --- /dev/null +++ b/crates/static-file/README.md @@ -0,0 +1,88 @@ +# StaticFile + +## Overview + +Data that has reached a finalized state and won't undergo further changes (essentially frozen) should be read without concerns of modification. This makes it unsuitable for traditional databases. + +This crate aims to copy this data from the current database to multiple static files, aggregated by block ranges. At every 500_000th block new static files are created. + +Below are two diagrams illustrating the processes of creating static files (custom format: `NippyJar`) and querying them. A glossary is also provided to explain the different (linked) components involved in these processes. + +
+ Creation diagram (StaticFileProducer) + +```mermaid +graph TD; + I("BLOCK_HEIGHT % 500_000 == 0")--triggers-->SP(StaticFileProducer) + SP --> |triggers| SH["create_static_file(block_range, StaticFileSegment::Headers)"] + SP --> |triggers| ST["create_static_file(block_range, StaticFileSegment::Transactions)"] + SP --> |triggers| SR["create_static_file(block_range, StaticFileSegment::Receipts)"] + SP --> |triggers| ETC["create_static_file(block_range, ...)"] + SH --> CS["create_static_file::< T >(DatabaseCursor)"] + ST --> CS + SR --> CS + ETC --> CS + CS --> |create| IF(NippyJar::InclusionFilters) + CS -- iterates --> DC(DatabaseCursor) -->HN{HasNext} + HN --> |true| NJC(NippyJar::Compression) + NJC --> HN + NJC --store--> NJ + HN --> |false| NJ + IF --store--> NJ(NippyJar) + NJ --freeze--> F(File) + F--"on success"--> SP1(StaticFileProducer) + SP1 --"sends BLOCK_HEIGHT"--> HST(HighestStaticFileTracker) + HST --"read by"-->Pruner + HST --"read by"-->DatabaseProvider + HST --"read by"-->SnapsotProvider + HST --"read by"-->ProviderFactory + +``` +
+ + +
+ Query diagram (Provider) + +```mermaid +graph TD; + RPC-->P + P("Provider::header(block_number)")-->PF(ProviderFactory) + PF--shares-->SP1("Arc(StaticFileProvider)") + SP1--shares-->PD(DatabaseProvider) + PF--creates-->PD + PD--check `HighestStaticFileTracker`-->PD + PD-->DC1{block_number
>
highest static_file block} + DC1 --> |true| PD1("DatabaseProvider::header(block_number)") + DC1 --> |false| ASP("StaticFileProvider::header(block_number)") + PD1 --> MDBX + ASP --find correct jar and creates--> JP("StaticFileJarProvider::header(block_number)") + JP --"creates"-->SC(StaticFileCursor) + SC --".get_one< HeaderMask< Header > >(number)"--->NJC("NippyJarCursor") + NJC--".row_by_number(row_index, mask)"-->NJ[NippyJar] + NJ--"&[u8]"-->NJC + NJC--"&[u8]"-->SC + SC--"Header"--> JP + JP--"Header"--> ASP +``` +
+ + +### Glossary +In descending order of abstraction hierarchy: + +[`StaticFileProducer`](../../crates/static_file/src/static_file_producer.rs#L20): A `reth` background service that **copies** data from the database to new static-file files when the block height reaches a certain threshold (e.g., `500_000th`). Upon completion, it dispatches a notification about the higher static file block to `HighestStaticFileTracker` channel. **It DOES NOT remove data from the database.** + +[`HighestStaticFileTracker`](../../crates/static_file/src/static_file_producer.rs#L22): A channel utilized by `StaticFileProducer` to announce the newest static_file block to all components with a listener: `Pruner` (to know which additional tables can be pruned) and `DatabaseProvider` (to know which data can be queried from the static files). + +[`StaticFileProvider`](../../crates/storage/provider/src/providers/static_file/manager.rs#L15) A provider similar to `DatabaseProvider`, **managing all existing static_file files** and selecting the optimal one (by range and segment type) to fulfill a request. **A single instance is shared across all components and should be instantiated only once within `ProviderFactory`**. An immutable reference is given everytime `ProviderFactory` creates a new `DatabaseProvider`. + +[`StaticFileJarProvider`](../../crates/storage/provider/src/providers/static_file/jar.rs#L42) A provider similar to `DatabaseProvider` that provides access to a **single static_file file**. + +[`StaticFileCursor`](../../crates/storage/db/src/static_file/cursor.rs#L12) An elevated abstraction of `NippyJarCursor` for simplified access. It associates the bitmasks with type decoding. For instance, `cursor.get_two::>(tx_number)` would yield `Tx` and `Signature`, eliminating the need to manage masks or invoke a decoder/decompressor. + +[`StaticFileSegment`](../../crates/primitives/src/static_file/segment.rs#L10) Each static_file file only contains data of a specific segment, e.g., `Headers`, `Transactions`, or `Receipts`. + +[`NippyJarCursor`](../../crates/storage/nippy-jar/src/cursor.rs#L12) Accessor of data in a `NippyJar` file. It enables queries either by row number (e.g., block number 1) or by a predefined key not part of the file (e.g., transaction hashes). If a file has multiple columns (e.g., `Tx | TxSender | Signature`), and one wishes to access only one of the column values, this can be accomplished by bitmasks. (e.g., for `TxSender`, the mask would be `0b010`). + +[`NippyJar`](../../crates/storage/nippy-jar/src/lib.rs#57) A create-only file format. No data can be appended after creation. It supports multiple columns, compression (e.g., Zstd (with and without dictionaries), lz4, uncompressed) and inclusion filters (e.g., cuckoo filter: `is hash X part of this dataset`). StaticFiles are organized by block ranges. (e.g., `TransactionStaticFile_499_999.jar` contains a transaction per row for all transactions from block `0` to block `499_999`). For more check the struct documentation. diff --git a/crates/static-file/src/event.rs b/crates/static-file/src/event.rs new file mode 100644 index 00000000000..1a2ca31b207 --- /dev/null +++ b/crates/static-file/src/event.rs @@ -0,0 +1,19 @@ +use crate::StaticFileTargets; +use std::time::Duration; + +/// An event emitted by a [StaticFileProducer][crate::StaticFileProducer]. +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum StaticFileProducerEvent { + /// Emitted when static file producer started running. + Started { + /// Targets that will be moved to static files + targets: StaticFileTargets, + }, + /// Emitted when static file producer finished running. + Finished { + /// Targets that were moved to static files + targets: StaticFileTargets, + /// Time it took to run the static file producer + elapsed: Duration, + }, +} diff --git a/crates/snapshot/src/lib.rs b/crates/static-file/src/lib.rs similarity index 58% rename from crates/snapshot/src/lib.rs rename to crates/static-file/src/lib.rs index 1673aa0b8e5..2c6c11dfd4f 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/static-file/src/lib.rs @@ -1,4 +1,4 @@ -//! Snapshotting implementation. +//! Static file producer implementation. #![doc( html_logo_url = "https://raw.githubusercontent.com/paradigmxyz/reth/main/assets/reth-docs.png", @@ -7,11 +7,11 @@ )] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] -mod error; +mod event; pub mod segments; -mod snapshotter; +mod static_file_producer; -pub use error::SnapshotterError; -pub use snapshotter::{ - HighestSnapshotsTracker, SnapshotTargets, Snapshotter, SnapshotterResult, SnapshotterWithResult, +pub use event::StaticFileProducerEvent; +pub use static_file_producer::{ + StaticFileProducer, StaticFileProducerResult, StaticFileProducerWithResult, StaticFileTargets, }; diff --git a/crates/static-file/src/segments/headers.rs b/crates/static-file/src/segments/headers.rs new file mode 100644 index 00000000000..960b95897d5 --- /dev/null +++ b/crates/static-file/src/segments/headers.rs @@ -0,0 +1,128 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment, SegmentHeader}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1_T2_T3, tables, + transaction::DbTx, RawKey, RawTable, +}; +use reth_interfaces::provider::ProviderResult; +use reth_primitives::{static_file::SegmentConfig, BlockNumber, StaticFileSegment}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + DatabaseProviderRO, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Headers] part of data. +#[derive(Debug, Default)] +pub struct Headers; + +impl Segment for Headers { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Headers + } + + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = + static_file_provider.get_writer(*block_range.start(), StaticFileSegment::Headers)?; + + let mut headers_cursor = provider.tx_ref().cursor_read::()?; + let headers_walker = headers_cursor.walk_range(block_range.clone())?; + + let mut header_td_cursor = + provider.tx_ref().cursor_read::()?; + let header_td_walker = header_td_cursor.walk_range(block_range.clone())?; + + let mut canonical_headers_cursor = + provider.tx_ref().cursor_read::()?; + let canonical_headers_walker = canonical_headers_cursor.walk_range(block_range)?; + + for ((header_entry, header_td_entry), canonical_header_entry) in + headers_walker.zip(header_td_walker).zip(canonical_headers_walker) + { + let (header_block, header) = header_entry?; + let (header_td_block, header_td) = header_td_entry?; + let (canonical_header_block, canonical_header) = canonical_header_entry?; + + debug_assert_eq!(header_block, header_td_block); + debug_assert_eq!(header_td_block, canonical_header_block); + + let _static_file_block = + static_file_writer.append_header(header, header_td.0, canonical_header)?; + debug_assert_eq!(_static_file_block, header_block); + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let range_len = block_range.clone().count(); + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Headers, + config, + block_range.clone(), + range_len, + || { + Ok([ + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + ]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut cursor = provider.tx_ref().cursor_read::>()?; + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + cursor + .walk(Some(RawKey::from(*block_range.start())))? + .take(range_len) + .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())), + ); + } + + create_static_file_T1_T2_T3::< + tables::Headers, + tables::HeaderTerminalDifficulties, + tables::CanonicalHeaders, + BlockNumber, + SegmentHeader, + >( + provider.tx_ref(), + block_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/segments/mod.rs b/crates/static-file/src/segments/mod.rs new file mode 100644 index 00000000000..7cad895aed7 --- /dev/null +++ b/crates/static-file/src/segments/mod.rs @@ -0,0 +1,116 @@ +//! StaticFile segment implementations and utilities. + +mod transactions; +pub use transactions::Transactions; + +mod headers; +pub use headers::Headers; + +mod receipts; +pub use receipts::Receipts; + +use reth_db::{ + cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, +}; +use reth_interfaces::provider::ProviderResult; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + static_file::{ + find_fixed_range, Compression, Filters, InclusionFilter, PerfectHashingFunction, + SegmentConfig, SegmentHeader, + }, + BlockNumber, StaticFileSegment, +}; +use reth_provider::{providers::StaticFileProvider, DatabaseProviderRO, TransactionsProviderExt}; +use std::{ops::RangeInclusive, path::Path}; + +pub(crate) type Rows = [Vec>; COLUMNS]; + +/// A segment represents moving some portion of the data to static files. +pub trait Segment: Send + Sync { + /// Returns the [`StaticFileSegment`]. + fn segment(&self) -> StaticFileSegment; + + /// Move data to static files for the provided block range. [StaticFileProvider] will handle the + /// management of and writing to files. + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()>; + + /// Create a static file of data for the provided block range. The `directory` parameter + /// determines the static file's save location. + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()>; +} + +/// Returns a [`NippyJar`] according to the desired configuration. The `directory` parameter +/// determines the static file's save location. +pub(crate) fn prepare_jar( + provider: &DatabaseProviderRO, + directory: impl AsRef, + segment: StaticFileSegment, + segment_config: SegmentConfig, + block_range: RangeInclusive, + total_rows: usize, + prepare_compression: impl Fn() -> ProviderResult>, +) -> ProviderResult> { + let tx_range = match segment { + StaticFileSegment::Headers => None, + StaticFileSegment::Receipts | StaticFileSegment::Transactions => { + Some(provider.transaction_range_by_block_range(block_range.clone())?.into()) + } + }; + + let mut nippy_jar = NippyJar::new( + COLUMNS, + &directory.as_ref().join(segment.filename(&find_fixed_range(*block_range.end())).as_str()), + SegmentHeader::new(block_range.clone().into(), Some(block_range.into()), tx_range, segment), + ); + + nippy_jar = match segment_config.compression { + Compression::Lz4 => nippy_jar.with_lz4(), + Compression::Zstd => nippy_jar.with_zstd(false, 0), + Compression::ZstdWithDictionary => { + let dataset = prepare_compression()?; + + nippy_jar = nippy_jar.with_zstd(true, 5_000_000); + nippy_jar.prepare_compression(dataset.to_vec())?; + nippy_jar + } + Compression::Uncompressed => nippy_jar, + }; + + if let Filters::WithFilters(inclusion_filter, phf) = segment_config.filters { + nippy_jar = match inclusion_filter { + InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), + }; + nippy_jar = match phf { + PerfectHashingFunction::Fmph => nippy_jar.with_fmph(), + PerfectHashingFunction::GoFmph => nippy_jar.with_gofmph(), + }; + } + + Ok(nippy_jar) +} + +/// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). +pub(crate) fn dataset_for_compression>( + provider: &DatabaseProviderRO, + range: &RangeInclusive, + range_len: usize, +) -> ProviderResult>> { + let mut cursor = provider.tx_ref().cursor_read::>()?; + Ok(cursor + .walk_back(Some(RawKey::from(*range.end())))? + .take(range_len.min(1000)) + .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) + .collect::>()) +} diff --git a/crates/static-file/src/segments/receipts.rs b/crates/static-file/src/segments/receipts.rs new file mode 100644 index 00000000000..5934edf8768 --- /dev/null +++ b/crates/static-file/src/segments/receipts.rs @@ -0,0 +1,107 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1, tables, + transaction::DbTx, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_primitives::{ + static_file::{SegmentConfig, SegmentHeader}, + BlockNumber, StaticFileSegment, TxNumber, +}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockReader, DatabaseProviderRO, TransactionsProviderExt, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Receipts] part of data. +#[derive(Debug, Default)] +pub struct Receipts; + +impl Segment for Receipts { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Receipts + } + + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = + static_file_provider.get_writer(*block_range.start(), StaticFileSegment::Receipts)?; + + for block in block_range { + let _static_file_block = + static_file_writer.increment_block(StaticFileSegment::Receipts)?; + debug_assert_eq!(_static_file_block, block); + + let block_body_indices = provider + .block_body_indices(block)? + .ok_or(ProviderError::BlockBodyIndicesNotFound(block))?; + + let mut receipts_cursor = provider.tx_ref().cursor_read::()?; + let receipts_walker = receipts_cursor.walk_range(block_body_indices.tx_num_range())?; + + for entry in receipts_walker { + let (tx_number, receipt) = entry?; + + static_file_writer.append_receipt(tx_number, receipt)?; + } + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); + + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Receipts, + config, + block_range, + tx_range_len, + || { + Ok([dataset_for_compression::( + provider, + &tx_range, + tx_range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_static_file_T1::( + provider.tx_ref(), + tx_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + tx_range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/segments/transactions.rs b/crates/static-file/src/segments/transactions.rs new file mode 100644 index 00000000000..b8a6928b320 --- /dev/null +++ b/crates/static-file/src/segments/transactions.rs @@ -0,0 +1,111 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1, tables, + transaction::DbTx, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_primitives::{ + static_file::{SegmentConfig, SegmentHeader}, + BlockNumber, StaticFileSegment, TxNumber, +}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockReader, DatabaseProviderRO, TransactionsProviderExt, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Transactions] part of data. +#[derive(Debug, Default)] +pub struct Transactions; + +impl Segment for Transactions { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Transactions + } + + /// Write transactions from database table [tables::Transactions] to static files with segment + /// [StaticFileSegment::Transactions] for the provided block range. + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = static_file_provider + .get_writer(*block_range.start(), StaticFileSegment::Transactions)?; + + for block in block_range { + let _static_file_block = + static_file_writer.increment_block(StaticFileSegment::Transactions)?; + debug_assert_eq!(_static_file_block, block); + + let block_body_indices = provider + .block_body_indices(block)? + .ok_or(ProviderError::BlockBodyIndicesNotFound(block))?; + + let mut transactions_cursor = + provider.tx_ref().cursor_read::()?; + let transactions_walker = + transactions_cursor.walk_range(block_body_indices.tx_num_range())?; + + for entry in transactions_walker { + let (tx_number, transaction) = entry?; + + static_file_writer.append_transaction(tx_number, transaction)?; + } + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); + + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Transactions, + config, + block_range, + tx_range_len, + || { + Ok([dataset_for_compression::( + provider, + &tx_range, + tx_range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_static_file_T1::( + provider.tx_ref(), + tx_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + tx_range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/static_file_producer.rs b/crates/static-file/src/static_file_producer.rs new file mode 100644 index 00000000000..52b115e9fee --- /dev/null +++ b/crates/static-file/src/static_file_producer.rs @@ -0,0 +1,327 @@ +//! Support for producing static files. + +use crate::{segments, segments::Segment, StaticFileProducerEvent}; +use rayon::prelude::*; +use reth_db::database::Database; +use reth_interfaces::RethResult; +use reth_primitives::{static_file::HighestStaticFiles, BlockNumber, PruneModes}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + ProviderFactory, +}; +use reth_tokio_util::EventListeners; +use std::{ops::RangeInclusive, time::Instant}; +use tokio_stream::wrappers::UnboundedReceiverStream; +use tracing::{debug, trace}; + +/// Result of [StaticFileProducer::run] execution. +pub type StaticFileProducerResult = RethResult; + +/// The [StaticFileProducer] instance itself with the result of [StaticFileProducer::run] +pub type StaticFileProducerWithResult = (StaticFileProducer, StaticFileProducerResult); + +/// Static File producer routine. See [StaticFileProducer::run] for more detailed description. +#[derive(Debug, Clone)] +pub struct StaticFileProducer { + /// Provider factory + provider_factory: ProviderFactory, + /// Static File provider + static_file_provider: StaticFileProvider, + /// Pruning configuration for every part of the data that can be pruned. Set by user, and + /// needed in [StaticFileProducer] to prevent attempting to move prunable data to static files. + /// See [StaticFileProducer::get_static_file_targets]. + prune_modes: PruneModes, + listeners: EventListeners, +} + +/// Static File targets, per data part, measured in [`BlockNumber`]. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct StaticFileTargets { + headers: Option>, + receipts: Option>, + transactions: Option>, +} + +impl StaticFileTargets { + /// Returns `true` if any of the targets are [Some]. + pub fn any(&self) -> bool { + self.headers.is_some() || self.receipts.is_some() || self.transactions.is_some() + } + + // Returns `true` if all targets are either [`None`] or has beginning of the range equal to the + // highest static_file. + fn is_contiguous_to_highest_static_files(&self, static_files: HighestStaticFiles) -> bool { + [ + (self.headers.as_ref(), static_files.headers), + (self.receipts.as_ref(), static_files.receipts), + (self.transactions.as_ref(), static_files.transactions), + ] + .iter() + .all(|(target_block_range, highest_static_fileted_block)| { + target_block_range.map_or(true, |target_block_range| { + *target_block_range.start() == + highest_static_fileted_block.map_or(0, |highest_static_fileted_block| { + highest_static_fileted_block + 1 + }) + }) + }) + } +} + +impl StaticFileProducer { + /// Creates a new [StaticFileProducer]. + pub fn new( + provider_factory: ProviderFactory, + static_file_provider: StaticFileProvider, + prune_modes: PruneModes, + ) -> Self { + Self { provider_factory, static_file_provider, prune_modes, listeners: Default::default() } + } + + /// Listen for events on the static_file_producer. + pub fn events(&mut self) -> UnboundedReceiverStream { + self.listeners.new_listener() + } + + /// Run the static_file_producer. + /// + /// For each [Some] target in [StaticFileTargets], initializes a corresponding [Segment] and + /// runs it with the provided block range using [StaticFileProvider] and a read-only + /// database transaction from [ProviderFactory]. All segments are run in parallel. + /// + /// NOTE: it doesn't delete the data from database, and the actual deleting (aka pruning) logic + /// lives in the `prune` crate. + pub fn run(&mut self, targets: StaticFileTargets) -> StaticFileProducerResult { + debug_assert!(targets.is_contiguous_to_highest_static_files( + self.static_file_provider.get_highest_static_files() + )); + + self.listeners.notify(StaticFileProducerEvent::Started { targets: targets.clone() }); + + debug!(target: "static_file", ?targets, "StaticFileProducer started"); + let start = Instant::now(); + + let mut segments = Vec::<(Box>, RangeInclusive)>::new(); + + if let Some(block_range) = targets.transactions.clone() { + segments.push((Box::new(segments::Transactions), block_range)); + } + if let Some(block_range) = targets.headers.clone() { + segments.push((Box::new(segments::Headers), block_range)); + } + if let Some(block_range) = targets.receipts.clone() { + segments.push((Box::new(segments::Receipts), block_range)); + } + + segments.par_iter().try_for_each(|(segment, block_range)| -> RethResult<()> { + debug!(target: "static_file", segment = %segment.segment(), ?block_range, "StaticFileProducer segment"); + let start = Instant::now(); + + // Create a new database transaction on every segment to prevent long-lived read-only + // transactions + let provider = self.provider_factory.provider()?.disable_long_read_transaction_safety(); + segment.copy_to_static_files(provider, self.static_file_provider.clone(), block_range.clone())?; + + let elapsed = start.elapsed(); // TODO(alexey): track in metrics + debug!(target: "static_file", segment = %segment.segment(), ?block_range, ?elapsed, "Finished StaticFileProducer segment"); + + Ok(()) + })?; + + self.static_file_provider.commit()?; + for (segment, block_range) in segments { + self.static_file_provider.update_index(segment.segment(), Some(*block_range.end()))?; + } + + let elapsed = start.elapsed(); // TODO(alexey): track in metrics + debug!(target: "static_file", ?targets, ?elapsed, "StaticFileProducer finished"); + + self.listeners + .notify(StaticFileProducerEvent::Finished { targets: targets.clone(), elapsed }); + + Ok(targets) + } + + /// Returns a static file targets at the provided finalized block numbers per segment. + /// The target is determined by the check against highest static_files using + /// [StaticFileProvider::get_highest_static_files]. + pub fn get_static_file_targets( + &self, + finalized_block_numbers: HighestStaticFiles, + ) -> RethResult { + let highest_static_files = self.static_file_provider.get_highest_static_files(); + + let targets = StaticFileTargets { + headers: finalized_block_numbers.headers.and_then(|finalized_block_number| { + self.get_static_file_target(highest_static_files.headers, finalized_block_number) + }), + // StaticFile receipts only if they're not pruned according to the user configuration + receipts: if self.prune_modes.receipts.is_none() && + self.prune_modes.receipts_log_filter.is_empty() + { + finalized_block_numbers.receipts.and_then(|finalized_block_number| { + self.get_static_file_target( + highest_static_files.receipts, + finalized_block_number, + ) + }) + } else { + None + }, + transactions: finalized_block_numbers.transactions.and_then(|finalized_block_number| { + self.get_static_file_target( + highest_static_files.transactions, + finalized_block_number, + ) + }), + }; + + trace!( + target: "static_file", + ?finalized_block_numbers, + ?highest_static_files, + ?targets, + any = %targets.any(), + "StaticFile targets" + ); + + Ok(targets) + } + + fn get_static_file_target( + &self, + highest_static_file: Option, + finalized_block_number: BlockNumber, + ) -> Option> { + let range = highest_static_file.map_or(0, |block| block + 1)..=finalized_block_number; + (!range.is_empty()).then_some(range) + } +} + +#[cfg(test)] +mod tests { + use crate::{static_file_producer::StaticFileTargets, StaticFileProducer}; + use assert_matches::assert_matches; + use reth_db::{database::Database, transaction::DbTx}; + use reth_interfaces::{ + provider::ProviderError, + test_utils::{ + generators, + generators::{random_block_range, random_receipt}, + }, + RethError, + }; + use reth_primitives::{ + static_file::HighestStaticFiles, PruneModes, StaticFileSegment, B256, U256, + }; + use reth_provider::providers::StaticFileWriter; + use reth_stages::test_utils::{StorageKind, TestStageDB}; + + #[test] + fn run() { + let mut rng = generators::rng(); + + let db = TestStageDB::default(); + + let blocks = random_block_range(&mut rng, 0..=3, B256::ZERO, 2..3); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); + // Unwind headers from static_files and manually insert them into the database, so we're + // able to check that static_file_producer works + db.factory + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .expect("get static file writer for headers") + .prune_headers(blocks.len() as u64) + .expect("prune headers"); + let tx = db.factory.db_ref().tx_mut().expect("init tx"); + blocks.iter().for_each(|block| { + TestStageDB::insert_header(None, &tx, &block.header, U256::ZERO) + .expect("insert block header"); + }); + tx.commit().expect("commit tx"); + + let mut receipts = Vec::new(); + for block in &blocks { + for transaction in &block.body { + receipts + .push((receipts.len() as u64, random_receipt(&mut rng, transaction, Some(0)))); + } + } + db.insert_receipts(receipts).expect("insert receipts"); + + let provider_factory = db.factory; + let static_file_provider = provider_factory.static_file_provider(); + + let mut static_file_producer = StaticFileProducer::new( + provider_factory, + static_file_provider.clone(), + PruneModes::default(), + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(1), + receipts: Some(1), + transactions: Some(1), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(0..=1), + receipts: Some(0..=1), + transactions: Some(0..=1) + } + ); + assert_matches!(static_file_producer.run(targets), Ok(_)); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(1), receipts: Some(1), transactions: Some(1) } + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(3), + receipts: Some(3), + transactions: Some(3), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(2..=3), + receipts: Some(2..=3), + transactions: Some(2..=3) + } + ); + assert_matches!(static_file_producer.run(targets), Ok(_)); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(3), receipts: Some(3), transactions: Some(3) } + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(4), + receipts: Some(4), + transactions: Some(4), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(4..=4), + receipts: Some(4..=4), + transactions: Some(4..=4) + } + ); + assert_matches!( + static_file_producer.run(targets), + Err(RethError::Provider(ProviderError::BlockBodyIndicesNotFound(4))) + ); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(3), receipts: Some(3), transactions: Some(3) } + ); + } +} diff --git a/crates/storage/codecs/derive/src/compact/generator.rs b/crates/storage/codecs/derive/src/compact/generator.rs index 370d74eec2a..8cd9070bb4b 100644 --- a/crates/storage/codecs/derive/src/compact/generator.rs +++ b/crates/storage/codecs/derive/src/compact/generator.rs @@ -52,7 +52,7 @@ pub fn generate_from_to(ident: &Ident, fields: &FieldList, is_zstd: bool) -> Tok /// Generates code to implement the `Compact` trait method `to_compact`. fn generate_from_compact(fields: &FieldList, ident: &Ident, is_zstd: bool) -> TokenStream2 { let mut lines = vec![]; - let mut known_types = vec!["B256", "Address", "Bloom", "Vec", "TxHash"]; + let mut known_types = vec!["B256", "Address", "Bloom", "Vec", "TxHash", "BlockHash"]; // Only types without `Bytes` should be added here. It's currently manually added, since // it's hard to figure out with derive_macro which types have Bytes fields. diff --git a/crates/storage/codecs/derive/src/compact/mod.rs b/crates/storage/codecs/derive/src/compact/mod.rs index 7e1ed9b42d4..39aaa242f2a 100644 --- a/crates/storage/codecs/derive/src/compact/mod.rs +++ b/crates/storage/codecs/derive/src/compact/mod.rs @@ -143,7 +143,7 @@ fn should_use_alt_impl(ftype: &String, segment: &syn::PathSegment) -> bool { if let (Some(path), 1) = (arg_path.path.segments.first(), arg_path.path.segments.len()) { - if ["B256", "Address", "Address", "Bloom", "TxHash"] + if ["B256", "Address", "Address", "Bloom", "TxHash", "BlockHash"] .contains(&path.ident.to_string().as_str()) { return true diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index 2bbd9edfff7..ea260eaebf8 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -67,7 +67,7 @@ pub mod abstraction; mod implementation; mod metrics; -pub mod snapshot; +pub mod static_file; pub mod tables; mod utils; pub mod version; @@ -98,7 +98,7 @@ pub fn init_db>(path: P, args: DatabaseArguments) -> eyre::Result let rpath = path.as_ref(); if is_database_empty(rpath) { - std::fs::create_dir_all(rpath) + reth_primitives::fs::create_dir_all(rpath) .wrap_err_with(|| format!("Could not create database directory {}", rpath.display()))?; create_db_version_file(rpath)?; } else { @@ -163,6 +163,8 @@ pub mod test_utils { pub const ERROR_DB_OPEN: &str = "Not able to open the database file."; /// Error during database creation pub const ERROR_DB_CREATION: &str = "Not able to create the database file."; + /// Error during database creation + pub const ERROR_STATIC_FILES_CREATION: &str = "Not able to create the static file path."; /// Error during table creation pub const ERROR_TABLE_CREATION: &str = "Not able to create tables in the database."; /// Error during tempdir creation @@ -225,6 +227,15 @@ pub mod test_utils { } } + /// Create static_files path for testing + pub fn create_test_static_files_dir() -> PathBuf { + let path = tempdir_path(); + let emsg = format!("{}: {:?}", ERROR_STATIC_FILES_CREATION, path); + + reth_primitives::fs::create_dir_all(path.clone()).expect(&emsg); + path + } + /// Get a temporary directory path to use for the database pub fn tempdir_path() -> PathBuf { let builder = tempfile::Builder::new().prefix("reth-test-").rand_bytes(8).tempdir(); diff --git a/crates/storage/db/src/snapshot/masks.rs b/crates/storage/db/src/snapshot/masks.rs deleted file mode 100644 index 2bc7bb416a7..00000000000 --- a/crates/storage/db/src/snapshot/masks.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::{ReceiptMask, TransactionMask}; -use crate::{ - add_snapshot_mask, - snapshot::mask::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask}, - table::Table, - CanonicalHeaders, HeaderTerminalDifficulties, Receipts, Transactions, -}; -use reth_primitives::{BlockHash, Header}; - -// HEADER MASKS - -add_snapshot_mask!(HeaderMask, Header, 0b001); -add_snapshot_mask!(HeaderMask, ::Value, 0b010); -add_snapshot_mask!(HeaderMask, BlockHash, 0b100); - -add_snapshot_mask!(HeaderMask, Header, BlockHash, 0b101); -add_snapshot_mask!( - HeaderMask, - ::Value, - ::Value, - 0b110 -); - -// RECEIPT MASKS -add_snapshot_mask!(ReceiptMask, ::Value, 0b1); - -// TRANSACTION MASKS -add_snapshot_mask!(TransactionMask, ::Value, 0b1); diff --git a/crates/storage/db/src/snapshot/mod.rs b/crates/storage/db/src/snapshot/mod.rs deleted file mode 100644 index 0856466d23a..00000000000 --- a/crates/storage/db/src/snapshot/mod.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! reth's snapshot database table import and access - -mod generation; -use std::{ - collections::{hash_map::Entry, HashMap}, - ops::RangeInclusive, - path::Path, -}; - -pub use generation::*; - -mod cursor; -pub use cursor::SnapshotCursor; - -mod mask; -pub use mask::*; -use reth_nippy_jar::{NippyJar, NippyJarError}; -use reth_primitives::{snapshot::SegmentHeader, BlockNumber, SnapshotSegment, TxNumber}; - -mod masks; - -/// Alias type for a map of [`SnapshotSegment`] and sorted lists of existing snapshot ranges. -type SortedSnapshots = - HashMap, RangeInclusive)>>; - -/// Given the snapshots directory path, it returns a list over the existing snapshots organized by -/// [`SnapshotSegment`]. Each segment has a sorted list of block ranges and transaction ranges. -pub fn iter_snapshots(path: impl AsRef) -> Result { - let mut static_files = SortedSnapshots::default(); - let entries = reth_primitives::fs::read_dir(path.as_ref()) - .map_err(|err| NippyJarError::Custom(err.to_string()))? - .filter_map(Result::ok) - .collect::>(); - - for entry in entries { - if entry.metadata().map_or(false, |metadata| metadata.is_file()) { - if let Some((segment, block_range, tx_range)) = - SnapshotSegment::parse_filename(&entry.file_name()) - { - let ranges = (block_range, tx_range); - match static_files.entry(segment) { - Entry::Occupied(mut entry) => { - entry.get_mut().push(ranges); - } - Entry::Vacant(entry) => { - entry.insert(vec![ranges]); - } - } - } - } - } - - for (segment, range_list) in static_files.iter_mut() { - // Sort by block end range. - range_list.sort_by(|a, b| a.0.end().cmp(b.0.end())); - - if let Some((block_range, tx_range)) = range_list.pop() { - // The highest height static file filename might not be indicative of its actual - // block_range, so we need to read its actual configuration. - let jar = NippyJar::::load( - &path.as_ref().join(segment.filename(&block_range, &tx_range)), - )?; - - if &tx_range != jar.user_header().tx_range() { - // TODO(joshie): rename - } - - range_list.push(( - jar.user_header().block_range().clone(), - jar.user_header().tx_range().clone(), - )); - } - } - - Ok(static_files) -} diff --git a/crates/storage/db/src/snapshot/cursor.rs b/crates/storage/db/src/static_file/cursor.rs similarity index 77% rename from crates/storage/db/src/snapshot/cursor.rs rename to crates/storage/db/src/static_file/cursor.rs index f778b39a03e..237cbe4518d 100644 --- a/crates/storage/db/src/snapshot/cursor.rs +++ b/crates/storage/db/src/static_file/cursor.rs @@ -3,23 +3,23 @@ use crate::table::Decompress; use derive_more::{Deref, DerefMut}; use reth_interfaces::provider::ProviderResult; use reth_nippy_jar::{DataReader, NippyJar, NippyJarCursor}; -use reth_primitives::{snapshot::SegmentHeader, B256}; +use reth_primitives::{static_file::SegmentHeader, B256}; use std::sync::Arc; -/// Cursor of a snapshot segment. +/// Cursor of a static file segment. #[derive(Debug, Deref, DerefMut)] -pub struct SnapshotCursor<'a>(NippyJarCursor<'a, SegmentHeader>); +pub struct StaticFileCursor<'a>(NippyJarCursor<'a, SegmentHeader>); -impl<'a> SnapshotCursor<'a> { - /// Returns a new [`SnapshotCursor`]. +impl<'a> StaticFileCursor<'a> { + /// Returns a new [`StaticFileCursor`]. pub fn new(jar: &'a NippyJar, reader: Arc) -> ProviderResult { Ok(Self(NippyJarCursor::with_reader(jar, reader)?)) } /// Returns the current `BlockNumber` or `TxNumber` of the cursor depending on the kind of - /// snapshot segment. - pub fn number(&self) -> u64 { - self.row_index() + self.jar().user_header().start() + /// static file segment. + pub fn number(&self) -> Option { + self.jar().user_header().start().map(|start| self.row_index() + start) } /// Gets a row of values. @@ -28,15 +28,21 @@ impl<'a> SnapshotCursor<'a> { key_or_num: KeyOrNumber<'_>, mask: usize, ) -> ProviderResult>> { + if self.jar().rows() == 0 { + return Ok(None) + } + let row = match key_or_num { KeyOrNumber::Key(k) => self.row_by_key_with_cols(k, mask), - KeyOrNumber::Number(n) => { - let offset = self.jar().user_header().start(); - if offset > n { - return Ok(None) + KeyOrNumber::Number(n) => match self.jar().user_header().start() { + Some(offset) => { + if offset > n { + return Ok(None) + } + self.row_by_number_with_cols((n - offset) as usize, mask) } - self.row_by_number_with_cols((n - offset) as usize, mask) - } + None => Ok(None), + }, }?; Ok(row) diff --git a/crates/storage/db/src/snapshot/generation.rs b/crates/storage/db/src/static_file/generation.rs similarity index 67% rename from crates/storage/db/src/snapshot/generation.rs rename to crates/storage/db/src/static_file/generation.rs index ea1c1e65431..0c667e1075f 100644 --- a/crates/storage/db/src/snapshot/generation.rs +++ b/crates/storage/db/src/static_file/generation.rs @@ -10,16 +10,16 @@ use reth_nippy_jar::{ColumnResult, NippyJar, NippyJarHeader, PHFKey}; use reth_tracing::tracing::*; use std::{error::Error as StdError, ops::RangeInclusive}; -/// Macro that generates snapshot creation functions that take an arbitratry number of [`Table`] and -/// creates a [`NippyJar`] file out of their [`Table::Value`]. Each list of [`Table::Value`] from a -/// table is a column of values. +/// Macro that generates static file creation functions that take an arbitratry number of [`Table`] +/// and creates a [`NippyJar`] file out of their [`Table::Value`]. Each list of [`Table::Value`] +/// from a table is a column of values. /// /// Has membership filter set and compression dictionary support. -macro_rules! generate_snapshot_func { +macro_rules! generate_static_file_func { ($(($($tbl:ident),+)),+ $(,)? ) => { $( paste::item! { - /// Creates a snapshot from specified tables. Each table's `Value` iterator represents a column. + /// Creates a static file from specified tables. Each table's `Value` iterator represents a column. /// /// **Ensure the range contains the same number of rows.** /// @@ -29,9 +29,9 @@ macro_rules! generate_snapshot_func { /// * `keys`: Iterator of keys (eg. `TxHash` or `BlockHash`) with length equal to `row_count` and ordered by future column insertion from `range`. /// * `dict_compression_set`: Sets of column data for compression dictionaries. Max size is 2GB. Row count is independent. /// * `row_count`: Total rows to add to `NippyJar`. Must match row count in `range`. - /// * `nippy_jar`: Snapshot object responsible for file generation. + /// * `nippy_jar`: Static File object responsible for file generation. #[allow(non_snake_case)] - pub fn []< + pub fn []< $($tbl: Table,)+ K, H: NippyJarHeader @@ -43,27 +43,27 @@ macro_rules! generate_snapshot_func { dict_compression_set: Option>>>, keys: Option>>, row_count: usize, - nippy_jar: &mut NippyJar + mut nippy_jar: NippyJar ) -> ProviderResult<()> where K: Key + Copy { let additional = additional.unwrap_or_default(); - debug!(target: "reth::snapshot", ?range, "Creating snapshot {:?} and {} more columns.", vec![$($tbl::NAME,)+], additional.len()); + debug!(target: "reth::static_file", ?range, "Creating static file {:?} and {} more columns.", vec![$($tbl::NAME,)+], additional.len()); let range: RangeInclusive> = RawKey::new(*range.start())..=RawKey::new(*range.end()); // Create PHF and Filter if required if let Some(keys) = keys { - debug!(target: "reth::snapshot", "Calculating Filter, PHF and offset index list"); + debug!(target: "reth::static_file", "Calculating Filter, PHF and offset index list"); nippy_jar.prepare_index(keys, row_count)?; - debug!(target: "reth::snapshot", "Filter, PHF and offset index list calculated."); + debug!(target: "reth::static_file", "Filter, PHF and offset index list calculated."); } // Create compression dictionaries if required if let Some(data_sets) = dict_compression_set { - debug!(target: "reth::snapshot", "Creating compression dictionaries."); + debug!(target: "reth::static_file", "Creating compression dictionaries."); nippy_jar.prepare_compression(data_sets)?; - debug!(target: "reth::snapshot", "Compression dictionaries created."); + debug!(target: "reth::static_file", "Compression dictionaries created."); } // Creates the cursors for the columns @@ -80,17 +80,17 @@ macro_rules! generate_snapshot_func { )+ - // Create the snapshot from the data + // Create the static file from the data let col_iterators: Vec,_>>>> = vec![ $(Box::new([< $tbl _iter>]),)+ ]; - debug!(target: "reth::snapshot", jar=?nippy_jar, "Generating snapshot file."); + debug!(target: "reth::static_file", jar=?nippy_jar, "Generating static file."); - nippy_jar.freeze(col_iterators.into_iter().chain(additional).collect(), row_count as u64)?; + let nippy_jar = nippy_jar.freeze(col_iterators.into_iter().chain(additional).collect(), row_count as u64)?; - debug!(target: "reth::snapshot", jar=?nippy_jar, "Snapshot file generated."); + debug!(target: "reth::static_file", jar=?nippy_jar, "Static file generated."); Ok(()) } @@ -99,4 +99,4 @@ macro_rules! generate_snapshot_func { }; } -generate_snapshot_func!((T1), (T1, T2), (T1, T2, T3), (T1, T2, T3, T4), (T1, T2, T3, T4, T5),); +generate_static_file_func!((T1), (T1, T2), (T1, T2, T3), (T1, T2, T3, T4), (T1, T2, T3, T4, T5),); diff --git a/crates/storage/db/src/snapshot/mask.rs b/crates/storage/db/src/static_file/mask.rs similarity index 75% rename from crates/storage/db/src/snapshot/mask.rs rename to crates/storage/db/src/static_file/mask.rs index 7b8cb016772..de5932ea9ac 100644 --- a/crates/storage/db/src/snapshot/mask.rs +++ b/crates/storage/db/src/static_file/mask.rs @@ -4,14 +4,14 @@ use crate::table::Decompress; /// /// #### Explanation: /// -/// A `NippyJar` snapshot row can contain multiple column values. To specify the column values +/// A `NippyJar` static file row can contain multiple column values. To specify the column values /// to be read, a mask is utilized. /// -/// For example, a snapshot with three columns, if the first and last columns are queried, the mask -/// `0b101` would be passed. To select only the second column, the mask `0b010` would be used. +/// For example, a static file with three columns, if the first and last columns are queried, the +/// mask `0b101` would be passed. To select only the second column, the mask `0b010` would be used. /// -/// Since each snapshot has its own column distribution, different wrapper types are necessary. For -/// instance, `B256` might be the third column in the `Header` segment, while being the second +/// Since each static file has its own column distribution, different wrapper types are necessary. +/// For instance, `B256` might be the third column in the `Header` segment, while being the second /// column in another segment. Hence, `Mask` would only be applicable to one of these /// scenarios. /// @@ -24,7 +24,7 @@ macro_rules! add_segments { ($($segment:tt),+) => { paste::paste! { $( - #[doc = concat!("Mask for ", stringify!($segment), " snapshot segment. See [`Mask`] for more.")] + #[doc = concat!("Mask for ", stringify!($segment), " static file segment. See [`Mask`] for more.")] #[derive(Debug)] pub struct [<$segment Mask>](Mask); )+ @@ -37,7 +37,7 @@ add_segments!(Header, Receipt, Transaction); pub trait ColumnSelectorOne { /// First desired column value type FIRST: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } @@ -47,7 +47,7 @@ pub trait ColumnSelectorTwo { type FIRST: Decompress; /// Second desired column value type SECOND: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } @@ -59,13 +59,13 @@ pub trait ColumnSelectorThree { type SECOND: Decompress; /// Third desired column value type THIRD: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } #[macro_export] -/// Add mask to select `N` column values from a specific snapshot segment row. -macro_rules! add_snapshot_mask { +/// Add mask to select `N` column values from a specific static file segment row. +macro_rules! add_static_file_mask { ($mask_struct:tt, $type1:ty, $mask:expr) => { impl ColumnSelectorOne for $mask_struct<$type1> { type FIRST = $type1; @@ -80,7 +80,7 @@ macro_rules! add_snapshot_mask { } }; ($mask_struct:tt, $type1:ty, $type2:ty, $type3:ty, $mask:expr) => { - impl ColumnSelectorTwo for $mask_struct<$type1, $type2, $type3> { + impl ColumnSelectorThree for $mask_struct<$type1, $type2, $type3> { type FIRST = $type1; type SECOND = $type2; type THIRD = $type3; diff --git a/crates/storage/db/src/static_file/masks.rs b/crates/storage/db/src/static_file/masks.rs new file mode 100644 index 00000000000..ab3e0d99e12 --- /dev/null +++ b/crates/storage/db/src/static_file/masks.rs @@ -0,0 +1,21 @@ +use super::{ReceiptMask, TransactionMask}; +use crate::{ + add_static_file_mask, + static_file::mask::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask}, + table::Table, + HeaderTerminalDifficulties, Receipts, Transactions, +}; +use reth_primitives::{BlockHash, Header}; + +// HEADER MASKS +add_static_file_mask!(HeaderMask, Header, 0b001); +add_static_file_mask!(HeaderMask, ::Value, 0b010); +add_static_file_mask!(HeaderMask, BlockHash, 0b100); +add_static_file_mask!(HeaderMask, Header, BlockHash, 0b101); +add_static_file_mask!(HeaderMask, ::Value, BlockHash, 0b110); + +// RECEIPT MASKS +add_static_file_mask!(ReceiptMask, ::Value, 0b1); + +// TRANSACTION MASKS +add_static_file_mask!(TransactionMask, ::Value, 0b1); diff --git a/crates/storage/db/src/static_file/mod.rs b/crates/storage/db/src/static_file/mod.rs new file mode 100644 index 00000000000..eed27e0de95 --- /dev/null +++ b/crates/storage/db/src/static_file/mod.rs @@ -0,0 +1,76 @@ +//! reth's static file database table import and access + +mod generation; +use std::{ + collections::{hash_map::Entry, HashMap}, + path::Path, +}; + +pub use generation::*; + +mod cursor; +pub use cursor::StaticFileCursor; + +mod mask; +pub use mask::*; +use reth_nippy_jar::{NippyJar, NippyJarError}; +use reth_primitives::{ + static_file::{SegmentHeader, SegmentRangeInclusive}, + StaticFileSegment, +}; + +mod masks; + +/// Alias type for a map of [`StaticFileSegment`] and sorted lists of existing static file ranges. +type SortedStaticFiles = + HashMap)>>; + +/// Given the static_files directory path, it returns a list over the existing static_files +/// organized by [`StaticFileSegment`]. Each segment has a sorted list of block ranges and +/// transaction ranges as presented in the file configuration. +pub fn iter_static_files(path: impl AsRef) -> Result { + let path = path.as_ref(); + if !path.exists() { + reth_primitives::fs::create_dir_all(path) + .map_err(|err| NippyJarError::Custom(err.to_string()))?; + } + + let mut static_files = SortedStaticFiles::default(); + let entries = reth_primitives::fs::read_dir(path) + .map_err(|err| NippyJarError::Custom(err.to_string()))? + .filter_map(Result::ok) + .collect::>(); + + for entry in entries { + if entry.metadata().map_or(false, |metadata| metadata.is_file()) { + if let Some((segment, _)) = + StaticFileSegment::parse_filename(&entry.file_name().to_string_lossy()) + { + let jar = NippyJar::::load(&entry.path())?; + + let (block_range, tx_range) = ( + jar.user_header().block_range().copied(), + jar.user_header().tx_range().copied(), + ); + + if let Some(block_range) = block_range { + match static_files.entry(segment) { + Entry::Occupied(mut entry) => { + entry.get_mut().push((block_range, tx_range)); + } + Entry::Vacant(entry) => { + entry.insert(vec![(block_range, tx_range)]); + } + } + } + } + } + } + + for (_, range_list) in static_files.iter_mut() { + // Sort by block end range. + range_list.sort_by(|a, b| a.0.end().cmp(&b.0.end())); + } + + Ok(static_files) +} diff --git a/crates/storage/db/src/tables/codecs/compact.rs b/crates/storage/db/src/tables/codecs/compact.rs index f31e61026e6..38722eb4903 100644 --- a/crates/storage/db/src/tables/codecs/compact.rs +++ b/crates/storage/db/src/tables/codecs/compact.rs @@ -29,6 +29,7 @@ macro_rules! impl_compression_for_compact { } impl_compression_for_compact!( + SealedHeader, Header, Account, Log, diff --git a/crates/storage/db/src/tables/raw.rs b/crates/storage/db/src/tables/raw.rs index 58c6b4e0621..90d4b96aec1 100644 --- a/crates/storage/db/src/tables/raw.rs +++ b/crates/storage/db/src/tables/raw.rs @@ -53,6 +53,12 @@ impl RawKey { Self { key: K::encode(key).into(), _phantom: std::marker::PhantomData } } + /// Creates a raw key from an existing `Vec`. Useful when we already have the encoded + /// key. + pub fn from_vec(vec: Vec) -> Self { + Self { key: vec, _phantom: std::marker::PhantomData } + } + /// Returns the decoded value. pub fn key(&self) -> Result { K::decode(&self.key) @@ -112,6 +118,12 @@ impl RawValue { Self { value: V::compress(value).into(), _phantom: std::marker::PhantomData } } + /// Creates a raw value from an existing `Vec`. Useful when we already have the encoded + /// value. + pub fn from_vec(vec: Vec) -> Self { + Self { value: vec, _phantom: std::marker::PhantomData } + } + /// Returns the decompressed value. pub fn value(&self) -> Result { V::decompress(&self.value) diff --git a/crates/storage/libmdbx-rs/Cargo.toml b/crates/storage/libmdbx-rs/Cargo.toml index e06eb089acc..2330b6f79e4 100644 --- a/crates/storage/libmdbx-rs/Cargo.toml +++ b/crates/storage/libmdbx-rs/Cargo.toml @@ -17,13 +17,13 @@ name = "reth_libmdbx" [dependencies] bitflags.workspace = true byteorder = "1" -derive_more = "0.99" +derive_more.workspace = true indexmap = "2" libc = "0.2" parking_lot.workspace = true thiserror.workspace = true dashmap = { version = "5.5.3", features = ["inline"], optional = true } -tracing = { workspace = true, optional = true } +tracing.workspace = true ffi = { package = "reth-mdbx-sys", path = "./mdbx-sys" } @@ -33,7 +33,7 @@ libffi = "3.2.0" [features] default = [] return-borrowed = [] -read-tx-timeouts = ["dashmap", "dashmap/inline", "tracing"] +read-tx-timeouts = ["dashmap", "dashmap/inline"] [dev-dependencies] pprof = { workspace = true, features = ["flamegraph", "frame-pointer", "criterion"] } diff --git a/crates/storage/libmdbx-rs/src/environment.rs b/crates/storage/libmdbx-rs/src/environment.rs index fd133021008..91bf80edbf3 100644 --- a/crates/storage/libmdbx-rs/src/environment.rs +++ b/crates/storage/libmdbx-rs/src/environment.rs @@ -20,6 +20,7 @@ use std::{ thread::sleep, time::Duration, }; +use tracing::warn; /// The default maximum duration of a read transaction. #[cfg(feature = "read-tx-timeouts")] @@ -96,6 +97,7 @@ impl Environment { /// Create a read-write transaction for use with the environment. This method will block while /// there are any other read-write transactions open on the environment. pub fn begin_rw_txn(&self) -> Result> { + let mut warned = false; let txn = loop { let (tx, rx) = sync_channel(0); self.txn_manager().send_message(TxnManagerMessage::Begin { @@ -105,6 +107,10 @@ impl Environment { }); let res = rx.recv().unwrap(); if let Err(Error::Busy) = &res { + if !warned { + warned = true; + warn!(target: "libmdbx", "Process stalled, awaiting read-write transaction lock."); + } sleep(Duration::from_millis(250)); continue } @@ -937,7 +943,8 @@ mod tests { .open(tempdir.path()) .unwrap(); - // Insert some data in the database, so the read transaction can lock on the snapshot of it + // Insert some data in the database, so the read transaction can lock on the static file of + // it { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); @@ -950,7 +957,8 @@ mod tests { // Create a read transaction let _tx_ro = env.begin_ro_txn().unwrap(); - // Change previously inserted data, so the read transaction would use the previous snapshot + // Change previously inserted data, so the read transaction would use the previous static + // file { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); @@ -961,7 +969,7 @@ mod tests { } // Insert more data in the database, so we hit the DB size limit error, and MDBX tries to - // kick long-lived readers and delete their snapshots + // kick long-lived readers and delete their static_files { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); diff --git a/crates/storage/libmdbx-rs/src/flags.rs b/crates/storage/libmdbx-rs/src/flags.rs index ad88c1fbedc..f984ffcaf02 100644 --- a/crates/storage/libmdbx-rs/src/flags.rs +++ b/crates/storage/libmdbx-rs/src/flags.rs @@ -25,7 +25,7 @@ pub enum SyncMode { /// /// [SyncMode::UtterlyNoSync] the [SyncMode::SafeNoSync] flag disable similarly flush system /// buffers to disk when committing a transaction. But there is a huge difference in how - /// are recycled the MVCC snapshots corresponding to previous "steady" transactions (see + /// are recycled the MVCC static_files corresponding to previous "steady" transactions (see /// below). /// /// With [crate::EnvironmentKind::WriteMap] the [SyncMode::SafeNoSync] instructs MDBX to use diff --git a/crates/storage/nippy-jar/Cargo.toml b/crates/storage/nippy-jar/Cargo.toml index fb7fc4ae7d9..7ed18e6a659 100644 --- a/crates/storage/nippy-jar/Cargo.toml +++ b/crates/storage/nippy-jar/Cargo.toml @@ -15,6 +15,8 @@ workspace = true name = "reth_nippy_jar" [dependencies] +# reth +reth-primitives.workspace = true # filter ph = "0.8.0" @@ -33,7 +35,7 @@ serde = { version = "1.0", features = ["derive"] } tracing = "0.1.0" anyhow = "1.0" thiserror.workspace = true -derive_more = "0.99" +derive_more.workspace = true [dev-dependencies] rand = { version = "0.8", features = ["small_rng"] } diff --git a/crates/storage/nippy-jar/src/error.rs b/crates/storage/nippy-jar/src/error.rs index 760a9446ddf..c769f0db863 100644 --- a/crates/storage/nippy-jar/src/error.rs +++ b/crates/storage/nippy-jar/src/error.rs @@ -7,6 +7,8 @@ pub enum NippyJarError { Internal(#[from] Box), #[error(transparent)] Disconnect(#[from] std::io::Error), + #[error(transparent)] + FileSystem(#[from] reth_primitives::fs::FsPathError), #[error("{0}")] Custom(String), #[error(transparent)] diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 46b7dc26bc6..f7b0c7b31a7 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -206,6 +206,16 @@ impl NippyJar { &self.user_header } + /// Gets total columns in jar. + pub fn columns(&self) -> usize { + self.columns + } + + /// Gets total rows in jar. + pub fn rows(&self) -> usize { + self.rows + } + /// Returns the size of inclusion filter pub fn filter_size(&self) -> usize { self.size() @@ -232,7 +242,9 @@ impl NippyJar { /// **The user must ensure the header type matches the one used during the jar's creation.** pub fn load(path: &Path) -> Result { // Read [`Self`] located at the data file. - let config_file = File::open(path.with_extension(CONFIG_FILE_EXTENSION))?; + let config_path = path.with_extension(CONFIG_FILE_EXTENSION); + let config_file = File::open(&config_path) + .map_err(|err| reth_primitives::fs::FsPathError::open(err, config_path))?; let mut obj: Self = bincode::deserialize_from(&config_file)?; obj.path = path.to_path_buf(); @@ -269,6 +281,21 @@ impl NippyJar { self.path.with_extension(CONFIG_FILE_EXTENSION) } + /// Deletes from disk this [`NippyJar`] alongside every satellite file. + pub fn delete(self) -> Result<(), NippyJarError> { + // TODO(joshie): ensure consistency on unexpected shutdown + + for path in + [self.data_path().into(), self.index_path(), self.offsets_path(), self.config_path()] + { + if path.exists() { + reth_primitives::fs::remove_file(path)?; + } + } + + Ok(()) + } + /// Returns a [`DataReader`] of the data and offset file pub fn open_data_reader(&self) -> Result { DataReader::new(self.data_path()) @@ -338,14 +365,17 @@ impl NippyJar { /// Writes all data and configuration to a file and the offset index to another. pub fn freeze( - &mut self, + mut self, columns: Vec>>>, total_rows: u64, - ) -> Result<(), NippyJarError> { + ) -> Result { self.check_before_freeze(&columns)?; debug!(target: "nippy-jar", path=?self.data_path(), "Opening data file."); + // Write phf, filter and offset index to file + self.freeze_filters()?; + // Creates the writer, data and offsets file let mut writer = NippyJarWriter::new(self)?; @@ -355,12 +385,9 @@ impl NippyJar { // Flushes configuration and offsets to disk writer.commit()?; - // Write phf, filter and offset index to file - self.freeze_filters()?; - - debug!(target: "nippy-jar", jar=?self, "Finished writing data."); + debug!(target: "nippy-jar", ?writer, "Finished writing data."); - Ok(()) + Ok(writer.into_jar()) } /// Freezes [`PerfectHashingFunction`], [`InclusionFilter`] and the offset index to file. @@ -428,9 +455,9 @@ impl PerfectHashingFunction for NippyJar { } } -/// Manages the reading of snapshot data using memory-mapped files. +/// Manages the reading of static file data using memory-mapped files. /// -/// Holds file and mmap descriptors of the data and offsets files of a snapshot. +/// Holds file and mmap descriptors of the data and offsets files of a static_file. #[derive(Debug)] pub struct DataReader { /// Data file descriptor. Needs to be kept alive as long as `data_mmap` handle. @@ -558,15 +585,21 @@ mod tests { let num_rows = col1.len() as u64; let file_path = tempfile::NamedTempFile::new().unwrap(); - let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()); - assert!(matches!(NippyJar::set_keys(&mut nippy, &col1), Err(NippyJarError::PHFMissing))); + let create_nippy = || -> NippyJar<()> { + let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()); + assert!(matches!( + NippyJar::set_keys(&mut nippy, &col1), + Err(NippyJarError::PHFMissing) + )); + nippy + }; - let check_phf = |nippy: &mut NippyJar<_>| { + let check_phf = |mut nippy: NippyJar<_>| { assert!(matches!( - NippyJar::get_index(nippy, &col1[0]), + NippyJar::get_index(&nippy, &col1[0]), Err(NippyJarError::PHFMissingKeys) )); - assert!(NippyJar::set_keys(nippy, &col1).is_ok()); + assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok()); let collect_indexes = |nippy: &NippyJar<_>| -> Vec { col1.iter() @@ -575,12 +608,12 @@ mod tests { }; // Ensure all indexes are unique - let indexes = collect_indexes(nippy); + let indexes = collect_indexes(&nippy); assert_eq!(indexes.iter().collect::>().len(), indexes.len()); // Ensure reproducibility - assert!(NippyJar::set_keys(nippy, &col1).is_ok()); - assert_eq!(indexes, collect_indexes(nippy)); + assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok()); + assert_eq!(indexes, collect_indexes(&nippy)); // Ensure that loaded phf provides the same function outputs nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap(); @@ -593,12 +626,10 @@ mod tests { }; // fmph bytes size for 100 values of 32 bytes: 54 - nippy = nippy.with_fmph(); - check_phf(&mut nippy); + check_phf(create_nippy().with_fmph()); // fmph bytes size for 100 values of 32 bytes: 46 - nippy = nippy.with_gofmph(); - check_phf(&mut nippy); + check_phf(create_nippy().with_gofmph()); } #[test] @@ -631,7 +662,9 @@ mod tests { assert!(InclusionFilter::add(&mut nippy, &col1[2]).is_ok()); assert!(InclusionFilter::add(&mut nippy, &col1[3]).is_ok()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -675,6 +708,10 @@ mod tests { Err(NippyJarError::CompressorNotReady) )); + let mut nippy = + NippyJar::new_without_header(num_columns, file_path.path()).with_zstd(true, 5000); + assert!(nippy.compressor().is_some()); + nippy.prepare_compression(vec![col1.clone(), col2.clone()]).unwrap(); if let Some(Compressors::Zstd(zstd)) = &nippy.compressor() { @@ -684,7 +721,9 @@ mod tests { )); } - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -724,10 +763,12 @@ mod tests { let nippy = NippyJar::new_without_header(num_columns, file_path.path()); assert!(nippy.compressor().is_none()); - let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_lz4(); + let nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_lz4(); assert!(nippy.compressor().is_some()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -760,11 +801,13 @@ mod tests { let nippy = NippyJar::new_without_header(num_columns, file_path.path()); assert!(nippy.compressor().is_none()); - let mut nippy = + let nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_zstd(false, 5000); assert!(nippy.compressor().is_some()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -903,7 +946,7 @@ mod tests { let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); data.shuffle(&mut rand::thread_rng()); - // Imagine `Blocks` snapshot file has two columns: `Block | StoredWithdrawals` + // Imagine `Blocks` static file has two columns: `Block | StoredWithdrawals` const BLOCKS_FULL_MASK: usize = 0b11; // Read both columns @@ -1047,7 +1090,7 @@ mod tests { col1: &[Vec], col2: &[Vec], ) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Set the baseline that should be unwinded to let initial_rows = nippy.rows; @@ -1059,7 +1102,7 @@ mod tests { assert!(initial_offset_size > 0); // Appends a third row - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.append_column(Some(Ok(&col1[2]))).unwrap(); writer.append_column(Some(Ok(&col2[2]))).unwrap(); @@ -1073,7 +1116,7 @@ mod tests { // Simulate an unexpected shutdown of the writer, before it can finish commit() drop(writer); - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); assert_eq!(initial_rows, nippy.rows); // Data was written successfuly @@ -1090,21 +1133,20 @@ mod tests { // Writer will execute a consistency check and verify first that the offset list on disk // doesn't match the nippy.rows, and prune it. Then, it will prune the data file // accordingly as well. - let _writer = NippyJarWriter::new(&mut nippy).unwrap(); - assert_eq!(initial_rows, nippy.rows); + let writer = NippyJarWriter::new(nippy).unwrap(); + assert_eq!(initial_rows, writer.rows()); assert_eq!( initial_offset_size, - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len() as usize ); assert_eq!( initial_data_size, - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize ); - assert_eq!(initial_rows, nippy.rows); } fn test_append_consistency_no_commit(file_path: &Path, col1: &[Vec], col2: &[Vec]) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Set the baseline that should be unwinded to let initial_rows = nippy.rows; @@ -1117,14 +1159,14 @@ mod tests { // Appends a third row, so we have an offset list in memory, which is not flushed to disk, // while the data has been. - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.append_column(Some(Ok(&col1[2]))).unwrap(); writer.append_column(Some(Ok(&col2[2]))).unwrap(); // Simulate an unexpected shutdown of the writer, before it can call commit() drop(writer); - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); assert_eq!(initial_rows, nippy.rows); // Data was written successfuly @@ -1140,13 +1182,12 @@ mod tests { // Writer will execute a consistency check and verify that the data file has more data than // it should, and resets it to the last offset of the list (on disk here) - let _writer = NippyJarWriter::new(&mut nippy).unwrap(); - assert_eq!(initial_rows, nippy.rows); + let writer = NippyJarWriter::new(nippy).unwrap(); + assert_eq!(initial_rows, writer.rows()); assert_eq!( initial_data_size, - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize ); - assert_eq!(initial_rows, nippy.rows); } fn append_two_rows(num_columns: usize, file_path: &Path, col1: &[Vec], col2: &[Vec]) { @@ -1157,7 +1198,7 @@ mod tests { assert_eq!(nippy.max_row_size, 0); assert_eq!(nippy.rows, 0); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); assert_eq!(writer.column(), 0); writer.append_column(Some(Ok(&col1[0]))).unwrap(); @@ -1173,26 +1214,26 @@ mod tests { let expected_data_file_size = *writer.offsets().last().unwrap(); writer.commit().unwrap(); - assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); - assert_eq!(nippy.rows, 1); + assert_eq!(writer.max_row_size(), col1[0].len() + col2[0].len()); + assert_eq!(writer.rows(), 1); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), 1 + num_columns as u64 * 8 + 8 ); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len(), + File::open(writer.data_path()).unwrap().metadata().unwrap().len(), expected_data_file_size ); } // Load and add 1 row { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Check if it was committed successfuly assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); assert_eq!(nippy.rows, 1); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); assert_eq!(writer.column(), 0); writer.append_column(Some(Ok(&col1[1]))).unwrap(); @@ -1208,22 +1249,22 @@ mod tests { let expected_data_file_size = *writer.offsets().last().unwrap(); writer.commit().unwrap(); - assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); - assert_eq!(nippy.rows, 2); + assert_eq!(writer.max_row_size(), col1[0].len() + col2[0].len()); + assert_eq!(writer.rows(), 2); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), - 1 + nippy.rows as u64 * num_columns as u64 * 8 + 8 + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), + 1 + writer.rows() as u64 * num_columns as u64 * 8 + 8 ); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len(), + File::open(writer.data_path()).unwrap().metadata().unwrap().len(), expected_data_file_size ); } } fn prune_rows(num_columns: usize, file_path: &Path, col1: &[Vec], col2: &[Vec]) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); // Appends a third row, so we have an offset list in memory, which is not flushed to disk writer.append_column(Some(Ok(&col1[2]))).unwrap(); @@ -1231,32 +1272,38 @@ mod tests { // This should prune from the on-memory offset list and ondisk offset list writer.prune_rows(2).unwrap(); - assert_eq!(nippy.rows, 1); + assert_eq!(writer.rows(), 1); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), - 1 + nippy.rows as u64 * num_columns as u64 * 8 + 8 + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), + 1 + writer.rows() as u64 * num_columns as u64 * 8 + 8 ); let expected_data_size = col1[0].len() + col2[0].len(); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize, + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize, expected_data_size ); - let data_reader = nippy.open_data_reader().unwrap(); - // there are only two valid offsets. so index 2 actually represents the expected file - // data size. - assert_eq!(data_reader.offset(2), expected_data_size as u64); + let nippy = NippyJar::load_without_header(file_path).unwrap(); + { + let data_reader = nippy.open_data_reader().unwrap(); + // there are only two valid offsets. so index 2 actually represents the expected file + // data size. + assert_eq!(data_reader.offset(2), expected_data_size as u64); + } // This should prune from the ondisk offset list and clear the jar. - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.prune_rows(1).unwrap(); - assert_eq!(nippy.rows, 0); - assert_eq!(nippy.max_row_size, 0); - assert_eq!(File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize, 0); + assert_eq!(writer.rows(), 0); + assert_eq!(writer.max_row_size(), 0); + assert_eq!(File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize, 0); // Only the byte that indicates how many bytes per offset should be left - assert_eq!(File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize, 1); + assert_eq!( + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len() as usize, + 1 + ); } fn simulate_interrupted_prune( @@ -1265,7 +1312,7 @@ mod tests { num_rows: u64, missing_offsets: u64, ) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); let reader = nippy.open_data_reader().unwrap(); let offsets_file = OpenOptions::new().read(true).write(true).open(nippy.offsets_path()).unwrap(); @@ -1284,6 +1331,6 @@ mod tests { data_file.set_len(data_len - 32 * missing_offsets).unwrap(); // runs the consistency check. - let _ = NippyJarWriter::new(&mut nippy).unwrap(); + let _ = NippyJarWriter::new(nippy).unwrap(); } } diff --git a/crates/storage/nippy-jar/src/writer.rs b/crates/storage/nippy-jar/src/writer.rs index b8de6454c27..8ab8bd47b4b 100644 --- a/crates/storage/nippy-jar/src/writer.rs +++ b/crates/storage/nippy-jar/src/writer.rs @@ -1,9 +1,8 @@ use crate::{compression::Compression, ColumnResult, NippyJar, NippyJarError, NippyJarHeader}; use std::{ cmp::Ordering, - fmt, fs::{File, OpenOptions}, - io::{Read, Seek, SeekFrom, Write}, + io::{BufWriter, Read, Seek, SeekFrom, Write}, path::Path, }; @@ -23,14 +22,15 @@ const OFFSET_SIZE_BYTES: u64 = 8; /// /// ## Data file layout /// The data file is represented just as a sequence of bytes of data without any delimiters -pub struct NippyJarWriter<'a, H> { - /// Reference to the associated [`NippyJar`], containing all necessary configurations for data +#[derive(Debug)] +pub struct NippyJarWriter { + /// Associated [`NippyJar`], containing all necessary configurations for data /// handling. - jar: &'a mut NippyJar, + jar: NippyJar, /// File handle to where the data is stored. - data_file: File, + data_file: BufWriter, /// File handle to where the offsets are stored. - offsets_file: File, + offsets_file: BufWriter, /// Temporary buffer to reuse when compressing data. tmp_buf: Vec, /// Used to find the maximum uncompressed size of a row in a jar. @@ -41,21 +41,19 @@ pub struct NippyJarWriter<'a, H> { column: usize, } -impl fmt::Debug for NippyJarWriter<'_, H> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("NippyJarWriter").finish_non_exhaustive() - } -} - -impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { - pub fn new(jar: &'a mut NippyJar) -> Result { +impl NippyJarWriter { + /// Creates a [`NippyJarWriter`] from [`NippyJar`]. + pub fn new(mut jar: NippyJar) -> Result { let (data_file, offsets_file, is_created) = Self::create_or_open_files(jar.data_path(), &jar.offsets_path())?; + // Makes sure we don't have dangling data and offset files + jar.freeze_config()?; + let mut writer = Self { jar, - data_file, - offsets_file, + data_file: BufWriter::new(data_file), + offsets_file: BufWriter::new(offsets_file), tmp_buf: Vec::with_capacity(1_000_000), uncompressed_row_size: 0, offsets: Vec::with_capacity(1_000_000), @@ -66,35 +64,56 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // changes if necessary. if !is_created { writer.check_consistency_and_heal()?; + writer.commit()?; } Ok(writer) } + /// Returns a reference to `H` of [`NippyJar`] + pub fn user_header(&self) -> &H { + &self.jar.user_header + } + + /// Returns a mutable reference to `H` of [`NippyJar`] + pub fn user_header_mut(&mut self) -> &mut H { + &mut self.jar.user_header + } + + /// Gets total writter rows in jar. + pub fn rows(&self) -> usize { + self.jar.rows() + } + + /// Consumes the writer and returns the associated [`NippyJar`]. + pub fn into_jar(self) -> NippyJar { + self.jar + } + fn create_or_open_files( data: &Path, offsets: &Path, ) -> Result<(File, File, bool), NippyJarError> { let is_created = !data.exists() || !offsets.exists(); - let mut data_file = if !data.exists() { - File::create(data)? - } else { - OpenOptions::new().read(true).write(true).open(data)? - }; + if !data.exists() { + // File::create is write-only (no reading possible) + File::create(data)?; + } + + let mut data_file = OpenOptions::new().read(true).write(true).open(data)?; data_file.seek(SeekFrom::End(0))?; - let mut offsets_file = if !offsets.exists() { - let mut offsets = File::create(offsets)?; + if !offsets.exists() { + // File::create is write-only (no reading possible) + File::create(offsets)?; + } - // First byte of the offset file is the size of one offset in bytes - offsets.write_all(&[OFFSET_SIZE_BYTES as u8])?; - offsets.sync_all()?; + let mut offsets_file = OpenOptions::new().read(true).write(true).open(offsets)?; - offsets - } else { - OpenOptions::new().read(true).write(true).open(offsets)? - }; + // First byte of the offset file is the size of one offset in bytes + offsets_file.write_all(&[OFFSET_SIZE_BYTES as u8])?; + offsets_file.sync_all()?; offsets_file.seek(SeekFrom::End(0))?; Ok((data_file, offsets_file, is_created)) @@ -118,7 +137,7 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { let expected_offsets_file_size = 1 + // first byte is the size of one offset OFFSET_SIZE_BYTES * self.jar.rows as u64 * self.jar.columns as u64 + // `offset size * num rows * num columns` OFFSET_SIZE_BYTES; // expected size of the data file - let actual_offsets_file_size = self.offsets_file.metadata()?.len(); + let actual_offsets_file_size = self.offsets_file.get_ref().metadata()?.len(); // Offsets configuration wasn't properly committed match expected_offsets_file_size.cmp(&actual_offsets_file_size) { @@ -126,7 +145,7 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // Happened during an appending job // TODO: ideally we could truncate until the last offset of the last column of the // last row inserted - self.offsets_file.set_len(expected_offsets_file_size)?; + self.offsets_file.get_mut().set_len(expected_offsets_file_size)?; } Ordering::Greater => { // Happened during a pruning job @@ -145,14 +164,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // last offset should match the data_file_len let last_offset = reader.reverse_offset(0)?; - let data_file_len = self.data_file.metadata()?.len(); + let data_file_len = self.data_file.get_ref().metadata()?.len(); // Offset list wasn't properly committed match last_offset.cmp(&data_file_len) { Ordering::Less => { // Happened during an appending job, so we need to truncate the data, since there's // no way to recover it. - self.data_file.set_len(last_offset)?; + self.data_file.get_mut().set_len(last_offset)?; } Ordering::Greater => { // Happened during a pruning job, so we need to reverse iterate offsets until we @@ -160,12 +179,13 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { for index in 0..reader.offsets_count()? { let offset = reader.reverse_offset(index + 1)?; if offset == data_file_len { - self.offsets_file.set_len( - self.offsets_file - .metadata()? - .len() - .saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1)), - )?; + let new_len = self + .offsets_file + .get_ref() + .metadata()? + .len() + .saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1)); + self.offsets_file.get_mut().set_len(new_len)?; drop(reader); @@ -229,11 +249,11 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.offsets.push(self.data_file.stream_position()?); } - self.write_column(value.as_ref())?; + let written = self.write_column(value.as_ref())?; // Last offset represents the size of the data file if no more data is to be // appended. Otherwise, represents the offset of the next data item. - self.offsets.push(self.data_file.stream_position()?); + self.offsets.push(self.offsets.last().expect("qed") + written as u64); } None => { return Err(NippyJarError::UnexpectedMissingValue( @@ -248,15 +268,17 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { } /// Writes column to data file. If it's the last column of the row, call `finalize_row()` - fn write_column(&mut self, value: &[u8]) -> Result<(), NippyJarError> { + fn write_column(&mut self, value: &[u8]) -> Result { self.uncompressed_row_size += value.len(); - if let Some(compression) = &self.jar.compressor { + let len = if let Some(compression) = &self.jar.compressor { let before = self.tmp_buf.len(); let len = compression.compress_to(value, &mut self.tmp_buf)?; self.data_file.write_all(&self.tmp_buf[before..before + len])?; + len } else { self.data_file.write_all(value)?; - } + value.len() + }; self.column += 1; @@ -264,11 +286,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.finalize_row(); } - Ok(()) + Ok(len) } /// Prunes rows from data and offsets file and updates its configuration on disk pub fn prune_rows(&mut self, num_rows: usize) -> Result<(), NippyJarError> { + self.offsets_file.flush()?; + self.data_file.flush()?; + // Each column of a row is one offset let num_offsets = num_rows * self.jar.columns; @@ -283,13 +308,13 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.offsets.truncate(self.offsets.len() - offsets_prune_count); // Truncate the data file to the new length - self.data_file.set_len(new_len)?; + self.data_file.get_mut().set_len(new_len)?; } // Prune from on-disk offset list if there are still rows left to prune if remaining_to_prune > 0 { // Get the current length of the on-disk offset file - let length = self.offsets_file.metadata()?.len(); + let length = self.offsets_file.get_ref().metadata()?.len(); // Handle non-empty offset file if length > 1 { @@ -308,8 +333,8 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // If all rows are to be pruned if new_num_offsets <= 1 { // <= 1 because the one offset would actually be the expected file data size - self.offsets_file.set_len(1)?; - self.data_file.set_len(0)?; + self.offsets_file.get_mut().set_len(1)?; + self.data_file.get_mut().set_len(0)?; } else { // Calculate the new length for the on-disk offset list let new_len = 1 + new_num_offsets * OFFSET_SIZE_BYTES; @@ -318,20 +343,20 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { .seek(SeekFrom::Start(new_len.saturating_sub(OFFSET_SIZE_BYTES)))?; // Read the last offset value let mut last_offset = [0u8; OFFSET_SIZE_BYTES as usize]; - self.offsets_file.read_exact(&mut last_offset)?; + self.offsets_file.get_ref().read_exact(&mut last_offset)?; let last_offset = u64::from_le_bytes(last_offset); // Update the lengths of both the offsets and data files - self.offsets_file.set_len(new_len)?; - self.data_file.set_len(last_offset)?; + self.offsets_file.get_mut().set_len(new_len)?; + self.data_file.get_mut().set_len(last_offset)?; } } else { return Err(NippyJarError::InvalidPruning(0, remaining_to_prune as u64)) } } - self.offsets_file.sync_all()?; - self.data_file.sync_all()?; + self.offsets_file.get_ref().sync_all()?; + self.data_file.get_ref().sync_all()?; self.offsets_file.seek(SeekFrom::End(0))?; self.data_file.seek(SeekFrom::End(0))?; @@ -358,7 +383,8 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { /// Commits configuration and offsets to disk. It drains the internal offset list. pub fn commit(&mut self) -> Result<(), NippyJarError> { - self.data_file.sync_all()?; + self.data_file.flush()?; + self.data_file.get_ref().sync_all()?; self.commit_offsets()?; @@ -374,11 +400,11 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // `append_column()` works alongside commit. So we need to skip it. let mut last_offset_ondisk = None; - if self.offsets_file.metadata()?.len() > 1 { + if self.offsets_file.get_ref().metadata()?.len() > 1 { self.offsets_file.seek(SeekFrom::End(-(OFFSET_SIZE_BYTES as i64)))?; let mut buf = [0u8; OFFSET_SIZE_BYTES as usize]; - self.offsets_file.read_exact(&mut buf)?; + self.offsets_file.get_ref().read_exact(&mut buf)?; last_offset_ondisk = Some(u64::from_le_bytes(buf)); } @@ -393,11 +419,17 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { } self.offsets_file.write_all(&offset.to_le_bytes())?; } - self.offsets_file.sync_all()?; + self.offsets_file.flush()?; + self.offsets_file.get_ref().sync_all()?; Ok(()) } + #[cfg(test)] + pub fn max_row_size(&self) -> usize { + self.jar.max_row_size + } + #[cfg(test)] pub fn column(&self) -> usize { self.column @@ -412,4 +444,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { pub fn offsets_mut(&mut self) -> &mut Vec { &mut self.offsets } + + #[cfg(test)] + pub fn offsets_path(&self) -> std::path::PathBuf { + self.jar.offsets_path() + } + + #[cfg(test)] + pub fn data_path(&self) -> &Path { + self.jar.data_path() + } } diff --git a/crates/storage/provider/Cargo.toml b/crates/storage/provider/Cargo.toml index bc442a5d002..8b11fe69f66 100644 --- a/crates/storage/provider/Cargo.toml +++ b/crates/storage/provider/Cargo.toml @@ -18,6 +18,7 @@ reth-interfaces.workspace = true reth-db.workspace = true reth-trie.workspace = true reth-nippy-jar.workspace = true +reth-codecs.workspace = true reth-node-api.workspace = true revm.workspace = true diff --git a/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs b/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs index b6be30d91b5..7b6e5f1fecc 100644 --- a/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs +++ b/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs @@ -1,15 +1,15 @@ -use crate::{StateChanges, StateReverts}; +use crate::{providers::StaticFileProviderRWRefMut, StateChanges, StateReverts}; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ logs_bloom, revm::compat::{into_reth_acc, into_revm_acc}, - Account, Address, BlockNumber, Bloom, Bytecode, Log, Receipt, Receipts, StorageEntry, B256, - U256, + Account, Address, BlockNumber, Bloom, Bytecode, Log, Receipt, Receipts, StaticFileSegment, + StorageEntry, B256, U256, }; use reth_trie::HashedPostState; use revm::{ @@ -285,15 +285,21 @@ impl BundleStateWithReceipts { std::mem::swap(&mut self.bundle, &mut other) } - /// Write the [BundleStateWithReceipts] to the database. + /// Write the [BundleStateWithReceipts] to database and receipts to either database or static + /// files if `static_file_producer` is `Some`. It should be none if there is any kind of + /// pruning/filtering over the receipts. /// - /// `is_value_known` should be set to `Not` if the [BundleStateWithReceipts] has some of its - /// state detached, This would make some original values not known. - pub fn write_to_db( + /// `omit_changed_check` should be set to true of bundle has some of it data + /// detached, This would make some original values not known. + pub fn write_to_storage( self, tx: &TX, + mut static_file_producer: Option>, is_value_known: OriginalValuesKnown, - ) -> Result<(), DatabaseError> { + ) -> ProviderResult<()> + where + TX: DbTxMut + DbTx, + { let (plain_state, reverts) = self.bundle.into_plain_state_and_reverts(is_value_known); StateReverts(reverts).write_to_db(tx, self.first_block)?; @@ -303,15 +309,22 @@ impl BundleStateWithReceipts { let mut receipts_cursor = tx.cursor_write::()?; for (idx, receipts) in self.receipts.into_iter().enumerate() { - if !receipts.is_empty() { - let block_number = self.first_block + idx as u64; - let (_, body_indices) = - bodies_cursor.seek_exact(block_number)?.unwrap_or_else(|| { - let last_available = bodies_cursor.last().ok().flatten().map(|(number, _)| number); - panic!("body indices for block {block_number} must exist. last available block number: {last_available:?}"); - }); - - let first_tx_index = body_indices.first_tx_num(); + let block_number = self.first_block + idx as u64; + let first_tx_index = bodies_cursor + .seek_exact(block_number)? + .map(|(_, indices)| indices.first_tx_num()) + .ok_or_else(|| ProviderError::BlockBodyIndicesNotFound(block_number))?; + + if let Some(static_file_producer) = &mut static_file_producer { + // Increment block on static file header. + static_file_producer.increment_block(StaticFileSegment::Receipts)?; + + for (tx_idx, receipt) in receipts.into_iter().enumerate() { + let receipt = receipt + .expect("receipt should not be filtered when saving to static files."); + static_file_producer.append_receipt(first_tx_index + tx_idx as u64, receipt)?; + } + } else if !receipts.is_empty() { for (tx_idx, receipt) in receipts.into_iter().enumerate() { if let Some(receipt) = receipt { receipts_cursor.append(first_tx_index + tx_idx as u64, receipt)?; @@ -549,7 +562,7 @@ mod tests { state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); // Check plain storage state @@ -647,7 +660,7 @@ mod tests { state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 2) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); assert_eq!( @@ -711,7 +724,7 @@ mod tests { )])); init_state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(init_state.take_bundle(), Receipts::new(), 0) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write init bundle state to DB"); let mut state = State::builder().with_bundle_update().build(); @@ -856,7 +869,7 @@ mod tests { let bundle = state.take_bundle(); BundleStateWithReceipts::new(bundle, Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); let mut storage_changeset_cursor = provider @@ -1019,7 +1032,7 @@ mod tests { )])); init_state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(init_state.take_bundle(), Receipts::new(), 0) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write init bundle state to DB"); let mut state = State::builder().with_bundle_update().build(); @@ -1064,7 +1077,7 @@ mod tests { // Commit block #1 changes to the database. state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); let mut storage_changeset_cursor = provider diff --git a/crates/storage/provider/src/providers/database/mod.rs b/crates/storage/provider/src/providers/database/mod.rs index 06d36237d1e..77c1f21ea19 100644 --- a/crates/storage/provider/src/providers/database/mod.rs +++ b/crates/storage/provider/src/providers/database/mod.rs @@ -1,8 +1,9 @@ use crate::{ providers::{ state::{historical::HistoricalStateProvider, latest::LatestStateProvider}, - SnapshotProvider, + StaticFileProvider, }, + to_range, traits::{BlockSource, ReceiptProvider}, BlockHashReader, BlockNumReader, BlockReader, ChainSpecProvider, EvmEnvProvider, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode, ProviderError, @@ -13,12 +14,11 @@ use reth_db::{database::Database, init_db, models::StoredBlockBodyIndices, Datab use reth_interfaces::{provider::ProviderResult, RethError, RethResult}; use reth_node_api::ConfigureEvmEnv; use reth_primitives::{ - snapshot::HighestSnapshots, stage::{StageCheckpoint, StageId}, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, ChainSpec, Header, PruneCheckpoint, PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, - SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, - Withdrawal, Withdrawals, B256, U256, + SealedHeader, StaticFileSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, + TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, }; use revm::primitives::{BlockEnv, CfgEnvWithHandlerCfg}; use std::{ @@ -26,7 +26,6 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::watch; use tracing::trace; mod metrics; @@ -38,30 +37,28 @@ use reth_db::mdbx::DatabaseArguments; /// A common provider that fetches data from a database. /// /// This provider implements most provider or provider factory traits. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ProviderFactory { /// Database db: DB, /// Chain spec chain_spec: Arc, - /// Snapshot Provider - snapshot_provider: Option>, -} - -impl Clone for ProviderFactory { - fn clone(&self) -> Self { - Self { - db: self.db.clone(), - chain_spec: Arc::clone(&self.chain_spec), - snapshot_provider: self.snapshot_provider.clone(), - } - } + /// Static File Provider + static_file_provider: StaticFileProvider, } impl ProviderFactory { /// Create new database provider factory. - pub fn new(db: DB, chain_spec: Arc) -> Self { - Self { db, chain_spec, snapshot_provider: None } + pub fn new( + db: DB, + chain_spec: Arc, + static_files_path: PathBuf, + ) -> RethResult> { + Ok(Self { + db, + chain_spec, + static_file_provider: StaticFileProvider::new(static_files_path)?, + }) } /// Create new database provider by passing a path. [`ProviderFactory`] will own the database @@ -70,31 +67,36 @@ impl ProviderFactory { path: P, chain_spec: Arc, args: DatabaseArguments, + static_files_path: PathBuf, ) -> RethResult> { Ok(ProviderFactory:: { db: init_db(path, args).map_err(|e| RethError::Custom(e.to_string()))?, chain_spec, - snapshot_provider: None, + static_file_provider: StaticFileProvider::new(static_files_path)?, }) } - /// Database provider that comes with a shared snapshot provider. - pub fn with_snapshots( - mut self, - snapshots_path: PathBuf, - highest_snapshot_tracker: watch::Receiver>, - ) -> ProviderResult { - self.snapshot_provider = Some(Arc::new( - SnapshotProvider::new(snapshots_path)? - .with_highest_tracker(Some(highest_snapshot_tracker)), - )); - Ok(self) + /// Enables metrics on the static file provider. + pub fn with_static_files_metrics(mut self) -> Self { + self.static_file_provider = self.static_file_provider.with_metrics(); + self } /// Returns reference to the underlying database. pub fn db_ref(&self) -> &DB { &self.db } + + /// Returns static file provider + pub fn static_file_provider(&self) -> StaticFileProvider { + self.static_file_provider.clone() + } + + #[cfg(any(test, feature = "test-utils"))] + /// Consumes Self and returns DB + pub fn into_db(self) -> DB { + self.db + } } impl ProviderFactory { @@ -103,13 +105,11 @@ impl ProviderFactory { /// [`BlockHashReader`]. This may fail if the inner read database transaction fails to open. #[track_caller] pub fn provider(&self) -> ProviderResult> { - let mut provider = DatabaseProvider::new(self.db.tx()?, self.chain_spec.clone()); - - if let Some(snapshot_provider) = &self.snapshot_provider { - provider = provider.with_snapshot_provider(snapshot_provider.clone()); - } - - Ok(provider) + Ok(DatabaseProvider::new( + self.db.tx()?, + self.chain_spec.clone(), + self.static_file_provider.clone(), + )) } /// Returns a provider with a created `DbTxMut` inside, which allows fetching and updating @@ -118,20 +118,18 @@ impl ProviderFactory { /// open. #[track_caller] pub fn provider_rw(&self) -> ProviderResult> { - let mut provider = DatabaseProvider::new_rw(self.db.tx_mut()?, self.chain_spec.clone()); - - if let Some(snapshot_provider) = &self.snapshot_provider { - provider = provider.with_snapshot_provider(snapshot_provider.clone()); - } - - Ok(DatabaseProviderRW(provider)) + Ok(DatabaseProviderRW(DatabaseProvider::new_rw( + self.db.tx_mut()?, + self.chain_spec.clone(), + self.static_file_provider.clone(), + ))) } /// Storage provider for latest block #[track_caller] pub fn latest(&self) -> ProviderResult { trace!(target: "providers::db", "Returning latest state provider"); - Ok(Box::new(LatestStateProvider::new(self.db.tx()?))) + Ok(Box::new(LatestStateProvider::new(self.db.tx()?, self.static_file_provider()))) } /// Storage provider for state at that given block @@ -143,7 +141,10 @@ impl ProviderFactory { if block_number == provider.best_block_number().unwrap_or_default() && block_number == provider.last_block_number().unwrap_or_default() { - return Ok(Box::new(LatestStateProvider::new(provider.into_tx()))) + return Ok(Box::new(LatestStateProvider::new( + provider.into_tx(), + self.static_file_provider(), + ))) } // +1 as the changeset that we want is the one that was applied after this block. @@ -154,7 +155,11 @@ impl ProviderFactory { let storage_history_prune_checkpoint = provider.get_prune_checkpoint(PruneSegment::StorageHistory)?; - let mut state_provider = HistoricalStateProvider::new(provider.into_tx(), block_number); + let mut state_provider = HistoricalStateProvider::new( + provider.into_tx(), + block_number, + self.static_file_provider(), + ); // If we pruned account or storage history, we can't return state on every historical block. // Instead, we should cap it at the latest prune checkpoint for corresponding prune segment. @@ -217,7 +222,12 @@ impl HeaderProvider for ProviderFactory { } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.provider()?.header_by_number(num) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + num, + |static_file| static_file.header_by_number(num), + || self.provider()?.header_by_number(num), + ) } fn header_td(&self, hash: &BlockHash) -> ProviderResult> { @@ -225,22 +235,44 @@ impl HeaderProvider for ProviderFactory { } fn header_td_by_number(&self, number: BlockNumber) -> ProviderResult> { - self.provider()?.header_td_by_number(number) + if let Some(td) = self.chain_spec.final_paris_total_difficulty(number) { + // if this block is higher than the final paris(merge) block, return the final paris + // difficulty + return Ok(Some(td)) + } + + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.header_td_by_number(number), + || self.provider()?.header_td_by_number(number), + ) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.provider()?.headers_range(range) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + to_range(range), + |static_file, range, _| static_file.headers_range(range), + |range, _| self.provider()?.headers_range(range), + |_| true, + ) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.provider()?.sealed_header(number) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.sealed_header(number), + || self.provider()?.sealed_header(number), + ) } fn sealed_headers_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.provider()?.sealed_headers_range(range) + self.sealed_headers_while(range, |_| true) } fn sealed_headers_while( @@ -248,13 +280,24 @@ impl HeaderProvider for ProviderFactory { range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.provider()?.sealed_headers_while(range, predicate) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + to_range(range), + |static_file, range, predicate| static_file.sealed_headers_while(range, predicate), + |range, predicate| self.provider()?.sealed_headers_while(range, predicate), + predicate, + ) } } impl BlockHashReader for ProviderFactory { fn block_hash(&self, number: u64) -> ProviderResult> { - self.provider()?.block_hash(number) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || self.provider()?.block_hash(number), + ) } fn canonical_hashes_range( @@ -262,7 +305,13 @@ impl BlockHashReader for ProviderFactory { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.provider()?.canonical_hashes_range(start, end) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| self.provider()?.canonical_hashes_range(range.start, range.end), + |_| true, + ) } } @@ -335,14 +384,24 @@ impl TransactionsProvider for ProviderFactory { } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.provider()?.transaction_by_id(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, + id, + |static_file| static_file.transaction_by_id(id), + || self.provider()?.transaction_by_id(id), + ) } fn transaction_by_id_no_hash( &self, id: TxNumber, ) -> ProviderResult> { - self.provider()?.transaction_by_id_no_hash(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, + id, + |static_file| static_file.transaction_by_id_no_hash(id), + || self.provider()?.transaction_by_id_no_hash(id), + ) } fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { @@ -395,7 +454,12 @@ impl TransactionsProvider for ProviderFactory { impl ReceiptProvider for ProviderFactory { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.provider()?.receipt(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Receipts, + id, + |static_file| static_file.receipt(id), + || self.provider()?.receipt(id), + ) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { @@ -410,7 +474,13 @@ impl ReceiptProvider for ProviderFactory { &self, range: impl RangeBounds, ) -> ProviderResult> { - self.provider()?.receipts_by_tx_range(range) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Receipts, + to_range(range), + |static_file, range, _| static_file.receipts_by_tx_range(range), + |range, _| self.provider()?.receipts_by_tx_range(range), + |_| true, + ) } } @@ -528,13 +598,17 @@ impl PruneCheckpointReader for ProviderFactory { mod tests { use super::ProviderFactory; use crate::{ - test_utils::create_test_provider_factory, BlockHashReader, BlockNumReader, BlockWriter, - HeaderSyncGapProvider, HeaderSyncMode, TransactionsProvider, + providers::StaticFileWriter, test_utils::create_test_provider_factory, BlockHashReader, + BlockNumReader, BlockWriter, HeaderSyncGapProvider, HeaderSyncMode, TransactionsProvider, }; use alloy_rlp::Decodable; use assert_matches::assert_matches; use rand::Rng; - use reth_db::{tables, test_utils::ERROR_TEMPDIR, transaction::DbTxMut, DatabaseEnv}; + use reth_db::{ + tables, + test_utils::{create_test_static_files_dir, ERROR_TEMPDIR}, + DatabaseEnv, + }; use reth_interfaces::{ provider::ProviderError, test_utils::{ @@ -544,7 +618,8 @@ mod tests { RethError, }; use reth_primitives::{ - hex_literal::hex, ChainSpecBuilder, PruneMode, PruneModes, SealedBlock, TxNumber, B256, + hex_literal::hex, ChainSpecBuilder, PruneMode, PruneModes, SealedBlock, StaticFileSegment, + TxNumber, B256, U256, }; use std::{ops::RangeInclusive, sync::Arc}; use tokio::sync::watch; @@ -582,6 +657,7 @@ mod tests { tempfile::TempDir::new().expect(ERROR_TEMPDIR).into_path(), Arc::new(chain_spec), Default::default(), + create_test_static_files_dir(), ) .unwrap(); @@ -685,8 +761,6 @@ mod tests { // Genesis let checkpoint = 0; let head = random_header(&mut rng, 0, None); - let gap_fill = random_header(&mut rng, 1, Some(head.hash())); - let gap_tip = random_header(&mut rng, 2, Some(gap_fill.hash())); // Empty database assert_matches!( @@ -696,46 +770,14 @@ mod tests { ); // Checkpoint and no gap - provider - .tx_ref() - .put::(head.number, head.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(head.number, head.clone().unseal()) - .expect("failed to write header"); + let mut static_file_writer = + provider.static_file_provider().latest_writer(StaticFileSegment::Headers).unwrap(); + static_file_writer.append_header(head.header().clone(), U256::ZERO, head.hash()).unwrap(); + static_file_writer.commit().unwrap(); + drop(static_file_writer); let gap = provider.sync_gap(mode.clone(), checkpoint).unwrap(); assert_eq!(gap.local_head, head); assert_eq!(gap.target.tip(), consensus_tip.into()); - - // Checkpoint and gap - provider - .tx_ref() - .put::(gap_tip.number, gap_tip.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(gap_tip.number, gap_tip.clone().unseal()) - .expect("failed to write header"); - - let gap = provider.sync_gap(mode.clone(), checkpoint).unwrap(); - assert_eq!(gap.local_head, head); - assert_eq!(gap.target.tip(), gap_tip.parent_hash.into()); - - // Checkpoint and gap closed - provider - .tx_ref() - .put::(gap_fill.number, gap_fill.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(gap_fill.number, gap_fill.clone().unseal()) - .expect("failed to write header"); - - assert_matches!( - provider.sync_gap(mode, checkpoint), - Err(RethError::Provider(ProviderError::InconsistentHeaderGap)) - ); } } diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index f43cdc85714..f9a2d37d0ea 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1,6 +1,6 @@ use crate::{ bundle_state::{BundleStateInit, BundleStateWithReceipts, HashedStateChanges, RevertsInit}, - providers::{database::metrics, SnapshotProvider}, + providers::{database::metrics, static_file::StaticFileWriter, StaticFileProvider}, to_range, traits::{ AccountExtReader, BlockSource, ChangeSetReader, ReceiptProvider, StageCheckpointWriter, @@ -8,7 +8,7 @@ use crate::{ AccountReader, BlockExecutionWriter, BlockHashReader, BlockNumReader, BlockReader, BlockWriter, Chain, EvmEnvProvider, HashingWriter, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode, HistoryWriter, OriginalValuesKnown, ProviderError, PruneCheckpointReader, - PruneCheckpointWriter, StageCheckpointReader, StorageReader, TransactionVariant, + PruneCheckpointWriter, StageCheckpointReader, StatsReader, StorageReader, TransactionVariant, TransactionsProvider, TransactionsProviderExt, WithdrawalsProvider, }; use itertools::{izip, Itertools}; @@ -28,7 +28,7 @@ use reth_db::{ use reth_interfaces::{ p2p::headers::downloader::SyncTarget, provider::{ProviderResult, RootMismatch}, - RethError, RethResult, + RethResult, }; use reth_node_api::ConfigureEvmEnv; use reth_primitives::{ @@ -38,7 +38,7 @@ use reth_primitives::{ trie::Nibbles, Account, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, ChainSpec, GotExpected, Hardfork, Head, Header, PruneCheckpoint, PruneModes, - PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, SnapshotSegment, + PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, StaticFileSegment, StorageEntry, TransactionMeta, TransactionSigned, TransactionSignedEcRecovered, TransactionSignedNoHash, TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, }; @@ -49,6 +49,7 @@ use reth_trie::{ }; use revm::primitives::{BlockEnv, CfgEnvWithHandlerCfg, SpecId}; use std::{ + cmp::Ordering, collections::{hash_map, BTreeMap, BTreeSet, HashMap, HashSet}, fmt::Debug, ops::{Bound, Deref, DerefMut, Range, RangeBounds, RangeInclusive}, @@ -82,7 +83,7 @@ impl DerefMut for DatabaseProviderRW { } impl DatabaseProviderRW { - /// Commit database transaction + /// Commit database transaction and static file if it exists. pub fn commit(self) -> ProviderResult { self.0.commit() } @@ -101,15 +102,25 @@ pub struct DatabaseProvider { tx: TX, /// Chain spec chain_spec: Arc, - /// Snapshot provider - #[allow(dead_code)] - snapshot_provider: Option>, + /// Static File provider + static_file_provider: StaticFileProvider, +} + +impl DatabaseProvider { + /// Returns a static file provider + pub fn static_file_provider(&self) -> &StaticFileProvider { + &self.static_file_provider + } } impl DatabaseProvider { /// Creates a provider with an inner read-write transaction. - pub fn new_rw(tx: TX, chain_spec: Arc) -> Self { - Self { tx, chain_spec, snapshot_provider: None } + pub fn new_rw( + tx: TX, + chain_spec: Arc, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, chain_spec, static_file_provider } } } @@ -153,6 +164,29 @@ impl DatabaseProvider { } } +impl DatabaseProvider { + #[cfg(any(test, feature = "test-utils"))] + /// Inserts an historical block. Used for setting up test environments + pub fn insert_historical_block( + &self, + block: SealedBlockWithSenders, + prune_modes: Option<&PruneModes>, + ) -> ProviderResult { + let ttd = if block.number == 0 { + block.difficulty + } else { + let parent_block_number = block.number - 1; + let parent_ttd = self.header_td_by_number(parent_block_number)?.unwrap_or_default(); + parent_ttd + block.difficulty + }; + + let mut writer = self.static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.append_header(block.header.as_ref().clone(), ttd, block.hash())?; + + self.insert_block(block, prune_modes) + } +} + /// For a given key, unwind all history shards that are below the given block number. /// /// S - Sharded key subtype. @@ -203,14 +237,12 @@ where impl DatabaseProvider { /// Creates a provider with an inner read-only transaction. - pub fn new(tx: TX, chain_spec: Arc) -> Self { - Self { tx, chain_spec, snapshot_provider: None } - } - - /// Creates a new [`Self`] with access to a [`SnapshotProvider`]. - pub fn with_snapshot_provider(mut self, snapshot_provider: Arc) -> Self { - self.snapshot_provider = Some(snapshot_provider); - self + pub fn new( + tx: TX, + chain_spec: Arc, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, chain_spec, static_file_provider } } /// Consume `DbTx` or `DbTxMut`. @@ -250,96 +282,6 @@ impl DatabaseProvider { self } - /// Gets data within a specified range, potentially spanning different snapshots and database. - /// - /// # Arguments - /// * `segment` - The segment of the snapshot to query. - /// * `block_range` - The range of data to fetch. - /// * `fetch_from_snapshot` - A function to fetch data from the snapshot. - /// * `fetch_from_database` - A function to fetch data from the database. - /// * `predicate` - A function used to evaluate each item in the fetched data. Fetching is - /// terminated when this function returns false, thereby filtering the data based on the - /// provided condition. - fn get_range_with_snapshot( - &self, - segment: SnapshotSegment, - mut block_or_tx_range: Range, - fetch_from_snapshot: FS, - mut fetch_from_database: FD, - mut predicate: P, - ) -> ProviderResult> - where - FS: Fn(&SnapshotProvider, Range, &mut P) -> ProviderResult>, - FD: FnMut(Range, P) -> ProviderResult>, - P: FnMut(&T) -> bool, - { - let mut data = Vec::new(); - - if let Some(snapshot_provider) = &self.snapshot_provider { - // If there is, check the maximum block or transaction number of the segment. - if let Some(snapshot_upper_bound) = match segment { - SnapshotSegment::Headers => snapshot_provider.get_highest_snapshot_block(segment), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - snapshot_provider.get_highest_snapshot_tx(segment) - } - } { - if block_or_tx_range.start <= snapshot_upper_bound { - let end = block_or_tx_range.end.min(snapshot_upper_bound + 1); - data.extend(fetch_from_snapshot( - snapshot_provider, - block_or_tx_range.start..end, - &mut predicate, - )?); - block_or_tx_range.start = end; - } - } - } - - if block_or_tx_range.end > block_or_tx_range.start { - data.extend(fetch_from_database(block_or_tx_range, predicate)?) - } - - Ok(data) - } - - /// Retrieves data from the database or snapshot, wherever it's available. - /// - /// # Arguments - /// * `segment` - The segment of the snapshot to check against. - /// * `index_key` - Requested index key, usually a block or transaction number. - /// * `fetch_from_snapshot` - A closure that defines how to fetch the data from the snapshot - /// provider. - /// * `fetch_from_database` - A closure that defines how to fetch the data from the database - /// when the snapshot doesn't contain the required data or is not available. - fn get_with_snapshot( - &self, - segment: SnapshotSegment, - number: u64, - fetch_from_snapshot: FS, - fetch_from_database: FD, - ) -> ProviderResult> - where - FS: Fn(&SnapshotProvider) -> ProviderResult>, - FD: Fn() -> ProviderResult>, - { - if let Some(provider) = &self.snapshot_provider { - // If there is, check the maximum block or transaction number of the segment. - let snapshot_upper_bound = match segment { - SnapshotSegment::Headers => provider.get_highest_snapshot_block(segment), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - provider.get_highest_snapshot_tx(segment) - } - }; - - if snapshot_upper_bound - .map_or(false, |snapshot_upper_bound| snapshot_upper_bound >= number) - { - return fetch_from_snapshot(provider) - } - } - fetch_from_database() - } - fn transactions_by_tx_range_with_cursor( &self, range: impl RangeBounds, @@ -348,10 +290,10 @@ impl DatabaseProvider { where C: DbCursorRO, { - self.get_range_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Transactions, to_range(range), - |snapshot, range, _| snapshot.transactions_by_tx_range(range), + |static_file, range, _| static_file.transactions_by_tx_range(range), |range, _| self.cursor_collect(cursor, range), |_| true, ) @@ -1038,45 +980,38 @@ impl HeaderSyncGapProvider for DatabaseProvider { mode: HeaderSyncMode, highest_uninterrupted_block: BlockNumber, ) -> RethResult { - // Create a cursor over canonical header hashes - let mut cursor = self.tx.cursor_read::()?; - let mut header_cursor = self.tx.cursor_read::()?; + let static_file_provider = self.static_file_provider(); - // Get head hash and reposition the cursor - let (head_num, head_hash) = cursor - .seek_exact(highest_uninterrupted_block)? + // Make sure Headers static file is at the same height. If it's further, this + // input execution was interrupted previously and we need to unwind the static file. + let next_static_file_block_num = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .map(|id| id + 1) + .unwrap_or_default(); + let next_block = highest_uninterrupted_block + 1; + + match next_static_file_block_num.cmp(&next_block) { + // The node shutdown between an executed static file commit and before the database + // commit, so we need to unwind the static files. + Ordering::Greater => { + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Headers)?; + static_file_producer.prune_headers(next_static_file_block_num - next_block)? + } + Ordering::Less => { + // There's either missing or corrupted files. + return Err(ProviderError::HeaderNotFound(next_static_file_block_num.into()).into()) + } + Ordering::Equal => {} + } + + let local_head = static_file_provider + .sealed_header(highest_uninterrupted_block)? .ok_or_else(|| ProviderError::HeaderNotFound(highest_uninterrupted_block.into()))?; - // Construct head - let (_, head) = header_cursor - .seek_exact(head_num)? - .ok_or_else(|| ProviderError::HeaderNotFound(head_num.into()))?; - let local_head = head.seal(head_hash); - - // Look up the next header - let next_header = cursor - .next()? - .map(|(next_num, next_hash)| -> Result { - let (_, next) = header_cursor - .seek_exact(next_num)? - .ok_or_else(|| ProviderError::HeaderNotFound(next_num.into()))?; - Ok(next.seal(next_hash)) - }) - .transpose()?; - - // Decide the tip or error out on invalid input. - // If the next element found in the cursor is not the "expected" next block per our current - // checkpoint, then there is a gap in the database and we should start downloading in - // reverse from there. Else, it should use whatever the forkchoice state reports. - let target = match next_header { - Some(header) if highest_uninterrupted_block + 1 != header.number => { - SyncTarget::Gap(header) - } - None => match mode { - HeaderSyncMode::Tip(rx) => SyncTarget::Tip(*rx.borrow()), - HeaderSyncMode::Continuous => SyncTarget::TipNum(head_num + 1), - }, - _ => return Err(ProviderError::InconsistentHeaderGap.into()), + let target = match mode { + HeaderSyncMode::Tip(rx) => SyncTarget::Tip(*rx.borrow()), + HeaderSyncMode::Continuous => SyncTarget::TipNum(highest_uninterrupted_block + 1), }; Ok(HeaderSyncGap { local_head, target }) @@ -1093,10 +1028,10 @@ impl HeaderProvider for DatabaseProvider { } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, num, - |snapshot| snapshot.header_by_number(num), + |static_file| static_file.header_by_number(num), || Ok(self.tx.get::(num)?), ) } @@ -1116,29 +1051,29 @@ impl HeaderProvider for DatabaseProvider { return Ok(Some(td)) } - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.header_td_by_number(number), + |static_file| static_file.header_td_by_number(number), || Ok(self.tx.get::(number)?.map(|td| td.0)), ) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, to_range(range), - |snapshot, range, _| snapshot.headers_range(range), + |static_file, range, _| static_file.headers_range(range), |range, _| self.cursor_read_collect::(range).map_err(Into::into), |_| true, ) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.sealed_header(number), + |static_file| static_file.sealed_header(number), || { if let Some(header) = self.header_by_number(number)? { let hash = self @@ -1157,10 +1092,10 @@ impl HeaderProvider for DatabaseProvider { range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, to_range(range), - |snapshot, range, predicate| snapshot.sealed_headers_while(range, predicate), + |static_file, range, predicate| static_file.sealed_headers_while(range, predicate), |range, mut predicate| { let mut headers = vec![]; for entry in self.tx.cursor_read::()?.walk_range(range)? { @@ -1183,10 +1118,10 @@ impl HeaderProvider for DatabaseProvider { impl BlockHashReader for DatabaseProvider { fn block_hash(&self, number: u64) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.block_hash(number), + |static_file| static_file.block_hash(number), || Ok(self.tx.get::(number)?), ) } @@ -1196,10 +1131,10 @@ impl BlockHashReader for DatabaseProvider { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, start..end, - |snapshot, range, _| snapshot.canonical_hashes_range(range.start, range.end), + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), |range, _| { self.cursor_read_collect::(range).map_err(Into::into) }, @@ -1419,10 +1354,10 @@ impl TransactionsProviderExt for DatabaseProvider { &self, tx_range: Range, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Transactions, tx_range, - |snapshot, range, _| snapshot.transaction_hashes_by_range(range), + |static_file, range, _| static_file.transaction_hashes_by_range(range), |tx_range, _| { let mut tx_cursor = self.tx.cursor_read::()?; let tx_range_size = tx_range.clone().count(); @@ -1487,10 +1422,10 @@ impl TransactionsProvider for DatabaseProvider { } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, id, - |snapshot| snapshot.transaction_by_id(id), + |static_file| static_file.transaction_by_id(id), || Ok(self.tx.get::(id)?.map(Into::into)), ) } @@ -1499,10 +1434,10 @@ impl TransactionsProvider for DatabaseProvider { &self, id: TxNumber, ) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, id, - |snapshot| snapshot.transaction_by_id_no_hash(id), + |static_file| static_file.transaction_by_id_no_hash(id), || Ok(self.tx.get::(id)?), ) } @@ -1640,10 +1575,10 @@ impl TransactionsProvider for DatabaseProvider { impl ReceiptProvider for DatabaseProvider { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Receipts, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Receipts, id, - |snapshot| snapshot.receipt(id), + |static_file| static_file.receipt(id), || Ok(self.tx.get::(id)?), ) } @@ -1674,10 +1609,10 @@ impl ReceiptProvider for DatabaseProvider { &self, range: impl RangeBounds, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Receipts, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Receipts, to_range(range), - |snapshot, range, _| snapshot.receipts_by_tx_range(range), + |static_file, range, _| static_file.receipts_by_tx_range(range), |range, _| self.cursor_read_collect::(range).map_err(Into::into), |_| true, ) @@ -2393,7 +2328,7 @@ impl BlockWriter for DatabaseProvider { let mut next_tx_num = self .tx - .cursor_read::()? + .cursor_read::()? .last()? .map(|(n, _)| n + 1) .unwrap_or_default(); @@ -2512,7 +2447,7 @@ impl BlockWriter for DatabaseProvider { // Write state and changesets to the database. // Must be written after blocks because of the receipt lookup. - state.write_to_db(self.tx_ref(), OriginalValuesKnown::No)?; + state.write_to_storage(self.tx_ref(), None, OriginalValuesKnown::No)?; durations_recorder.record_relative(metrics::Action::InsertState); // insert hashes and intermediate merkle nodes @@ -2554,6 +2489,19 @@ impl PruneCheckpointWriter for DatabaseProvider { } } +impl StatsReader for DatabaseProvider { + fn count_entries(&self) -> ProviderResult { + let db_entries = self.tx.entries::()?; + let static_file_entries = match self.static_file_provider.count_entries::() { + Ok(entries) => entries, + Err(ProviderError::UnsupportedProvider) => 0, + Err(err) => return Err(err), + }; + + Ok(db_entries + static_file_entries) + } +} + fn range_size_hint(range: &impl RangeBounds) -> Option { let start = match range.start_bound().cloned() { Bound::Included(start) => start, diff --git a/crates/storage/provider/src/providers/mod.rs b/crates/storage/provider/src/providers/mod.rs index 56bb9145431..0fbde23f132 100644 --- a/crates/storage/provider/src/providers/mod.rs +++ b/crates/storage/provider/src/providers/mod.rs @@ -39,8 +39,11 @@ pub use state::{ mod bundle_state_provider; mod chain_info; mod database; -mod snapshot; -pub use snapshot::{SnapshotJarProvider, SnapshotProvider}; +mod static_file; +pub use static_file::{ + StaticFileJarProvider, StaticFileProvider, StaticFileProviderRW, StaticFileProviderRWRefMut, + StaticFileWriter, +}; mod state; use crate::{providers::chain_info::ChainInfoTracker, traits::BlockSource}; pub use bundle_state_provider::BundleStateProvider; @@ -131,34 +134,34 @@ where Tree: Send + Sync, { fn header(&self, block_hash: &BlockHash) -> ProviderResult> { - self.database.provider()?.header(block_hash) + self.database.header(block_hash) } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.database.provider()?.header_by_number(num) + self.database.header_by_number(num) } fn header_td(&self, hash: &BlockHash) -> ProviderResult> { - self.database.provider()?.header_td(hash) + self.database.header_td(hash) } fn header_td_by_number(&self, number: BlockNumber) -> ProviderResult> { - self.database.provider()?.header_td_by_number(number) + self.database.header_td_by_number(number) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.database.provider()?.headers_range(range) + self.database.headers_range(range) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.database.provider()?.sealed_header(number) + self.database.sealed_header(number) } fn sealed_headers_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.sealed_headers_range(range) + self.database.sealed_headers_range(range) } fn sealed_headers_while( @@ -166,7 +169,7 @@ where range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.database.provider()?.sealed_headers_while(range, predicate) + self.database.sealed_headers_while(range, predicate) } } @@ -176,7 +179,7 @@ where Tree: Send + Sync, { fn block_hash(&self, number: u64) -> ProviderResult> { - self.database.provider()?.block_hash(number) + self.database.block_hash(number) } fn canonical_hashes_range( @@ -184,7 +187,7 @@ where start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.database.provider()?.canonical_hashes_range(start, end) + self.database.canonical_hashes_range(start, end) } } @@ -202,11 +205,11 @@ where } fn last_block_number(&self) -> ProviderResult { - self.database.provider()?.last_block_number() + self.database.last_block_number() } fn block_number(&self, hash: B256) -> ProviderResult> { - self.database.provider()?.block_number(hash) + self.database.block_number(hash) } } @@ -237,7 +240,7 @@ where let block = match source { BlockSource::Any => { // check database first - let mut block = self.database.provider()?.block_by_hash(hash)?; + let mut block = self.database.block_by_hash(hash)?; if block.is_none() { // Note: it's fine to return the unsealed block because the caller already has // the hash @@ -246,7 +249,7 @@ where block } BlockSource::Pending => self.tree.block_by_hash(hash).map(|block| block.unseal()), - BlockSource::Database => self.database.provider()?.block_by_hash(hash)?, + BlockSource::Database => self.database.block_by_hash(hash)?, }; Ok(block) @@ -255,7 +258,7 @@ where fn block(&self, id: BlockHashOrNumber) -> ProviderResult> { match id { BlockHashOrNumber::Hash(hash) => self.find_block_by_hash(hash, BlockSource::Any), - BlockHashOrNumber::Number(num) => self.database.provider()?.block_by_number(num), + BlockHashOrNumber::Number(num) => self.database.block_by_number(num), } } @@ -272,14 +275,14 @@ where } fn ommers(&self, id: BlockHashOrNumber) -> ProviderResult>> { - self.database.provider()?.ommers(id) + self.database.ommers(id) } fn block_body_indices( &self, number: BlockNumber, ) -> ProviderResult> { - self.database.provider()?.block_body_indices(number) + self.database.block_body_indices(number) } /// Returns the block with senders with matching number or hash from database. @@ -293,11 +296,11 @@ where id: BlockHashOrNumber, transaction_kind: TransactionVariant, ) -> ProviderResult> { - self.database.provider()?.block_with_senders(id, transaction_kind) + self.database.block_with_senders(id, transaction_kind) } fn block_range(&self, range: RangeInclusive) -> ProviderResult> { - self.database.provider()?.block_range(range) + self.database.block_range(range) } } @@ -307,65 +310,65 @@ where Tree: BlockchainTreeViewer + Send + Sync, { fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { - self.database.provider()?.transaction_id(tx_hash) + self.database.transaction_id(tx_hash) } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_by_id(id) + self.database.transaction_by_id(id) } fn transaction_by_id_no_hash( &self, id: TxNumber, ) -> ProviderResult> { - self.database.provider()?.transaction_by_id_no_hash(id) + self.database.transaction_by_id_no_hash(id) } fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.database.provider()?.transaction_by_hash(hash) + self.database.transaction_by_hash(hash) } fn transaction_by_hash_with_meta( &self, tx_hash: TxHash, ) -> ProviderResult> { - self.database.provider()?.transaction_by_hash_with_meta(tx_hash) + self.database.transaction_by_hash_with_meta(tx_hash) } fn transaction_block(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_block(id) + self.database.transaction_block(id) } fn transactions_by_block( &self, id: BlockHashOrNumber, ) -> ProviderResult>> { - self.database.provider()?.transactions_by_block(id) + self.database.transactions_by_block(id) } fn transactions_by_block_range( &self, range: impl RangeBounds, ) -> ProviderResult>> { - self.database.provider()?.transactions_by_block_range(range) + self.database.transactions_by_block_range(range) } fn transactions_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.transactions_by_tx_range(range) + self.database.transactions_by_tx_range(range) } fn senders_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.senders_by_tx_range(range) + self.database.senders_by_tx_range(range) } fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_sender(id) + self.database.transaction_sender(id) } } @@ -375,22 +378,22 @@ where Tree: Send + Sync, { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.receipt(id) + self.database.receipt(id) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.database.provider()?.receipt_by_hash(hash) + self.database.receipt_by_hash(hash) } fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult>> { - self.database.provider()?.receipts_by_block(block) + self.database.receipts_by_block(block) } fn receipts_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.receipts_by_tx_range(range) + self.database.receipts_by_tx_range(range) } } impl ReceiptProviderIdExt for BlockchainProvider @@ -431,11 +434,11 @@ where id: BlockHashOrNumber, timestamp: u64, ) -> ProviderResult> { - self.database.provider()?.withdrawals_by_block(id, timestamp) + self.database.withdrawals_by_block(id, timestamp) } fn latest_withdrawal(&self) -> ProviderResult> { - self.database.provider()?.latest_withdrawal() + self.database.latest_withdrawal() } } diff --git a/crates/storage/provider/src/providers/snapshot/manager.rs b/crates/storage/provider/src/providers/snapshot/manager.rs deleted file mode 100644 index c46cab32567..00000000000 --- a/crates/storage/provider/src/providers/snapshot/manager.rs +++ /dev/null @@ -1,685 +0,0 @@ -use super::{LoadedJar, SnapshotJarProvider}; -use crate::{ - to_range, BlockHashReader, BlockNumReader, BlockReader, BlockSource, HeaderProvider, - ReceiptProvider, TransactionVariant, TransactionsProvider, TransactionsProviderExt, - WithdrawalsProvider, -}; -use dashmap::DashMap; -use parking_lot::RwLock; -use reth_db::{ - codecs::CompactU256, - models::StoredBlockBodyIndices, - snapshot::{iter_snapshots, HeaderMask, ReceiptMask, SnapshotCursor, TransactionMask}, -}; -use reth_interfaces::provider::{ProviderError, ProviderResult}; -use reth_nippy_jar::NippyJar; -use reth_primitives::{ - snapshot::HighestSnapshots, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, - BlockWithSenders, ChainInfo, Header, Receipt, SealedBlock, SealedBlockWithSenders, - SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, - TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, -}; -use std::{ - collections::{hash_map::Entry, BTreeMap, HashMap}, - ops::{Range, RangeBounds, RangeInclusive}, - path::{Path, PathBuf}, -}; -use tokio::sync::watch; - -/// Alias type for a map that can be queried for transaction/block ranges from a block/transaction -/// segment respectively. It uses `BlockNumber` to represent the block end of a snapshot range or -/// `TxNumber` to represent the transaction end of a snapshot range. -/// -/// Can be in one of the two formats: -/// - `HashMap>>` -/// - `HashMap>>` -type SegmentRanges = HashMap>>; - -/// [`SnapshotProvider`] manages all existing [`SnapshotJarProvider`]. -#[derive(Debug, Default)] -pub struct SnapshotProvider { - /// Maintains a map which allows for concurrent access to different `NippyJars`, over different - /// segments and ranges. - map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>, - /// Available snapshot transaction ranges on disk indexed by max blocks. - snapshots_block_index: RwLock, - /// Available snapshot block ranges on disk indexed by max transactions. - snapshots_tx_index: RwLock, - /// Tracks the highest snapshot of every segment. - highest_tracker: Option>>, - /// Directory where snapshots are located - path: PathBuf, - /// Whether [`SnapshotJarProvider`] loads filters into memory. If not, `by_hash` queries won't - /// be able to be queried directly. - load_filters: bool, -} - -impl SnapshotProvider { - /// Creates a new [`SnapshotProvider`]. - pub fn new(path: impl AsRef) -> ProviderResult { - let provider = Self { - map: Default::default(), - snapshots_block_index: Default::default(), - snapshots_tx_index: Default::default(), - highest_tracker: None, - path: path.as_ref().to_path_buf(), - load_filters: false, - }; - - provider.update_index()?; - Ok(provider) - } - - /// Loads filters into memory when creating a [`SnapshotJarProvider`]. - pub fn with_filters(mut self) -> Self { - self.load_filters = true; - self - } - - /// Adds a highest snapshot tracker to the provider - pub fn with_highest_tracker( - mut self, - highest_tracker: Option>>, - ) -> Self { - self.highest_tracker = highest_tracker; - self - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and block. - pub fn get_segment_provider_from_block( - &self, - segment: SnapshotSegment, - block: BlockNumber, - path: Option<&Path>, - ) -> ProviderResult> { - self.get_segment_provider( - segment, - || self.get_segment_ranges_from_block(segment, block), - path, - )? - .ok_or_else(|| ProviderError::MissingSnapshotBlock(segment, block)) - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and transaction. - pub fn get_segment_provider_from_transaction( - &self, - segment: SnapshotSegment, - tx: TxNumber, - path: Option<&Path>, - ) -> ProviderResult> { - self.get_segment_provider( - segment, - || self.get_segment_ranges_from_transaction(segment, tx), - path, - )? - .ok_or_else(|| ProviderError::MissingSnapshotTx(segment, tx)) - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and block or transaction. - pub fn get_segment_provider( - &self, - segment: SnapshotSegment, - fn_ranges: impl Fn() -> Option<(RangeInclusive, RangeInclusive)>, - path: Option<&Path>, - ) -> ProviderResult>> { - // If we have a path, then get the block range and transaction range from its name. - // Otherwise, check `self.available_snapshots` - let snapshot_ranges = match path { - Some(path) => { - SnapshotSegment::parse_filename(path.file_name().ok_or_else(|| { - ProviderError::MissingSnapshotPath(segment, path.to_path_buf()) - })?) - .and_then(|(parsed_segment, block_range, tx_range)| { - if parsed_segment == segment { - return Some((block_range, tx_range)) - } - None - }) - } - None => fn_ranges(), - }; - - // Return cached `LoadedJar` or insert it for the first time, and then, return it. - if let Some((block_range, tx_range)) = snapshot_ranges { - return Ok(Some(self.get_or_create_jar_provider(segment, &block_range, &tx_range)?)) - } - - Ok(None) - } - - /// Given a segment, block range and transaction range it returns a cached - /// [`SnapshotJarProvider`]. TODO: we should check the size and pop N if there's too many. - fn get_or_create_jar_provider( - &self, - segment: SnapshotSegment, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> ProviderResult> { - let key = (*block_range.end(), segment); - let entry = match self.map.entry(key) { - dashmap::mapref::entry::Entry::Occupied(entry) => entry.into_ref(), - dashmap::mapref::entry::Entry::Vacant(entry) => { - let path = self.path.join(segment.filename(block_range, tx_range)); - let mut jar = NippyJar::load(&path)?; - if self.load_filters { - jar.load_filters()?; - } - let loaded_jar = LoadedJar::new(jar)?; - entry.insert(loaded_jar) - } - }; - Ok(entry.downgrade().into()) - } - - /// Gets a snapshot segment's block range and transaction range from the provider inner block - /// index. - fn get_segment_ranges_from_block( - &self, - segment: SnapshotSegment, - block: u64, - ) -> Option<(RangeInclusive, RangeInclusive)> { - let snapshots = self.snapshots_block_index.read(); - let segment_snapshots = snapshots.get(&segment)?; - - // It's more probable that the request comes from a newer block height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((block_end, tx_range)) = snapshots_rev_iter.next() { - if block > *block_end { - // request block is higher than highest snapshot block - return None - } - // `unwrap_or(0) is safe here as it sets block_start to 0 if the iterator is empty, - // indicating the lowest height snapshot has been reached. - let block_start = - snapshots_rev_iter.peek().map(|(block_end, _)| *block_end + 1).unwrap_or(0); - if block_start <= block { - return Some((block_start..=*block_end, tx_range.clone())) - } - } - None - } - - /// Gets a snapshot segment's block range and transaction range from the provider inner - /// transaction index. - fn get_segment_ranges_from_transaction( - &self, - segment: SnapshotSegment, - tx: u64, - ) -> Option<(RangeInclusive, RangeInclusive)> { - let snapshots = self.snapshots_tx_index.read(); - let segment_snapshots = snapshots.get(&segment)?; - - // It's more probable that the request comes from a newer tx height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((tx_end, block_range)) = snapshots_rev_iter.next() { - if tx > *tx_end { - // request tx is higher than highest snapshot tx - return None - } - let tx_start = snapshots_rev_iter.peek().map(|(tx_end, _)| *tx_end + 1).unwrap_or(0); - if tx_start <= tx { - return Some((block_range.clone(), tx_start..=*tx_end)) - } - } - None - } - - /// Updates the inner transaction and block index - pub fn update_index(&self) -> ProviderResult<()> { - let mut block_index = self.snapshots_block_index.write(); - let mut tx_index = self.snapshots_tx_index.write(); - - for (segment, ranges) in iter_snapshots(&self.path)? { - for (block_range, tx_range) in ranges { - let block_end = *block_range.end(); - let tx_end = *tx_range.end(); - - match tx_index.entry(segment) { - Entry::Occupied(mut index) => { - index.get_mut().insert(tx_end, block_range); - } - Entry::Vacant(index) => { - index.insert(BTreeMap::from([(tx_end, block_range)])); - } - }; - - match block_index.entry(segment) { - Entry::Occupied(mut index) => { - index.get_mut().insert(block_end, tx_range); - } - Entry::Vacant(index) => { - index.insert(BTreeMap::from([(block_end, tx_range)])); - } - }; - } - } - - Ok(()) - } - - /// Gets the highest snapshot block if it exists for a snapshot segment. - pub fn get_highest_snapshot_block(&self, segment: SnapshotSegment) -> Option { - self.snapshots_block_index - .read() - .get(&segment) - .and_then(|index| index.last_key_value().map(|(last_block, _)| *last_block)) - } - - /// Gets the highest snapshotted transaction. - pub fn get_highest_snapshot_tx(&self, segment: SnapshotSegment) -> Option { - self.snapshots_tx_index - .read() - .get(&segment) - .and_then(|index| index.last_key_value().map(|(last_tx, _)| *last_tx)) - } - - /// Iterates through segment snapshots in reverse order, executing a function until it returns - /// some object. Useful for finding objects by [`TxHash`] or [`BlockHash`]. - pub fn find_snapshot( - &self, - segment: SnapshotSegment, - func: impl Fn(SnapshotJarProvider<'_>) -> ProviderResult>, - ) -> ProviderResult> { - let snapshots = self.snapshots_block_index.read(); - if let Some(segment_snapshots) = snapshots.get(&segment) { - // It's more probable that the request comes from a newer block height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((block_end, tx_range)) = snapshots_rev_iter.next() { - // `unwrap_or(0) is safe here as it sets block_start to 0 if the iterator - // is empty, indicating the lowest height snapshot has been reached. - let block_start = - snapshots_rev_iter.peek().map(|(block_end, _)| *block_end + 1).unwrap_or(0); - - if let Some(res) = func(self.get_or_create_jar_provider( - segment, - &(block_start..=*block_end), - tx_range, - )?)? { - return Ok(Some(res)) - } - } - } - - Ok(None) - } - - /// Fetches data within a specified range across multiple snapshot files. - /// - /// This function iteratively retrieves data using `get_fn` for each item in the given range. - /// It continues fetching until the end of the range is reached or the provided `predicate` - /// returns false. - pub fn fetch_range( - &self, - segment: SnapshotSegment, - range: Range, - get_fn: F, - mut predicate: P, - ) -> ProviderResult> - where - F: Fn(&mut SnapshotCursor<'_>, u64) -> ProviderResult>, - P: FnMut(&T) -> bool, - { - let get_provider = |start: u64| match segment { - SnapshotSegment::Headers => self.get_segment_provider_from_block(segment, start, None), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - self.get_segment_provider_from_transaction(segment, start, None) - } - }; - - let mut result = Vec::with_capacity((range.end - range.start).min(100) as usize); - let mut provider = get_provider(range.start)?; - let mut cursor = provider.cursor()?; - - // advances number in range - 'outer: for number in range { - // advances snapshot files if `get_fn` returns None - 'inner: loop { - match get_fn(&mut cursor, number)? { - Some(res) => { - if !predicate(&res) { - break 'outer - } - result.push(res); - break 'inner - } - None => { - provider = get_provider(number)?; - cursor = provider.cursor()?; - } - } - } - } - - Ok(result) - } -} - -impl HeaderProvider for SnapshotProvider { - fn header(&self, block_hash: &BlockHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Headers, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_two::>(block_hash.into())? - .and_then(|(header, hash)| { - if &hash == block_hash { - return Some(header) - } - None - })) - }) - } - - fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .header_by_number(num) - } - - fn header_td(&self, block_hash: &BlockHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Headers, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_two::>(block_hash.into())? - .and_then(|(td, hash)| (&hash == block_hash).then_some(td.0))) - }) - } - - fn header_td_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .header_td_by_number(num) - } - - fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - to_range(range), - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } - - fn sealed_header(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .sealed_header(num) - } - - fn sealed_headers_while( - &self, - range: impl RangeBounds, - predicate: impl FnMut(&SealedHeader) -> bool, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - to_range(range), - |cursor, number| { - Ok(cursor - .get_two::>(number.into())? - .map(|(header, hash)| header.seal(hash))) - }, - predicate, - ) - } -} - -impl BlockHashReader for SnapshotProvider { - fn block_hash(&self, num: u64) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)?.block_hash(num) - } - - fn canonical_hashes_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - start..end, - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } -} - -impl ReceiptProvider for SnapshotProvider { - fn receipt(&self, num: TxNumber) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Receipts, num, None)? - .receipt(num) - } - - fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - if let Some(num) = self.transaction_id(hash)? { - return self.receipt(num) - } - Ok(None) - } - - fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { - unreachable!() - } - - fn receipts_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Receipts, - to_range(range), - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } -} - -impl TransactionsProviderExt for SnapshotProvider { - fn transaction_hashes_by_range( - &self, - tx_range: Range, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Transactions, - tx_range, - |cursor, number| { - let tx = - cursor.get_one::>(number.into())?; - Ok(tx.map(|tx| (tx.hash(), cursor.number()))) - }, - |_| true, - ) - } -} - -impl TransactionsProvider for SnapshotProvider { - fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Transactions, |jar_provider| { - let mut cursor = jar_provider.cursor()?; - if cursor - .get_one::>((&tx_hash).into())? - .and_then(|tx| (tx.hash() == tx_hash).then_some(tx)) - .is_some() - { - Ok(Some(cursor.number())) - } else { - Ok(None) - } - }) - } - - fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Transactions, num, None)? - .transaction_by_id(num) - } - - fn transaction_by_id_no_hash( - &self, - num: TxNumber, - ) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Transactions, num, None)? - .transaction_by_id_no_hash(num) - } - - fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Transactions, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_one::>((&hash).into())? - .map(|tx| tx.with_hash()) - .and_then(|tx| (tx.hash_ref() == &hash).then_some(tx))) - }) - } - - fn transaction_by_hash_with_meta( - &self, - _hash: TxHash, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transaction_block(&self, _id: TxNumber) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transactions_by_block( - &self, - _block_id: BlockHashOrNumber, - ) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transactions_by_block_range( - &self, - _range: impl RangeBounds, - ) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn senders_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - let txes = self.transactions_by_tx_range(range)?; - TransactionSignedNoHash::recover_signers(&txes, txes.len()) - .ok_or(ProviderError::SenderRecoveryError) - } - - fn transactions_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Transactions, - to_range(range), - |cursor, number| { - cursor.get_one::>(number.into()) - }, - |_| true, - ) - } - - fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { - Ok(self.transaction_by_id_no_hash(id)?.and_then(|tx| tx.recover_signer())) - } -} - -/* Cannot be successfully implemented but must exist for trait requirements */ - -impl BlockNumReader for SnapshotProvider { - fn chain_info(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn best_block_number(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn last_block_number(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_number(&self, _hash: B256) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} - -impl BlockReader for SnapshotProvider { - fn find_block_by_hash( - &self, - _hash: B256, - _source: BlockSource, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block(&self, _id: BlockHashOrNumber) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block_with_senders(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block_and_receipts(&self) -> ProviderResult)>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn ommers(&self, _id: BlockHashOrNumber) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_body_indices(&self, _num: u64) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_with_senders( - &self, - _id: BlockHashOrNumber, - _transaction_kind: TransactionVariant, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_range(&self, _range: RangeInclusive) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} - -impl WithdrawalsProvider for SnapshotProvider { - fn withdrawals_by_block( - &self, - _id: BlockHashOrNumber, - _timestamp: u64, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn latest_withdrawal(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index 497e126c254..55b1bec1d91 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -1,6 +1,7 @@ use crate::{ - providers::state::macros::delegate_provider_impls, AccountReader, BlockHashReader, - BundleStateWithReceipts, ProviderError, StateProvider, StateRootProvider, + providers::{state::macros::delegate_provider_impls, StaticFileProvider}, + AccountReader, BlockHashReader, BundleStateWithReceipts, ProviderError, StateProvider, + StateRootProvider, }; use reth_db::{ cursor::{DbCursorRO, DbDupCursorRO}, @@ -13,7 +14,7 @@ use reth_db::{ use reth_interfaces::provider::ProviderResult; use reth_primitives::{ constants::EPOCH_SLOTS, trie::AccountProof, Account, Address, BlockNumber, Bytecode, - StorageKey, StorageValue, B256, + StaticFileSegment, StorageKey, StorageValue, B256, }; use reth_trie::{updates::TrieUpdates, HashedPostState}; use std::fmt::Debug; @@ -37,6 +38,8 @@ pub struct HistoricalStateProviderRef<'b, TX: DbTx> { block_number: BlockNumber, /// Lowest blocks at which different parts of the state are available. lowest_available_blocks: LowestAvailableBlocks, + /// Static File provider + static_file_provider: StaticFileProvider, } #[derive(Debug, Eq, PartialEq)] @@ -49,8 +52,12 @@ pub enum HistoryInfo { impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { /// Create new StateProvider for historical block number - pub fn new(tx: &'b TX, block_number: BlockNumber) -> Self { - Self { tx, block_number, lowest_available_blocks: Default::default() } + pub fn new( + tx: &'b TX, + block_number: BlockNumber, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, block_number, lowest_available_blocks: Default::default(), static_file_provider } } /// Create new StateProvider for historical block number and lowest block numbers at which @@ -59,8 +66,9 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { tx: &'b TX, block_number: BlockNumber, lowest_available_blocks: LowestAvailableBlocks, + static_file_provider: StaticFileProvider, ) -> Self { - Self { tx, block_number, lowest_available_blocks } + Self { tx, block_number, lowest_available_blocks, static_file_provider } } /// Lookup an account in the AccountsHistory table @@ -105,10 +113,14 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { return Err(ProviderError::StateAtBlockPruned(self.block_number)) } - let (tip, _) = self + let tip = self .tx .cursor_read::()? .last()? + .map(|(tip, _)| tip) + .or_else(|| { + self.static_file_provider.get_highest_static_file_block(StaticFileSegment::Headers) + }) .ok_or(ProviderError::BestBlockNotFound)?; if tip.saturating_sub(self.block_number) > EPOCH_SLOTS { @@ -211,7 +223,12 @@ impl<'b, TX: DbTx> AccountReader for HistoricalStateProviderRef<'b, TX> { impl<'b, TX: DbTx> BlockHashReader for HistoricalStateProviderRef<'b, TX> { /// Get block hash by number. fn block_hash(&self, number: u64) -> ProviderResult> { - self.tx.get::(number).map_err(Into::into) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || Ok(self.tx.get::(number)?), + ) } fn canonical_hashes_range( @@ -219,16 +236,23 @@ impl<'b, TX: DbTx> BlockHashReader for HistoricalStateProviderRef<'b, TX> { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - let range = start..end; - self.tx - .cursor_read::() - .map(|mut cursor| { - cursor - .walk_range(range)? - .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) - .collect::>>() - })? - .map_err(Into::into) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| { + self.tx + .cursor_read::() + .map(|mut cursor| { + cursor + .walk_range(range)? + .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) + .collect::>>() + })? + .map_err(Into::into) + }, + |_| true, + ) } } @@ -303,12 +327,18 @@ pub struct HistoricalStateProvider { block_number: BlockNumber, /// Lowest blocks at which different parts of the state are available. lowest_available_blocks: LowestAvailableBlocks, + /// Static File provider + static_file_provider: StaticFileProvider, } impl HistoricalStateProvider { /// Create new StateProvider for historical block number - pub fn new(tx: TX, block_number: BlockNumber) -> Self { - Self { tx, block_number, lowest_available_blocks: Default::default() } + pub fn new( + tx: TX, + block_number: BlockNumber, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, block_number, lowest_available_blocks: Default::default(), static_file_provider } } /// Set the lowest block number at which the account history is available. @@ -336,6 +366,7 @@ impl HistoricalStateProvider { &self.tx, self.block_number, self.lowest_available_blocks, + self.static_file_provider.clone(), ) } } @@ -375,13 +406,12 @@ impl LowestAvailableBlocks { mod tests { use crate::{ providers::state::historical::{HistoryInfo, LowestAvailableBlocks}, + test_utils::create_test_provider_factory, AccountReader, HistoricalStateProvider, HistoricalStateProviderRef, StateProvider, }; use reth_db::{ - database::Database, models::{storage_sharded_key::StorageShardedKey, AccountBeforeTx, ShardedKey}, tables, - test_utils::create_test_rw_db, transaction::{DbTx, DbTxMut}, BlockNumberList, }; @@ -400,8 +430,9 @@ mod tests { #[test] fn history_provider_get_account() { - let db = create_test_rw_db(); - let tx = db.tx_mut().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); tx.put::( ShardedKey { key: ADDRESS, highest_block_number: 7 }, @@ -461,54 +492,73 @@ mod tests { tx.put::(HIGHER_ADDRESS, higher_acc_plain).unwrap(); tx.commit().unwrap(); - let tx = db.tx().unwrap(); + let tx = factory.provider().unwrap().into_tx(); // run - assert_eq!(HistoricalStateProviderRef::new(&tx, 1).basic_account(ADDRESS), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 2).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .basic_account(ADDRESS) + .clone(), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 2, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at3)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 3).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 3, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at3)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 4).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 4, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at7)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 7).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 7, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at7)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 9).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 9, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at10)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 10).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 10, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at10)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 11).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 11, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at15)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 16).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 16, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_plain)) ); - assert_eq!(HistoricalStateProviderRef::new(&tx, 1).basic_account(HIGHER_ADDRESS), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1000).basic_account(HIGHER_ADDRESS), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .basic_account(HIGHER_ADDRESS), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 1000, static_file_provider.clone()) + .basic_account(HIGHER_ADDRESS), Ok(Some(higher_acc_plain)) ); } #[test] fn history_provider_get_storage() { - let db = create_test_rw_db(); - let tx = db.tx_mut().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); tx.put::( StorageShardedKey { @@ -555,52 +605,66 @@ mod tests { tx.put::(HIGHER_ADDRESS, higher_entry_plain).unwrap(); tx.commit().unwrap(); - let tx = db.tx().unwrap(); + let tx = factory.provider().unwrap().into_tx(); // run - assert_eq!(HistoricalStateProviderRef::new(&tx, 0).storage(ADDRESS, STORAGE), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 3).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 0, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 3, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(U256::ZERO)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 4).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 4, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at7.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 7).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 7, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at7.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 9).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 9, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at10.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 10).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 10, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at10.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 11).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 11, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at15.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 16).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 16, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_plain.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1).storage(HIGHER_ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .storage(HIGHER_ADDRESS, STORAGE), Ok(None) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1000).storage(HIGHER_ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 1000, static_file_provider) + .storage(HIGHER_ADDRESS, STORAGE), Ok(Some(higher_entry_plain.value)) ); } #[test] fn history_provider_unavailable() { - let db = create_test_rw_db(); - let tx = db.tx().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); // provider block_number < lowest available block number, // i.e. state at provider block is pruned @@ -611,6 +675,7 @@ mod tests { account_history_block_number: Some(3), storage_history_block_number: Some(3), }, + static_file_provider.clone(), ); assert_eq!( provider.account_history_lookup(ADDRESS), @@ -630,6 +695,7 @@ mod tests { account_history_block_number: Some(2), storage_history_block_number: Some(2), }, + static_file_provider.clone(), ); assert_eq!(provider.account_history_lookup(ADDRESS), Ok(HistoryInfo::MaybeInPlainState)); assert_eq!( @@ -646,6 +712,7 @@ mod tests { account_history_block_number: Some(1), storage_history_block_number: Some(1), }, + static_file_provider.clone(), ); assert_eq!(provider.account_history_lookup(ADDRESS), Ok(HistoryInfo::MaybeInPlainState)); assert_eq!( diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 51616bee8c7..29441f22057 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -1,6 +1,6 @@ use crate::{ - providers::state::macros::delegate_provider_impls, AccountReader, BlockHashReader, - BundleStateWithReceipts, StateProvider, StateRootProvider, + providers::{state::macros::delegate_provider_impls, StaticFileProvider}, + AccountReader, BlockHashReader, BundleStateWithReceipts, StateProvider, StateRootProvider, }; use reth_db::{ cursor::{DbCursorRO, DbDupCursorRO}, @@ -9,7 +9,8 @@ use reth_db::{ }; use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ - trie::AccountProof, Account, Address, BlockNumber, Bytecode, StorageKey, StorageValue, B256, + trie::AccountProof, Account, Address, BlockNumber, Bytecode, StaticFileSegment, StorageKey, + StorageValue, B256, }; use reth_trie::{proof::Proof, updates::TrieUpdates}; @@ -18,12 +19,14 @@ use reth_trie::{proof::Proof, updates::TrieUpdates}; pub struct LatestStateProviderRef<'b, TX: DbTx> { /// database transaction db: &'b TX, + /// Static File provider + static_file_provider: StaticFileProvider, } impl<'b, TX: DbTx> LatestStateProviderRef<'b, TX> { /// Create new state provider - pub fn new(db: &'b TX) -> Self { - Self { db } + pub fn new(db: &'b TX, static_file_provider: StaticFileProvider) -> Self { + Self { db, static_file_provider } } } @@ -37,7 +40,12 @@ impl<'b, TX: DbTx> AccountReader for LatestStateProviderRef<'b, TX> { impl<'b, TX: DbTx> BlockHashReader for LatestStateProviderRef<'b, TX> { /// Get block hash by number. fn block_hash(&self, number: u64) -> ProviderResult> { - self.db.get::(number).map_err(Into::into) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || Ok(self.db.get::(number)?), + ) } fn canonical_hashes_range( @@ -45,16 +53,23 @@ impl<'b, TX: DbTx> BlockHashReader for LatestStateProviderRef<'b, TX> { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - let range = start..end; - self.db - .cursor_read::() - .map(|mut cursor| { - cursor - .walk_range(range)? - .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) - .collect::>>() - })? - .map_err(Into::into) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| { + self.db + .cursor_read::() + .map(|mut cursor| { + cursor + .walk_range(range)? + .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) + .collect::>>() + })? + .map_err(Into::into) + }, + |_| true, + ) } } @@ -110,18 +125,20 @@ impl<'b, TX: DbTx> StateProvider for LatestStateProviderRef<'b, TX> { pub struct LatestStateProvider { /// database transaction db: TX, + /// Static File provider + static_file_provider: StaticFileProvider, } impl LatestStateProvider { /// Create new state provider - pub fn new(db: TX) -> Self { - Self { db } + pub fn new(db: TX, static_file_provider: StaticFileProvider) -> Self { + Self { db, static_file_provider } } /// Returns a new provider that takes the `TX` as reference #[inline(always)] fn as_ref(&self) -> LatestStateProviderRef<'_, TX> { - LatestStateProviderRef::new(&self.db) + LatestStateProviderRef::new(&self.db, self.static_file_provider.clone()) } } diff --git a/crates/storage/provider/src/providers/snapshot/jar.rs b/crates/storage/provider/src/providers/static_file/jar.rs similarity index 81% rename from crates/storage/provider/src/providers/snapshot/jar.rs rename to crates/storage/provider/src/providers/static_file/jar.rs index 7766c2906ad..92bc0bce6fb 100644 --- a/crates/storage/provider/src/providers/snapshot/jar.rs +++ b/crates/storage/provider/src/providers/static_file/jar.rs @@ -1,58 +1,81 @@ -use super::LoadedJarRef; +use super::{ + metrics::{StaticFileProviderMetrics, StaticFileProviderOperation}, + LoadedJarRef, +}; use crate::{ to_range, BlockHashReader, BlockNumReader, HeaderProvider, ReceiptProvider, TransactionsProvider, }; use reth_db::{ codecs::CompactU256, - snapshot::{HeaderMask, ReceiptMask, SnapshotCursor, TransactionMask}, + static_file::{HeaderMask, ReceiptMask, StaticFileCursor, TransactionMask}, }; use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, Receipt, SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, }; -use std::ops::{Deref, RangeBounds}; +use std::{ + ops::{Deref, RangeBounds}, + sync::Arc, +}; /// Provider over a specific `NippyJar` and range. #[derive(Debug)] -pub struct SnapshotJarProvider<'a> { - /// Main snapshot segment +pub struct StaticFileJarProvider<'a> { + /// Main static file segment jar: LoadedJarRef<'a>, - /// Another kind of snapshot segment to help query data from the main one. + /// Another kind of static file segment to help query data from the main one. auxiliar_jar: Option>, + metrics: Option>, } -impl<'a> Deref for SnapshotJarProvider<'a> { +impl<'a> Deref for StaticFileJarProvider<'a> { type Target = LoadedJarRef<'a>; fn deref(&self) -> &Self::Target { &self.jar } } -impl<'a> From> for SnapshotJarProvider<'a> { +impl<'a> From> for StaticFileJarProvider<'a> { fn from(value: LoadedJarRef<'a>) -> Self { - SnapshotJarProvider { jar: value, auxiliar_jar: None } + StaticFileJarProvider { jar: value, auxiliar_jar: None, metrics: None } } } -impl<'a> SnapshotJarProvider<'a> { +impl<'a> StaticFileJarProvider<'a> { /// Provides a cursor for more granular data access. - pub fn cursor<'b>(&'b self) -> ProviderResult> + pub fn cursor<'b>(&'b self) -> ProviderResult> where 'b: 'a, { - SnapshotCursor::new(self.value(), self.mmap_handle()) + let result = StaticFileCursor::new(self.value(), self.mmap_handle())?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + self.segment(), + StaticFileProviderOperation::InitCursor, + None, + ); + } + + Ok(result) } - /// Adds a new auxiliar snapshot to help query data from the main one - pub fn with_auxiliar(mut self, auxiliar_jar: SnapshotJarProvider<'a>) -> Self { + /// Adds a new auxiliar static file to help query data from the main one + pub fn with_auxiliar(mut self, auxiliar_jar: StaticFileJarProvider<'a>) -> Self { self.auxiliar_jar = Some(Box::new(auxiliar_jar)); self } + + /// Enables metrics on the provider. + pub fn with_metrics(mut self, metrics: Arc) -> Self { + self.metrics = Some(metrics); + self + } } -impl<'a> HeaderProvider for SnapshotJarProvider<'a> { +impl<'a> HeaderProvider for StaticFileJarProvider<'a> { fn header(&self, block_hash: &BlockHash) -> ProviderResult> { Ok(self .cursor()? @@ -124,7 +147,7 @@ impl<'a> HeaderProvider for SnapshotJarProvider<'a> { } } -impl<'a> BlockHashReader for SnapshotJarProvider<'a> { +impl<'a> BlockHashReader for StaticFileJarProvider<'a> { fn block_hash(&self, number: u64) -> ProviderResult> { self.cursor()?.get_one::>(number.into()) } @@ -146,7 +169,7 @@ impl<'a> BlockHashReader for SnapshotJarProvider<'a> { } } -impl<'a> BlockNumReader for SnapshotJarProvider<'a> { +impl<'a> BlockNumReader for StaticFileJarProvider<'a> { fn chain_info(&self) -> ProviderResult { // Information on live database Err(ProviderError::UnsupportedProvider) @@ -167,17 +190,17 @@ impl<'a> BlockNumReader for SnapshotJarProvider<'a> { Ok(cursor .get_one::>((&hash).into())? - .and_then(|res| (res == hash).then(|| cursor.number()))) + .and_then(|res| (res == hash).then(|| cursor.number()).flatten())) } } -impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { +impl<'a> TransactionsProvider for StaticFileJarProvider<'a> { fn transaction_id(&self, hash: TxHash) -> ProviderResult> { let mut cursor = self.cursor()?; Ok(cursor .get_one::>((&hash).into())? - .and_then(|res| (res.hash() == hash).then(|| cursor.number()))) + .and_then(|res| (res.hash() == hash).then(|| cursor.number()).flatten())) } fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { @@ -218,7 +241,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { &self, _block_id: BlockHashOrNumber, ) -> ProviderResult>> { - // Related to indexing tables. Live database should get the tx_range and call snapshot + // Related to indexing tables. Live database should get the tx_range and call static file // provider with `transactions_by_tx_range` instead. Err(ProviderError::UnsupportedProvider) } @@ -227,7 +250,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { &self, _range: impl RangeBounds, ) -> ProviderResult>> { - // Related to indexing tables. Live database should get the tx_range and call snapshot + // Related to indexing tables. Live database should get the tx_range and call static file // provider with `transactions_by_tx_range` instead. Err(ProviderError::UnsupportedProvider) } @@ -267,14 +290,14 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { } } -impl<'a> ReceiptProvider for SnapshotJarProvider<'a> { +impl<'a> ReceiptProvider for StaticFileJarProvider<'a> { fn receipt(&self, num: TxNumber) -> ProviderResult> { self.cursor()?.get_one::>(num.into()) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - if let Some(tx_snapshot) = &self.auxiliar_jar { - if let Some(num) = tx_snapshot.transaction_id(hash)? { + if let Some(tx_static_file) = &self.auxiliar_jar { + if let Some(num) = tx_static_file.transaction_id(hash)? { return self.receipt(num) } } @@ -282,7 +305,7 @@ impl<'a> ReceiptProvider for SnapshotJarProvider<'a> { } fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { - // Related to indexing tables. Snapshot should get the tx_range and call snapshot + // Related to indexing tables. StaticFile should get the tx_range and call static file // provider with `receipt()` instead for each Err(ProviderError::UnsupportedProvider) } diff --git a/crates/storage/provider/src/providers/static_file/manager.rs b/crates/storage/provider/src/providers/static_file/manager.rs new file mode 100644 index 00000000000..1d5a36bd5f1 --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/manager.rs @@ -0,0 +1,1110 @@ +use super::{ + metrics::StaticFileProviderMetrics, LoadedJar, StaticFileJarProvider, StaticFileProviderRW, + StaticFileProviderRWRefMut, BLOCKS_PER_STATIC_FILE, +}; +use crate::{ + to_range, BlockHashReader, BlockNumReader, BlockReader, BlockSource, HeaderProvider, + ReceiptProvider, StatsReader, TransactionVariant, TransactionsProvider, + TransactionsProviderExt, WithdrawalsProvider, +}; +use dashmap::{mapref::entry::Entry as DashMapEntry, DashMap}; +use parking_lot::RwLock; +use reth_db::{ + codecs::CompactU256, + models::StoredBlockBodyIndices, + static_file::{iter_static_files, HeaderMask, ReceiptMask, StaticFileCursor, TransactionMask}, + table::Table, + tables, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + keccak256, + static_file::{find_fixed_range, HighestStaticFiles, SegmentHeader, SegmentRangeInclusive}, + Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, Header, + Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, StaticFileSegment, TransactionMeta, + TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, Withdrawal, Withdrawals, B256, + U256, +}; +use std::{ + collections::{hash_map::Entry, BTreeMap, HashMap}, + ops::{Deref, Range, RangeBounds, RangeInclusive}, + path::{Path, PathBuf}, + sync::{mpsc, Arc}, +}; +use tracing::warn; + +/// Alias type for a map that can be queried for block ranges from a transaction +/// segment respectively. It uses `TxNumber` to represent the transaction end of a static file +/// range. +type SegmentRanges = HashMap>; + +/// [`StaticFileProvider`] manages all existing [`StaticFileJarProvider`]. +#[derive(Debug, Default, Clone)] +pub struct StaticFileProvider(pub(crate) Arc); + +impl StaticFileProvider { + /// Creates a new [`StaticFileProvider`]. + pub fn new(path: impl AsRef) -> ProviderResult { + let provider = Self(Arc::new(StaticFileProviderInner::new(path)?)); + provider.initialize_index()?; + Ok(provider) + } +} + +impl Deref for StaticFileProvider { + type Target = StaticFileProviderInner; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// [`StaticFileProviderInner`] manages all existing [`StaticFileJarProvider`]. +#[derive(Debug, Default)] +pub struct StaticFileProviderInner { + /// Maintains a map which allows for concurrent access to different `NippyJars`, over different + /// segments and ranges. + map: DashMap<(BlockNumber, StaticFileSegment), LoadedJar>, + /// Max static file block for each segment + static_files_max_block: RwLock>, + /// Available static file block ranges on disk indexed by max transactions. + static_files_tx_index: RwLock, + /// Directory where static_files are located + path: PathBuf, + /// Whether [`StaticFileJarProvider`] loads filters into memory. If not, `by_hash` queries + /// won't be able to be queried directly. + load_filters: bool, + /// Maintains a map of StaticFile writers for each [`StaticFileSegment`] + writers: DashMap, + metrics: Option>, +} + +impl StaticFileProviderInner { + /// Creates a new [`StaticFileProviderInner`]. + fn new(path: impl AsRef) -> ProviderResult { + let provider = Self { + map: Default::default(), + writers: Default::default(), + static_files_max_block: Default::default(), + static_files_tx_index: Default::default(), + path: path.as_ref().to_path_buf(), + load_filters: false, + metrics: None, + }; + + Ok(provider) + } +} + +impl StaticFileProvider { + /// Loads filters into memory when creating a [`StaticFileJarProvider`]. + pub fn with_filters(self) -> Self { + let mut provider = + Arc::try_unwrap(self.0).expect("should be called when initializing only"); + provider.load_filters = true; + Self(Arc::new(provider)) + } + + /// Enables metrics on the [`StaticFileProvider`]. + pub fn with_metrics(self) -> Self { + let mut provider = + Arc::try_unwrap(self.0).expect("should be called when initializing only"); + provider.metrics = Some(Arc::new(StaticFileProviderMetrics::default())); + Self(Arc::new(provider)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and block. + pub fn get_segment_provider_from_block( + &self, + segment: StaticFileSegment, + block: BlockNumber, + path: Option<&Path>, + ) -> ProviderResult> { + self.get_segment_provider( + segment, + || self.get_segment_ranges_from_block(segment, block), + path, + )? + .ok_or_else(|| ProviderError::MissingStaticFileBlock(segment, block)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and transaction. + pub fn get_segment_provider_from_transaction( + &self, + segment: StaticFileSegment, + tx: TxNumber, + path: Option<&Path>, + ) -> ProviderResult> { + self.get_segment_provider( + segment, + || self.get_segment_ranges_from_transaction(segment, tx), + path, + )? + .ok_or_else(|| ProviderError::MissingStaticFileTx(segment, tx)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and block or transaction. + /// + /// `fn_range` should make sure the range goes through `find_fixed_range`. + pub fn get_segment_provider( + &self, + segment: StaticFileSegment, + fn_range: impl Fn() -> Option, + path: Option<&Path>, + ) -> ProviderResult>> { + // If we have a path, then get the block range from its name. + // Otherwise, check `self.available_static_files` + let block_range = match path { + Some(path) => StaticFileSegment::parse_filename( + &path + .file_name() + .ok_or_else(|| { + ProviderError::MissingStaticFilePath(segment, path.to_path_buf()) + })? + .to_string_lossy(), + ) + .and_then(|(parsed_segment, block_range)| { + if parsed_segment == segment { + return Some(block_range) + } + None + }), + None => fn_range(), + }; + + // Return cached `LoadedJar` or insert it for the first time, and then, return it. + if let Some(block_range) = block_range { + return Ok(Some(self.get_or_create_jar_provider(segment, &block_range)?)) + } + + Ok(None) + } + + /// Given a segment and block range it removes the cached provider from the map. + pub fn remove_cached_provider( + &self, + segment: StaticFileSegment, + fixed_block_range_end: BlockNumber, + ) { + self.map.remove(&(fixed_block_range_end, segment)); + } + + /// Given a segment and block range it deletes the jar and all files associated with it. + /// + /// CAUTION: destructive. Deletes files on disk. + pub fn delete_jar( + &self, + segment: StaticFileSegment, + fixed_block_range: SegmentRangeInclusive, + ) -> ProviderResult<()> { + let key = (fixed_block_range.end(), segment); + let jar = if let Some((_, jar)) = self.map.remove(&key) { + jar.jar + } else { + let mut jar = NippyJar::::load( + &self.path.join(segment.filename(&fixed_block_range)), + )?; + if self.load_filters { + jar.load_filters()?; + } + jar + }; + + jar.delete()?; + + let mut segment_max_block = None; + if fixed_block_range.start() > 0 { + segment_max_block = Some(fixed_block_range.start() - 1) + }; + self.update_index(segment, segment_max_block)?; + + Ok(()) + } + + /// Given a segment and block range it returns a cached + /// [`StaticFileJarProvider`]. TODO(joshie): we should check the size and pop N if there's too + /// many. + fn get_or_create_jar_provider( + &self, + segment: StaticFileSegment, + fixed_block_range: &SegmentRangeInclusive, + ) -> ProviderResult> { + let key = (fixed_block_range.end(), segment); + + // Avoid using `entry` directly to avoid a write lock in the common case. + let mut provider: StaticFileJarProvider<'_> = if let Some(jar) = self.map.get(&key) { + jar.into() + } else { + let path = self.path.join(segment.filename(fixed_block_range)); + let mut jar = NippyJar::load(&path)?; + if self.load_filters { + jar.load_filters()?; + } + + self.map.entry(key).insert(LoadedJar::new(jar)?).downgrade().into() + }; + + if let Some(metrics) = &self.metrics { + provider = provider.with_metrics(metrics.clone()); + } + Ok(provider) + } + + /// Gets a static file segment's block range from the provider inner block + /// index. + fn get_segment_ranges_from_block( + &self, + segment: StaticFileSegment, + block: u64, + ) -> Option { + self.static_files_max_block + .read() + .get(&segment) + .filter(|max| **max >= block) + .map(|_| find_fixed_range(block)) + } + + /// Gets a static file segment's fixed block range from the provider inner + /// transaction index. + fn get_segment_ranges_from_transaction( + &self, + segment: StaticFileSegment, + tx: u64, + ) -> Option { + let static_files = self.static_files_tx_index.read(); + let segment_static_files = static_files.get(&segment)?; + + // It's more probable that the request comes from a newer tx height, so we iterate + // the static_files in reverse. + let mut static_files_rev_iter = segment_static_files.iter().rev().peekable(); + + while let Some((tx_end, block_range)) = static_files_rev_iter.next() { + if tx > *tx_end { + // request tx is higher than highest static file tx + return None + } + let tx_start = static_files_rev_iter.peek().map(|(tx_end, _)| *tx_end + 1).unwrap_or(0); + if tx_start <= tx { + return Some(find_fixed_range(block_range.end())) + } + } + None + } + + /// Updates the inner transaction and block indexes alongside the internal cached providers in + /// `self.map`. + /// + /// Any entry higher than `segment_max_block` will be deleted from the previous structures. + /// + /// If `segment_max_block` is None it means there's no static file for this segment. + pub fn update_index( + &self, + segment: StaticFileSegment, + segment_max_block: Option, + ) -> ProviderResult<()> { + let mut max_block = self.static_files_max_block.write(); + let mut tx_index = self.static_files_tx_index.write(); + + match segment_max_block { + Some(segment_max_block) => { + // Update the max block for the segment + max_block.insert(segment, segment_max_block); + let fixed_range = find_fixed_range(segment_max_block); + + let jar = NippyJar::::load( + &self.path.join(segment.filename(&fixed_range)), + )?; + + // Updates the tx index by first removing all entries which have a higher + // block_start than our current static file. + if let Some(tx_range) = jar.user_header().tx_range() { + let tx_end = tx_range.end(); + + // Current block range has the same block start as `fixed_range``, but block end + // might be different if we are still filling this static file. + if let Some(current_block_range) = jar.user_header().block_range().copied() { + // Considering that `update_index` is called when we either append/truncate, + // we are sure that we are handling the latest data + // points. + // + // Here we remove every entry of the index that has a block start higher or + // equal than our current one. This is important in the case + // that we prune a lot of rows resulting in a file (and thus + // a higher block range) deletion. + tx_index + .entry(segment) + .and_modify(|index| { + index.retain(|_, block_range| { + block_range.start() < fixed_range.start() + }); + index.insert(tx_end, current_block_range); + }) + .or_insert_with(|| BTreeMap::from([(tx_end, current_block_range)])); + } + } else if let Some(1) = tx_index.get(&segment).map(|index| index.len()) { + // Only happens if we unwind all the txs/receipts from the first static file. + // Should only happen in test scenarios. + if jar.user_header().expected_block_start() == 0 && + matches!( + segment, + StaticFileSegment::Receipts | StaticFileSegment::Transactions + ) + { + tx_index.remove(&segment); + } + } + + // Update the cached provider. + self.map.insert((fixed_range.end(), segment), LoadedJar::new(jar)?); + + // Delete any cached provider that no longer has an associated jar. + self.map.retain(|(end, seg), _| !(*seg == segment && *end > fixed_range.end())); + } + None => { + tx_index.remove(&segment); + max_block.remove(&segment); + } + }; + + Ok(()) + } + + /// Initializes the inner transaction and block index + pub fn initialize_index(&self) -> ProviderResult<()> { + let mut max_block = self.static_files_max_block.write(); + let mut tx_index = self.static_files_tx_index.write(); + + tx_index.clear(); + + for (segment, ranges) in iter_static_files(&self.path)? { + // Update last block for each segment + if let Some((block_range, _)) = ranges.last() { + max_block.insert(segment, block_range.end()); + } + + // Update tx -> block_range index + for (block_range, tx_range) in ranges { + if let Some(tx_range) = tx_range { + let tx_end = tx_range.end(); + + match tx_index.entry(segment) { + Entry::Occupied(mut index) => { + index.get_mut().insert(tx_end, block_range); + } + Entry::Vacant(index) => { + index.insert(BTreeMap::from([(tx_end, block_range)])); + } + }; + } + } + } + + Ok(()) + } + + /// Gets the highest static file block if it exists for a static file segment. + pub fn get_highest_static_file_block(&self, segment: StaticFileSegment) -> Option { + self.static_files_max_block.read().get(&segment).copied() + } + + /// Gets the highest static file transaction. + pub fn get_highest_static_file_tx(&self, segment: StaticFileSegment) -> Option { + self.static_files_tx_index + .read() + .get(&segment) + .and_then(|index| index.last_key_value().map(|(last_tx, _)| *last_tx)) + } + + /// Gets the highest static file block for all segments. + pub fn get_highest_static_files(&self) -> HighestStaticFiles { + HighestStaticFiles { + headers: self.get_highest_static_file_block(StaticFileSegment::Headers), + receipts: self.get_highest_static_file_block(StaticFileSegment::Receipts), + transactions: self.get_highest_static_file_block(StaticFileSegment::Transactions), + } + } + + /// Iterates through segment static_files in reverse order, executing a function until it + /// returns some object. Useful for finding objects by [`TxHash`] or [`BlockHash`]. + pub fn find_static_file( + &self, + segment: StaticFileSegment, + func: impl Fn(StaticFileJarProvider<'_>) -> ProviderResult>, + ) -> ProviderResult> { + if let Some(highest_block) = self.get_highest_static_file_block(segment) { + let mut range = find_fixed_range(highest_block); + while range.end() > 0 { + if let Some(res) = func(self.get_or_create_jar_provider(segment, &range)?)? { + return Ok(Some(res)) + } + range = SegmentRangeInclusive::new( + range.start().saturating_sub(BLOCKS_PER_STATIC_FILE), + range.end().saturating_sub(BLOCKS_PER_STATIC_FILE), + ); + } + } + + Ok(None) + } + + /// Fetches data within a specified range across multiple static files. + /// + /// This function iteratively retrieves data using `get_fn` for each item in the given range. + /// It continues fetching until the end of the range is reached or the provided `predicate` + /// returns false. + pub fn fetch_range_with_predicate( + &self, + segment: StaticFileSegment, + range: Range, + mut get_fn: F, + mut predicate: P, + ) -> ProviderResult> + where + F: FnMut(&mut StaticFileCursor<'_>, u64) -> ProviderResult>, + P: FnMut(&T) -> bool, + { + let get_provider = |start: u64| match segment { + StaticFileSegment::Headers => { + self.get_segment_provider_from_block(segment, start, None) + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_segment_provider_from_transaction(segment, start, None) + } + }; + + let mut result = Vec::with_capacity((range.end - range.start).min(100) as usize); + let mut provider = get_provider(range.start)?; + let mut cursor = provider.cursor()?; + + // advances number in range + 'outer: for number in range { + // The `retrying` flag ensures a single retry attempt per `number`. If `get_fn` fails to + // access data in two different static files, it halts further attempts by returning + // an error, effectively preventing infinite retry loops. + let mut retrying = false; + + // advances static files if `get_fn` returns None + 'inner: loop { + match get_fn(&mut cursor, number)? { + Some(res) => { + if !predicate(&res) { + break 'outer + } + result.push(res); + break 'inner + } + None => { + if retrying { + warn!( + target: "provider::static_file", + ?segment, + ?number, + "Could not find block or tx number on a range request" + ); + + let err = if segment.is_headers() { + ProviderError::MissingStaticFileBlock(segment, number) + } else { + ProviderError::MissingStaticFileTx(segment, number) + }; + return Err(err) + } + provider = get_provider(number)?; + cursor = provider.cursor()?; + retrying = true; + } + } + } + } + + Ok(result) + } + + /// Fetches data within a specified range across multiple static files. + /// + /// Returns an iterator over the data + pub fn fetch_range_iter<'a, T, F>( + &'a self, + segment: StaticFileSegment, + range: Range, + get_fn: F, + ) -> ProviderResult> + 'a> + where + F: Fn(&mut StaticFileCursor<'_>, u64) -> ProviderResult> + 'a, + T: std::fmt::Debug, + { + let get_provider = move |start: u64| match segment { + StaticFileSegment::Headers => { + self.get_segment_provider_from_block(segment, start, None) + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_segment_provider_from_transaction(segment, start, None) + } + }; + let mut provider = get_provider(range.start)?; + + Ok(range.filter_map(move |number| { + match get_fn(&mut provider.cursor().ok()?, number).transpose() { + Some(result) => Some(result), + None => { + provider = get_provider(number).ok()?; + get_fn(&mut provider.cursor().ok()?, number).transpose() + } + } + })) + } + + /// Returns directory where static_files are located. + pub fn directory(&self) -> &Path { + &self.path + } + + /// Retrieves data from the database or static file, wherever it's available. + /// + /// # Arguments + /// * `segment` - The segment of the static file to check against. + /// * `index_key` - Requested index key, usually a block or transaction number. + /// * `fetch_from_static_file` - A closure that defines how to fetch the data from the static + /// file provider. + /// * `fetch_from_database` - A closure that defines how to fetch the data from the database + /// when the static file doesn't contain the required data or is not available. + pub fn get_with_static_file_or_database( + &self, + segment: StaticFileSegment, + number: u64, + fetch_from_static_file: FS, + fetch_from_database: FD, + ) -> ProviderResult> + where + FS: Fn(&StaticFileProvider) -> ProviderResult>, + FD: Fn() -> ProviderResult>, + { + // If there is, check the maximum block or transaction number of the segment. + let static_file_upper_bound = match segment { + StaticFileSegment::Headers => self.get_highest_static_file_block(segment), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_highest_static_file_tx(segment) + } + }; + + if static_file_upper_bound + .map_or(false, |static_file_upper_bound| static_file_upper_bound >= number) + { + return fetch_from_static_file(self) + } + fetch_from_database() + } + + /// Gets data within a specified range, potentially spanning different static_files and + /// database. + /// + /// # Arguments + /// * `segment` - The segment of the static file to query. + /// * `block_range` - The range of data to fetch. + /// * `fetch_from_static_file` - A function to fetch data from the static_file. + /// * `fetch_from_database` - A function to fetch data from the database. + /// * `predicate` - A function used to evaluate each item in the fetched data. Fetching is + /// terminated when this function returns false, thereby filtering the data based on the + /// provided condition. + pub fn get_range_with_static_file_or_database( + &self, + segment: StaticFileSegment, + mut block_or_tx_range: Range, + fetch_from_static_file: FS, + mut fetch_from_database: FD, + mut predicate: P, + ) -> ProviderResult> + where + FS: Fn(&StaticFileProvider, Range, &mut P) -> ProviderResult>, + FD: FnMut(Range, P) -> ProviderResult>, + P: FnMut(&T) -> bool, + { + let mut data = Vec::new(); + + // If there is, check the maximum block or transaction number of the segment. + if let Some(static_file_upper_bound) = match segment { + StaticFileSegment::Headers => self.get_highest_static_file_block(segment), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_highest_static_file_tx(segment) + } + } { + if block_or_tx_range.start <= static_file_upper_bound { + let end = block_or_tx_range.end.min(static_file_upper_bound + 1); + data.extend(fetch_from_static_file( + self, + block_or_tx_range.start..end, + &mut predicate, + )?); + block_or_tx_range.start = end; + } + } + + if block_or_tx_range.end > block_or_tx_range.start { + data.extend(fetch_from_database(block_or_tx_range, predicate)?) + } + + Ok(data) + } + + #[cfg(any(test, feature = "test-utils"))] + /// Returns static_files directory + pub fn path(&self) -> &Path { + &self.path + } +} + +/// Helper trait to manage different [`StaticFileProviderRW`] of an `Arc ProviderResult>; + + /// Returns a mutable reference to a [`StaticFileProviderRW`] of the latest + /// [`StaticFileSegment`]. + fn latest_writer( + &self, + segment: StaticFileSegment, + ) -> ProviderResult>; + + /// Commits all changes of all [`StaticFileProviderRW`] of all [`StaticFileSegment`]. + fn commit(&self) -> ProviderResult<()>; +} + +impl StaticFileWriter for StaticFileProvider { + fn get_writer( + &self, + block: BlockNumber, + segment: StaticFileSegment, + ) -> ProviderResult> { + tracing::trace!(target: "providers::static_file", ?block, ?segment, "Getting static file writer."); + Ok(match self.writers.entry(segment) { + DashMapEntry::Occupied(entry) => entry.into_ref(), + DashMapEntry::Vacant(entry) => { + let writer = StaticFileProviderRW::new( + segment, + block, + Arc::downgrade(&self.0), + self.metrics.clone(), + )?; + entry.insert(writer) + } + }) + } + + fn latest_writer( + &self, + segment: StaticFileSegment, + ) -> ProviderResult> { + self.get_writer(self.get_highest_static_file_block(segment).unwrap_or_default(), segment) + } + + fn commit(&self) -> ProviderResult<()> { + for mut writer in self.writers.iter_mut() { + writer.commit()?; + } + Ok(()) + } +} + +impl HeaderProvider for StaticFileProvider { + fn header(&self, block_hash: &BlockHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Headers, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_two::>(block_hash.into())? + .and_then(|(header, hash)| { + if &hash == block_hash { + return Some(header) + } + None + })) + }) + } + + fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .header_by_number(num) + } + + fn header_td(&self, block_hash: &BlockHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Headers, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_two::>(block_hash.into())? + .and_then(|(td, hash)| (&hash == block_hash).then_some(td.0))) + }) + } + + fn header_td_by_number(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .header_td_by_number(num) + } + + fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + to_range(range), + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } + + fn sealed_header(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .sealed_header(num) + } + + fn sealed_headers_while( + &self, + range: impl RangeBounds, + predicate: impl FnMut(&SealedHeader) -> bool, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + to_range(range), + |cursor, number| { + Ok(cursor + .get_two::>(number.into())? + .map(|(header, hash)| header.seal(hash))) + }, + predicate, + ) + } +} + +impl BlockHashReader for StaticFileProvider { + fn block_hash(&self, num: u64) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)?.block_hash(num) + } + + fn canonical_hashes_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + start..end, + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } +} + +impl ReceiptProvider for StaticFileProvider { + fn receipt(&self, num: TxNumber) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Receipts, num, None)? + .receipt(num) + } + + fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { + if let Some(num) = self.transaction_id(hash)? { + return self.receipt(num) + } + Ok(None) + } + + fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { + unreachable!() + } + + fn receipts_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Receipts, + to_range(range), + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } +} + +impl TransactionsProviderExt for StaticFileProvider { + fn transaction_hashes_by_range( + &self, + tx_range: Range, + ) -> ProviderResult> { + let tx_range_size = (tx_range.end - tx_range.start) as usize; + + // Transactions are different size, so chunks will not all take the same processing time. If + // chunks are too big, there will be idle threads waiting for work. Choosing an + // arbitrary smaller value to make sure it doesn't happen. + let chunk_size = 100; + + let chunks = (tx_range.start..tx_range.end) + .step_by(chunk_size) + .map(|start| start..std::cmp::min(start + chunk_size as u64, tx_range.end)) + .collect::>>(); + let mut channels = Vec::with_capacity(chunk_size); + + #[inline] + fn calculate_hash( + entry: (TxNumber, TransactionSignedNoHash), + rlp_buf: &mut Vec, + ) -> Result<(B256, TxNumber), Box> { + let (tx_id, tx) = entry; + tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); + Ok((keccak256(rlp_buf), tx_id)) + } + + for chunk_range in chunks { + let (channel_tx, channel_rx) = mpsc::channel(); + channels.push(channel_rx); + + let manager = self.clone(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has calculated + // the hash. + rayon::spawn(move || { + let mut rlp_buf = Vec::with_capacity(128); + let _ = manager.fetch_range_with_predicate( + StaticFileSegment::Transactions, + chunk_range, + |cursor, number| { + Ok(cursor + .get_one::>(number.into())? + .map(|transaction| { + rlp_buf.clear(); + let _ = channel_tx + .send(calculate_hash((number, transaction), &mut rlp_buf)); + })) + }, + |_| true, + ); + }); + } + + let mut tx_list = Vec::with_capacity(tx_range_size); + + // Iterate over channels and append the tx hashes unsorted + for channel in channels { + while let Ok(tx) = channel.recv() { + let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; + tx_list.push((tx_hash, tx_id)); + } + } + + Ok(tx_list) + } +} + +impl TransactionsProvider for StaticFileProvider { + fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Transactions, |jar_provider| { + let mut cursor = jar_provider.cursor()?; + if cursor + .get_one::>((&tx_hash).into())? + .and_then(|tx| (tx.hash() == tx_hash).then_some(tx)) + .is_some() + { + Ok(cursor.number()) + } else { + Ok(None) + } + }) + } + + fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Transactions, num, None)? + .transaction_by_id(num) + } + + fn transaction_by_id_no_hash( + &self, + num: TxNumber, + ) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Transactions, num, None)? + .transaction_by_id_no_hash(num) + } + + fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Transactions, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_one::>((&hash).into())? + .map(|tx| tx.with_hash()) + .and_then(|tx| (tx.hash_ref() == &hash).then_some(tx))) + }) + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transaction_block(&self, _id: TxNumber) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn senders_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + let txes = self.transactions_by_tx_range(range)?; + TransactionSignedNoHash::recover_signers(&txes, txes.len()) + .ok_or(ProviderError::SenderRecoveryError) + } + + fn transactions_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Transactions, + to_range(range), + |cursor, number| { + cursor.get_one::>(number.into()) + }, + |_| true, + ) + } + + fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { + Ok(self.transaction_by_id_no_hash(id)?.and_then(|tx| tx.recover_signer())) + } +} + +/* Cannot be successfully implemented but must exist for trait requirements */ + +impl BlockNumReader for StaticFileProvider { + fn chain_info(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn best_block_number(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn last_block_number(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_number(&self, _hash: B256) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl BlockReader for StaticFileProvider { + fn find_block_by_hash( + &self, + _hash: B256, + _source: BlockSource, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block(&self, _id: BlockHashOrNumber) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block_with_senders(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block_and_receipts(&self) -> ProviderResult)>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn ommers(&self, _id: BlockHashOrNumber) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_body_indices(&self, _num: u64) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_with_senders( + &self, + _id: BlockHashOrNumber, + _transaction_kind: TransactionVariant, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_range(&self, _range: RangeInclusive) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl WithdrawalsProvider for StaticFileProvider { + fn withdrawals_by_block( + &self, + _id: BlockHashOrNumber, + _timestamp: u64, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn latest_withdrawal(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl StatsReader for StaticFileProvider { + fn count_entries(&self) -> ProviderResult { + match T::NAME { + tables::CanonicalHeaders::NAME | + tables::Headers::NAME | + tables::HeaderTerminalDifficulties::NAME => Ok(self + .get_highest_static_file_block(StaticFileSegment::Headers) + .map(|block| block + 1) + .unwrap_or_default() + as usize), + tables::Receipts::NAME => Ok(self + .get_highest_static_file_tx(StaticFileSegment::Receipts) + .map(|receipts| receipts + 1) + .unwrap_or_default() as usize), + tables::Transactions::NAME => Ok(self + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .map(|txs| txs + 1) + .unwrap_or_default() as usize), + _ => Err(ProviderError::UnsupportedProvider), + } + } +} diff --git a/crates/storage/provider/src/providers/static_file/metrics.rs b/crates/storage/provider/src/providers/static_file/metrics.rs new file mode 100644 index 00000000000..497620b64b8 --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/metrics.rs @@ -0,0 +1,90 @@ +use std::{collections::HashMap, time::Duration}; + +use itertools::Itertools; +use metrics::{Counter, Histogram}; +use reth_metrics::Metrics; +use reth_primitives::StaticFileSegment; +use strum::{EnumIter, IntoEnumIterator}; + +/// Metrics for the static file provider. +#[derive(Debug)] +pub struct StaticFileProviderMetrics { + segment_operations: HashMap< + (StaticFileSegment, StaticFileProviderOperation), + StaticFileProviderOperationMetrics, + >, +} + +impl Default for StaticFileProviderMetrics { + fn default() -> Self { + Self { + segment_operations: StaticFileSegment::iter() + .cartesian_product(StaticFileProviderOperation::iter()) + .map(|(segment, operation)| { + ( + (segment, operation), + StaticFileProviderOperationMetrics::new_with_labels(&[ + ("segment", segment.as_str()), + ("operation", operation.as_str()), + ]), + ) + }) + .collect(), + } + } +} + +impl StaticFileProviderMetrics { + pub(crate) fn record_segment_operation( + &self, + segment: StaticFileSegment, + operation: StaticFileProviderOperation, + duration: Option, + ) { + self.segment_operations + .get(&(segment, operation)) + .expect("segment operation metrics should exist") + .calls_total + .increment(1); + + if let Some(duration) = duration { + self.segment_operations + .get(&(segment, operation)) + .expect("segment operation metrics should exist") + .write_duration_seconds + .record(duration.as_secs_f64()); + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] +pub(crate) enum StaticFileProviderOperation { + InitCursor, + OpenWriter, + Append, + Prune, + IncrementBlock, + CommitWriter, +} + +impl StaticFileProviderOperation { + const fn as_str(&self) -> &'static str { + match self { + Self::InitCursor => "init-cursor", + Self::OpenWriter => "open-writer", + Self::Append => "append", + Self::Prune => "prune", + Self::IncrementBlock => "increment-block", + Self::CommitWriter => "commit-writer", + } + } +} + +#[derive(Metrics)] +#[metrics(scope = "static_files.jar_provider")] +pub(crate) struct StaticFileProviderOperationMetrics { + /// Total number of static file jar provider operations made. + calls_total: Counter, + /// The time it took to execute the static file jar provider operation that writes data. + write_duration_seconds: Histogram, +} diff --git a/crates/storage/provider/src/providers/snapshot/mod.rs b/crates/storage/provider/src/providers/static_file/mod.rs similarity index 73% rename from crates/storage/provider/src/providers/snapshot/mod.rs rename to crates/storage/provider/src/providers/static_file/mod.rs index e1e34522a8e..be1db10b15b 100644 --- a/crates/storage/provider/src/providers/snapshot/mod.rs +++ b/crates/storage/provider/src/providers/static_file/mod.rs @@ -1,18 +1,25 @@ mod manager; -pub use manager::SnapshotProvider; +pub use manager::{StaticFileProvider, StaticFileWriter}; mod jar; -pub use jar::SnapshotJarProvider; +pub use jar::StaticFileJarProvider; + +mod writer; +pub use writer::{StaticFileProviderRW, StaticFileProviderRWRefMut}; + +mod metrics; use reth_interfaces::provider::ProviderResult; use reth_nippy_jar::NippyJar; -use reth_primitives::{snapshot::SegmentHeader, SnapshotSegment}; +use reth_primitives::{static_file::SegmentHeader, StaticFileSegment}; use std::{ops::Deref, sync::Arc}; +const BLOCKS_PER_STATIC_FILE: u64 = 500_000; + /// Alias type for each specific `NippyJar`. -type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>; +type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, StaticFileSegment), LoadedJar>; -/// Helper type to reuse an associated snapshot mmap handle on created cursors. +/// Helper type to reuse an associated static file mmap handle on created cursors. #[derive(Debug)] pub struct LoadedJar { jar: NippyJar, @@ -29,6 +36,10 @@ impl LoadedJar { fn mmap_handle(&self) -> Arc { self.mmap_handle.clone() } + + fn segment(&self) -> StaticFileSegment { + self.jar.user_header().segment() + } } impl Deref for LoadedJar { @@ -45,25 +56,31 @@ mod tests { use rand::seq::SliceRandom; use reth_db::{ cursor::DbCursorRO, - snapshot::create_snapshot_T1_T2_T3, + static_file::create_static_file_T1_T2_T3, transaction::{DbTx, DbTxMut}, CanonicalHeaders, HeaderNumbers, HeaderTerminalDifficulties, Headers, RawTable, }; use reth_interfaces::test_utils::generators::{self, random_header_range}; - use reth_primitives::{BlockNumber, B256, U256}; + use reth_primitives::{static_file::find_fixed_range, BlockNumber, B256, U256}; #[test] fn test_snap() { // Ranges let row_count = 100u64; let range = 0..=(row_count - 1); - let segment_header = - SegmentHeader::new(range.clone(), range.clone(), SnapshotSegment::Headers); + let segment_header = SegmentHeader::new( + range.clone().into(), + Some(range.clone().into()), + Some(range.clone().into()), + StaticFileSegment::Headers, + ); // Data sources let factory = create_test_provider_factory(); - let snap_path = tempfile::tempdir().unwrap(); - let snap_file = snap_path.path().join(SnapshotSegment::Headers.filename(&range, &range)); + let static_files_path = tempfile::tempdir().unwrap(); + let static_file = static_files_path + .path() + .join(StaticFileSegment::Headers.filename(&find_fixed_range(*range.end()))); // Setup data let mut headers = random_header_range( @@ -86,12 +103,12 @@ mod tests { } provider_rw.commit().unwrap(); - // Create Snapshot + // Create StaticFile { let with_compression = true; let with_filter = true; - let mut nippy_jar = NippyJar::new(3, snap_file.as_path(), segment_header); + let mut nippy_jar = NippyJar::new(3, static_file.as_path(), segment_header); if with_compression { nippy_jar = nippy_jar.with_zstd(false, 0); @@ -115,24 +132,22 @@ mod tests { .unwrap() .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); - create_snapshot_T1_T2_T3::< + create_static_file_T1_T2_T3::< Headers, HeaderTerminalDifficulties, CanonicalHeaders, BlockNumber, SegmentHeader, - >( - tx, range, None, none_vec, Some(hashes), row_count as usize, &mut nippy_jar - ) + >(tx, range, None, none_vec, Some(hashes), row_count as usize, nippy_jar) .unwrap(); } // Use providers to query Header data and compare if it matches { let db_provider = factory.provider().unwrap(); - let manager = SnapshotProvider::new(snap_path.path()).unwrap().with_filters(); + let manager = StaticFileProvider::new(static_files_path.path()).unwrap().with_filters(); let jar_provider = manager - .get_segment_provider_from_block(SnapshotSegment::Headers, 0, Some(&snap_file)) + .get_segment_provider_from_block(StaticFileSegment::Headers, 0, Some(&static_file)) .unwrap(); assert!(!headers.is_empty()); diff --git a/crates/storage/provider/src/providers/static_file/writer.rs b/crates/storage/provider/src/providers/static_file/writer.rs new file mode 100644 index 00000000000..b30f81bcd81 --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/writer.rs @@ -0,0 +1,488 @@ +use crate::providers::static_file::metrics::StaticFileProviderOperation; + +use super::{ + manager::StaticFileProviderInner, metrics::StaticFileProviderMetrics, StaticFileProvider, +}; +use dashmap::mapref::one::RefMut; +use reth_codecs::Compact; +use reth_db::codecs::CompactU256; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_nippy_jar::{NippyJar, NippyJarError, NippyJarWriter}; +use reth_primitives::{ + static_file::{find_fixed_range, SegmentHeader, SegmentRangeInclusive}, + BlockHash, BlockNumber, Header, Receipt, StaticFileSegment, TransactionSignedNoHash, TxNumber, + U256, +}; +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Weak}, + time::Instant, +}; +use tracing::debug; + +/// Mutable reference to a dashmap element of [`StaticFileProviderRW`]. +pub type StaticFileProviderRWRefMut<'a> = RefMut<'a, StaticFileSegment, StaticFileProviderRW>; + +#[derive(Debug)] +/// Extends `StaticFileProvider` with writing capabilities +pub struct StaticFileProviderRW { + /// Reference back to the provider. We need [Weak] here because [StaticFileProviderRW] is + /// stored in a [dashmap::DashMap] inside the parent [StaticFileProvider].which is an [Arc]. + /// If we were to use an [Arc] here, we would create a reference cycle. + reader: Weak, + writer: NippyJarWriter, + data_path: PathBuf, + buf: Vec, + metrics: Option>, +} + +impl StaticFileProviderRW { + /// Creates a new [`StaticFileProviderRW`] for a [`StaticFileSegment`]. + pub fn new( + segment: StaticFileSegment, + block: BlockNumber, + reader: Weak, + metrics: Option>, + ) -> ProviderResult { + let (writer, data_path) = Self::open(segment, block, reader.clone(), metrics.clone())?; + Ok(Self { writer, data_path, buf: Vec::with_capacity(100), reader, metrics }) + } + + fn open( + segment: StaticFileSegment, + block: u64, + reader: Weak, + metrics: Option>, + ) -> ProviderResult<(NippyJarWriter, PathBuf)> { + let start = Instant::now(); + + let static_file_provider = Self::upgrade_provider_to_strong_reference(&reader); + + let block_range = find_fixed_range(block); + let (jar, path) = match static_file_provider.get_segment_provider_from_block( + segment, + block_range.start(), + None, + ) { + Ok(provider) => (NippyJar::load(provider.data_path())?, provider.data_path().into()), + Err(ProviderError::MissingStaticFileBlock(_, _)) => { + let path = static_file_provider.directory().join(segment.filename(&block_range)); + (create_jar(segment, &path, block_range), path) + } + Err(err) => return Err(err), + }; + + let result = match NippyJarWriter::new(jar) { + Ok(writer) => Ok((writer, path)), + Err(NippyJarError::FrozenJar) => { + // This static file has been frozen, so we should + Err(ProviderError::FinalizedStaticFile(segment, block)) + } + Err(e) => Err(e.into()), + }?; + + if let Some(metrics) = &metrics { + metrics.record_segment_operation( + segment, + StaticFileProviderOperation::OpenWriter, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Commits configuration changes to disk and updates the reader index with the new changes. + pub fn commit(&mut self) -> ProviderResult<()> { + let start = Instant::now(); + + // Commits offsets and new user_header to disk + self.writer.commit()?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + self.writer.user_header().segment(), + StaticFileProviderOperation::CommitWriter, + Some(start.elapsed()), + ); + } + + debug!( + target: "provider::static_file", + segment = ?self.writer.user_header().segment(), + path = ?self.data_path, + duration = ?start.elapsed(), + "Commit" + ); + + self.update_index()?; + + Ok(()) + } + + /// Updates the `self.reader` internal index. + fn update_index(&self) -> ProviderResult<()> { + // We find the maximum block of the segment by checking this writer's last block. + // + // However if there's no block range (because there's no data), we try to calculate it by + // substracting 1 from the expected block start, resulting on the last block of the + // previous file. + // + // If that expected block start is 0, then it means that there's no actual block data, and + // there's no block data in static files. + let segment_max_block = match self.writer.user_header().block_range() { + Some(block_range) => Some(block_range.end()), + None => { + if self.writer.user_header().expected_block_start() > 0 { + Some(self.writer.user_header().expected_block_start() - 1) + } else { + None + } + } + }; + + self.reader().update_index(self.writer.user_header().segment(), segment_max_block) + } + + /// Allows to increment the [`SegmentHeader`] end block. It will commit the current static file, + /// and create the next one if we are past the end range. + /// + /// Returns the current [`BlockNumber`] as seen in the static file. + pub fn increment_block(&mut self, segment: StaticFileSegment) -> ProviderResult { + let start = Instant::now(); + if let Some(last_block) = self.writer.user_header().block_end() { + // We have finished the previous static file and must freeze it + if last_block == self.writer.user_header().expected_block_end() { + // Commits offsets and new user_header to disk + self.commit()?; + + // Opens the new static file + let (writer, data_path) = + Self::open(segment, last_block + 1, self.reader.clone(), self.metrics.clone())?; + self.writer = writer; + self.data_path = data_path; + + *self.writer.user_header_mut() = + SegmentHeader::new(find_fixed_range(last_block + 1), None, None, segment); + } + } + + let block = self.writer.user_header_mut().increment_block(); + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + segment, + StaticFileProviderOperation::IncrementBlock, + Some(start.elapsed()), + ); + } + + Ok(block) + } + + /// Truncates a number of rows from disk. It deletes and loads an older static file if block + /// goes beyond the start of the current block range. + /// + /// **last_block** should be passed only with transaction based segments. + /// + /// # Note + /// Commits to the configuration file at the end. + fn truncate( + &mut self, + segment: StaticFileSegment, + mut num_rows: u64, + last_block: Option, + ) -> ProviderResult<()> { + while num_rows > 0 { + let len = match segment { + StaticFileSegment::Headers => { + self.writer.user_header().block_len().unwrap_or_default() + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.writer.user_header().tx_len().unwrap_or_default() + } + }; + + if num_rows >= len { + // If there's more rows to delete than this static file contains, then just + // delete the whole file and go to the next static file + let previous_snap = self.data_path.clone(); + let block_start = self.writer.user_header().expected_block_start(); + + if block_start != 0 { + let (writer, data_path) = Self::open( + segment, + self.writer.user_header().expected_block_start() - 1, + self.reader.clone(), + self.metrics.clone(), + )?; + self.writer = writer; + self.data_path = data_path; + + NippyJar::::load(&previous_snap)?.delete()?; + } else { + // Update `SegmentHeader` + self.writer.user_header_mut().prune(len); + self.writer.prune_rows(len as usize)?; + break + } + + num_rows -= len; + } else { + // Update `SegmentHeader` + self.writer.user_header_mut().prune(num_rows); + + // Truncate data + self.writer.prune_rows(num_rows as usize)?; + num_rows = 0; + } + } + + // Only Transactions and Receipts + if let Some(last_block) = last_block { + let header = self.writer.user_header_mut(); + header.set_block_range(header.expected_block_start(), last_block); + } + + // Commits new changes to disk. + self.commit()?; + + Ok(()) + } + + /// Appends column to static file. + fn append_column(&mut self, column: T) -> ProviderResult<()> { + self.buf.clear(); + column.to_compact(&mut self.buf); + + self.writer.append_column(Some(Ok(&self.buf)))?; + Ok(()) + } + + /// Appends to tx number-based static file. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + fn append_with_tx_number( + &mut self, + segment: StaticFileSegment, + tx_num: TxNumber, + value: V, + ) -> ProviderResult { + debug_assert!(self.writer.user_header().segment() == segment); + + if self.writer.user_header().tx_range().is_none() { + self.writer.user_header_mut().set_tx_range(tx_num, tx_num); + } else { + self.writer.user_header_mut().increment_tx(); + } + + self.append_column(value)?; + + Ok(self.writer.user_header().tx_end().expect("qed")) + } + + /// Appends header to static file. + /// + /// It **CALLS** `increment_block()` since the number of headers is equal to the number of + /// blocks. + /// + /// Returns the current [`BlockNumber`] as seen in the static file. + pub fn append_header( + &mut self, + header: Header, + terminal_difficulty: U256, + hash: BlockHash, + ) -> ProviderResult { + let start = Instant::now(); + + debug_assert!(self.writer.user_header().segment() == StaticFileSegment::Headers); + + let block_number = self.increment_block(StaticFileSegment::Headers)?; + + self.append_column(header)?; + self.append_column(CompactU256::from(terminal_difficulty))?; + self.append_column(hash)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Headers, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(block_number) + } + + /// Appends transaction to static file. + /// + /// It **DOES NOT CALL** `increment_block()`, it should be handled elsewhere. There might be + /// empty blocks and this function wouldn't be called. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + pub fn append_transaction( + &mut self, + tx_num: TxNumber, + tx: TransactionSignedNoHash, + ) -> ProviderResult { + let start = Instant::now(); + + let result = self.append_with_tx_number(StaticFileSegment::Transactions, tx_num, tx)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Transactions, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Appends receipt to static file. + /// + /// It **DOES NOT** call `increment_block()`, it should be handled elsewhere. There might be + /// empty blocks and this function wouldn't be called. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + pub fn append_receipt( + &mut self, + tx_num: TxNumber, + receipt: Receipt, + ) -> ProviderResult { + let start = Instant::now(); + + let result = self.append_with_tx_number(StaticFileSegment::Receipts, tx_num, receipt)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Receipts, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Removes the last `number` of transactions from static files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_transactions( + &mut self, + number: u64, + last_block: BlockNumber, + ) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Transactions; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, number, Some(last_block))?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Transactions, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + /// Prunes `to_delete` number of receipts from static_files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_receipts( + &mut self, + to_delete: u64, + last_block: BlockNumber, + ) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Receipts; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, to_delete, Some(last_block))?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Receipts, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + /// Prunes `to_delete` number of headers from static_files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_headers(&mut self, to_delete: u64) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Headers; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, to_delete, None)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Headers, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + fn reader(&self) -> StaticFileProvider { + Self::upgrade_provider_to_strong_reference(&self.reader) + } + + /// Upgrades a weak reference of [`StaticFileProviderInner`] to a strong reference + /// [`StaticFileProvider`]. + /// + /// # Panics + /// + /// Panics if the parent [`StaticFileProvider`] is fully dropped while the child writer is still + /// active. In reality, it's impossible to detach the [`StaticFileProviderRW`] from the + /// [`StaticFileProvider`]. + fn upgrade_provider_to_strong_reference( + provider: &Weak, + ) -> StaticFileProvider { + provider.upgrade().map(StaticFileProvider).expect("StaticFileProvider is dropped") + } + + #[cfg(any(test, feature = "test-utils"))] + /// Helper function to override block range for testing. + pub fn set_block_range(&mut self, block_range: std::ops::RangeInclusive) { + self.writer.user_header_mut().set_block_range(*block_range.start(), *block_range.end()) + } +} + +fn create_jar( + segment: StaticFileSegment, + path: &Path, + expected_block_range: SegmentRangeInclusive, +) -> NippyJar { + let mut jar = NippyJar::new( + segment.columns(), + path, + SegmentHeader::new(expected_block_range, None, None, segment), + ); + + // Transaction and Receipt already have the compression scheme used natively in its encoding. + // (zstd-dictionary) + if segment.is_headers() { + jar = jar.with_lz4(); + } + + jar +} diff --git a/crates/storage/provider/src/test_utils/mod.rs b/crates/storage/provider/src/test_utils/mod.rs index 0da47c47940..7857b8c2144 100644 --- a/crates/storage/provider/src/test_utils/mod.rs +++ b/crates/storage/provider/src/test_utils/mod.rs @@ -1,6 +1,6 @@ use crate::ProviderFactory; use reth_db::{ - test_utils::{create_test_rw_db, TempDatabase}, + test_utils::{create_test_rw_db, create_test_static_files_dir, TempDatabase}, DatabaseEnv, }; use reth_primitives::{ChainSpec, MAINNET}; @@ -27,5 +27,6 @@ pub fn create_test_provider_factory_with_chain_spec( chain_spec: Arc, ) -> ProviderFactory>> { let db = create_test_rw_db(); - ProviderFactory::new(db, chain_spec) + ProviderFactory::new(db, chain_spec, create_test_static_files_dir()) + .expect("create provider factory with static_files") } diff --git a/crates/storage/provider/src/traits/mod.rs b/crates/storage/provider/src/traits/mod.rs index 1260534784d..360fe97c06a 100644 --- a/crates/storage/provider/src/traits/mod.rs +++ b/crates/storage/provider/src/traits/mod.rs @@ -71,3 +71,6 @@ pub use prune_checkpoint::{PruneCheckpointReader, PruneCheckpointWriter}; mod database_provider; pub use database_provider::DatabaseProviderFactory; + +mod stats; +pub use stats::StatsReader; diff --git a/crates/storage/provider/src/traits/stats.rs b/crates/storage/provider/src/traits/stats.rs new file mode 100644 index 00000000000..dece75e287b --- /dev/null +++ b/crates/storage/provider/src/traits/stats.rs @@ -0,0 +1,10 @@ +use reth_db::table::Table; +use reth_interfaces::provider::ProviderResult; + +/// The trait for fetching provider statistics. +#[auto_impl::auto_impl(&, Arc)] +pub trait StatsReader: Send + Sync { + /// Fetch the number of entries in the corresponding [Table]. Depending on the provider, it may + /// route to different data sources other than [Table]. + fn count_entries(&self) -> ProviderResult; +} diff --git a/crates/transaction-pool/src/pool/best.rs b/crates/transaction-pool/src/pool/best.rs index 74d3e295fb4..5e870de2b81 100644 --- a/crates/transaction-pool/src/pool/best.rs +++ b/crates/transaction-pool/src/pool/best.rs @@ -85,7 +85,7 @@ pub(crate) struct BestTransactions { /// There might be the case where a yielded transactions is invalid, this will track it. pub(crate) invalid: HashSet, /// Used to receive any new pending transactions that have been added to the pool after this - /// iterator was snapshotted + /// iterator was static fileted /// /// These new pending transactions are inserted into this iterator's pool before yielding the /// next value diff --git a/crates/transaction-pool/src/pool/pending.rs b/crates/transaction-pool/src/pool/pending.rs index 9476f3c6cb1..90ae13cd644 100644 --- a/crates/transaction-pool/src/pool/pending.rs +++ b/crates/transaction-pool/src/pool/pending.rs @@ -51,7 +51,7 @@ pub struct PendingPool { /// See also [`PoolTransaction::size`](crate::traits::PoolTransaction::size). size_of: SizeTracker, /// Used to broadcast new transactions that have been added to the PendingPool to existing - /// snapshots of this pool. + /// static_files of this pool. new_transaction_notifier: broadcast::Sender>, } @@ -309,7 +309,7 @@ impl PendingPool { self.update_independents_and_highest_nonces(&tx, &tx_id); self.all.insert(tx.clone()); - // send the new transaction to any existing pendingpool snapshot iterators + // send the new transaction to any existing pendingpool static file iterators if self.new_transaction_notifier.receiver_count() > 0 { let _ = self.new_transaction_notifier.send(tx.clone()); } diff --git a/crates/trie/Cargo.toml b/crates/trie/Cargo.toml index 280189eada0..a6bb0bbc322 100644 --- a/crates/trie/Cargo.toml +++ b/crates/trie/Cargo.toml @@ -27,7 +27,7 @@ tracing.workspace = true # misc thiserror.workspace = true -derive_more = "0.99" +derive_more.workspace = true auto_impl = "1" # test-utils diff --git a/docs/crates/stages.md b/docs/crates/stages.md index 1ea64aaab14..8e3de4a044a 100644 --- a/docs/crates/stages.md +++ b/docs/crates/stages.md @@ -94,10 +94,6 @@ This process continues until all of the headers have been downloaded and written
-## TotalDifficultyStage -* TODO: explain stage -
- ## BodyStage Once the `HeaderStage` completes successfully, the `BodyStage` will start execution. The body stage downloads block bodies for all of the new block headers that were stored locally in the database. The `BodyStage` first determines which block bodies to download by checking if the block body has an ommers hash and transaction root. diff --git a/etc/grafana/dashboards/overview.json b/etc/grafana/dashboards/overview.json index 867657bf879..19e9b7e26e8 100644 --- a/etc/grafana/dashboards/overview.json +++ b/etc/grafana/dashboards/overview.json @@ -670,7 +670,6 @@ "MerkleUnwind": 5, "SenderRecovery": 3, "StorageHashing": 7, - "TotalDifficulty": 1, "TransactionLookup": 9 } } diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 92bb0f1f1ea..4b50d33573c 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -27,6 +27,8 @@ futures.workspace = true async-trait.workspace = true tokio.workspace = true +jemallocator = { version = "0.5.0", features = ["profiling"] } + [[example]] name = "db-access" path = "db-access.rs" @@ -38,3 +40,4 @@ path = "network.rs" [[example]] name = "network-txpool" path = "network-txpool.rs" + diff --git a/examples/db-access.rs b/examples/db-access.rs index 22883483846..6edfc6afe16 100644 --- a/examples/db-access.rs +++ b/examples/db-access.rs @@ -18,12 +18,14 @@ fn main() -> eyre::Result<()> { // Opens a RO handle to the database file. // TODO: Should be able to do `ProviderFactory::new_with_db_path_ro(...)` instead of // doing in 2 steps. - let db = open_db_read_only(Path::new(&std::env::var("RETH_DB_PATH")?), Default::default())?; + let db_path = std::env::var("RETH_DB_PATH")?; + let db_path = Path::new(&db_path); + let db = open_db_read_only(db_path.join("db").as_path(), Default::default())?; // Instantiate a provider factory for Ethereum mainnet using the provided DB. // TODO: Should the DB version include the spec so that you do not need to specify it here? let spec = ChainSpecBuilder::mainnet().build(); - let factory = ProviderFactory::new(db, spec.into()); + let factory = ProviderFactory::new(db, spec.into(), db_path.join("static_files"))?; // This call opens a RO transaction on the database. To write to the DB you'd need to call // the `provider_rw` function and look for the `Writer` variants of the traits. diff --git a/examples/polygon-p2p/src/chain_cfg.rs b/examples/polygon-p2p/src/chain_cfg.rs index 034e5b482db..5a1fadb5342 100644 --- a/examples/polygon-p2p/src/chain_cfg.rs +++ b/examples/polygon-p2p/src/chain_cfg.rs @@ -27,7 +27,6 @@ pub(crate) fn polygon_chain_spec() -> Arc { ]), deposit_contract: None, base_fee_params: reth_primitives::BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), - snapshot_block_interval: 500_000, prune_delete_limit: 0, } .into() diff --git a/examples/rpc-db/src/main.rs b/examples/rpc-db/src/main.rs index 79b801e02bb..13fbc622372 100644 --- a/examples/rpc-db/src/main.rs +++ b/examples/rpc-db/src/main.rs @@ -36,12 +36,11 @@ pub mod myrpc_ext; #[tokio::main] async fn main() -> eyre::Result<()> { // 1. Setup the DB - let db = Arc::new(open_db_read_only( - Path::new(&std::env::var("RETH_DB_PATH")?), - Default::default(), - )?); + let db_path = std::env::var("RETH_DB_PATH")?; + let db_path = Path::new(&db_path); + let db = Arc::new(open_db_read_only(db_path.join("db").as_path(), Default::default())?); let spec = Arc::new(ChainSpecBuilder::mainnet().build()); - let factory = ProviderFactory::new(db.clone(), spec.clone()); + let factory = ProviderFactory::new(db.clone(), spec.clone(), db_path.join("static_files"))?; // 2. Setup the blockchain provider using only the database provider and a noop for the tree to // satisfy trait bounds. Tree is not used in this example since we are only operating on the diff --git a/testing/ef-tests/Cargo.toml b/testing/ef-tests/Cargo.toml index 35480d7e8bb..c418863777f 100644 --- a/testing/ef-tests/Cargo.toml +++ b/testing/ef-tests/Cargo.toml @@ -17,7 +17,7 @@ ef-tests = [] [dependencies] reth-primitives.workspace = true reth-db = { workspace = true, features = ["mdbx", "test-utils"] } -reth-provider.workspace = true +reth-provider = { workspace = true, features = ["test-utils"] } reth-stages.workspace = true reth-interfaces.workspace = true reth-revm.workspace = true @@ -29,3 +29,4 @@ walkdir = "2.3.3" serde = "1.0.163" serde_json.workspace = true thiserror.workspace = true +rayon.workspace = true diff --git a/testing/ef-tests/src/cases/blockchain_test.rs b/testing/ef-tests/src/cases/blockchain_test.rs index f2a894cc3fb..3af21076cd7 100644 --- a/testing/ef-tests/src/cases/blockchain_test.rs +++ b/testing/ef-tests/src/cases/blockchain_test.rs @@ -5,10 +5,11 @@ use crate::{ Case, Error, Suite, }; use alloy_rlp::Decodable; -use reth_db::test_utils::create_test_rw_db; +use rayon::iter::{ParallelBridge, ParallelIterator}; +use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{BlockBody, SealedBlock}; -use reth_provider::{BlockWriter, HashingWriter, ProviderFactory}; +use reth_primitives::{BlockBody, SealedBlock, StaticFileSegment}; +use reth_provider::{providers::StaticFileWriter, HashingWriter, ProviderFactory}; use reth_stages::{stages::ExecutionStage, ExecInput, Stage}; use std::{collections::BTreeMap, fs, path::Path, sync::Arc}; @@ -64,83 +65,107 @@ impl Case for BlockchainTestCase { } // Iterate through test cases, filtering by the network type to exclude specific forks. - for case in self.tests.values().filter(|case| { - !matches!( - case.network, - ForkSpec::ByzantiumToConstantinopleAt5 | - ForkSpec::Constantinople | - ForkSpec::ConstantinopleFix | - ForkSpec::MergeEOF | - ForkSpec::MergeMeterInitCode | - ForkSpec::MergePush0 | - ForkSpec::Unknown - ) - }) { - // Create a new test database and initialize a provider for the test case. - let db = create_test_rw_db(); - let provider = ProviderFactory::new(db.as_ref(), Arc::new(case.network.clone().into())) + self.tests + .values() + .filter(|case| { + !matches!( + case.network, + ForkSpec::ByzantiumToConstantinopleAt5 | + ForkSpec::Constantinople | + ForkSpec::ConstantinopleFix | + ForkSpec::MergeEOF | + ForkSpec::MergeMeterInitCode | + ForkSpec::MergePush0 | + ForkSpec::Unknown + ) + }) + .par_bridge() + .try_for_each(|case| { + // Create a new test database and initialize a provider for the test case. + let db = create_test_rw_db(); + let static_files_dir = create_test_static_files_dir(); + let provider = ProviderFactory::new( + db.as_ref(), + Arc::new(case.network.clone().into()), + static_files_dir.clone(), + )? .provider_rw() .unwrap(); - // Insert initial test state into the provider. - provider - .insert_block( - SealedBlock::new( - case.genesis_block_header.clone().into(), - BlockBody::default(), - ) - .try_seal_with_senders() - .unwrap(), - None, - ) - .map_err(|err| Error::RethError(err.into()))?; - case.pre.write_to_db(provider.tx_ref())?; - - // Decode and insert blocks, creating a chain of blocks for the test case. - let last_block = case.blocks.iter().try_fold(None, |_, block| { - let decoded = SealedBlock::decode(&mut block.rlp.as_ref())?; + // Insert initial test state into the provider. provider - .insert_block(decoded.clone().try_seal_with_senders().unwrap(), None) + .insert_historical_block( + SealedBlock::new( + case.genesis_block_header.clone().into(), + BlockBody::default(), + ) + .try_seal_with_senders() + .unwrap(), + None, + ) .map_err(|err| Error::RethError(err.into()))?; - Ok::, Error>(Some(decoded)) - })?; + case.pre.write_to_db(provider.tx_ref())?; - // Execute the execution stage using the EVM processor factory for the test case - // network. - let _ = ExecutionStage::new_with_factory(reth_revm::EvmProcessorFactory::new( - Arc::new(case.network.clone().into()), - EthEvmConfig::default(), - )) - .execute( - &provider, - ExecInput { target: last_block.as_ref().map(|b| b.number), checkpoint: None }, - ); - - // Validate the post-state for the test case. - match (&case.post_state, &case.post_state_hash) { - (Some(state), None) => { - // Validate accounts in the state against the provider's database. - for (&address, account) in state.iter() { - account.assert_db(address, provider.tx_ref())?; - } - } - (None, Some(expected_state_root)) => { - // Insert state hashes into the provider based on the expected state root. - let last_block = last_block.unwrap_or_default(); + // Decode and insert blocks, creating a chain of blocks for the test case. + let last_block = case.blocks.iter().try_fold(None, |_, block| { + let decoded = SealedBlock::decode(&mut block.rlp.as_ref())?; provider - .insert_hashes( - 0..=last_block.number, - last_block.hash(), - *expected_state_root, + .insert_historical_block( + decoded.clone().try_seal_with_senders().unwrap(), + None, ) .map_err(|err| Error::RethError(err.into()))?; + Ok::, Error>(Some(decoded)) + })?; + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); + + // Execute the execution stage using the EVM processor factory for the test case + // network. + let _ = ExecutionStage::new_with_factory(reth_revm::EvmProcessorFactory::new( + Arc::new(case.network.clone().into()), + EthEvmConfig::default(), + )) + .execute( + &provider, + ExecInput { target: last_block.as_ref().map(|b| b.number), checkpoint: None }, + ); + + // Validate the post-state for the test case. + match (&case.post_state, &case.post_state_hash) { + (Some(state), None) => { + // Validate accounts in the state against the provider's database. + for (&address, account) in state.iter() { + account.assert_db(address, provider.tx_ref())?; + } + } + (None, Some(expected_state_root)) => { + // Insert state hashes into the provider based on the expected state root. + let last_block = last_block.unwrap_or_default(); + provider + .insert_hashes( + 0..=last_block.number, + last_block.hash(), + *expected_state_root, + ) + .map_err(|err| Error::RethError(err.into()))?; + } + _ => return Err(Error::MissingPostState), } - _ => return Err(Error::MissingPostState), - } - // Drop the provider without committing to the database. - drop(provider); - } + // Drop the provider without committing to the database. + drop(provider); + // TODO: replace with `tempdir` usage, so the temp directory is removed + // automatically when the variable goes out of scope + reth_primitives::fs::remove_dir_all(static_files_dir) + .expect("Failed to remove static files directory"); + + Ok(()) + })?; Ok(()) }